aboutsummaryrefslogtreecommitdiff
path: root/src/openvic-dataloader/v2script
diff options
context:
space:
mode:
author Spartan322 <Megacake1234@gmail.com>2024-05-09 16:06:02 +0200
committer Spartan322 <Megacake1234@gmail.com>2024-06-18 01:31:12 +0200
commitb0c3ba3f91926b0c95625bdbf4aab69269130b13 (patch)
treef15ebc47d6bf370031af28e4bb4814ae30ef46e1 /src/openvic-dataloader/v2script
parent7b521d6023113372cf6b02e562828273c4040f0e (diff)
Add runtime encoding detection and conversionfix/char-detection
Win-1251/1252 detection is a reduced C++ version of https://github.com/hsivonen/chardetng Add manually-specified encoding fallback Add default system encoding fallback Add error recovery to v2script Add unknown encoding detection warning Remove csv::Parser templating Fix lua files dropping data Update lexy to foonathan/lexy@1e5d99fa3826b1c3c8628d3a11117fb4fb4cc0d0 Remove exclusive reliance on lexy::default_encoding for v2script Move internal concepts to src/openvic-detail/InternalConcepts.hpp Move contents of DetectUtf8.hpp to src/detail/Detect.hpp Move openvic-dataloader/AbstractSyntaxTree.hpp to src Move DiagnosticLogger.hpp to src Move File.hpp to src Move openvic-dataloader/detail/utlity files to openvic-dataloader/detail Add ovdl::utility::type_concat Add ovdl::utility::type_prepend Add ovdl::utility::is_instance_of Overhaul parse error messages
Diffstat (limited to 'src/openvic-dataloader/v2script')
-rw-r--r--src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp53
-rw-r--r--src/openvic-dataloader/v2script/EventGrammar.hpp8
-rw-r--r--src/openvic-dataloader/v2script/LuaDefinesGrammar.hpp133
-rw-r--r--src/openvic-dataloader/v2script/ModifierGrammar.hpp14
-rw-r--r--src/openvic-dataloader/v2script/ParseState.hpp19
-rw-r--r--src/openvic-dataloader/v2script/Parser.cpp195
-rw-r--r--src/openvic-dataloader/v2script/SimpleGrammar.hpp307
7 files changed, 461 insertions, 268 deletions
diff --git a/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp b/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp
index abade40..5a98b40 100644
--- a/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp
+++ b/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp
@@ -1,8 +1,7 @@
-#include <stddef.h>
-
-#include <openvic-dataloader/v2script/AbstractSyntaxTree.hpp>
+#include "openvic-dataloader/v2script/AbstractSyntaxTree.hpp"
#include <lexy/dsl/option.hpp>
+#include <lexy/encoding.hpp>
#include <lexy/input_location.hpp>
#include <dryad/node.hpp>
@@ -23,6 +22,15 @@ ListValue::ListValue(dryad::node_ctor ctor, StatementList statements)
}
}
+ListValue::ListValue(dryad::node_ctor ctor, AssignStatementList statements) : node_base(ctor) {
+ insert_child_list_after(nullptr, statements);
+ if (statements.empty()) {
+ _last_statement = nullptr;
+ } else {
+ _last_statement = statements.back();
+ }
+}
+
FileTree::FileTree(dryad::node_ctor ctor, StatementList statements) : node_base(ctor) {
insert_child_list_after(nullptr, statements);
if (statements.empty()) {
@@ -32,29 +40,22 @@ FileTree::FileTree(dryad::node_ctor ctor, StatementList statements) : node_base(
}
}
-// static void _handle_string_characters(std::string& string, bool allow_newline) {
-// size_t position = 0;
-// for (auto& c : string) {
-// switch (c) {
-// case '\r':
-// case '\n':
-// if (allow_newline) goto END_LOOP;
-// c = ' ';
-// break;
-// default: break;
-// }
-// END_LOOP:
-// position++;
-// }
-// }
-
-std::string AbstractSyntaxTree::make_list_visualizer() const {
+FileTree::FileTree(dryad::node_ctor ctor, AssignStatementList statements) : node_base(ctor) {
+ insert_child_list_after(nullptr, statements);
+ if (statements.empty()) {
+ _last_node = nullptr;
+ } else {
+ _last_node = statements.back();
+ }
+}
+
+std::string FileAbstractSyntaxTree::make_list_visualizer() const {
const int INDENT_SIZE = 2;
std::string result;
unsigned int level = 0;
- for (auto [event, node] : dryad::traverse(_tree)) {
+ for (auto [event, node] : dryad::traverse(this->_tree)) {
if (event == dryad::traverse_event::exit) {
--level;
continue;
@@ -66,7 +67,7 @@ std::string AbstractSyntaxTree::make_list_visualizer() const {
dryad::visit_node(
node,
[&](const FlatValue* value) {
- result.append(value->value(_symbol_interner));
+ result.append(value->value(this->_symbol_interner));
},
[&](const ListValue* value) {
},
@@ -89,19 +90,19 @@ std::string AbstractSyntaxTree::make_list_visualizer() const {
return result;
}
-std::string AbstractSyntaxTree::make_native_visualizer() const {
+std::string FileAbstractSyntaxTree::make_native_visualizer() const {
constexpr int INDENT_SIZE = 2;
std::string result;
unsigned int level = 0;
dryad::visit_tree(
- _tree,
+ this->_tree,
[&](const IdentifierValue* value) {
- result.append(value->value(_symbol_interner));
+ result.append(value->value(this->_symbol_interner));
},
[&](const StringValue* value) {
- result.append(1, '"').append(value->value(_symbol_interner)).append(1, '"');
+ result.append(1, '"').append(value->value(this->_symbol_interner)).append(1, '"');
},
[&](dryad::child_visitor<NodeKind> visitor, const ValueStatement* statement) {
visitor(statement->value());
diff --git a/src/openvic-dataloader/v2script/EventGrammar.hpp b/src/openvic-dataloader/v2script/EventGrammar.hpp
index 27f6459..130a233 100644
--- a/src/openvic-dataloader/v2script/EventGrammar.hpp
+++ b/src/openvic-dataloader/v2script/EventGrammar.hpp
@@ -11,8 +11,8 @@
#include "openvic-dataloader/NodeLocation.hpp"
-#include "ParseState.hpp"
#include "SimpleGrammar.hpp"
+#include "detail/InternalConcepts.hpp"
#include "detail/dsl.hpp"
#include "v2script/AiBehaviorGrammar.hpp"
#include "v2script/EffectGrammar.hpp"
@@ -28,7 +28,7 @@ namespace ovdl::v2script::grammar {
struct MonthValue {
static constexpr auto rule = lexy::dsl::p<Identifier<StringEscapeOption>>;
static constexpr auto value = dsl::callback<ast::IdentifierValue*>(
- [](ast::ParseState& state, ast::IdentifierValue* value) {
+ [](detail::IsParseState auto& state, ast::IdentifierValue* value) {
bool is_number = true;
for (auto* current = value->value(state.ast().symbol_interner()); *current; current++) {
is_number = is_number && std::isdigit(*current);
@@ -94,7 +94,7 @@ namespace ovdl::v2script::grammar {
static constexpr auto value =
dsl::callback<ast::EventStatement*>(
- [](ast::ParseState& state, NodeLocation loc, ast::IdentifierValue* name, ast::ListValue* list) {
+ [](detail::IsParseState auto& state, NodeLocation loc, ast::IdentifierValue* name, ast::ListValue* list) {
static auto country_decl = state.ast().intern_cstr("country_event");
static auto province_decl = state.ast().intern_cstr("province_event");
@@ -104,7 +104,7 @@ namespace ovdl::v2script::grammar {
.finish();
}
- return state.ast().create<ast::EventStatement>(loc, name->value(state.ast().symbol_interner()) == province_decl, list);
+ return state.ast().template create<ast::EventStatement>(loc, name->value(state.ast().symbol_interner()) == province_decl, list);
});
};
diff --git a/src/openvic-dataloader/v2script/LuaDefinesGrammar.hpp b/src/openvic-dataloader/v2script/LuaDefinesGrammar.hpp
index 96cce99..885413c 100644
--- a/src/openvic-dataloader/v2script/LuaDefinesGrammar.hpp
+++ b/src/openvic-dataloader/v2script/LuaDefinesGrammar.hpp
@@ -4,9 +4,12 @@
#include <lexy/_detail/config.hpp>
#include <lexy/dsl.hpp>
+#include <lexy/dsl/delimited.hpp>
+#include <lexy/dsl/recover.hpp>
+#include <lexy/dsl/unicode.hpp>
-#include "ParseState.hpp"
#include "SimpleGrammar.hpp"
+#include "detail/InternalConcepts.hpp"
#include "detail/dsl.hpp"
namespace ovdl::v2script::lua::grammar {
@@ -21,90 +24,118 @@ namespace ovdl::v2script::lua::grammar {
template<typename T>
constexpr auto construct_list = v2script::grammar::construct_list<T>;
- struct ParseOptions {
- };
-
- template<ParseOptions Options>
struct StatementListBlock;
static constexpr auto comment_specifier = LEXY_LIT("--") >> lexy::dsl::until(lexy::dsl::newline).or_eof();
- template<ParseOptions Options>
struct Identifier {
static constexpr auto rule = lexy::dsl::identifier(lexy::dsl::ascii::alpha_underscore, lexy::dsl::ascii::alpha_digit_underscore);
- static constexpr auto value = callback<ast::IdentifierValue*>(
- [](ast::ParseState& state, auto lexeme) {
- auto value = state.ast().intern(lexeme.data(), lexeme.size());
- return state.ast().create<ast::IdentifierValue>(lexeme.begin(), lexeme.end(), value);
- });
+ static constexpr auto value =
+ callback<ast::IdentifierValue*>(
+ [](detail::IsParseState auto& state, auto lexeme) {
+ auto value = state.ast().intern(lexeme.data(), lexeme.size());
+ return state.ast().template create<ast::IdentifierValue>(lexeme.begin(), lexeme.end(), value);
+ });
};
- template<ParseOptions Options>
struct Value {
static constexpr auto rule = lexy::dsl::identifier(lexy::dsl::ascii::digit / lexy::dsl::lit_c<'.'> / lexy::dsl::lit_c<'-'>);
- static constexpr auto value = callback<ast::IdentifierValue*>(
- [](ast::ParseState& state, auto lexeme) {
- auto value = state.ast().intern(lexeme.data(), lexeme.size());
- return state.ast().create<ast::IdentifierValue>(lexeme.begin(), lexeme.end(), value);
- });
- };
-
- template<ParseOptions Options>
- struct String {
- static constexpr auto rule = [] {
- // Arbitrary code points that aren't control characters.
- auto c = dsl::make_range<0x20, 0xFF>() - lexy::dsl::ascii::control;
-
- return lexy::dsl::delimited(lexy::dsl::position(lexy::dsl::lit_b<'"'>))(c) | lexy::dsl::delimited(lexy::dsl::position(lexy::dsl::lit_b<'\''>))(c);
- }();
-
static constexpr auto value =
- lexy::as_string<std::string> >>
- callback<ast::StringValue*>(
- [](ast::ParseState& state, const char* begin, const std::string& str, const char* end) {
- auto value = state.ast().intern(str.data(), str.length());
- return state.ast().create<ast::StringValue>(begin, end, value);
+ callback<ast::IdentifierValue*>(
+ [](detail::IsParseState auto& state, auto lexeme) {
+ auto value = state.ast().intern(lexeme.data(), lexeme.size());
+ return state.ast().template create<ast::IdentifierValue>(lexeme.begin(), lexeme.end(), value);
});
};
- template<ParseOptions Options>
+ struct String : lexy::scan_production<ast::StringValue*>,
+ lexy::token_production {
+ template<typename Context, typename Reader>
+ static constexpr scan_result scan(lexy::rule_scanner<Context, Reader>& scanner, detail::IsParseState auto& state) {
+ using encoding = typename Reader::encoding;
+
+ constexpr auto c = [] {
+ if constexpr (std::same_as<encoding, lexy::default_encoding> || std::same_as<encoding, lexy::byte_encoding>) {
+ // Arbitrary code points that aren't control characters.
+ return dsl::lit_b_range<0x20, 0xFF> - lexy::dsl::ascii::control;
+ } else {
+ return -lexy::dsl::unicode::control;
+ }
+ }();
+ auto rule = lexy::dsl::quoted(c) | lexy::dsl::single_quoted(c);
+ auto begin = scanner.position();
+ lexy::scan_result<std::string> str_result;
+ scanner.parse(str_result, rule);
+ if (!scanner || !str_result)
+ return lexy::scan_failed;
+ auto end = scanner.position();
+ auto str = str_result.value();
+ auto value = state.ast().intern(str.data(), str.size());
+ return state.ast().template create<ast::StringValue>(begin, end, value);
+ }
+
+ static constexpr auto rule = lexy::dsl::peek(lexy::dsl::quoted.open() | lexy::dsl::single_quoted.open()) >> lexy::dsl::scan;
+ static constexpr auto value = ovdl::v2script::grammar::convert_as_string<std::string> >> lexy::forward<ast::StringValue*>;
+ };
+
struct Expression {
- static constexpr auto rule = lexy::dsl::p<Value<Options>> | lexy::dsl::p<String<Options>>;
+ static constexpr auto rule = lexy::dsl::p<Value> | lexy::dsl::p<String>;
static constexpr auto value = lexy::forward<ast::Value*>;
};
- template<ParseOptions Options>
struct AssignmentStatement {
- static constexpr auto rule =
- dsl::p<Identifier<Options>> >>
- lexy::dsl::equal_sign >>
- (lexy::dsl::p<Expression<Options>> | lexy::dsl::recurse_branch<StatementListBlock<Options>>);
+ static constexpr auto rule = [] {
+ auto right_brace = lexy::dsl::lit_c<'}'>;
+
+ auto expression = lexy::dsl::p<Expression>;
+ auto statement_list = lexy::dsl::recurse_branch<StatementListBlock>;
+
+ auto rhs_recover = lexy::dsl::recover(expression, statement_list).limit(right_brace);
+ auto rhs_try = lexy::dsl::try_(expression | statement_list, rhs_recover);
+
+ auto identifier = dsl::p<Identifier> >> lexy::dsl::equal_sign + rhs_try;
+
+ auto recover = lexy::dsl::recover(identifier).limit(right_brace);
+ return lexy::dsl::try_(identifier, recover);
+ }();
static constexpr auto value = callback<ast::AssignStatement*>(
- [](ast::ParseState& state, const char* pos, ast::IdentifierValue* name, ast::Value* initializer) {
- return state.ast().create<ast::AssignStatement>(pos, name, initializer);
+ [](detail::IsParseState auto& state, const char* pos, ast::IdentifierValue* name, ast::Value* initializer) -> ast::AssignStatement* {
+ if (initializer == nullptr) return nullptr;
+ return state.ast().template create<ast::AssignStatement>(pos, name, initializer);
+ },
+ [](detail::IsParseState auto& state, ast::Value*) {
+ return nullptr;
+ },
+ [](detail::IsParseState auto& state) {
+ return nullptr;
});
};
- template<ParseOptions Options>
struct StatementListBlock {
- static constexpr auto rule =
- dsl::curly_bracketed(
- lexy::dsl::opt(
- lexy::dsl::list(
- lexy::dsl::recurse_branch<AssignmentStatement<Options>>,
- lexy::dsl::trailing_sep(lexy::dsl::lit_c<','>))));
+ static constexpr auto rule = [] {
+ auto right_brace = lexy::dsl::lit_c<'}'>;
+ auto comma = lexy::dsl::lit_c<','>;
+
+ auto assign_statement = lexy::dsl::recurse_branch<AssignmentStatement>;
+ auto assign_try = lexy::dsl::try_(assign_statement);
+
+ auto curly_bracket = dsl::curly_bracketed.opt_list(
+ assign_try,
+ lexy::dsl::trailing_sep(comma));
+
+ return lexy::dsl::try_(curly_bracket, lexy::dsl::find(right_brace));
+ }();
static constexpr auto value =
lexy::as_list<ast::AssignStatementList> >> construct_list<ast::ListValue>;
};
- template<ParseOptions Options = ParseOptions {}>
struct File {
// Allow arbitrary spaces between individual tokens.
static constexpr auto whitespace = ovdl::v2script::grammar::whitespace_specifier | comment_specifier;
- static constexpr auto rule = lexy::dsl::position + lexy::dsl::terminator(lexy::dsl::eof).opt_list(lexy::dsl::p<AssignmentStatement<Options>>);
+ static constexpr auto rule = lexy::dsl::position + lexy::dsl::terminator(lexy::dsl::eof).opt_list(lexy::dsl::p<AssignmentStatement>);
static constexpr auto value = lexy::as_list<ast::AssignStatementList> >> construct<ast::FileTree>;
};
diff --git a/src/openvic-dataloader/v2script/ModifierGrammar.hpp b/src/openvic-dataloader/v2script/ModifierGrammar.hpp
index 22592d4..122a8c7 100644
--- a/src/openvic-dataloader/v2script/ModifierGrammar.hpp
+++ b/src/openvic-dataloader/v2script/ModifierGrammar.hpp
@@ -10,9 +10,9 @@
#include "openvic-dataloader/NodeLocation.hpp"
-#include "ParseState.hpp"
#include "SimpleGrammar.hpp"
#include "TriggerGrammar.hpp"
+#include "detail/InternalConcepts.hpp"
#include "detail/dsl.hpp"
namespace ovdl::v2script::grammar {
@@ -22,9 +22,9 @@ namespace ovdl::v2script::grammar {
struct FactorStatement {
static constexpr auto rule = lexy::dsl::position(factor_keyword) >> (lexy::dsl::equal_sign + lexy::dsl::p<Identifier<StringEscapeOption>>);
static constexpr auto value = dsl::callback<ast::AssignStatement*>(
- [](ast::ParseState& state, NodeLocation loc, ast::IdentifierValue* value) {
- auto* factor = state.ast().create<ast::IdentifierValue>(loc, state.ast().intern("factor"));
- return state.ast().create<ast::AssignStatement>(loc, factor, value);
+ [](detail::IsParseState auto& state, NodeLocation loc, ast::IdentifierValue* value) {
+ auto* factor = state.ast().template create<ast::IdentifierValue>(loc, state.ast().intern("factor"));
+ return state.ast().template create<ast::AssignStatement>(loc, factor, value);
});
};
@@ -49,9 +49,9 @@ namespace ovdl::v2script::grammar {
lexy::dsl::position(modifier_keyword) >> lexy::dsl::equal_sign >> lexy::dsl::p<ModifierList>;
static constexpr auto value = dsl::callback<ast::AssignStatement*>(
- [](ast::ParseState& state, NodeLocation loc, ast::ListValue* list) {
- auto* factor = state.ast().create<ast::IdentifierValue>(loc, state.ast().intern("modifier"));
- return state.ast().create<ast::AssignStatement>(loc, factor, list);
+ [](detail::IsParseState auto& state, NodeLocation loc, ast::ListValue* list) {
+ auto* factor = state.ast().template create<ast::IdentifierValue>(loc, state.ast().intern("modifier"));
+ return state.ast().template create<ast::AssignStatement>(loc, factor, list);
});
};
} \ No newline at end of file
diff --git a/src/openvic-dataloader/v2script/ParseState.hpp b/src/openvic-dataloader/v2script/ParseState.hpp
index 8e29bf5..954e39d 100644
--- a/src/openvic-dataloader/v2script/ParseState.hpp
+++ b/src/openvic-dataloader/v2script/ParseState.hpp
@@ -1,23 +1,24 @@
#pragma once
-#include <openvic-dataloader/File.hpp>
-#include <openvic-dataloader/ParseState.hpp>
#include <openvic-dataloader/v2script/AbstractSyntaxTree.hpp>
#include <lexy/encoding.hpp>
+#include "../openvic-dataloader/ParseState.hpp"
+#include "AbstractSyntaxTree.hpp"
+#include "File.hpp"
+#include "detail/InternalConcepts.hpp"
+
namespace ovdl::v2script::ast {
- using File = ovdl::BasicFile<lexy::default_encoding, Node>;
- struct AbstractSyntaxTree : ovdl::BasicAbstractSyntaxTree<File, FileTree> {
- using BasicAbstractSyntaxTree::BasicAbstractSyntaxTree;
+
+ struct FileAbstractSyntaxTree : ovdl::BasicAbstractSyntaxTree<ovdl::BasicFile<Node>, FileTree> {
+ using ovdl::BasicAbstractSyntaxTree<ovdl::BasicFile<Node>, FileTree>::BasicAbstractSyntaxTree;
std::string make_list_visualizer() const;
std::string make_native_visualizer() const;
};
- using ParseState = ovdl::ParseState<AbstractSyntaxTree>;
+ using ParseState = ovdl::ParseState<FileAbstractSyntaxTree>;
- static_assert(IsFile<ast::File>, "File failed IsFile concept");
- static_assert(IsAst<ast::AbstractSyntaxTree>, "AbstractSyntaxTree failed IsAst concept");
- static_assert(IsParseState<ast::ParseState>, "ParseState failed IsParseState concept");
+ static_assert(detail::IsParseState<ast::ParseState>, "ParseState failed IsParseState concept");
} \ No newline at end of file
diff --git a/src/openvic-dataloader/v2script/Parser.cpp b/src/openvic-dataloader/v2script/Parser.cpp
index eb491d5..23dada7 100644
--- a/src/openvic-dataloader/v2script/Parser.cpp
+++ b/src/openvic-dataloader/v2script/Parser.cpp
@@ -4,16 +4,15 @@
#include <iostream>
#include <optional>
#include <string>
+#include <type_traits>
#include <utility>
-#include <openvic-dataloader/DiagnosticLogger.hpp>
+#include <openvic-dataloader/Error.hpp>
#include <openvic-dataloader/NodeLocation.hpp>
-#include <openvic-dataloader/ParseError.hpp>
-#include <openvic-dataloader/ParseWarning.hpp>
-#include <openvic-dataloader/detail/LexyReportError.hpp>
+#include <openvic-dataloader/detail/Concepts.hpp>
+#include <openvic-dataloader/detail/Encoding.hpp>
#include <openvic-dataloader/detail/OStreamOutputIterator.hpp>
-#include <openvic-dataloader/detail/utility/Concepts.hpp>
-#include <openvic-dataloader/detail/utility/Utility.hpp>
+#include <openvic-dataloader/detail/Utility.hpp>
#include <openvic-dataloader/v2script/AbstractSyntaxTree.hpp>
#include <lexy/action/parse.hpp>
@@ -29,10 +28,8 @@
#include <fmt/core.h>
-#include "openvic-dataloader/Error.hpp"
-
+#include "DiagnosticLogger.hpp"
#include "ParseState.hpp"
-#include "detail/DetectUtf8.hpp"
#include "detail/NullBuff.hpp"
#include "detail/ParseHandler.hpp"
#include "detail/Warnings.hpp"
@@ -44,29 +41,46 @@
using namespace ovdl;
using namespace ovdl::v2script;
-/// BufferHandler ///
+/// ParseHandler ///
struct Parser::ParseHandler final : detail::BasicStateParseHandler<v2script::ast::ParseState> {
- constexpr bool is_exclusive_utf8() const {
- return detail::is_utf8_no_ascii(buffer());
- }
-
template<typename Node>
std::optional<DiagnosticLogger::error_range> parse() {
- auto result = lexy::parse<Node>(buffer(), *_parse_state, _parse_state->logger().error_callback());
+ if (parse_state().encoding() == ovdl::detail::Encoding::Utf8) {
+ parse_state().logger().warning(warnings::make_utf8_warning(path()));
+ }
+
+ auto result = [&] {
+ switch (parse_state().encoding()) {
+ using enum detail::Encoding;
+ case Ascii:
+ case Utf8:
+ return lexy::parse<Node>(buffer<lexy::utf8_char_encoding>(), parse_state(), parse_state().logger().error_callback());
+ case Unknown:
+ case Windows1251:
+ case Windows1252:
+ return lexy::parse<Node>(buffer<lexy::default_encoding>(), parse_state(), parse_state().logger().error_callback());
+ default:
+ ovdl::detail::unreachable();
+ }
+ }();
if (!result) {
- return _parse_state->logger().get_errors();
+ return parse_state().logger().get_errors();
}
- _parse_state->ast().set_root(result.value());
+ parse_state().ast().set_root(result.value());
return std::nullopt;
}
ast::FileTree* root() {
- return _parse_state->ast().root();
+ return parse_state().ast().root();
+ }
+
+ Parser::error_range get_errors() {
+ return parse_state().logger().get_errors();
}
};
-/// BufferHandler ///
+/// ParseHandler ///
Parser::Parser()
: _parse_handler(std::make_unique<ParseHandler>()) {
@@ -82,29 +96,29 @@ Parser::Parser(Parser&&) = default;
Parser& Parser::operator=(Parser&&) = default;
Parser::~Parser() = default;
-Parser Parser::from_buffer(const char* data, std::size_t size) {
+Parser Parser::from_buffer(const char* data, std::size_t size, std::optional<detail::Encoding> encoding_fallback) {
Parser result;
- return std::move(result.load_from_buffer(data, size));
+ return std::move(result.load_from_buffer(data, size, encoding_fallback));
}
-Parser Parser::from_buffer(const char* start, const char* end) {
+Parser Parser::from_buffer(const char* start, const char* end, std::optional<detail::Encoding> encoding_fallback) {
Parser result;
- return std::move(result.load_from_buffer(start, end));
+ return std::move(result.load_from_buffer(start, end, encoding_fallback));
}
-Parser Parser::from_string(const std::string_view string) {
+Parser Parser::from_string(const std::string_view string, std::optional<detail::Encoding> encoding_fallback) {
Parser result;
- return std::move(result.load_from_string(string));
+ return std::move(result.load_from_string(string, encoding_fallback));
}
-Parser Parser::from_file(const char* path) {
+Parser Parser::from_file(const char* path, std::optional<detail::Encoding> encoding_fallback) {
Parser result;
- return std::move(result.load_from_file(path));
+ return std::move(result.load_from_file(path, encoding_fallback));
}
-Parser Parser::from_file(const std::filesystem::path& path) {
+Parser Parser::from_file(const std::filesystem::path& path, std::optional<detail::Encoding> encoding_fallback) {
Parser result;
- return std::move(result.load_from_file(path));
+ return std::move(result.load_from_file(path, encoding_fallback));
}
///
@@ -128,38 +142,38 @@ constexpr void Parser::_run_load_func(detail::LoadCallback<Parser::ParseHandler*
if (!error_message.empty()) {
_has_error = true;
_has_fatal_error = true;
- _parse_handler->parse_state().logger().create_log<error::BufferError>(DiagnosticLogger::DiagnosticKind::error, fmt::runtime(error_message));
+ _parse_handler->parse_state().logger().template create_log<error::BufferError>(DiagnosticLogger::DiagnosticKind::error, fmt::runtime(error_message));
}
if (has_error() && &_error_stream.get() != &detail::cnull) {
print_errors_to(_error_stream.get());
}
}
-constexpr Parser& Parser::load_from_buffer(const char* data, std::size_t size) {
+constexpr Parser& Parser::load_from_buffer(const char* data, std::size_t size, std::optional<detail::Encoding> encoding_fallback) {
// Type can't be deduced?
- _run_load_func(std::mem_fn(&ParseHandler::load_buffer_size), data, size);
+ _run_load_func(std::mem_fn(&ParseHandler::load_buffer_size), data, size, encoding_fallback);
return *this;
}
-constexpr Parser& Parser::load_from_buffer(const char* start, const char* end) {
+constexpr Parser& Parser::load_from_buffer(const char* start, const char* end, std::optional<detail::Encoding> encoding_fallback) {
// Type can't be deduced?
- _run_load_func(std::mem_fn(&ParseHandler::load_buffer), start, end);
+ _run_load_func(std::mem_fn(&ParseHandler::load_buffer), start, end, encoding_fallback);
return *this;
}
-constexpr Parser& Parser::load_from_string(const std::string_view string) {
- return load_from_buffer(string.data(), string.size());
+constexpr Parser& Parser::load_from_string(const std::string_view string, std::optional<detail::Encoding> encoding_fallback) {
+ return load_from_buffer(string.data(), string.size(), encoding_fallback);
}
-Parser& Parser::load_from_file(const char* path) {
+Parser& Parser::load_from_file(const char* path, std::optional<detail::Encoding> encoding_fallback) {
set_file_path(path);
// Type can be deduced??
- _run_load_func(std::mem_fn(&ParseHandler::load_file), path);
+ _run_load_func(std::mem_fn(&ParseHandler::load_file), get_file_path().data(), encoding_fallback);
return *this;
}
-Parser& Parser::load_from_file(const std::filesystem::path& path) {
- return load_from_file(path.string().c_str());
+Parser& Parser::load_from_file(const std::filesystem::path& path, std::optional<detail::Encoding> encoding_fallback) {
+ return load_from_file(path.string().c_str(), encoding_fallback);
}
/* REQUIREMENTS:
@@ -173,11 +187,7 @@ bool Parser::simple_parse() {
return false;
}
- if (_parse_handler->is_exclusive_utf8()) {
- _parse_handler->parse_state().logger().warning(warnings::make_utf8_warning(_file_path));
- }
-
- auto errors = _parse_handler->parse<grammar::File<grammar::NoStringEscapeOption>>();
+ std::optional<DiagnosticLogger::error_range> errors = _parse_handler->parse<grammar::File>();
_has_error = _parse_handler->parse_state().logger().errored();
_has_warning = _parse_handler->parse_state().logger().warned();
if (!_parse_handler->root()) {
@@ -196,14 +206,11 @@ bool Parser::event_parse() {
return false;
}
- if (_parse_handler->is_exclusive_utf8()) {
- _parse_handler->parse_state().logger().warning(warnings::make_utf8_warning(_file_path));
- }
-
- auto errors = _parse_handler->parse<grammar::EventFile>();
+ std::optional<DiagnosticLogger::error_range> errors = _parse_handler->parse<grammar::EventFile>();
_has_error = _parse_handler->parse_state().logger().errored();
_has_warning = _parse_handler->parse_state().logger().warned();
if (!_parse_handler->root()) {
+ _has_error = true;
_has_fatal_error = true;
if (&_error_stream.get() != &detail::cnull) {
print_errors_to(_error_stream);
@@ -218,14 +225,11 @@ bool Parser::decision_parse() {
return false;
}
- if (_parse_handler->is_exclusive_utf8()) {
- _parse_handler->parse_state().logger().warning(warnings::make_utf8_warning(_file_path));
- }
-
- auto errors = _parse_handler->parse<grammar::DecisionFile>();
+ std::optional<DiagnosticLogger::error_range> errors = _parse_handler->parse<grammar::DecisionFile>();
_has_error = _parse_handler->parse_state().logger().errored();
_has_warning = _parse_handler->parse_state().logger().warned();
if (!_parse_handler->root()) {
+ _has_error = true;
_has_fatal_error = true;
if (&_error_stream.get() != &detail::cnull) {
print_errors_to(_error_stream);
@@ -240,14 +244,11 @@ bool Parser::lua_defines_parse() {
return false;
}
- if (_parse_handler->is_exclusive_utf8()) {
- _parse_handler->parse_state().logger().warning(warnings::make_utf8_warning(_file_path));
- }
-
- auto errors = _parse_handler->parse<lua::grammar::File<>>();
+ std::optional<DiagnosticLogger::error_range> errors = _parse_handler->parse<lua::grammar::File>();
_has_error = _parse_handler->parse_state().logger().errored();
_has_warning = _parse_handler->parse_state().logger().warned();
if (!_parse_handler->root()) {
+ _has_error = true;
_has_fatal_error = true;
if (&_error_stream.get() != &detail::cnull) {
print_errors_to(_error_stream);
@@ -273,48 +274,66 @@ std::string Parser::make_list_string() const {
return _parse_handler->parse_state().ast().make_list_visualizer();
}
+// TODO: Remove reinterpret_cast
+// WARNING: This almost certainly breaks on utf16 and utf32 encodings, luckily we don't parse in that format
+// This is purely to silence the node_location errors because char8_t is useless
+#define REINTERPRET_IT(IT) reinterpret_cast<const std::decay_t<decltype(buffer)>::encoding::char_type*>((IT))
+
const FilePosition Parser::get_position(const ast::Node* node) const {
if (!node || !node->is_linked_in_tree()) {
return {};
}
- auto node_location = _parse_handler->parse_state().ast().location_of(node);
+
+ NodeLocation node_location;
+
+ node_location = _parse_handler->parse_state().ast().location_of(node);
+
if (node_location.is_synthesized()) {
- return {};
+ return FilePosition {};
}
- auto loc_begin = lexy::get_input_location(_parse_handler->buffer(), node_location.begin());
- FilePosition result { loc_begin.line_nr(), loc_begin.line_nr(), loc_begin.column_nr(), loc_begin.column_nr() };
- if (node_location.begin() < node_location.end()) {
- auto loc_end = lexy::get_input_location(_parse_handler->buffer(), node_location.end(), loc_begin.anchor());
- result.end_line = loc_end.line_nr();
- result.end_column = loc_end.column_nr();
- }
- return result;
+ return _parse_handler->parse_state().ast().file().visit_buffer(
+ [&](auto&& buffer) -> FilePosition {
+ auto loc_begin = lexy::get_input_location(buffer, REINTERPRET_IT(node_location.begin()));
+ FilePosition result { loc_begin.line_nr(), loc_begin.line_nr(), loc_begin.column_nr(), loc_begin.column_nr() };
+ if (node_location.begin() < node_location.end()) {
+ auto loc_end = lexy::get_input_location(buffer, REINTERPRET_IT(node_location.end()), loc_begin.anchor());
+ result.end_line = loc_end.line_nr();
+ result.end_column = loc_end.column_nr();
+ }
+ return result;
+ });
}
Parser::error_range Parser::get_errors() const {
- return _parse_handler->parse_state().logger().get_errors();
+ return _parse_handler->get_errors();
}
const FilePosition Parser::get_error_position(const error::Error* error) const {
if (!error || !error->is_linked_in_tree()) {
return {};
}
+
auto err_location = _parse_handler->parse_state().logger().location_of(error);
if (err_location.is_synthesized()) {
- return {};
+ return FilePosition {};
}
- auto loc_begin = lexy::get_input_location(_parse_handler->buffer(), err_location.begin());
- FilePosition result { loc_begin.line_nr(), loc_begin.line_nr(), loc_begin.column_nr(), loc_begin.column_nr() };
- if (err_location.begin() < err_location.end()) {
- auto loc_end = lexy::get_input_location(_parse_handler->buffer(), err_location.end(), loc_begin.anchor());
- result.end_line = loc_end.line_nr();
- result.end_column = loc_end.column_nr();
- }
- return result;
+ return _parse_handler->parse_state().ast().file().visit_buffer(
+ [&](auto&& buffer) -> FilePosition {
+ auto loc_begin = lexy::get_input_location(buffer, REINTERPRET_IT(err_location.begin()));
+ FilePosition result { loc_begin.line_nr(), loc_begin.line_nr(), loc_begin.column_nr(), loc_begin.column_nr() };
+ if (err_location.begin() < err_location.end()) {
+ auto loc_end = lexy::get_input_location(buffer, REINTERPRET_IT(err_location.end()), loc_begin.anchor());
+ result.end_line = loc_end.line_nr();
+ result.end_column = loc_end.column_nr();
+ }
+ return result;
+ });
}
+#undef REINTERPRET_IT
+
void Parser::print_errors_to(std::basic_ostream<char>& stream) const {
auto errors = get_errors();
if (errors.empty()) return;
@@ -324,19 +343,9 @@ void Parser::print_errors_to(std::basic_ostream<char>& stream) const {
[&](const error::BufferError* buffer_error) {
stream << "buffer error: " << buffer_error->message() << '\n';
},
- [&](const error::ParseError* parse_error) {
- auto position = get_error_position(parse_error);
- std::string pos_str = fmt::format(":{}:{}: ", position.start_line, position.start_column);
- stream << _file_path << pos_str << "parse error for '" << parse_error->production_name() << "': " << parse_error->message() << '\n';
- },
- [&](dryad::child_visitor<error::ErrorKind> visitor, const error::Semantic* semantic) {
- auto position = get_error_position(semantic);
- std::string pos_str = ": ";
- if (!position.is_empty()) {
- pos_str = fmt::format(":{}:{}: ", position.start_line, position.start_column);
- }
- stream << _file_path << pos_str << semantic->message() << '\n';
- auto annotations = semantic->annotations();
+ [&](dryad::child_visitor<error::ErrorKind> visitor, const error::AnnotatedError* annotated_error) {
+ stream << annotated_error->message() << '\n';
+ auto annotations = annotated_error->annotations();
for (auto annotation : annotations) {
visitor(annotation);
}
diff --git a/src/openvic-dataloader/v2script/SimpleGrammar.hpp b/src/openvic-dataloader/v2script/SimpleGrammar.hpp
index 37e295f..d42ce07 100644
--- a/src/openvic-dataloader/v2script/SimpleGrammar.hpp
+++ b/src/openvic-dataloader/v2script/SimpleGrammar.hpp
@@ -5,10 +5,22 @@
#include <lexy/callback.hpp>
#include <lexy/dsl.hpp>
+#include <lexy/dsl/any.hpp>
#include <lexy/dsl/identifier.hpp>
+#include <lexy/dsl/option.hpp>
+#include <lexy/dsl/peek.hpp>
+#include <lexy/dsl/punctuator.hpp>
+#include <lexy/dsl/recover.hpp>
+#include <lexy/dsl/scan.hpp>
#include <lexy/dsl/symbol.hpp>
-
-#include "ParseState.hpp"
+#include <lexy/dsl/unicode.hpp>
+#include <lexy/encoding.hpp>
+#include <lexy/input/base.hpp>
+#include <lexy/input/buffer.hpp>
+#include <lexy/lexeme.hpp>
+
+#include "detail/Convert.hpp"
+#include "detail/InternalConcepts.hpp"
#include "detail/dsl.hpp"
// Grammar Definitions //
@@ -23,17 +35,28 @@
*/
namespace ovdl::v2script::grammar {
template<typename T>
- constexpr auto construct = dsl::construct<ast::ParseState, T>;
+ constexpr auto construct = dsl::construct<T>;
template<typename T, bool DisableEmpty = false, typename ListType = ast::AssignStatementList>
- constexpr auto construct_list = dsl::construct_list<ast::ParseState, T, ListType, DisableEmpty>;
+ constexpr auto construct_list = dsl::construct_list<T, ListType, DisableEmpty>;
+
+ struct ConvertErrorHandler {
+ static constexpr void on_invalid_character(detail::IsStateType auto& state, auto reader) {
+ state.logger().warning("invalid character value '{}' found.", static_cast<int>(reader.peek())) //
+ .primary(BasicNodeLocation { reader.position() }, "here")
+ .finish();
+ }
+ };
+
+ template<typename String>
+ constexpr auto convert_as_string = convert::convert_as_string<String, ConvertErrorHandler>;
struct ParseOptions {
/// @brief Makes string parsing avoid string escapes
bool NoStringEscape;
};
- static constexpr ParseOptions NoStringEscapeOption = ParseOptions { true };
- static constexpr ParseOptions StringEscapeOption = ParseOptions { false };
+ static constexpr auto NoStringEscapeOption = ParseOptions { true };
+ static constexpr auto StringEscapeOption = ParseOptions { false };
/* REQUIREMENTS: DAT-630 */
static constexpr auto whitespace_specifier = lexy::dsl::ascii::blank / lexy::dsl::ascii::newline;
@@ -50,24 +73,28 @@ namespace ovdl::v2script::grammar {
ascii /
lexy::dsl::lit_b<0x8A> / lexy::dsl::lit_b<0x8C> / lexy::dsl::lit_b<0x8E> /
lexy::dsl::lit_b<0x92> / lexy::dsl::lit_b<0x97> / lexy::dsl::lit_b<0x9A> / lexy::dsl::lit_b<0x9C> /
- dsl::make_range<0x9E, 0x9F>() /
- dsl::make_range<0xC0, 0xD6>() /
- dsl::make_range<0xD8, 0xF6>() /
- dsl::make_range<0xF8, 0xFF>();
+ dsl::lit_b_range<0x9E, 0x9F> /
+ dsl::lit_b_range<0xC0, 0xD6> /
+ dsl::lit_b_range<0xD8, 0xF6> /
+ dsl::lit_b_range<0xF8, 0xFF>;
static constexpr auto windows_1251_data_specifier_additions =
- dsl::make_range<0x80, 0x81>() / lexy::dsl::lit_b<0x83> / lexy::dsl::lit_b<0x8D> / lexy::dsl::lit_b<0x8F> /
+ dsl::lit_b_range<0x80, 0x81> / lexy::dsl::lit_b<0x83> / lexy::dsl::lit_b<0x8D> / lexy::dsl::lit_b<0x8F> /
lexy::dsl::lit_b<0x90> / lexy::dsl::lit_b<0x9D> / lexy::dsl::lit_b<0x9F> /
- dsl::make_range<0xA1, 0xA3>() / lexy::dsl::lit_b<0xA5> / lexy::dsl::lit_b<0xA8> / lexy::dsl::lit_b<0xAA> /
+ dsl::lit_b_range<0xA1, 0xA3> / lexy::dsl::lit_b<0xA5> / lexy::dsl::lit_b<0xA8> / lexy::dsl::lit_b<0xAA> /
lexy::dsl::lit_b<0xAF> /
- dsl::make_range<0xB2, 0xB4>() / lexy::dsl::lit_b<0xB8> / lexy::dsl::lit_b<0xBA> /
- dsl::make_range<0xBC, 0xBF>() /
+ dsl::lit_b_range<0xB2, 0xB4> / lexy::dsl::lit_b<0xB8> / lexy::dsl::lit_b<0xBA> /
+ dsl::lit_b_range<0xBC, 0xBF> /
lexy::dsl::lit_b<0xD7> / lexy::dsl::lit_b<0xF7>;
static constexpr auto data_specifier = windows_1252_data_specifier / windows_1251_data_specifier_additions;
static constexpr auto data_char_class = LEXY_CHAR_CLASS("DataSpecifier", data_specifier);
+ static constexpr auto utf_data_specifier = lexy::dsl::unicode::xid_continue / LEXY_ASCII_ONE_OF("+:@%&'-.");
+
+ static constexpr auto utf_char_class = LEXY_CHAR_CLASS("DataSpecifier", utf_data_specifier);
+
static constexpr auto escaped_symbols = lexy::symbol_table<char> //
.map<'"'>('"')
.map<'\''>('\'')
@@ -79,50 +106,121 @@ namespace ovdl::v2script::grammar {
.map<'r'>('\r')
.map<'t'>('\t');
- static constexpr auto id = lexy::dsl::identifier(data_char_class);
+ static constexpr auto id = lexy::dsl::identifier(ascii);
template<ParseOptions Options>
struct SimpleGrammar {
struct StatementListBlock;
- struct Identifier {
- static constexpr auto rule = lexy::dsl::identifier(data_char_class);
- static constexpr auto value = dsl::callback<ast::IdentifierValue*>(
- [](ast::ParseState& state, auto lexeme) {
- auto value = state.ast().intern(lexeme.data(), lexeme.size());
- return state.ast().create<ast::IdentifierValue>(ovdl::NodeLocation::make_from(lexeme.begin(), lexeme.end()), value);
- });
+ struct Identifier : lexy::scan_production<ast::IdentifierValue*>,
+ lexy::token_production {
+
+ template<typename Context, typename Reader>
+ static constexpr scan_result scan(lexy::rule_scanner<Context, Reader>& scanner, detail::IsParseState auto& state) {
+ using encoding = typename Reader::encoding;
+ using char_type = typename encoding::char_type;
+
+ std::basic_string<char_type> value_result;
+
+ auto content_begin = scanner.position();
+ do {
+ if constexpr (std::same_as<encoding, lexy::default_encoding> || std::same_as<encoding, lexy::byte_encoding>) {
+ if (lexy::scan_result<lexy::lexeme<Reader>> ascii_result; scanner.branch(ascii_result, lexy::dsl::identifier(ascii))) {
+ value_result.append(ascii_result.value().begin(), ascii_result.value().end());
+ continue;
+ }
+
+ char_type char_array[] { *scanner.position(), char_type {} };
+ auto input = lexy::range_input(&char_array[0], &char_array[1]);
+ auto reader = input.reader();
+ convert::map_value val = convert::try_parse_map(state.encoding(), reader);
+
+ if (val.is_invalid()) {
+ ConvertErrorHandler::on_invalid_character(state, reader);
+ continue;
+ }
+
+ if (!val.is_pass()) {
+ // non-pass characters are not valid ascii and are mapped to utf8 values
+ value_result.append(val._value);
+ scanner.parse(data_char_class);
+ } else {
+ break;
+ }
+ } else {
+ auto lexeme_result = scanner.template parse<lexy::lexeme<Reader>>(lexy::dsl::identifier(utf_char_class));
+ if (lexeme_result) {
+ value_result.append(lexeme_result.value().begin(), lexeme_result.value().size());
+ break;
+ }
+ }
+ } while (scanner);
+ auto content_end = scanner.position();
+
+ if (value_result.empty()) {
+ return lexy::scan_failed;
+ }
+
+ auto value = state.ast().intern(value_result);
+ return state.ast().template create<ast::IdentifierValue>(ovdl::NodeLocation::make_from(content_begin, content_end), value);
+ }
+
+ static constexpr auto rule = dsl::peek(data_char_class, utf_char_class) >> lexy::dsl::scan;
};
/* REQUIREMENTS:
* DAT-633
* DAT-634
*/
- struct StringExpression {
- static constexpr auto rule = [] {
- if constexpr (Options.NoStringEscape) {
- auto c = dsl::make_range<0x20, 0xFF>() / lexy::dsl::lit_b<0x07> / lexy::dsl::lit_b<0x09> / lexy::dsl::lit_b<0x0A> / lexy::dsl::lit_b<0x0D>;
- return lexy::dsl::delimited(lexy::dsl::position(lexy::dsl::lit_b<'"'>))(c);
- } else {
- // Arbitrary code points that aren't control characters.
- auto c = dsl::make_range<0x20, 0xFF>() - lexy::dsl::ascii::control;
-
- // Escape sequences start with a backlash.
- // They either map one of the symbols,
- // or a Unicode code point of the form uXXXX.
- auto escape = lexy::dsl::backslash_escape //
- .symbol<escaped_symbols>();
- return lexy::dsl::delimited(lexy::dsl::position(lexy::dsl::lit_b<'"'>))(c, escape);
- }
- }();
-
- static constexpr auto value =
- lexy::as_string<std::string> >>
- dsl::callback<ast::StringValue*>(
- [](ast::ParseState& state, const char* begin, auto&& str, const char* end) {
- auto value = state.ast().intern(str.data(), str.length());
- return state.ast().create<ast::StringValue>(ovdl::NodeLocation::make_from(begin, end), value);
- });
+ struct StringExpression : lexy::scan_production<ast::StringValue*>,
+ lexy::token_production {
+
+ template<typename Context, typename Reader>
+ static constexpr scan_result scan(lexy::rule_scanner<Context, Reader>& scanner, detail::IsParseState auto& state) {
+ using encoding = typename Reader::encoding;
+
+ constexpr auto rule = [] {
+ if constexpr (Options.NoStringEscape) {
+ auto c = [] {
+ if constexpr (std::same_as<encoding, lexy::default_encoding> || std::same_as<encoding, lexy::byte_encoding>) {
+ return dsl::lit_b_range<0x20, 0xFF> / lexy::dsl::lit_b<0x07> / lexy::dsl::lit_b<0x09> / lexy::dsl::lit_b<0x0A> / lexy::dsl::lit_b<0x0D>;
+ } else {
+ return -lexy::dsl::unicode::control;
+ }
+ }();
+ return lexy::dsl::quoted(c);
+ } else {
+ // Arbitrary code points that aren't control characters.
+ auto c = [] {
+ if constexpr (std::same_as<encoding, lexy::default_encoding> || std::same_as<encoding, lexy::byte_encoding>) {
+ return dsl::lit_b_range<0x20, 0xFF> - lexy::dsl::ascii::control;
+ } else {
+ return -lexy::dsl::unicode::control;
+ }
+ }();
+
+ // Escape sequences start with a backlash.
+ // They either map one of the symbols,
+ // or a Unicode code point of the form uXXXX.
+ auto escape = lexy::dsl::backslash_escape //
+ .symbol<escaped_symbols>();
+ return lexy::dsl::quoted(c, escape);
+ }
+ }();
+
+ auto begin = scanner.position();
+ lexy::scan_result<std::string> str_result;
+ scanner.parse(str_result, rule);
+ if (!scanner || !str_result)
+ return lexy::scan_failed;
+ auto end = scanner.position();
+ auto str = str_result.value();
+ auto value = state.ast().intern(str.data(), str.size());
+ return state.ast().template create<ast::StringValue>(ovdl::NodeLocation::make_from(begin, end), value);
+ }
+
+ static constexpr auto rule = lexy::dsl::peek(lexy::dsl::quoted.open()) >> lexy::dsl::scan;
+ static constexpr auto value = convert_as_string<std::string> >> lexy::forward<ast::StringValue*>;
};
/* REQUIREMENTS: DAT-638 */
@@ -132,59 +230,112 @@ namespace ovdl::v2script::grammar {
};
struct SimpleAssignmentStatement {
- static constexpr auto rule =
- dsl::p<Identifier> >>
- (lexy::dsl::equal_sign >>
- (lexy::dsl::p<ValueExpression> | lexy::dsl::recurse_branch<StatementListBlock>));
+ static constexpr auto rule = [] {
+ auto right_brace = lexy::dsl::lit_c<'}'>;
+
+ auto value_expression = lexy::dsl::p<ValueExpression>;
+ auto statement_list_expression = lexy::dsl::recurse_branch<StatementListBlock>;
+
+ auto rhs_recover = lexy::dsl::recover(value_expression, statement_list_expression).limit(right_brace);
+ auto rhs_try = lexy::dsl::try_(value_expression | statement_list_expression, rhs_recover);
+
+ auto identifier =
+ dsl::p<Identifier> >>
+ (lexy::dsl::equal_sign >> rhs_try);
+
+ auto recover = lexy::dsl::recover(identifier).limit(right_brace);
+ return lexy::dsl::try_(identifier, recover);
+ }();
static constexpr auto value = construct<ast::AssignStatement>;
};
/* REQUIREMENTS: DAT-639 */
struct AssignmentStatement {
- static constexpr auto rule =
- dsl::p<Identifier> >>
+ static constexpr auto rule = [] {
+ auto right_brace = lexy::dsl::lit_c<'}'>;
+
+ auto value_expression = lexy::dsl::p<ValueExpression>;
+ auto statement_list_expression = lexy::dsl::recurse_branch<StatementListBlock>;
+
+ auto rhs_recover = lexy::dsl::recover(value_expression, statement_list_expression).limit(right_brace);
+ auto rhs_try = lexy::dsl::try_(value_expression | statement_list_expression, rhs_recover);
+
+ auto identifier =
+ dsl::p<Identifier> >>
(lexy::dsl::equal_sign >>
- (lexy::dsl::p<ValueExpression> | lexy::dsl::recurse_branch<StatementListBlock>) |
- lexy::dsl::else_ >> lexy::dsl::return_) |
- dsl::p<StringExpression> |
- lexy::dsl::recurse_branch<StatementListBlock>;
+ rhs_try |
+ lexy::dsl::else_ >> lexy::dsl::return_);
+
+ auto string_expression = dsl::p<StringExpression>;
+ auto statement_list = lexy::dsl::recurse_branch<StatementListBlock>;
+
+ return identifier | string_expression | statement_list;
+ }();
static constexpr auto value = dsl::callback<ast::Statement*>(
- [](ast::ParseState& state, const char* pos, ast::IdentifierValue* name, ast::Value* initializer) {
- return state.ast().create<ast::AssignStatement>(pos, name, initializer);
+ [](detail::IsParseState auto& state, const char* pos, ast::IdentifierValue* name, ast::Value* initializer) {
+ return state.ast().template create<ast::AssignStatement>(pos, name, initializer);
},
- [](ast::ParseState& state, const char* pos, ast::Value* left, lexy::nullopt = {}) {
- return state.ast().create<ast::ValueStatement>(pos, left);
+ [](detail::IsParseState auto& state, bool&, const char* pos, ast::IdentifierValue* name, ast::Value* initializer) {
+ return state.ast().template create<ast::AssignStatement>(pos, name, initializer);
},
- [](ast::ParseState& state, ast::Value* left) {
- return state.ast().create<ast::ValueStatement>(state.ast().location_of(left), left);
+ [](detail::IsParseState auto& state, bool&, bool&, const char* pos, ast::IdentifierValue* name, ast::Value* initializer) {
+ return state.ast().template create<ast::AssignStatement>(pos, name, initializer);
+ },
+ [](detail::IsParseState auto& state, bool&, bool&, const char* pos, ast::Value* name) {
+ return state.ast().template create<ast::ValueStatement>(pos, name);
+ },
+ [](detail::IsParseState auto& state, const char* pos, ast::Value* left, lexy::nullopt = {}) {
+ return state.ast().template create<ast::ValueStatement>(pos, left);
+ },
+ [](detail::IsParseState auto& state, bool&, const char* pos, ast::Value* left, lexy::nullopt = {}) {
+ return state.ast().template create<ast::ValueStatement>(pos, left);
+ },
+ [](detail::IsParseState auto& state, ast::Value* left) -> ast::ValueStatement* {
+ if (left == nullptr) return nullptr;
+ return state.ast().template create<ast::ValueStatement>(state.ast().location_of(left), left);
+ },
+ [](detail::IsParseState auto& state, bool&, ast::Value* left) -> ast::ValueStatement* {
+ if (left == nullptr) return nullptr;
+ return state.ast().template create<ast::ValueStatement>(state.ast().location_of(left), left);
});
};
/* REQUIREMENTS: DAT-640 */
struct StatementListBlock {
- static constexpr auto rule =
- dsl::curly_bracketed(
- (lexy::dsl::opt(lexy::dsl::list(lexy::dsl::recurse_branch<AssignmentStatement>)) +
- lexy::dsl::opt(lexy::dsl::semicolon)));
+ static constexpr auto rule = [] {
+ auto right_brace = lexy::dsl::lit_c<'}'>;
+
+ auto assign_statement = lexy::dsl::recurse_branch<AssignmentStatement>;
+
+ auto assign_try = lexy::dsl::try_(assign_statement);
+ auto assign_opt = lexy::dsl::opt(lexy::dsl::list(assign_try));
+
+ auto curly_bracket = dsl::curly_bracketed(assign_opt + lexy::dsl::opt(lexy::dsl::semicolon));
+
+ return lexy::dsl::try_(curly_bracket, lexy::dsl::find(right_brace));
+ }();
static constexpr auto value =
lexy::as_list<ast::StatementList> >>
dsl::callback<ast::ListValue*>(
- [](ast::ParseState& state, const char* begin, auto&& list, const char* end) {
+ [](detail::IsParseState auto& state, const char* begin, auto&& list, const char* end) {
if constexpr (std::is_same_v<std::decay_t<decltype(list)>, lexy::nullopt>) {
- return state.ast().create<ast::ListValue>(ovdl::NodeLocation::make_from(begin, end));
+ return state.ast().template create<ast::ListValue>(ovdl::NodeLocation::make_from(begin, end));
} else {
- return state.ast().create<ast::ListValue>(ovdl::NodeLocation::make_from(begin, end), LEXY_MOV(list));
+ return state.ast().template create<ast::ListValue>(ovdl::NodeLocation::make_from(begin, end), LEXY_MOV(list));
}
},
- [](ast::ParseState& state, const char* begin, auto&& list, auto&& semicolon, const char* end) {
+ [](detail::IsParseState auto& state, const char* begin, auto&& list, auto&& semicolon, const char* end) {
if constexpr (std::is_same_v<std::decay_t<decltype(list)>, lexy::nullopt>) {
- return state.ast().create<ast::ListValue>(ovdl::NodeLocation::make_from(begin, end));
+ return state.ast().template create<ast::ListValue>(ovdl::NodeLocation::make_from(begin, end));
} else {
- return state.ast().create<ast::ListValue>(ovdl::NodeLocation::make_from(begin, end), LEXY_MOV(list));
+ return state.ast().template create<ast::ListValue>(ovdl::NodeLocation::make_from(begin, end), LEXY_MOV(list));
}
+ },
+ [](detail::IsParseState auto& state, lexy::nullopt fail = {}) {
+ return fail;
});
};
};
@@ -198,22 +349,20 @@ namespace ovdl::v2script::grammar {
template<ParseOptions Options>
using SAssignStatement = typename SimpleGrammar<Options>::SimpleAssignmentStatement;
- template<ovdl::detail::string_literal Keyword, auto Production, auto Value = dsl::default_kw_value<ast::ParseState, ast::IdentifierValue, Keyword>>
+ template<ovdl::detail::string_literal Keyword, auto Production, auto Value = dsl::default_kw_value<ast::IdentifierValue, Keyword>>
using keyword_rule = dsl::keyword_rule<
- ast::ParseState,
id,
ast::AssignStatement,
Keyword, Production, Value>;
- template<ovdl::detail::string_literal Keyword, auto Production, auto Value = dsl::default_kw_value<ast::ParseState, ast::IdentifierValue, Keyword>>
+ template<ovdl::detail::string_literal Keyword, auto Production, auto Value = dsl::default_kw_value<ast::IdentifierValue, Keyword>>
using fkeyword_rule = dsl::fkeyword_rule<
- ast::ParseState,
id,
ast::AssignStatement,
Keyword, Production, Value>;
template<ParseOptions Options>
- struct File {
+ struct BasicFile {
// Allow arbitrary spaces between individual tokens.
static constexpr auto whitespace = whitespace_specifier | comment_specifier;
@@ -223,4 +372,6 @@ namespace ovdl::v2script::grammar {
static constexpr auto value = lexy::as_list<ast::StatementList> >> construct<ast::FileTree>;
};
+
+ using File = BasicFile<NoStringEscapeOption>;
}