diff options
author | Spartan322 <Megacake1234@gmail.com> | 2024-05-09 16:06:02 +0200 |
---|---|---|
committer | Spartan322 <Megacake1234@gmail.com> | 2024-06-18 01:31:12 +0200 |
commit | b0c3ba3f91926b0c95625bdbf4aab69269130b13 (patch) | |
tree | f15ebc47d6bf370031af28e4bb4814ae30ef46e1 /src/openvic-dataloader/v2script/LuaDefinesGrammar.hpp | |
parent | 7b521d6023113372cf6b02e562828273c4040f0e (diff) |
Add runtime encoding detection and conversionfix/char-detection
Win-1251/1252 detection is a reduced C++ version of https://github.com/hsivonen/chardetng
Add manually-specified encoding fallback
Add default system encoding fallback
Add error recovery to v2script
Add unknown encoding detection warning
Remove csv::Parser templating
Fix lua files dropping data
Update lexy to foonathan/lexy@1e5d99fa3826b1c3c8628d3a11117fb4fb4cc0d0
Remove exclusive reliance on lexy::default_encoding for v2script
Move internal concepts to src/openvic-detail/InternalConcepts.hpp
Move contents of DetectUtf8.hpp to src/detail/Detect.hpp
Move openvic-dataloader/AbstractSyntaxTree.hpp to src
Move DiagnosticLogger.hpp to src
Move File.hpp to src
Move openvic-dataloader/detail/utlity files to openvic-dataloader/detail
Add ovdl::utility::type_concat
Add ovdl::utility::type_prepend
Add ovdl::utility::is_instance_of
Overhaul parse error messages
Diffstat (limited to 'src/openvic-dataloader/v2script/LuaDefinesGrammar.hpp')
-rw-r--r-- | src/openvic-dataloader/v2script/LuaDefinesGrammar.hpp | 133 |
1 files changed, 82 insertions, 51 deletions
diff --git a/src/openvic-dataloader/v2script/LuaDefinesGrammar.hpp b/src/openvic-dataloader/v2script/LuaDefinesGrammar.hpp index 96cce99..885413c 100644 --- a/src/openvic-dataloader/v2script/LuaDefinesGrammar.hpp +++ b/src/openvic-dataloader/v2script/LuaDefinesGrammar.hpp @@ -4,9 +4,12 @@ #include <lexy/_detail/config.hpp> #include <lexy/dsl.hpp> +#include <lexy/dsl/delimited.hpp> +#include <lexy/dsl/recover.hpp> +#include <lexy/dsl/unicode.hpp> -#include "ParseState.hpp" #include "SimpleGrammar.hpp" +#include "detail/InternalConcepts.hpp" #include "detail/dsl.hpp" namespace ovdl::v2script::lua::grammar { @@ -21,90 +24,118 @@ namespace ovdl::v2script::lua::grammar { template<typename T> constexpr auto construct_list = v2script::grammar::construct_list<T>; - struct ParseOptions { - }; - - template<ParseOptions Options> struct StatementListBlock; static constexpr auto comment_specifier = LEXY_LIT("--") >> lexy::dsl::until(lexy::dsl::newline).or_eof(); - template<ParseOptions Options> struct Identifier { static constexpr auto rule = lexy::dsl::identifier(lexy::dsl::ascii::alpha_underscore, lexy::dsl::ascii::alpha_digit_underscore); - static constexpr auto value = callback<ast::IdentifierValue*>( - [](ast::ParseState& state, auto lexeme) { - auto value = state.ast().intern(lexeme.data(), lexeme.size()); - return state.ast().create<ast::IdentifierValue>(lexeme.begin(), lexeme.end(), value); - }); + static constexpr auto value = + callback<ast::IdentifierValue*>( + [](detail::IsParseState auto& state, auto lexeme) { + auto value = state.ast().intern(lexeme.data(), lexeme.size()); + return state.ast().template create<ast::IdentifierValue>(lexeme.begin(), lexeme.end(), value); + }); }; - template<ParseOptions Options> struct Value { static constexpr auto rule = lexy::dsl::identifier(lexy::dsl::ascii::digit / lexy::dsl::lit_c<'.'> / lexy::dsl::lit_c<'-'>); - static constexpr auto value = callback<ast::IdentifierValue*>( - [](ast::ParseState& state, auto lexeme) { - auto value = state.ast().intern(lexeme.data(), lexeme.size()); - return state.ast().create<ast::IdentifierValue>(lexeme.begin(), lexeme.end(), value); - }); - }; - - template<ParseOptions Options> - struct String { - static constexpr auto rule = [] { - // Arbitrary code points that aren't control characters. - auto c = dsl::make_range<0x20, 0xFF>() - lexy::dsl::ascii::control; - - return lexy::dsl::delimited(lexy::dsl::position(lexy::dsl::lit_b<'"'>))(c) | lexy::dsl::delimited(lexy::dsl::position(lexy::dsl::lit_b<'\''>))(c); - }(); - static constexpr auto value = - lexy::as_string<std::string> >> - callback<ast::StringValue*>( - [](ast::ParseState& state, const char* begin, const std::string& str, const char* end) { - auto value = state.ast().intern(str.data(), str.length()); - return state.ast().create<ast::StringValue>(begin, end, value); + callback<ast::IdentifierValue*>( + [](detail::IsParseState auto& state, auto lexeme) { + auto value = state.ast().intern(lexeme.data(), lexeme.size()); + return state.ast().template create<ast::IdentifierValue>(lexeme.begin(), lexeme.end(), value); }); }; - template<ParseOptions Options> + struct String : lexy::scan_production<ast::StringValue*>, + lexy::token_production { + template<typename Context, typename Reader> + static constexpr scan_result scan(lexy::rule_scanner<Context, Reader>& scanner, detail::IsParseState auto& state) { + using encoding = typename Reader::encoding; + + constexpr auto c = [] { + if constexpr (std::same_as<encoding, lexy::default_encoding> || std::same_as<encoding, lexy::byte_encoding>) { + // Arbitrary code points that aren't control characters. + return dsl::lit_b_range<0x20, 0xFF> - lexy::dsl::ascii::control; + } else { + return -lexy::dsl::unicode::control; + } + }(); + auto rule = lexy::dsl::quoted(c) | lexy::dsl::single_quoted(c); + auto begin = scanner.position(); + lexy::scan_result<std::string> str_result; + scanner.parse(str_result, rule); + if (!scanner || !str_result) + return lexy::scan_failed; + auto end = scanner.position(); + auto str = str_result.value(); + auto value = state.ast().intern(str.data(), str.size()); + return state.ast().template create<ast::StringValue>(begin, end, value); + } + + static constexpr auto rule = lexy::dsl::peek(lexy::dsl::quoted.open() | lexy::dsl::single_quoted.open()) >> lexy::dsl::scan; + static constexpr auto value = ovdl::v2script::grammar::convert_as_string<std::string> >> lexy::forward<ast::StringValue*>; + }; + struct Expression { - static constexpr auto rule = lexy::dsl::p<Value<Options>> | lexy::dsl::p<String<Options>>; + static constexpr auto rule = lexy::dsl::p<Value> | lexy::dsl::p<String>; static constexpr auto value = lexy::forward<ast::Value*>; }; - template<ParseOptions Options> struct AssignmentStatement { - static constexpr auto rule = - dsl::p<Identifier<Options>> >> - lexy::dsl::equal_sign >> - (lexy::dsl::p<Expression<Options>> | lexy::dsl::recurse_branch<StatementListBlock<Options>>); + static constexpr auto rule = [] { + auto right_brace = lexy::dsl::lit_c<'}'>; + + auto expression = lexy::dsl::p<Expression>; + auto statement_list = lexy::dsl::recurse_branch<StatementListBlock>; + + auto rhs_recover = lexy::dsl::recover(expression, statement_list).limit(right_brace); + auto rhs_try = lexy::dsl::try_(expression | statement_list, rhs_recover); + + auto identifier = dsl::p<Identifier> >> lexy::dsl::equal_sign + rhs_try; + + auto recover = lexy::dsl::recover(identifier).limit(right_brace); + return lexy::dsl::try_(identifier, recover); + }(); static constexpr auto value = callback<ast::AssignStatement*>( - [](ast::ParseState& state, const char* pos, ast::IdentifierValue* name, ast::Value* initializer) { - return state.ast().create<ast::AssignStatement>(pos, name, initializer); + [](detail::IsParseState auto& state, const char* pos, ast::IdentifierValue* name, ast::Value* initializer) -> ast::AssignStatement* { + if (initializer == nullptr) return nullptr; + return state.ast().template create<ast::AssignStatement>(pos, name, initializer); + }, + [](detail::IsParseState auto& state, ast::Value*) { + return nullptr; + }, + [](detail::IsParseState auto& state) { + return nullptr; }); }; - template<ParseOptions Options> struct StatementListBlock { - static constexpr auto rule = - dsl::curly_bracketed( - lexy::dsl::opt( - lexy::dsl::list( - lexy::dsl::recurse_branch<AssignmentStatement<Options>>, - lexy::dsl::trailing_sep(lexy::dsl::lit_c<','>)))); + static constexpr auto rule = [] { + auto right_brace = lexy::dsl::lit_c<'}'>; + auto comma = lexy::dsl::lit_c<','>; + + auto assign_statement = lexy::dsl::recurse_branch<AssignmentStatement>; + auto assign_try = lexy::dsl::try_(assign_statement); + + auto curly_bracket = dsl::curly_bracketed.opt_list( + assign_try, + lexy::dsl::trailing_sep(comma)); + + return lexy::dsl::try_(curly_bracket, lexy::dsl::find(right_brace)); + }(); static constexpr auto value = lexy::as_list<ast::AssignStatementList> >> construct_list<ast::ListValue>; }; - template<ParseOptions Options = ParseOptions {}> struct File { // Allow arbitrary spaces between individual tokens. static constexpr auto whitespace = ovdl::v2script::grammar::whitespace_specifier | comment_specifier; - static constexpr auto rule = lexy::dsl::position + lexy::dsl::terminator(lexy::dsl::eof).opt_list(lexy::dsl::p<AssignmentStatement<Options>>); + static constexpr auto rule = lexy::dsl::position + lexy::dsl::terminator(lexy::dsl::eof).opt_list(lexy::dsl::p<AssignmentStatement>); static constexpr auto value = lexy::as_list<ast::AssignStatementList> >> construct<ast::FileTree>; }; |