diff options
Diffstat (limited to 'src/openvic-dataloader/v2script/SimpleGrammar.hpp')
-rw-r--r-- | src/openvic-dataloader/v2script/SimpleGrammar.hpp | 262 |
1 files changed, 140 insertions, 122 deletions
diff --git a/src/openvic-dataloader/v2script/SimpleGrammar.hpp b/src/openvic-dataloader/v2script/SimpleGrammar.hpp index 7a59123..bd4adaa 100644 --- a/src/openvic-dataloader/v2script/SimpleGrammar.hpp +++ b/src/openvic-dataloader/v2script/SimpleGrammar.hpp @@ -1,14 +1,12 @@ #pragma once -#include <string> -#include <vector> - +#include <openvic-dataloader/NodeLocation.hpp> #include <openvic-dataloader/v2script/AbstractSyntaxTree.hpp> #include <lexy/callback.hpp> #include <lexy/dsl.hpp> -#include "detail/LexyLitRange.hpp" +#include "detail/dsl.hpp" // Grammar Definitions // /* REQUIREMENTS: @@ -21,6 +19,11 @@ * DAT-643 */ namespace ovdl::v2script::grammar { + template<typename T> + constexpr auto construct = dsl::construct<ast::ParseState, T>; + template<typename T, bool DisableEmpty = false, typename ListType = ast::AssignStatementList> + constexpr auto construct_list = dsl::construct_list<ast::ParseState, T, ListType, DisableEmpty>; + struct ParseOptions { /// @brief Makes string parsing avoid string escapes bool NoStringEscape; @@ -29,11 +32,6 @@ namespace ovdl::v2script::grammar { static constexpr ParseOptions NoStringEscapeOption = ParseOptions { true }; static constexpr ParseOptions StringEscapeOption = ParseOptions { false }; - template<ParseOptions Options> - struct StatementListBlock; - template<ParseOptions Options> - struct AssignmentStatement; - /* REQUIREMENTS: DAT-630 */ static constexpr auto whitespace_specifier = lexy::dsl::ascii::blank / lexy::dsl::ascii::newline; /* REQUIREMENTS: DAT-631 */ @@ -47,18 +45,18 @@ namespace ovdl::v2script::grammar { lexy::dsl::ascii::alpha_digit_underscore / LEXY_ASCII_ONE_OF("+:@%&'-.") / lexy::dsl::lit_b<0x8A> / lexy::dsl::lit_b<0x8C> / lexy::dsl::lit_b<0x8E> / lexy::dsl::lit_b<0x92> / lexy::dsl::lit_b<0x97> / lexy::dsl::lit_b<0x9A> / lexy::dsl::lit_b<0x9C> / - detail::lexydsl::make_range<0x9E, 0x9F>() / - detail::lexydsl::make_range<0xC0, 0xD6>() / - detail::lexydsl::make_range<0xD8, 0xF6>() / - detail::lexydsl::make_range<0xF8, 0xFF>(); + dsl::make_range<0x9E, 0x9F>() / + dsl::make_range<0xC0, 0xD6>() / + dsl::make_range<0xD8, 0xF6>() / + dsl::make_range<0xF8, 0xFF>(); static constexpr auto windows_1251_data_specifier_additions = - detail::lexydsl::make_range<0x80, 0x81>() / lexy::dsl::lit_b<0x83> / lexy::dsl::lit_b<0x8D> / lexy::dsl::lit_b<0x8F> / + dsl::make_range<0x80, 0x81>() / lexy::dsl::lit_b<0x83> / lexy::dsl::lit_b<0x8D> / lexy::dsl::lit_b<0x8F> / lexy::dsl::lit_b<0x90> / lexy::dsl::lit_b<0x9D> / lexy::dsl::lit_b<0x9F> / - detail::lexydsl::make_range<0xA1, 0xA3>() / lexy::dsl::lit_b<0xA5> / lexy::dsl::lit_b<0xA8> / lexy::dsl::lit_b<0xAA> / + dsl::make_range<0xA1, 0xA3>() / lexy::dsl::lit_b<0xA5> / lexy::dsl::lit_b<0xA8> / lexy::dsl::lit_b<0xAA> / lexy::dsl::lit_b<0xAF> / - detail::lexydsl::make_range<0xB2, 0xB4>() / lexy::dsl::lit_b<0xB8> / lexy::dsl::lit_b<0xBA> / - detail::lexydsl::make_range<0xBC, 0xBF>() / + dsl::make_range<0xB2, 0xB4>() / lexy::dsl::lit_b<0xB8> / lexy::dsl::lit_b<0xBA> / + dsl::make_range<0xBC, 0xBF>() / lexy::dsl::lit_b<0xD7> / lexy::dsl::lit_b<0xF7>; static constexpr auto data_specifier = windows_1252_data_specifier / windows_1251_data_specifier_additions; @@ -77,125 +75,145 @@ namespace ovdl::v2script::grammar { .map<'t'>('\t'); template<ParseOptions Options> - struct Identifier { - static constexpr auto rule = lexy::dsl::identifier(data_char_class); - static constexpr auto value = lexy::callback<ast::NodePtr>( - [](auto lexeme) { - std::string str(lexeme.data(), lexeme.size()); - return ast::make_node_ptr<ast::IdentifierNode>(ast::NodeLocation { lexeme.begin(), lexeme.end() }, LEXY_MOV(str)); - }); - }; - - /* REQUIREMENTS: - * DAT-633 - * DAT-634 - */ - template<ParseOptions Options> - struct StringExpression { - static constexpr auto rule = [] { - if constexpr (Options.NoStringEscape) { - auto c = ovdl::detail::lexydsl::make_range<0x20, 0xFF>() / lexy::dsl::lit_b<0x07> / lexy::dsl::lit_b<0x09> / lexy::dsl::lit_b<0x0A> / lexy::dsl::lit_b<0x0D>; - return lexy::dsl::delimited(lexy::dsl::position(lexy::dsl::lit_b<'"'>))(c); - } else { - // Arbitrary code points that aren't control characters. - auto c = ovdl::detail::lexydsl::make_range<0x20, 0xFF>() - lexy::dsl::ascii::control; - - // Escape sequences start with a backlash. - // They either map one of the symbols, - // or a Unicode code point of the form uXXXX. - auto escape = lexy::dsl::backslash_escape // - .symbol<escaped_symbols>(); - return lexy::dsl::delimited(lexy::dsl::position(lexy::dsl::lit_b<'"'>))(c, escape); - } - }(); - - static constexpr auto value = - lexy::as_string<std::string> >> - lexy::callback<ast::NodePtr>( - [](const char* begin, auto&& str, const char* end) { - return ast::make_node_ptr<ast::StringNode>(ast::NodeLocation::make_from(begin, end), LEXY_MOV(str), Options.NoStringEscape); + struct SimpleGrammar { + struct StatementListBlock; + + struct Identifier { + static constexpr auto rule = lexy::dsl::identifier(data_char_class); + static constexpr auto value = dsl::callback<ast::IdentifierValue*>( + [](ast::ParseState& state, auto lexeme) { + auto value = state.ast().intern(lexeme.data(), lexeme.size()); + return state.ast().create<ast::IdentifierValue>(ovdl::NodeLocation::make_from(lexeme.begin(), lexeme.end()), value); }); - }; - - /* REQUIREMENTS: DAT-638 */ - template<ParseOptions Options> - struct ValueExpression { - static constexpr auto rule = lexy::dsl::p<Identifier<Options>> | lexy::dsl::p<StringExpression<Options>>; - static constexpr auto value = lexy::forward<ast::NodePtr>; + }; + + /* REQUIREMENTS: + * DAT-633 + * DAT-634 + */ + struct StringExpression { + static constexpr auto rule = [] { + if constexpr (Options.NoStringEscape) { + auto c = dsl::make_range<0x20, 0xFF>() / lexy::dsl::lit_b<0x07> / lexy::dsl::lit_b<0x09> / lexy::dsl::lit_b<0x0A> / lexy::dsl::lit_b<0x0D>; + return lexy::dsl::delimited(lexy::dsl::position(lexy::dsl::lit_b<'"'>))(c); + } else { + // Arbitrary code points that aren't control characters. + auto c = dsl::make_range<0x20, 0xFF>() - lexy::dsl::ascii::control; + + // Escape sequences start with a backlash. + // They either map one of the symbols, + // or a Unicode code point of the form uXXXX. + auto escape = lexy::dsl::backslash_escape // + .symbol<escaped_symbols>(); + return lexy::dsl::delimited(lexy::dsl::position(lexy::dsl::lit_b<'"'>))(c, escape); + } + }(); + + static constexpr auto value = + lexy::as_string<std::string> >> + dsl::callback<ast::StringValue*>( + [](ast::ParseState& state, const char* begin, auto&& str, const char* end) { + auto value = state.ast().intern(str.data(), str.length()); + return state.ast().create<ast::StringValue>(ovdl::NodeLocation::make_from(begin, end), value); + }); + }; + + /* REQUIREMENTS: DAT-638 */ + struct ValueExpression { + static constexpr auto rule = lexy::dsl::p<Identifier> | lexy::dsl::p<StringExpression>; + static constexpr auto value = lexy::forward<ast::Value*>; + }; + + struct SimpleAssignmentStatement { + static constexpr auto rule = + dsl::p<Identifier> >> + (lexy::dsl::equal_sign >> + (lexy::dsl::p<ValueExpression> | lexy::dsl::recurse_branch<StatementListBlock>)); + + static constexpr auto value = construct<ast::AssignStatement>; + }; + + /* REQUIREMENTS: DAT-639 */ + struct AssignmentStatement { + static constexpr auto rule = + dsl::p<Identifier> >> + (lexy::dsl::equal_sign >> + (lexy::dsl::p<ValueExpression> | lexy::dsl::recurse_branch<StatementListBlock>) | + lexy::dsl::else_ >> lexy::dsl::return_) | + dsl::p<StringExpression> | + lexy::dsl::recurse_branch<StatementListBlock>; + + static constexpr auto value = dsl::callback<ast::Statement*>( + [](ast::ParseState& state, const char* pos, ast::IdentifierValue* name, ast::Value* initializer) { + return state.ast().create<ast::AssignStatement>(pos, name, initializer); + }, + [](ast::ParseState& state, const char* pos, ast::Value* left, lexy::nullopt = {}) { + return state.ast().create<ast::ValueStatement>(pos, left); + }, + [](ast::ParseState& state, ast::Value* left) { + return state.ast().create<ast::ValueStatement>(state.ast().location_of(left), left); + }); + }; + + /* REQUIREMENTS: DAT-640 */ + struct StatementListBlock { + static constexpr auto rule = + dsl::curly_bracketed( + (lexy::dsl::opt(lexy::dsl::list(lexy::dsl::recurse_branch<AssignmentStatement>)) + + lexy::dsl::opt(lexy::dsl::semicolon))); + + static constexpr auto value = + lexy::as_list<ast::StatementList> >> + dsl::callback<ast::ListValue*>( + [](ast::ParseState& state, const char* begin, auto&& list, const char* end) { + if constexpr (std::is_same_v<std::decay_t<decltype(list)>, lexy::nullopt>) { + return state.ast().create<ast::ListValue>(ovdl::NodeLocation::make_from(begin, end)); + } else { + return state.ast().create<ast::ListValue>(ovdl::NodeLocation::make_from(begin, end), LEXY_MOV(list)); + } + }, + [](ast::ParseState& state, const char* begin, auto&& list, auto&& semicolon, const char* end) { + if constexpr (std::is_same_v<std::decay_t<decltype(list)>, lexy::nullopt>) { + return state.ast().create<ast::ListValue>(ovdl::NodeLocation::make_from(begin, end)); + } else { + return state.ast().create<ast::ListValue>(ovdl::NodeLocation::make_from(begin, end), LEXY_MOV(list)); + } + }); + }; }; template<ParseOptions Options> - struct SimpleAssignmentStatement { - static constexpr auto rule = - lexy::dsl::position(lexy::dsl::p<Identifier<Options>>) >> - (lexy::dsl::equal_sign + - (lexy::dsl::p<ValueExpression<Options>> | lexy::dsl::recurse_branch<StatementListBlock<Options>>)); - - static constexpr auto value = lexy::callback<ast::NodePtr>( - [](const char* pos, auto name, auto&& initalizer) { - return ast::make_node_ptr<ast::AssignNode>(pos, LEXY_MOV(name), LEXY_MOV(initalizer)); - }); - }; + using StringExpression = typename SimpleGrammar<Options>::StringExpression; - /* REQUIREMENTS: DAT-639 */ template<ParseOptions Options> - struct AssignmentStatement { - static constexpr auto rule = - lexy::dsl::position(lexy::dsl::p<Identifier<Options>>) >> - (lexy::dsl::equal_sign >> - (lexy::dsl::p<ValueExpression<Options>> | lexy::dsl::recurse_branch<StatementListBlock<Options>>) | - lexy::dsl::else_ >> lexy::dsl::return_) | - lexy::dsl::p<StringExpression<Options>> | - lexy::dsl::recurse_branch<StatementListBlock<Options>>; - - static constexpr auto value = lexy::callback<ast::NodePtr>( - [](const char* pos, auto name, lexy::nullopt = {}) { - return LEXY_MOV(name); - }, - [](auto name, lexy::nullopt = {}) { - return LEXY_MOV(name); - }, - [](const char* pos, auto name, auto&& initalizer) { - return ast::make_node_ptr<ast::AssignNode>(pos, LEXY_MOV(name), LEXY_MOV(initalizer)); - }); - }; + using Identifier = typename SimpleGrammar<Options>::Identifier; - /* REQUIREMENTS: DAT-640 */ template<ParseOptions Options> - struct StatementListBlock { - static constexpr auto rule = - lexy::dsl::position(lexy::dsl::curly_bracketed.open()) >> - (lexy::dsl::opt(lexy::dsl::list(lexy::dsl::recurse_branch<AssignmentStatement<Options>>)) + - lexy::dsl::opt(lexy::dsl::semicolon)) >> - lexy::dsl::position(lexy::dsl::curly_bracketed.close()); - - static constexpr auto value = - lexy::as_list<std::vector<ast::NodePtr>> >> - lexy::callback<ast::NodePtr>( - [](const char* begin, lexy::nullopt, const char* end) { - return ast::make_node_ptr<ast::ListNode>(ast::NodeLocation::make_from(begin, end)); - }, - [](const char* begin, auto&& list, const char* end) { - return ast::make_node_ptr<ast::ListNode>(ast::NodeLocation::make_from(begin, end), LEXY_MOV(list)); - }, - [](const char* begin, lexy::nullopt, lexy::nullopt, const char* end) { - return ast::make_node_ptr<ast::ListNode>(ast::NodeLocation::make_from(begin, end)); - }, - [](const char* begin, auto&& list, lexy::nullopt, const char* end) { - return ast::make_node_ptr<ast::ListNode>(ast::NodeLocation::make_from(begin, end), LEXY_MOV(list)); - }, - [](const char* begin, auto& list, const char* end) { - return ast::make_node_ptr<ast::ListNode>(ast::NodeLocation::make_from(begin, end), list); - }); - }; + using SAssignStatement = typename SimpleGrammar<Options>::SimpleAssignmentStatement; + + template<ovdl::detail::string_literal Keyword, auto Production, auto Value = dsl::default_kw_value<ast::ParseState, ast::IdentifierValue, Keyword>> + using keyword_rule = dsl::keyword_rule< + ast::ParseState, + Identifier<StringEscapeOption>, + ast::AssignStatement, + Keyword, Production, Value>; + + template<ovdl::detail::string_literal Keyword, auto Production, auto Value = dsl::default_kw_value<ast::ParseState, ast::IdentifierValue, Keyword>> + using fkeyword_rule = dsl::fkeyword_rule< + ast::ParseState, + Identifier<StringEscapeOption>, + ast::AssignStatement, + Keyword, Production, Value>; template<ParseOptions Options> struct File { // Allow arbitrary spaces between individual tokens. static constexpr auto whitespace = whitespace_specifier | comment_specifier; - static constexpr auto rule = lexy::dsl::position + lexy::dsl::terminator(lexy::dsl::eof).opt_list(lexy::dsl::p<AssignmentStatement<Options>>); + static constexpr auto rule = lexy::dsl::position( + lexy::dsl::terminator(lexy::dsl::eof) + .opt_list(lexy::dsl::p<typename SimpleGrammar<Options>::AssignmentStatement>)); - static constexpr auto value = lexy::as_list<std::vector<ast::NodePtr>> >> lexy::new_<ast::FileNode, ast::NodePtr>; + static constexpr auto value = lexy::as_list<ast::StatementList> >> construct<ast::FileTree>; }; } |