diff options
author | Spartan322 <Megacake1234@gmail.com> | 2024-07-21 03:02:30 +0200 |
---|---|---|
committer | Spartan322 <Megacake1234@gmail.com> | 2024-07-22 02:49:25 +0200 |
commit | a07c64148eb60b886f92caa46a9c687240ec420c (patch) | |
tree | e43aae271fe5aa75f1b0c918c1428e7f766a14f8 /src | |
parent | ba8addc72595607206da654bc79c994121b7a3ae (diff) |
Add backslash identifier support to v2scriptadd/backslash-identifier
Add buffer::char_type size multiplier to max file size of string intern buffer
Fix list grammar segfaults
Fix diagnostic logger intern segfaults from buffer reallocation
Fix non-string-supported CSV parser not supporting Victoria 2 CSV escaping behavior
Diffstat (limited to 'src')
-rw-r--r-- | src/openvic-dataloader/AbstractSyntaxTree.hpp | 5 | ||||
-rw-r--r-- | src/openvic-dataloader/DiagnosticLogger.hpp | 26 | ||||
-rw-r--r-- | src/openvic-dataloader/csv/CsvGrammar.hpp | 28 | ||||
-rw-r--r-- | src/openvic-dataloader/csv/Parser.cpp | 12 | ||||
-rw-r--r-- | src/openvic-dataloader/v2script/Parser.cpp | 12 | ||||
-rw-r--r-- | src/openvic-dataloader/v2script/SimpleGrammar.hpp | 34 |
6 files changed, 61 insertions, 56 deletions
diff --git a/src/openvic-dataloader/AbstractSyntaxTree.hpp b/src/openvic-dataloader/AbstractSyntaxTree.hpp index ade1c82..f9f5796 100644 --- a/src/openvic-dataloader/AbstractSyntaxTree.hpp +++ b/src/openvic-dataloader/AbstractSyntaxTree.hpp @@ -3,6 +3,7 @@ #include <concepts> #include <cstdio> #include <string_view> +#include <type_traits> #include <utility> #include <openvic-dataloader/NodeLocation.hpp> @@ -51,12 +52,12 @@ namespace ovdl { using node_type = typename file_type::node_type; explicit BasicAbstractSyntaxTree(file_type&& file) - : AbstractSyntaxTree(file.size()), + : AbstractSyntaxTree(file.size() * file.visit_buffer([](auto&& buffer) -> size_t { return sizeof(typename std::decay_t<decltype(buffer)>::char_type); })), _file { std::move(file) } {} template<typename Encoding, typename MemoryResource = void> explicit BasicAbstractSyntaxTree(lexy::buffer<Encoding, MemoryResource>&& buffer) - : AbstractSyntaxTree(buffer.size()), + : AbstractSyntaxTree(buffer.size() * sizeof(Encoding::char_type)), _file { std::move(buffer) } {} void set_location(const node_type* n, NodeLocation loc) { diff --git a/src/openvic-dataloader/DiagnosticLogger.hpp b/src/openvic-dataloader/DiagnosticLogger.hpp index 9810e1e..8c491ca 100644 --- a/src/openvic-dataloader/DiagnosticLogger.hpp +++ b/src/openvic-dataloader/DiagnosticLogger.hpp @@ -2,6 +2,7 @@ #include <concepts> // IWYU pragma: keep #include <cstdio> +#include <iostream> #include <ostream> #include <string> #include <type_traits> @@ -37,12 +38,7 @@ namespace ovdl { template<typename ParseState> struct BasicDiagnosticLogger; - struct DiagnosticLogger { - struct SymbolId; - using index_type = std::uint32_t; - using symbol_type = dryad::symbol<SymbolId, index_type>; - using symbol_interner_type = dryad::symbol_interner<SymbolId, char, index_type>; - + struct DiagnosticLogger : error::ErrorSymbolInterner { using AnnotationKind = lexy_ext::annotation_kind; using DiagnosticKind = lexy_ext::diagnostic_kind; @@ -116,23 +112,23 @@ namespace ovdl { } result = writer.error(); } else { - auto production = _logger.intern_cstr(production_name); + auto production = production_name; if constexpr (std::is_same_v<Tag, lexy::expected_literal>) { auto string = lexy::_detail::make_literal_lexeme<typename Reader::encoding>(error.string(), error.length()); NodeLocation loc = NodeLocation::make_from(context.position(), error.position() - 1); - auto message = _logger.intern_cstr(fmt::format("expected '{}'", string.data())); + auto message = _logger.intern(fmt::format("expected '{}'", string.data())); result = _logger.template create<error::ExpectedLiteral>(loc, message, production); } else if constexpr (std::is_same_v<Tag, lexy::expected_keyword>) { auto string = lexy::_detail::make_literal_lexeme<typename Reader::encoding>(error.string(), error.length()); NodeLocation loc = NodeLocation::make_from(context.position(), error.position() - 1); - auto message = _logger.intern_cstr(fmt::format("expected keyword '{}'", string.data())); + auto message = _logger.intern(fmt::format("expected keyword '{}'", string.data())); result = _logger.template create<error::ExpectedKeyword>(loc, message, production); } else if constexpr (std::is_same_v<Tag, lexy::expected_char_class>) { - auto message = _logger.intern_cstr(fmt::format("expected {}", error.name())); + auto message = _logger.intern(fmt::format("expected {}", error.name())); result = _logger.template create<error::ExpectedCharClass>(error.position(), message, production); } else { NodeLocation loc = NodeLocation::make_from(error.begin(), error.end()); - auto message = _logger.intern_cstr(error.message()); + auto message = _logger.intern(error.message()); result = _logger.template create<error::GenericParseError>(loc, message, production); } } @@ -361,7 +357,7 @@ namespace ovdl { }); error::Annotation* annotation; - auto message = _logger.intern_cstr(output); + auto message = _logger.intern(output); switch (kind) { case AnnotationKind::primary: annotation = _logger.create<error::PrimaryAnnotation>(loc, message); @@ -404,7 +400,7 @@ namespace ovdl { }); impl.write_path(iter, file().path()); - auto message = intern_cstr(output); + auto message = intern(output); error->_set_message(message); if (!error->is_linked_in_tree()) insert(error); @@ -422,8 +418,8 @@ namespace ovdl { }); impl.write_path(iter, file().path()); - auto production = intern_cstr(production_name); - auto message = intern_cstr(output); + auto production = production_name; + auto message = intern(output); auto* error = [&] { if constexpr (std::is_same_v<Tag, lexy::expected_literal>) { return create<error::ExpectedLiteral>(loc, message, production); diff --git a/src/openvic-dataloader/csv/CsvGrammar.hpp b/src/openvic-dataloader/csv/CsvGrammar.hpp index 19aee54..91226c7 100644 --- a/src/openvic-dataloader/csv/CsvGrammar.hpp +++ b/src/openvic-dataloader/csv/CsvGrammar.hpp @@ -11,10 +11,7 @@ #include <lexy/_detail/config.hpp> #include <lexy/callback.hpp> -#include <lexy/callback/string.hpp> #include <lexy/dsl.hpp> -#include <lexy/dsl/ascii.hpp> -#include <lexy/dsl/option.hpp> #include <lexy/encoding.hpp> #include "detail/Convert.hpp" @@ -73,6 +70,9 @@ namespace ovdl::csv::grammar { constexpr auto escaped_quote = lexy::symbol_table<char> // .map<'"'>('"'); + constexpr auto escaped_newline = lexy::symbol_table<char> // + .map<'n'>('\n'); + template<ParseOptions Options> struct CsvGrammar { struct StringValue : lexy::scan_production<std::string>, @@ -118,6 +118,11 @@ namespace ovdl::csv::grammar { template<auto character> static constexpr auto _escape_check = character - (lexy::dsl::lit_b<Options.SepChar> / lexy::dsl::ascii::newline); + struct Backslash { + static constexpr auto rule = LEXY_LIT("\\n"); + static constexpr auto value = lexy::constant('\n'); + }; + template<typename Context, typename Reader> static constexpr scan_result scan(lexy::rule_scanner<Context, Reader>& scanner, detail::IsFileParseState auto& state) { using encoding = typename Reader::encoding; @@ -134,13 +139,16 @@ namespace ovdl::csv::grammar { if constexpr (Options.SupportStrings) { return lexy::dsl::identifier(character - (lexy::dsl::lit_b<Options.SepChar> / lexy::dsl::ascii::newline)); } else { - auto escape_check_char = _escape_check<character>; - auto id_check_char = escape_check_char - lexy::dsl::lit_b<'\\'>; - auto id_segment = lexy::dsl::identifier(id_check_char); - auto escape_segement = lexy::dsl::token(escape_check_char); - auto escape_sym = lexy::dsl::symbol<escaped_symbols>(escape_segement); - auto escape_rule = lexy::dsl::lit_b<'\\'> >> escape_sym; - return lexy::dsl::list(id_segment | escape_rule); + constexpr auto backslash = lexy::dsl::lit_b<'\\'>; + + constexpr auto escape_check_char = _escape_check<character>; + constexpr auto escape_rule = lexy::dsl::p<Backslash>; + + return lexy::dsl::list( + lexy::dsl::identifier(escape_check_char - backslash) | + escape_rule | + lexy::dsl::capture(escape_check_char) // + ); } }(); diff --git a/src/openvic-dataloader/csv/Parser.cpp b/src/openvic-dataloader/csv/Parser.cpp index 8fe8b17..bbd1be4 100644 --- a/src/openvic-dataloader/csv/Parser.cpp +++ b/src/openvic-dataloader/csv/Parser.cpp @@ -195,6 +195,10 @@ typename Parser::error_range Parser::get_errors() const { return _parse_handler->get_errors(); } +std::string_view Parser::error(const ovdl::error::Error* error) const { + return error->message(_parse_handler->parse_state().logger().symbol_interner()); +} + const FilePosition Parser::get_error_position(const error::Error* error) const { if (!error || !error->is_linked_in_tree()) { return {}; @@ -231,20 +235,20 @@ void Parser::print_errors_to(std::basic_ostream<char>& stream) const { dryad::visit_tree( error, [&](const error::BufferError* buffer_error) { - stream << "buffer error: " << buffer_error->message() << '\n'; + stream << "buffer error: " << this->error(buffer_error) << '\n'; }, [&](dryad::child_visitor<error::ErrorKind> visitor, const error::AnnotatedError* annotated_error) { - stream << annotated_error->message() << '\n'; + stream << this->error(annotated_error) << '\n'; auto annotations = annotated_error->annotations(); for (auto annotation : annotations) { visitor(annotation); } }, [&](const error::PrimaryAnnotation* primary) { - stream << primary->message() << '\n'; + stream << this->error(primary) << '\n'; }, [&](const error::SecondaryAnnotation* secondary) { - stream << secondary->message() << '\n'; + stream << this->error(secondary) << '\n'; }); } }
\ No newline at end of file diff --git a/src/openvic-dataloader/v2script/Parser.cpp b/src/openvic-dataloader/v2script/Parser.cpp index 2375a1a..e5234d6 100644 --- a/src/openvic-dataloader/v2script/Parser.cpp +++ b/src/openvic-dataloader/v2script/Parser.cpp @@ -320,6 +320,10 @@ Parser::error_range Parser::get_errors() const { return _parse_handler->get_errors(); } +std::string_view Parser::error(const ovdl::error::Error* error) const { + return error->message(_parse_handler->parse_state().logger().symbol_interner()); +} + const FilePosition Parser::get_error_position(const error::Error* error) const { if (!error || !error->is_linked_in_tree()) { return {}; @@ -352,20 +356,20 @@ void Parser::print_errors_to(std::basic_ostream<char>& stream) const { dryad::visit_tree( error, [&](const error::BufferError* buffer_error) { - stream << "buffer error: " << buffer_error->message() << '\n'; + stream << "buffer error: " << this->error(buffer_error) << '\n'; }, [&](dryad::child_visitor<error::ErrorKind> visitor, const error::AnnotatedError* annotated_error) { - stream << annotated_error->message() << '\n'; + stream << this->error(annotated_error) << '\n'; auto annotations = annotated_error->annotations(); for (auto annotation : annotations) { visitor(annotation); } }, [&](const error::PrimaryAnnotation* primary) { - stream << primary->message() << '\n'; + stream << this->error(primary) << '\n'; }, [&](const error::SecondaryAnnotation* secondary) { - stream << secondary->message() << '\n'; + stream << this->error(secondary) << '\n'; }); } }
\ No newline at end of file diff --git a/src/openvic-dataloader/v2script/SimpleGrammar.hpp b/src/openvic-dataloader/v2script/SimpleGrammar.hpp index c47b243..5474c79 100644 --- a/src/openvic-dataloader/v2script/SimpleGrammar.hpp +++ b/src/openvic-dataloader/v2script/SimpleGrammar.hpp @@ -5,15 +5,6 @@ #include <lexy/callback.hpp> #include <lexy/dsl.hpp> -#include <lexy/dsl/any.hpp> -#include <lexy/dsl/identifier.hpp> -#include <lexy/dsl/option.hpp> -#include <lexy/dsl/peek.hpp> -#include <lexy/dsl/punctuator.hpp> -#include <lexy/dsl/recover.hpp> -#include <lexy/dsl/scan.hpp> -#include <lexy/dsl/symbol.hpp> -#include <lexy/dsl/unicode.hpp> #include <lexy/encoding.hpp> #include <lexy/input/base.hpp> #include <lexy/input/buffer.hpp> @@ -63,7 +54,7 @@ namespace ovdl::v2script::grammar { /* REQUIREMENTS: DAT-631 */ static constexpr auto comment_specifier = LEXY_LIT("#") >> lexy::dsl::until(lexy::dsl::newline).or_eof(); - static constexpr auto ascii = lexy::dsl::ascii::alpha_digit_underscore / LEXY_ASCII_ONE_OF("+:@%&'-."); + static constexpr auto ascii = lexy::dsl::ascii::alpha_digit_underscore / LEXY_ASCII_ONE_OF("+:@%&'-.\\"); /* REQUIREMENTS: * DAT-632 @@ -91,7 +82,7 @@ namespace ovdl::v2script::grammar { static constexpr auto data_char_class = LEXY_CHAR_CLASS("DataSpecifier", data_specifier); - static constexpr auto utf_data_specifier = lexy::dsl::unicode::xid_continue / LEXY_ASCII_ONE_OF("+:@%&'-."); + static constexpr auto utf_data_specifier = lexy::dsl::unicode::xid_continue / LEXY_ASCII_ONE_OF("+:@%&'-.\\"); static constexpr auto utf_char_class = LEXY_CHAR_CLASS("DataSpecifier", utf_data_specifier); @@ -196,9 +187,9 @@ namespace ovdl::v2script::grammar { if constexpr (Options.NoStringEscape) { auto c = [] { if constexpr (std::same_as<encoding, lexy::default_encoding> || std::same_as<encoding, lexy::byte_encoding>) { - return dsl::lit_b_range<0x20, 0xFF> / lexy::dsl::lit_b<0x07> / lexy::dsl::lit_b<0x09> / lexy::dsl::lit_b<0x0A> / lexy::dsl::lit_b<0x0D>; + return dsl::lit_b_range<0x01, 0xFF>; } else { - return -lexy::dsl::unicode::control; + return lexy::dsl::unicode::character; } }(); return lexy::dsl::quoted(c); @@ -287,7 +278,7 @@ namespace ovdl::v2script::grammar { }(); static constexpr auto value = dsl::callback<ast::Statement*>( - [](detail::IsParseState auto& state, const char* pos, ast::IdentifierValue* name, ast::Value* initializer) { + [](detail::IsParseState auto& state, const char* pos, ast::IdentifierValue* name, ast::Value* initializer) -> ast::AssignStatement* { return state.ast().template create<ast::AssignStatement>(pos, name, initializer); }, [](detail::IsParseState auto& state, bool&, const char* pos, ast::IdentifierValue* name, ast::Value* initializer) { @@ -306,11 +297,15 @@ namespace ovdl::v2script::grammar { return state.ast().template create<ast::ValueStatement>(pos, left); }, [](detail::IsParseState auto& state, ast::Value* left) -> ast::ValueStatement* { - if (left == nullptr) return nullptr; + if (left == nullptr) { // May no longer be neccessary + return nullptr; + } return state.ast().template create<ast::ValueStatement>(state.ast().location_of(left), left); }, [](detail::IsParseState auto& state, bool&, ast::Value* left) -> ast::ValueStatement* { - if (left == nullptr) return nullptr; + if (left == nullptr) { // May no longer be neccessary + return nullptr; + } return state.ast().template create<ast::ValueStatement>(state.ast().location_of(left), left); }); }; @@ -322,12 +317,12 @@ namespace ovdl::v2script::grammar { auto assign_statement = lexy::dsl::recurse_branch<AssignmentStatement>; - auto assign_try = lexy::dsl::try_(assign_statement); + auto assign_try = lexy::dsl::try_(assign_statement, lexy::dsl::nullopt); auto assign_opt = lexy::dsl::opt(lexy::dsl::list(assign_try)); auto curly_bracket = dsl::curly_bracketed(assign_opt + lexy::dsl::opt(lexy::dsl::semicolon)); - return lexy::dsl::try_(curly_bracket, lexy::dsl::find(right_brace)); + return curly_bracket; }(); static constexpr auto value = @@ -346,9 +341,6 @@ namespace ovdl::v2script::grammar { } else { return state.ast().template create<ast::ListValue>(ovdl::NodeLocation::make_from(begin, end), LEXY_MOV(list)); } - }, - [](detail::IsParseState auto& state, lexy::nullopt fail = {}) { - return fail; }); }; }; |