From a07c64148eb60b886f92caa46a9c687240ec420c Mon Sep 17 00:00:00 2001 From: Spartan322 Date: Sat, 20 Jul 2024 21:02:30 -0400 Subject: Add backslash identifier support to v2script Add buffer::char_type size multiplier to max file size of string intern buffer Fix list grammar segfaults Fix diagnostic logger intern segfaults from buffer reallocation Fix non-string-supported CSV parser not supporting Victoria 2 CSV escaping behavior --- src/openvic-dataloader/csv/CsvGrammar.hpp | 28 ++++++++++++++++++---------- src/openvic-dataloader/csv/Parser.cpp | 12 ++++++++---- 2 files changed, 26 insertions(+), 14 deletions(-) (limited to 'src/openvic-dataloader/csv') diff --git a/src/openvic-dataloader/csv/CsvGrammar.hpp b/src/openvic-dataloader/csv/CsvGrammar.hpp index 19aee54..91226c7 100644 --- a/src/openvic-dataloader/csv/CsvGrammar.hpp +++ b/src/openvic-dataloader/csv/CsvGrammar.hpp @@ -11,10 +11,7 @@ #include #include -#include #include -#include -#include #include #include "detail/Convert.hpp" @@ -73,6 +70,9 @@ namespace ovdl::csv::grammar { constexpr auto escaped_quote = lexy::symbol_table // .map<'"'>('"'); + constexpr auto escaped_newline = lexy::symbol_table // + .map<'n'>('\n'); + template struct CsvGrammar { struct StringValue : lexy::scan_production, @@ -118,6 +118,11 @@ namespace ovdl::csv::grammar { template static constexpr auto _escape_check = character - (lexy::dsl::lit_b / lexy::dsl::ascii::newline); + struct Backslash { + static constexpr auto rule = LEXY_LIT("\\n"); + static constexpr auto value = lexy::constant('\n'); + }; + template static constexpr scan_result scan(lexy::rule_scanner& scanner, detail::IsFileParseState auto& state) { using encoding = typename Reader::encoding; @@ -134,13 +139,16 @@ namespace ovdl::csv::grammar { if constexpr (Options.SupportStrings) { return lexy::dsl::identifier(character - (lexy::dsl::lit_b / lexy::dsl::ascii::newline)); } else { - auto escape_check_char = _escape_check; - auto id_check_char = escape_check_char - lexy::dsl::lit_b<'\\'>; - auto id_segment = lexy::dsl::identifier(id_check_char); - auto escape_segement = lexy::dsl::token(escape_check_char); - auto escape_sym = lexy::dsl::symbol(escape_segement); - auto escape_rule = lexy::dsl::lit_b<'\\'> >> escape_sym; - return lexy::dsl::list(id_segment | escape_rule); + constexpr auto backslash = lexy::dsl::lit_b<'\\'>; + + constexpr auto escape_check_char = _escape_check; + constexpr auto escape_rule = lexy::dsl::p; + + return lexy::dsl::list( + lexy::dsl::identifier(escape_check_char - backslash) | + escape_rule | + lexy::dsl::capture(escape_check_char) // + ); } }(); diff --git a/src/openvic-dataloader/csv/Parser.cpp b/src/openvic-dataloader/csv/Parser.cpp index 8fe8b17..bbd1be4 100644 --- a/src/openvic-dataloader/csv/Parser.cpp +++ b/src/openvic-dataloader/csv/Parser.cpp @@ -195,6 +195,10 @@ typename Parser::error_range Parser::get_errors() const { return _parse_handler->get_errors(); } +std::string_view Parser::error(const ovdl::error::Error* error) const { + return error->message(_parse_handler->parse_state().logger().symbol_interner()); +} + const FilePosition Parser::get_error_position(const error::Error* error) const { if (!error || !error->is_linked_in_tree()) { return {}; @@ -231,20 +235,20 @@ void Parser::print_errors_to(std::basic_ostream& stream) const { dryad::visit_tree( error, [&](const error::BufferError* buffer_error) { - stream << "buffer error: " << buffer_error->message() << '\n'; + stream << "buffer error: " << this->error(buffer_error) << '\n'; }, [&](dryad::child_visitor visitor, const error::AnnotatedError* annotated_error) { - stream << annotated_error->message() << '\n'; + stream << this->error(annotated_error) << '\n'; auto annotations = annotated_error->annotations(); for (auto annotation : annotations) { visitor(annotation); } }, [&](const error::PrimaryAnnotation* primary) { - stream << primary->message() << '\n'; + stream << this->error(primary) << '\n'; }, [&](const error::SecondaryAnnotation* secondary) { - stream << secondary->message() << '\n'; + stream << this->error(secondary) << '\n'; }); } } \ No newline at end of file -- cgit v1.2.3-56-ga3b1