aboutsummaryrefslogtreecommitdiff
path: root/src/openvic-dataloader/csv
diff options
context:
space:
mode:
author Spartan322 <Megacake1234@gmail.com>2024-07-21 03:02:30 +0200
committer Spartan322 <Megacake1234@gmail.com>2024-07-22 02:49:25 +0200
commita07c64148eb60b886f92caa46a9c687240ec420c (patch)
treee43aae271fe5aa75f1b0c918c1428e7f766a14f8 /src/openvic-dataloader/csv
parentba8addc72595607206da654bc79c994121b7a3ae (diff)
Add backslash identifier support to v2scriptadd/backslash-identifier
Add buffer::char_type size multiplier to max file size of string intern buffer Fix list grammar segfaults Fix diagnostic logger intern segfaults from buffer reallocation Fix non-string-supported CSV parser not supporting Victoria 2 CSV escaping behavior
Diffstat (limited to 'src/openvic-dataloader/csv')
-rw-r--r--src/openvic-dataloader/csv/CsvGrammar.hpp28
-rw-r--r--src/openvic-dataloader/csv/Parser.cpp12
2 files changed, 26 insertions, 14 deletions
diff --git a/src/openvic-dataloader/csv/CsvGrammar.hpp b/src/openvic-dataloader/csv/CsvGrammar.hpp
index 19aee54..91226c7 100644
--- a/src/openvic-dataloader/csv/CsvGrammar.hpp
+++ b/src/openvic-dataloader/csv/CsvGrammar.hpp
@@ -11,10 +11,7 @@
#include <lexy/_detail/config.hpp>
#include <lexy/callback.hpp>
-#include <lexy/callback/string.hpp>
#include <lexy/dsl.hpp>
-#include <lexy/dsl/ascii.hpp>
-#include <lexy/dsl/option.hpp>
#include <lexy/encoding.hpp>
#include "detail/Convert.hpp"
@@ -73,6 +70,9 @@ namespace ovdl::csv::grammar {
constexpr auto escaped_quote = lexy::symbol_table<char> //
.map<'"'>('"');
+ constexpr auto escaped_newline = lexy::symbol_table<char> //
+ .map<'n'>('\n');
+
template<ParseOptions Options>
struct CsvGrammar {
struct StringValue : lexy::scan_production<std::string>,
@@ -118,6 +118,11 @@ namespace ovdl::csv::grammar {
template<auto character>
static constexpr auto _escape_check = character - (lexy::dsl::lit_b<Options.SepChar> / lexy::dsl::ascii::newline);
+ struct Backslash {
+ static constexpr auto rule = LEXY_LIT("\\n");
+ static constexpr auto value = lexy::constant('\n');
+ };
+
template<typename Context, typename Reader>
static constexpr scan_result scan(lexy::rule_scanner<Context, Reader>& scanner, detail::IsFileParseState auto& state) {
using encoding = typename Reader::encoding;
@@ -134,13 +139,16 @@ namespace ovdl::csv::grammar {
if constexpr (Options.SupportStrings) {
return lexy::dsl::identifier(character - (lexy::dsl::lit_b<Options.SepChar> / lexy::dsl::ascii::newline));
} else {
- auto escape_check_char = _escape_check<character>;
- auto id_check_char = escape_check_char - lexy::dsl::lit_b<'\\'>;
- auto id_segment = lexy::dsl::identifier(id_check_char);
- auto escape_segement = lexy::dsl::token(escape_check_char);
- auto escape_sym = lexy::dsl::symbol<escaped_symbols>(escape_segement);
- auto escape_rule = lexy::dsl::lit_b<'\\'> >> escape_sym;
- return lexy::dsl::list(id_segment | escape_rule);
+ constexpr auto backslash = lexy::dsl::lit_b<'\\'>;
+
+ constexpr auto escape_check_char = _escape_check<character>;
+ constexpr auto escape_rule = lexy::dsl::p<Backslash>;
+
+ return lexy::dsl::list(
+ lexy::dsl::identifier(escape_check_char - backslash) |
+ escape_rule |
+ lexy::dsl::capture(escape_check_char) //
+ );
}
}();
diff --git a/src/openvic-dataloader/csv/Parser.cpp b/src/openvic-dataloader/csv/Parser.cpp
index 8fe8b17..bbd1be4 100644
--- a/src/openvic-dataloader/csv/Parser.cpp
+++ b/src/openvic-dataloader/csv/Parser.cpp
@@ -195,6 +195,10 @@ typename Parser::error_range Parser::get_errors() const {
return _parse_handler->get_errors();
}
+std::string_view Parser::error(const ovdl::error::Error* error) const {
+ return error->message(_parse_handler->parse_state().logger().symbol_interner());
+}
+
const FilePosition Parser::get_error_position(const error::Error* error) const {
if (!error || !error->is_linked_in_tree()) {
return {};
@@ -231,20 +235,20 @@ void Parser::print_errors_to(std::basic_ostream<char>& stream) const {
dryad::visit_tree(
error,
[&](const error::BufferError* buffer_error) {
- stream << "buffer error: " << buffer_error->message() << '\n';
+ stream << "buffer error: " << this->error(buffer_error) << '\n';
},
[&](dryad::child_visitor<error::ErrorKind> visitor, const error::AnnotatedError* annotated_error) {
- stream << annotated_error->message() << '\n';
+ stream << this->error(annotated_error) << '\n';
auto annotations = annotated_error->annotations();
for (auto annotation : annotations) {
visitor(annotation);
}
},
[&](const error::PrimaryAnnotation* primary) {
- stream << primary->message() << '\n';
+ stream << this->error(primary) << '\n';
},
[&](const error::SecondaryAnnotation* secondary) {
- stream << secondary->message() << '\n';
+ stream << this->error(secondary) << '\n';
});
}
} \ No newline at end of file