From 7440a5d1433eec4bf87e3723022db187e7f61b1a Mon Sep 17 00:00:00 2001 From: Spartan322 Date: Fri, 28 Jul 2023 00:52:00 -0400 Subject: Rework Grammar and Parser Add proper headless binary construction: Includes basic validation Add Error and Warning structs to Parser Add FileNode pointer getter to Parser Change all `char8_t*` and `const char8_t` to `const char*` in Parser Add Parser move operators and Parser deconstructor Add BufferHandler PIMPL object to Parser Add UTF-8 file Warning to v2script Add proper Grammar value retrieval Add AbstractSyntaxTree for v2script data parser: Has compile-time embedded type information accessible at compile-time and runtime Has Tab-based print functionality Fix wrong environment reference for headless construction in SConstruct Add error retrieval Add BasicCallbackOStreamBuffer for callback streaming Add CallbackStreamBuffer for char Add CallbackWStreamBuffer for wchar_t Add BasicCallbackStream Add CallbackStream for char Add CallbackWStream for wchar_t Add grammar for events and decisions Add event_parse to Parser Add decision_parse to Parser Add .clang-format Ignore dirty lexy module Add CSV parser and grammar: Creates std::vector for a list of lines Add BasicParser and BasicBufferHandler to reduce code reduplication --- src/openvic-dataloader/csv/CsvGrammar.hpp | 129 ++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 src/openvic-dataloader/csv/CsvGrammar.hpp (limited to 'src/openvic-dataloader/csv/CsvGrammar.hpp') diff --git a/src/openvic-dataloader/csv/CsvGrammar.hpp b/src/openvic-dataloader/csv/CsvGrammar.hpp new file mode 100644 index 0000000..edce97b --- /dev/null +++ b/src/openvic-dataloader/csv/CsvGrammar.hpp @@ -0,0 +1,129 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include + +#include +#include + +// Grammar Definitions // +namespace ovdl::csv::grammar { + struct StringValue { + static constexpr auto escaped_symbols = lexy::symbol_table // + .map<'"'>('"') + .map<'\''>('\'') + .map<'\\'>('\\') + .map<'/'>('/') + .map<'b'>('\b') + .map<'f'>('\f') + .map<'n'>('\n') + .map<'r'>('\r') + .map<'t'>('\t'); + /// This doesn't actually do anything, so this might to be manually parsed if vic2's CSV parser creates a " from "" + static constexpr auto escaped_quote = lexy::symbol_table // + .map<'"'>('"'); + static constexpr auto rule = [] { + // Arbitrary code points + auto c = -lexy::dsl::lit_c<'"'>; + + auto back_escape = lexy::dsl::backslash_escape // + .symbol(); + + auto quote_escape = lexy::dsl::escape(lexy::dsl::lit_c<'"'>) // + .symbol(); + + return lexy::dsl::quoted(c, back_escape, quote_escape); + }(); + + static constexpr auto value = lexy::as_string; + }; + + template + struct PlainValue { + static constexpr auto rule = lexy::dsl::identifier(-(Sep / lexy::dsl::lit_c<'\n'>)); + static constexpr auto value = lexy::as_string; + }; + + template + struct Value { + static constexpr auto rule = lexy::dsl::p | lexy::dsl::p>; + static constexpr auto value = lexy::forward; + }; + + template + struct SeperatorCount { + static constexpr auto rule = lexy::dsl::list(Sep); + static constexpr auto value = lexy::count; + }; + + template + struct LineEnd { + static constexpr auto rule = lexy::dsl::list(lexy::dsl::p>, lexy::dsl::trailing_sep(lexy::dsl::p>)); + static constexpr auto value = lexy::fold_inplace( + std::initializer_list {}, + [](csv::LineObject& result, auto&& arg) { + if constexpr (std::is_same_v, std::size_t>) { + // Count seperators, adds to previous value, making it a position + using position_type = csv::LineObject::position_type; + result.emplace_back(static_cast(arg + std::get<0>(result.back())), ""); + } else { + if (result.empty()) result.emplace_back(0u, LEXY_MOV(arg)); + else { + auto& [pos, value] = result.back(); + value = arg; + } + } + }); + }; + + template + struct Line { + + static constexpr auto suffix_setter(csv::LineObject& line) { + auto& [position, value] = line.back(); + if (value.empty()) { + line.set_suffix_end(position); + line.pop_back(); + } else { + line.set_suffix_end(position + 1); + } + }; + + static constexpr auto rule = lexy::dsl::p> | lexy::dsl::p> >> lexy::dsl::p>; + static constexpr auto value = + lexy::callback( + [](csv::LineObject&& line) { + suffix_setter(line); + return LEXY_MOV(line); + }, + [](std::size_t prefix_count, csv::LineObject&& line) { + line.set_prefix_end(prefix_count); + // position needs to be adjusted to prefix + for (auto& [position, value] : line) { + position += prefix_count; + } + suffix_setter(line); + return LEXY_MOV(line); + }); + }; + + template + struct File { + static constexpr auto rule = + lexy::dsl::whitespace(lexy::dsl::newline) + + lexy::dsl::opt(lexy::dsl::list(lexy::dsl::p>, lexy::dsl::trailing_sep(lexy::dsl::eol))); + + static constexpr auto value = lexy::as_list>; + }; + + using CommaFile = File>; + using ColonFile = File>; + using SemiColonFile = File>; + using TabFile = File>; + using BarFile = File>; +} \ No newline at end of file -- cgit v1.2.3-56-ga3b1