#pragma once #include #include #include #include #include #include #include #include #include #include #include #include "detail/Convert.hpp" #include "detail/InternalConcepts.hpp" #include "detail/dsl.hpp" // Grammar Definitions // namespace ovdl::csv::grammar { struct ParseOptions { /// @brief Seperator character char SepChar; /// @brief Determines whether StringValue is supported bool SupportStrings; /// @brief Paradox-style localization escape characters /// @note Is ignored if SupportStrings is true char EscapeChar; }; struct ConvertErrorHandler { static constexpr void on_invalid_character(detail::IsStateType auto& state, auto reader) { state.logger().warning("invalid character value '{}' found", static_cast(reader.peek())) // .primary(BasicNodeLocation { reader.position() }, "here") .finish(); } }; constexpr bool IsUtf8(auto encoding) { return std::same_as, lexy::utf8_char_encoding>; } template constexpr auto convert_as_string = convert::convert_as_string< String, ConvertErrorHandler>; constexpr auto ansi_character = lexy::dsl::ascii::character / dsl::lit_b_range<0x80, 0xFF>; constexpr auto ansi_control = lexy::dsl::ascii::control / lexy::dsl::lit_b<0x81> / lexy::dsl::lit_b<0x8D> / lexy::dsl::lit_b<0x8F> / lexy::dsl::lit_b<0x90> / lexy::dsl::lit_b<0x9D>; constexpr auto utf_character = lexy::dsl::unicode::character; constexpr auto utf_control = lexy::dsl::unicode::control; constexpr auto escaped_symbols = lexy::symbol_table // .map<'"'>('"') .map<'\''>('\'') .map<'\\'>('\\') .map<'/'>('/') .map<'b'>('\b') .map<'f'>('\f') .map<'n'>('\n') .map<'r'>('\r') .map<'t'>('\t'); constexpr auto escaped_quote = lexy::symbol_table // .map<'"'>('"'); constexpr auto escaped_newline = lexy::symbol_table // .map<'n'>('\n'); template struct CsvGrammar { struct StringValue : lexy::scan_production, lexy::token_production { template static constexpr scan_result scan(lexy::rule_scanner& scanner, detail::IsFileParseState auto& state) { using encoding = typename Reader::encoding; constexpr auto rule = [] { // Arbitrary code points auto c = [] { if constexpr (std::same_as || std::same_as) { return ansi_character - ansi_control; } else { return utf_character - utf_control; } }(); auto back_escape = lexy::dsl::backslash_escape // .symbol(); auto quote_escape = lexy::dsl::escape(lexy::dsl::lit_c<'"'>) // .template symbol(); return lexy::dsl::delimited(lexy::dsl::lit_c<'"'>, lexy::dsl::not_followed_by(lexy::dsl::lit_c<'"'>, lexy::dsl::lit_c<'"'>))(c, back_escape, quote_escape); }(); lexy::scan_result str_result = scanner.template parse(rule); if (!scanner || !str_result) return lexy::scan_failed; return str_result.value(); } static constexpr auto rule = lexy::dsl::peek(lexy::dsl::lit_c<'"'>) >> lexy::dsl::scan; static constexpr auto value = convert_as_string >> lexy::forward; }; struct PlainValue : lexy::scan_production, lexy::token_production { template static constexpr auto _escape_check = character - (lexy::dsl::lit_b / lexy::dsl::ascii::newline); struct Backslash { static constexpr auto rule = LEXY_LIT("\\n"); static constexpr auto value = lexy::constant('\n'); }; template static constexpr scan_result scan(lexy::rule_scanner& scanner, detail::IsFileParseState auto& state) { using encoding = typename Reader::encoding; constexpr auto rule = [] { constexpr auto character = [] { if constexpr (std::same_as || std::same_as) { return ansi_character; } else { return utf_character; } }(); if constexpr (Options.SupportStrings) { return lexy::dsl::identifier(character - (lexy::dsl::lit_b / lexy::dsl::ascii::newline)); } else { constexpr auto backslash = lexy::dsl::lit_b<'\\'>; constexpr auto escape_check_char = _escape_check; constexpr auto escape_rule = lexy::dsl::p; return lexy::dsl::list( lexy::dsl::identifier(escape_check_char - backslash) | escape_rule | lexy::dsl::capture(escape_check_char) // ); } }(); if constexpr (Options.SupportStrings) { auto lexeme_result = scanner.template parse>(rule); if (!scanner || !lexeme_result) return lexy::scan_failed; return std::string { lexeme_result.value().begin(), lexeme_result.value().end() }; } else { lexy::scan_result str_result = scanner.template parse(rule); if (!scanner || !str_result) return lexy::scan_failed; return str_result.value(); } } static constexpr auto rule = dsl::peek( _escape_check, _escape_check) >> lexy::dsl::scan; static constexpr auto value = convert_as_string >> lexy::forward; }; struct Value { static constexpr auto rule = [] { if constexpr (Options.SupportStrings) { return lexy::dsl::p | lexy::dsl::p; } else { return lexy::dsl::p; } }(); static constexpr auto value = lexy::forward; }; struct SepConst { static constexpr auto rule = lexy::dsl::lit_b; static constexpr auto value = lexy::constant(1); }; struct Seperator { static constexpr auto rule = lexy::dsl::list(lexy::dsl::p); static constexpr auto value = lexy::count; }; struct LineEnd { static constexpr auto rule = lexy::dsl::list(lexy::dsl::p, lexy::dsl::trailing_sep(lexy::dsl::p)); static constexpr auto value = lexy::fold_inplace( std::initializer_list {}, [](ovdl::csv::LineObject& result, std::size_t&& arg) { // Count seperators, adds to previous value, making it a position using position_type = ovdl::csv::LineObject::position_type; result.emplace_back(static_cast(arg + result.back().first), ""); }, [](ovdl::csv::LineObject& result, std::string&& arg) { if (result.empty()) { result.emplace_back(0u, LEXY_MOV(arg)); } else { auto& [pos, value] = result.back(); value = LEXY_MOV(arg); } }); }; struct Line { static constexpr auto suffix_setter(ovdl::csv::LineObject& line) { auto& [position, value] = line.back(); if (value.empty()) { line.set_suffix_end(position); line.pop_back(); } else { line.set_suffix_end(position + 1); } }; static constexpr auto rule = lexy::dsl::p | lexy::dsl::p >> lexy::dsl::opt(lexy::dsl::p); static constexpr auto value = lexy::callback( [](ovdl::csv::LineObject&& line) { suffix_setter(line); return LEXY_MOV(line); }, [](std::size_t prefix_count, ovdl::csv::LineObject&& line) { line.set_prefix_end(prefix_count); // position needs to be adjusted to prefix for (auto& [position, value] : line) { position += prefix_count; } suffix_setter(line); return LEXY_MOV(line); }, [](std::size_t suffix_count, lexy::nullopt = {}) { return ovdl::csv::LineObject(0, {}, suffix_count + 1); }); }; }; template struct File { static constexpr auto rule = lexy::dsl::terminator(lexy::dsl::eof).opt_list(lexy::dsl::p::Line> | lexy::dsl::newline); static constexpr auto value = lexy::as_list>; }; using CommaFile = File; using ColonFile = File; using SemiColonFile = File; using TabFile = File; using BarFile = File; namespace strings { using CommaFile = File; using ColonFile = File; using SemiColonFile = File; using TabFile = File; using BarFile = File; } }