From 757114a3c5b748567b42f273c7b78ca039ae983c Mon Sep 17 00:00:00 2001 From: Spartan322 Date: Tue, 28 Nov 2023 05:09:26 -0500 Subject: Add `deps/dryad` -> https://github.com/Spartan322/dryad Add `deps/fmt` -> https://github.com/fmtlib/fmt Add `deps/range-v3` -> https://github.com/ericniebler/range-v3 Improve parser error and warning support Update .clang-format Update `deps/SCsub` --- src/openvic-dataloader/csv/CsvGrammar.hpp | 201 +++++++++++++-------------- src/openvic-dataloader/csv/CsvParseState.hpp | 28 ++++ src/openvic-dataloader/csv/Parser.cpp | 161 +++++++++++++-------- 3 files changed, 231 insertions(+), 159 deletions(-) create mode 100644 src/openvic-dataloader/csv/CsvParseState.hpp (limited to 'src/openvic-dataloader/csv') diff --git a/src/openvic-dataloader/csv/CsvGrammar.hpp b/src/openvic-dataloader/csv/CsvGrammar.hpp index 712bddc..5451f26 100644 --- a/src/openvic-dataloader/csv/CsvGrammar.hpp +++ b/src/openvic-dataloader/csv/CsvGrammar.hpp @@ -7,14 +7,17 @@ #include #include +#include #include #include -#include "detail/LexyLitRange.hpp" +#include "detail/dsl.hpp" // Grammar Definitions // namespace ovdl::csv::grammar { + using EncodingType = ovdl::csv::EncodingType; + template concept ParseChars = requires() { { T::character }; @@ -51,123 +54,117 @@ namespace ovdl::csv::grammar { .map<'"'>('"'); template - struct StringValue { - static constexpr auto rule = [] { - // Arbitrary code points - auto c = Options.character - Options.control; - - auto back_escape = lexy::dsl::backslash_escape // - .symbol(); + struct CsvGrammar { + struct StringValue { + static constexpr auto rule = [] { + // Arbitrary code points + auto c = Options.character - Options.control; - auto quote_escape = lexy::dsl::escape(lexy::dsl::lit_c<'"'>) // - .template symbol(); + auto back_escape = lexy::dsl::backslash_escape // + .symbol(); - return lexy::dsl::delimited(lexy::dsl::lit_c<'"'>, lexy::dsl::not_followed_by(lexy::dsl::lit_c<'"'>, lexy::dsl::lit_c<'"'>))(c, back_escape, quote_escape); - }(); - - static constexpr auto value = lexy::as_string; - }; + auto quote_escape = lexy::dsl::escape(lexy::dsl::lit_c<'"'>) // + .template symbol(); - template - struct PlainValue { - static constexpr auto rule = [] { - if constexpr (Options.SupportStrings) { - return lexy::dsl::identifier(Options.character - (lexy::dsl::lit_b / lexy::dsl::ascii::newline)); - } else { - auto escape_check_char = Options.character - (lexy::dsl::lit_b / lexy::dsl::ascii::newline); - auto id_check_char = escape_check_char - lexy::dsl::lit_b<'\\'>; - auto id_segment = lexy::dsl::identifier(id_check_char); - auto escape_segement = lexy::dsl::token(escape_check_char); - auto escape_sym = lexy::dsl::symbol(escape_segement); - auto escape_rule = lexy::dsl::lit_b<'\\'> >> escape_sym; - return lexy::dsl::list(id_segment | escape_rule); - } - }(); - static constexpr auto value = lexy::as_string; - }; + return lexy::dsl::delimited(lexy::dsl::lit_c<'"'>, lexy::dsl::not_followed_by(lexy::dsl::lit_c<'"'>, lexy::dsl::lit_c<'"'>))(c, back_escape, quote_escape); + }(); - template - struct Value { - static constexpr auto rule = [] { - if constexpr (Options.SupportStrings) { - return lexy::dsl::p> | lexy::dsl::p>; - } else { - return lexy::dsl::p>; - } - }(); - static constexpr auto value = lexy::forward; - }; - - template - struct SepConst { - static constexpr auto rule = lexy::dsl::lit_b; - static constexpr auto value = lexy::constant(1); - }; + static constexpr auto value = lexy::as_string; + }; - template - struct Seperator { - static constexpr auto rule = lexy::dsl::list(lexy::dsl::p>); - static constexpr auto value = lexy::count; - }; + struct PlainValue { + static constexpr auto rule = [] { + if constexpr (Options.SupportStrings) { + return lexy::dsl::identifier(Options.character - (lexy::dsl::lit_b / lexy::dsl::ascii::newline)); + } else { + auto escape_check_char = Options.character - (lexy::dsl::lit_b / lexy::dsl::ascii::newline); + auto id_check_char = escape_check_char - lexy::dsl::lit_b<'\\'>; + auto id_segment = lexy::dsl::identifier(id_check_char); + auto escape_segement = lexy::dsl::token(escape_check_char); + auto escape_sym = lexy::dsl::symbol(escape_segement); + auto escape_rule = lexy::dsl::lit_b<'\\'> >> escape_sym; + return lexy::dsl::list(id_segment | escape_rule); + } + }(); + static constexpr auto value = lexy::as_string; + }; - template - struct LineEnd { - static constexpr auto rule = lexy::dsl::list(lexy::dsl::p>, lexy::dsl::trailing_sep(lexy::dsl::p>)); - static constexpr auto value = lexy::fold_inplace( - std::initializer_list {}, - [](ovdl::csv::LineObject& result, auto&& arg) { - if constexpr (std::is_same_v, std::size_t>) { - // Count seperators, adds to previous value, making it a position - using position_type = ovdl::csv::LineObject::position_type; - result.emplace_back(static_cast(arg + result.back().first), ""); + struct Value { + static constexpr auto rule = [] { + if constexpr (Options.SupportStrings) { + return lexy::dsl::p | lexy::dsl::p; } else { - if (result.empty()) result.emplace_back(0u, LEXY_MOV(arg)); - else { - auto& [pos, value] = result.back(); - value = arg; - } + return lexy::dsl::p; } - }); - }; + }(); + static constexpr auto value = lexy::forward; + }; - template - struct Line { - static constexpr auto suffix_setter(ovdl::csv::LineObject& line) { - auto& [position, value] = line.back(); - if (value.empty()) { - line.set_suffix_end(position); - line.pop_back(); - } else { - line.set_suffix_end(position + 1); - } + struct SepConst { + static constexpr auto rule = lexy::dsl::lit_b; + static constexpr auto value = lexy::constant(1); }; - static constexpr auto rule = lexy::dsl::p> | lexy::dsl::p> >> lexy::dsl::opt(lexy::dsl::p>); - static constexpr auto value = - lexy::callback( - [](ovdl::csv::LineObject&& line) { - suffix_setter(line); - return LEXY_MOV(line); - }, - [](std::size_t prefix_count, ovdl::csv::LineObject&& line) { - line.set_prefix_end(prefix_count); - // position needs to be adjusted to prefix - for (auto& [position, value] : line) { - position += prefix_count; + struct Seperator { + static constexpr auto rule = lexy::dsl::list(lexy::dsl::p); + static constexpr auto value = lexy::count; + }; + + struct LineEnd { + static constexpr auto rule = lexy::dsl::list(lexy::dsl::p, lexy::dsl::trailing_sep(lexy::dsl::p)); + static constexpr auto value = lexy::fold_inplace( + std::initializer_list {}, + [](ovdl::csv::LineObject& result, auto&& arg) { + if constexpr (std::is_same_v, std::size_t>) { + // Count seperators, adds to previous value, making it a position + using position_type = ovdl::csv::LineObject::position_type; + result.emplace_back(static_cast(arg + result.back().first), ""); + } else { + if (result.empty()) result.emplace_back(0u, LEXY_MOV(arg)); + else { + auto& [pos, value] = result.back(); + value = arg; + } } - suffix_setter(line); - return LEXY_MOV(line); - }, - [](std::size_t suffix_count, lexy::nullopt = {}) { - return ovdl::csv::LineObject(0, {}, suffix_count + 1); }); + }; + + struct Line { + static constexpr auto suffix_setter(ovdl::csv::LineObject& line) { + auto& [position, value] = line.back(); + if (value.empty()) { + line.set_suffix_end(position); + line.pop_back(); + } else { + line.set_suffix_end(position + 1); + } + }; + + static constexpr auto rule = lexy::dsl::p | lexy::dsl::p >> lexy::dsl::opt(lexy::dsl::p); + static constexpr auto value = + lexy::callback( + [](ovdl::csv::LineObject&& line) { + suffix_setter(line); + return LEXY_MOV(line); + }, + [](std::size_t prefix_count, ovdl::csv::LineObject&& line) { + line.set_prefix_end(prefix_count); + // position needs to be adjusted to prefix + for (auto& [position, value] : line) { + position += prefix_count; + } + suffix_setter(line); + return LEXY_MOV(line); + }, + [](std::size_t suffix_count, lexy::nullopt = {}) { + return ovdl::csv::LineObject(0, {}, suffix_count + 1); + }); + }; }; template struct File { - static constexpr auto rule = lexy::dsl::terminator(lexy::dsl::eof).opt_list( - lexy::dsl::p> | lexy::dsl::newline - ); + static constexpr auto rule = lexy::dsl::terminator(lexy::dsl::eof).opt_list(lexy::dsl::p::Line> | lexy::dsl::newline); static constexpr auto value = lexy::as_list>; }; @@ -199,7 +196,7 @@ namespace ovdl::csv::grammar { namespace ovdl::csv::grammar::windows1252 { struct windows1252_t { - static constexpr auto character = detail::lexydsl::make_range<0x01, 0xFF>(); + static constexpr auto character = dsl::make_range<0x01, 0xFF>(); static constexpr auto control = lexy::dsl::ascii::control / lexy::dsl::lit_b<0x81> / lexy::dsl::lit_b<0x8D> / lexy::dsl::lit_b<0x8F> / diff --git a/src/openvic-dataloader/csv/CsvParseState.hpp b/src/openvic-dataloader/csv/CsvParseState.hpp new file mode 100644 index 0000000..2390453 --- /dev/null +++ b/src/openvic-dataloader/csv/CsvParseState.hpp @@ -0,0 +1,28 @@ +#pragma once + +#include +#include +#include +#include + +#include + +template +struct LexyEncodingFrom { +}; + +template<> +struct LexyEncodingFrom { + using encoding = lexy::default_encoding; +}; + +template<> +struct LexyEncodingFrom { + using encoding = lexy::utf8_char_encoding; +}; + +template +using CsvFile = ovdl::BasicFile::encoding, std::vector>; + +template +using CsvParseState = ovdl::FileParseState>; \ No newline at end of file diff --git a/src/openvic-dataloader/csv/Parser.cpp b/src/openvic-dataloader/csv/Parser.cpp index 0ca3402..849ea05 100644 --- a/src/openvic-dataloader/csv/Parser.cpp +++ b/src/openvic-dataloader/csv/Parser.cpp @@ -1,47 +1,34 @@ -#include #include +#include #include #include -#include +#include +#include +#include #include #include #include #include -#include "csv/CsvGrammar.hpp" -#include "detail/BasicBufferHandler.hpp" -#include "detail/LexyReportError.hpp" -#include "detail/OStreamOutputIterator.hpp" +#include "CsvGrammar.hpp" +#include "CsvParseState.hpp" +#include "detail/NullBuff.hpp" +#include "detail/ParseHandler.hpp" using namespace ovdl; using namespace ovdl::csv; -/// BufferHandler /// - -template -struct LexyEncodingFrom { -}; - -template<> -struct LexyEncodingFrom { - using encoding = lexy::default_encoding; -}; - -template<> -struct LexyEncodingFrom { - using encoding = lexy::utf8_char_encoding; -}; +/// ParseHandler /// template -class Parser::BufferHandler final : public detail::BasicBufferHandler::encoding> { -public: - template - std::optional> parse(const ErrorCallback& callback) { - auto result = lexy::parse(this->_buffer, callback); +struct Parser::ParseHandler final : detail::BasicFileParseHandler> { + template + std::optional parse() { + auto result = lexy::parse(this->buffer(), *this->_parse_state, this->_parse_state->logger().error_callback()); if (!result) { - return result.errors(); + return this->_parse_state->logger().get_errors(); } _lines = std::move(result.value()); return std::nullopt; @@ -59,8 +46,14 @@ private: template Parser::Parser() - : _buffer_handler(std::make_unique()) { - set_error_log_to_stderr(); + : _parse_handler(std::make_unique()) { + set_error_log_to_null(); +} + +template +Parser::Parser(std::basic_ostream& error_stream) + : _parse_handler(std::make_unique()) { + set_error_log_to(error_stream); } template @@ -115,28 +108,31 @@ Parser Parser::from_file(const std::filesystem::path& path) /// template template -constexpr void Parser::_run_load_func(detail::LoadCallback auto func, Args... args) { - _warnings.clear(); - _errors.clear(); +constexpr void Parser::_run_load_func(detail::LoadCallback auto func, Args... args) { _has_fatal_error = false; - if (auto error = func(_buffer_handler.get(), std::forward(args)...); error) { - _has_fatal_error = error.value().type == ParseError::Type::Fatal; - _errors.push_back(error.value()); - _error_stream.get() << "Error: " << _errors.back().message << '\n'; + auto error = func(_parse_handler.get(), std::forward(args)...); + auto error_message = _parse_handler->make_error_from(error); + if (!error_message.empty()) { + _has_error = true; + _has_fatal_error = true; + _parse_handler->parse_state().logger().template create_log(DiagnosticLogger::DiagnosticKind::error, fmt::runtime(error_message)); + } + if (has_error() && &_error_stream.get() != &detail::cnull) { + print_errors_to(_error_stream.get()); } } template constexpr Parser& Parser::load_from_buffer(const char* data, std::size_t size) { // Type can't be deduced? - _run_load_func(std::mem_fn(&BufferHandler::load_buffer_size), data, size); + _run_load_func(std::mem_fn(&ParseHandler::load_buffer_size), data, size); return *this; } template constexpr Parser& Parser::load_from_buffer(const char* start, const char* end) { // Type can't be deduced? - _run_load_func(std::mem_fn(&BufferHandler::load_buffer), start, end); + _run_load_func(std::mem_fn(&ParseHandler::load_buffer), start, end); return *this; } @@ -149,7 +145,7 @@ template constexpr Parser& Parser::load_from_file(const char* path) { _file_path = path; // Type can be deduced?? - _run_load_func(std::mem_fn(&BufferHandler::load_file), path); + _run_load_func(std::mem_fn(&ParseHandler::load_file), path); return *this; } @@ -158,39 +154,35 @@ Parser& Parser::load_from_file(const std::filesystem::path& return load_from_file(path.string().c_str()); } -template -constexpr Parser& Parser::load_from_file(const detail::Has_c_str auto& path) { - return load_from_file(path.c_str()); -} - template bool Parser::parse_csv(bool handle_strings) { - if (!_buffer_handler->is_valid()) { + if (!_parse_handler->is_valid()) { return false; } - std::optional> errors; - auto report_error = ovdl::detail::ReporError.path(_file_path).to(detail::OStreamOutputIterator { _error_stream }); + std::optional::error_range> errors; + // auto report_error = ovdl::detail::ReporError.path(_file_path).to(detail::OStreamOutputIterator { _error_stream }); if constexpr (Encoding == EncodingType::Windows1252) { if (handle_strings) - errors = _buffer_handler->template parse(report_error); + errors = _parse_handler->template parse(); else - errors = _buffer_handler->template parse(report_error); + errors = _parse_handler->template parse(); } else { if (handle_strings) - errors = _buffer_handler->template parse(report_error); + errors = _parse_handler->template parse(); else - errors = _buffer_handler->template parse(report_error); + errors = _parse_handler->template parse(); } - if (errors) { - _errors.reserve(errors->size()); - for (auto& err : errors.value()) { - _has_fatal_error |= err.type == ParseError::Type::Fatal; - _errors.push_back(err); + _has_error = _parse_handler->parse_state().logger().errored(); + _has_warning = _parse_handler->parse_state().logger().warned(); + if (!errors->empty()) { + _has_fatal_error = true; + if (&_error_stream.get() != &detail::cnull) { + print_errors_to(_error_stream); } return false; } - _lines = std::move(_buffer_handler->get_lines()); + _lines = std::move(_parse_handler->get_lines()); return true; } @@ -199,5 +191,60 @@ const std::vector& Parser::get_lines() const { return _lines; } +template +typename Parser::error_range Parser::get_errors() const { + return _parse_handler->parse_state().logger().get_errors(); +} + +template +const FilePosition Parser::get_error_position(const error::Error* error) const { + if (!error || !error->is_linked_in_tree()) { + return {}; + } + auto err_location = _parse_handler->parse_state().logger().location_of(error); + if (err_location.is_synthesized()) { + return {}; + } + + auto loc_begin = lexy::get_input_location(_parse_handler->buffer(), err_location.begin()); + FilePosition result { loc_begin.line_nr(), loc_begin.line_nr(), loc_begin.column_nr(), loc_begin.column_nr() }; + if (err_location.begin() < err_location.end()) { + auto loc_end = lexy::get_input_location(_parse_handler->buffer(), err_location.end(), loc_begin.anchor()); + result.end_line = loc_end.line_nr(); + result.end_column = loc_end.column_nr(); + } + return result; +} + +template +void Parser::print_errors_to(std::basic_ostream& stream) const { + auto errors = get_errors(); + if (errors.empty()) return; + for (const auto error : errors) { + dryad::visit_tree( + error, + [&](const error::BufferError* buffer_error) { + stream << buffer_error->message() << '\n'; + }, + [&](const error::ParseError* parse_error) { + stream << parse_error->message() << '\n'; + }, + [&](dryad::child_visitor visitor, const error::Semantic* semantic) { + stream << semantic->message() << '\n'; + auto annotations = semantic->annotations(); + if (annotations.empty()) return; + for (auto annotation : annotations) { + visitor(annotation); + } + }, + [&](const error::PrimaryAnnotation* primary) { + stream << primary->message() << '\n'; + }, + [&](const error::SecondaryAnnotation* secondary) { + stream << secondary->message() << '\n'; + }); + } +} + template class ovdl::csv::Parser; template class ovdl::csv::Parser; \ No newline at end of file -- cgit v1.2.3-56-ga3b1