diff options
author | Spartan322 <Megacake1234@gmail.com> | 2023-07-28 06:52:00 +0200 |
---|---|---|
committer | Spartan322 <Megacake1234@gmail.com> | 2023-09-02 14:28:21 +0200 |
commit | 7440a5d1433eec4bf87e3723022db187e7f61b1a (patch) | |
tree | 2bb062c320fa2227b18956617b94d0e8800420d8 /src/openvic-dataloader/detail | |
parent | e941573f47fb867ff75c8a2cf78302b754ffbeee (diff) |
Rework Grammar and Parser
Add proper headless binary construction:
Includes basic validation
Add Error and Warning structs to Parser
Add FileNode pointer getter to Parser
Change all `char8_t*` and `const char8_t` to `const char*` in Parser
Add Parser move operators and Parser deconstructor
Add BufferHandler PIMPL object to Parser
Add UTF-8 file Warning to v2script
Add proper Grammar value retrieval
Add AbstractSyntaxTree for v2script data parser:
Has compile-time embedded type information accessible at compile-time and runtime
Has Tab-based print functionality
Fix wrong environment reference for headless construction in SConstruct
Add error retrieval
Add BasicCallbackOStreamBuffer for callback streaming
Add CallbackStreamBuffer for char
Add CallbackWStreamBuffer for wchar_t
Add BasicCallbackStream
Add CallbackStream for char
Add CallbackWStream for wchar_t
Add grammar for events and decisions
Add event_parse to Parser
Add decision_parse to Parser
Add .clang-format
Ignore dirty lexy module
Add CSV parser and grammar:
Creates std::vector<csv::LineObject> for a list of lines
Add BasicParser and BasicBufferHandler to reduce code reduplication
Diffstat (limited to 'src/openvic-dataloader/detail')
-rw-r--r-- | src/openvic-dataloader/detail/BasicBufferHandler.hpp | 44 | ||||
-rw-r--r-- | src/openvic-dataloader/detail/BasicParser.cpp | 47 | ||||
-rw-r--r-- | src/openvic-dataloader/detail/DetectUtf8.hpp | 53 | ||||
-rw-r--r-- | src/openvic-dataloader/detail/Errors.hpp | 23 | ||||
-rw-r--r-- | src/openvic-dataloader/detail/LexyLitRange.hpp | 16 | ||||
-rw-r--r-- | src/openvic-dataloader/detail/LexyReportError.hpp | 102 | ||||
-rw-r--r-- | src/openvic-dataloader/detail/NullBuff.hpp | 30 | ||||
-rw-r--r-- | src/openvic-dataloader/detail/OStreamOutputIterator.hpp | 21 | ||||
-rw-r--r-- | src/openvic-dataloader/detail/Warnings.hpp | 21 |
9 files changed, 357 insertions, 0 deletions
diff --git a/src/openvic-dataloader/detail/BasicBufferHandler.hpp b/src/openvic-dataloader/detail/BasicBufferHandler.hpp new file mode 100644 index 0000000..ba2cef9 --- /dev/null +++ b/src/openvic-dataloader/detail/BasicBufferHandler.hpp @@ -0,0 +1,44 @@ +#pragma once + +#include <optional> + +#include <openvic-dataloader/ParseError.hpp> + +#include <lexy/encoding.hpp> +#include <lexy/input/buffer.hpp> +#include <lexy/input/file.hpp> + +#include "detail/Errors.hpp" + +namespace ovdl::detail { + template<typename Encoding = lexy::default_encoding, typename MemoryResource = void> + class BasicBufferHandler { + public: + constexpr bool is_valid() const { + return _buffer.size() != 0; + } + + constexpr std::optional<ovdl::ParseError> load_buffer_size(const char* data, std::size_t size) { + _buffer = lexy::buffer<Encoding, MemoryResource>(data, size); + return std::nullopt; + } + + constexpr std::optional<ovdl::ParseError> load_buffer(const char* start, const char* end) { + _buffer = lexy::buffer<Encoding, MemoryResource>(start, end); + return std::nullopt; + } + + std::optional<ovdl::ParseError> load_file(const char* path) { + auto file = lexy::read_file<Encoding, lexy::encoding_endianness::bom, MemoryResource>(path); + if (!file) { + return ovdl::errors::make_no_file_error(path); + } + + _buffer = file.buffer(); + return std::nullopt; + } + + protected: + lexy::buffer<Encoding, MemoryResource> _buffer; + }; +}
\ No newline at end of file diff --git a/src/openvic-dataloader/detail/BasicParser.cpp b/src/openvic-dataloader/detail/BasicParser.cpp new file mode 100644 index 0000000..ee1b516 --- /dev/null +++ b/src/openvic-dataloader/detail/BasicParser.cpp @@ -0,0 +1,47 @@ +#include <iostream> +#include <ostream> + +#include <openvic-dataloader/detail/BasicParser.hpp> + +#include "detail/NullBuff.hpp" + +using namespace ovdl; +using namespace ovdl::detail; + +BasicParser::BasicParser() : _error_stream(detail::cnull) {} + +void BasicParser::set_error_log_to_null() { + set_error_log_to(detail::cnull); +} + +void BasicParser::set_error_log_to_stderr() { + set_error_log_to(std::cerr); +} + +void BasicParser::set_error_log_to_stdout() { + set_error_log_to(std::cout); +} + +void BasicParser::set_error_log_to(std::basic_ostream<char>& stream) { + _error_stream = stream; +} + +bool BasicParser::has_error() const { + return !_errors.empty(); +} + +bool BasicParser::has_fatal_error() const { + return _has_fatal_error; +} + +bool BasicParser::has_warning() const { + return !_warnings.empty(); +} + +const std::vector<ovdl::ParseError>& BasicParser::get_errors() const { + return _errors; +} + +const std::vector<ovdl::ParseWarning>& BasicParser::get_warnings() const { + return _warnings; +}
\ No newline at end of file diff --git a/src/openvic-dataloader/detail/DetectUtf8.hpp b/src/openvic-dataloader/detail/DetectUtf8.hpp new file mode 100644 index 0000000..2045b3c --- /dev/null +++ b/src/openvic-dataloader/detail/DetectUtf8.hpp @@ -0,0 +1,53 @@ +#pragma once + +#include <lexy/action/match.hpp> +#include <lexy/dsl.hpp> + +#include "detail/LexyLitRange.hpp" + +namespace ovdl::detail { + namespace detect_utf8 { + + template<bool INCLUDE_ASCII> + struct DetectUtf8 { + struct not_utf8 { + static constexpr auto name = "not utf8"; + }; + + static constexpr auto rule = [] { + constexpr auto is_not_ascii_flag = lexy::dsl::context_flag<DetectUtf8>; + + // & 0b10000000 == 0b00000000 + constexpr auto ascii_values = lexydsl::make_range<0b00000000, 0b01111111>(); + // & 0b11100000 == 0b11000000 + constexpr auto two_byte = lexydsl::make_range<0b11000000, 0b11011111>(); + // & 0b11110000 == 0b11100000 + constexpr auto three_byte = lexydsl::make_range<0b11100000, 0b11101111>(); + // & 0b11111000 == 0b11110000 + constexpr auto four_byte = lexydsl::make_range<0b11110000, 0b11110111>(); + // & 0b11000000 == 0b10000000 + constexpr auto check_bytes = lexydsl::make_range<0b10000000, 0b10111111>(); + + constexpr auto utf8_check = + ((four_byte >> lexy::dsl::times<3>(check_bytes)) | + (three_byte >> lexy::dsl::times<2>(check_bytes)) | + (two_byte >> lexy::dsl::times<1>(check_bytes))) >> + is_not_ascii_flag.set(); + + return is_not_ascii_flag.template create<INCLUDE_ASCII>() + + lexy::dsl::while_(utf8_check | ascii_values) + + lexy::dsl::must(is_not_ascii_flag.is_set()).template error<not_utf8>; + }(); + }; + } + + template<typename Input> + constexpr bool is_utf8_no_ascii(const Input& input) { + return lexy::match<detect_utf8::DetectUtf8<false>>(input); + } + + template<typename Input> + constexpr bool is_utf8(const Input& input) { + return lexy::match<detect_utf8::DetectUtf8<true>>(input); + } +}
\ No newline at end of file diff --git a/src/openvic-dataloader/detail/Errors.hpp b/src/openvic-dataloader/detail/Errors.hpp new file mode 100644 index 0000000..f53bedc --- /dev/null +++ b/src/openvic-dataloader/detail/Errors.hpp @@ -0,0 +1,23 @@ +#pragma once + +#include "openvic-dataloader/v2script/Parser.hpp" + +namespace ovdl::errors { + inline const ParseError make_no_file_error(const char* file_path) { + std::string message; + if (!file_path) { + message = "File path not specified."; + } else { + message = "File '" + std::string(file_path) + "' was not found."; + } + + return ParseError { ParseError::Type::Fatal, message, 1 }; + } +} + +namespace ovdl::v2script::errors { + +} + +namespace ovdl::ovscript::errors { +}
\ No newline at end of file diff --git a/src/openvic-dataloader/detail/LexyLitRange.hpp b/src/openvic-dataloader/detail/LexyLitRange.hpp new file mode 100644 index 0000000..a6761a8 --- /dev/null +++ b/src/openvic-dataloader/detail/LexyLitRange.hpp @@ -0,0 +1,16 @@ +#pragma once + +#include <lexy/dsl/literal.hpp> + +namespace ovdl::detail::lexydsl { + template<unsigned char LOW, unsigned char HIGH> + consteval auto make_range() { + if constexpr (LOW == HIGH) { + return lexy::dsl::lit_c<LOW>; + } else if constexpr (LOW == (HIGH - 1)) { + return lexy::dsl::lit_c<LOW> / lexy::dsl::lit_c<HIGH>; + } else { + return lexy::dsl::lit_c<LOW> / make_range<LOW + 1, HIGH>(); + } + } +}
\ No newline at end of file diff --git a/src/openvic-dataloader/detail/LexyReportError.hpp b/src/openvic-dataloader/detail/LexyReportError.hpp new file mode 100644 index 0000000..684b5db --- /dev/null +++ b/src/openvic-dataloader/detail/LexyReportError.hpp @@ -0,0 +1,102 @@ +#pragma once + +#include <cstddef> +#include <sstream> +#include <string> +#include <utility> +#include <vector> + +#include <openvic-dataloader/ParseData.hpp> +#include <openvic-dataloader/ParseError.hpp> + +#include <lexy/input_location.hpp> +#include <lexy/visualize.hpp> + +#include <lexy_ext/report_error.hpp> + +namespace ovdl::detail { + template<typename OutputIterator> + struct _ReportError { + OutputIterator _iter; + lexy::visualization_options _opts; + const char* _path; + + struct _sink { + OutputIterator _iter; + lexy::visualization_options _opts; + const char* _path; + std::size_t _count; + std::vector<ParseError> _errors; + + using return_type = std::vector<ParseError>; + + template<typename Input, typename Reader, typename Tag> + void operator()(const lexy::error_context<Input>& context, const lexy::error<Reader, Tag>& error) { + _iter = lexy_ext::_detail::write_error(_iter, context, error, _opts, _path); + ++_count; + + // Convert the context location and error location into line/column information. + auto context_location = lexy::get_input_location(context.input(), context.position()); + auto location = lexy::get_input_location(context.input(), error.position(), context_location.anchor()); + + std::basic_stringstream<typename Reader::encoding::char_type> message; + + // Write the main annotation. + if constexpr (std::is_same_v<Tag, lexy::expected_literal>) { + auto string = lexy::_detail::make_literal_lexeme<typename Reader::encoding>(error.string(), error.length()); + + message << "expected '" << string.data() << '\''; + } else if constexpr (std::is_same_v<Tag, lexy::expected_keyword>) { + auto string = lexy::_detail::make_literal_lexeme<typename Reader::encoding>(error.string(), error.length()); + + message << "expected keyword '" << string.data() << '\''; + } else if constexpr (std::is_same_v<Tag, lexy::expected_char_class>) { + message << "expected " << error.name(); + } else { + message << error.message(); + } + + _errors.push_back( + ParseError { + ParseError::Type::Fatal, // TODO: distinguish recoverable errors from fatal errors + std::move(message.str()), + 0, // TODO: implement proper error codes + ParseData { + context.production(), + context_location.line_nr(), + context_location.column_nr(), + }, + location.line_nr(), + location.column_nr(), + }); + } + + return_type finish() && { + if (_count != 0) + *_iter++ = '\n'; + return _errors; + } + }; + constexpr auto sink() const { + return _sink { _iter, _opts, _path, 0 }; + } + + /// Specifies a path that will be printed alongside the diagnostic. + constexpr _ReportError path(const char* path) const { + return { _iter, _opts, path }; + } + + /// Specifies an output iterator where the errors are written to. + template<typename OI> + constexpr _ReportError<OI> to(OI out) const { + return { out, _opts, _path }; + } + + /// Overrides visualization options. + constexpr _ReportError opts(lexy::visualization_options opts) const { + return { _iter, opts, _path }; + } + }; + + constexpr auto ReporError = _ReportError<lexy::stderr_output_iterator> {}; +}
\ No newline at end of file diff --git a/src/openvic-dataloader/detail/NullBuff.hpp b/src/openvic-dataloader/detail/NullBuff.hpp new file mode 100644 index 0000000..baf9e1b --- /dev/null +++ b/src/openvic-dataloader/detail/NullBuff.hpp @@ -0,0 +1,30 @@ +#pragma once + +#include <ostream> + +namespace ovdl::detail { + template<class cT, class traits = std::char_traits<cT>> + class basic_nullbuf : public std::basic_streambuf<cT, traits> { + typename traits::int_type overflow(typename traits::int_type c) { + return traits::not_eof(c); // indicate success + } + }; + + template<class cT, class traits = std::char_traits<cT>> + class basic_onullstream : public std::basic_ostream<cT, traits> { + public: + basic_onullstream() : std::basic_ios<cT, traits>(&m_sbuf), + std::basic_ostream<cT, traits>(&m_sbuf) { + std::basic_ios<cT, traits>::init(&m_sbuf); + } + + private: + basic_nullbuf<cT, traits> m_sbuf; + }; + + typedef basic_onullstream<char> onullstream; + typedef basic_onullstream<wchar_t> wonullstream; + + inline onullstream cnull; + inline onullstream wcnull; +}
\ No newline at end of file diff --git a/src/openvic-dataloader/detail/OStreamOutputIterator.hpp b/src/openvic-dataloader/detail/OStreamOutputIterator.hpp new file mode 100644 index 0000000..81f6c89 --- /dev/null +++ b/src/openvic-dataloader/detail/OStreamOutputIterator.hpp @@ -0,0 +1,21 @@ +#pragma once + +#include <ostream> + +namespace ovdl::detail { + struct OStreamOutputIterator { + std::reference_wrapper<std::ostream> _stream; + + auto operator*() const noexcept { + return *this; + } + auto operator++(int) const noexcept { + return *this; + } + + OStreamOutputIterator& operator=(char c) { + _stream.get().put(c); + return *this; + } + }; +}
\ No newline at end of file diff --git a/src/openvic-dataloader/detail/Warnings.hpp b/src/openvic-dataloader/detail/Warnings.hpp new file mode 100644 index 0000000..fc0fbed --- /dev/null +++ b/src/openvic-dataloader/detail/Warnings.hpp @@ -0,0 +1,21 @@ +#pragma once + +#include "openvic-dataloader/v2script/Parser.hpp" + +namespace ovdl::v2script::warnings { + inline const ParseWarning make_utf8_warning(const char* file_path) { + constexpr std::string_view message_suffix = "This may cause problems. Prefer Windows-1252 encoding."; + + std::string message; + if (!file_path) { + message = "Buffer is a UTF-8 encoded string. " + std::string(message_suffix); + } else { + message = "File '" + std::string(file_path) + "' is a UTF-8 encoded file. " + std::string(message_suffix); + } + + return ParseWarning { message, 1 }; + } +} + +namespace ovdl::ovscript::warnings { +}
\ No newline at end of file |