aboutsummaryrefslogtreecommitdiff
path: root/src/openvic-dataloader/detail
diff options
context:
space:
mode:
author Spartan322 <Megacake1234@gmail.com>2023-07-28 06:52:00 +0200
committer Spartan322 <Megacake1234@gmail.com>2023-09-02 14:28:21 +0200
commit7440a5d1433eec4bf87e3723022db187e7f61b1a (patch)
tree2bb062c320fa2227b18956617b94d0e8800420d8 /src/openvic-dataloader/detail
parente941573f47fb867ff75c8a2cf78302b754ffbeee (diff)
Rework Grammar and Parser
Add proper headless binary construction: Includes basic validation Add Error and Warning structs to Parser Add FileNode pointer getter to Parser Change all `char8_t*` and `const char8_t` to `const char*` in Parser Add Parser move operators and Parser deconstructor Add BufferHandler PIMPL object to Parser Add UTF-8 file Warning to v2script Add proper Grammar value retrieval Add AbstractSyntaxTree for v2script data parser: Has compile-time embedded type information accessible at compile-time and runtime Has Tab-based print functionality Fix wrong environment reference for headless construction in SConstruct Add error retrieval Add BasicCallbackOStreamBuffer for callback streaming Add CallbackStreamBuffer for char Add CallbackWStreamBuffer for wchar_t Add BasicCallbackStream Add CallbackStream for char Add CallbackWStream for wchar_t Add grammar for events and decisions Add event_parse to Parser Add decision_parse to Parser Add .clang-format Ignore dirty lexy module Add CSV parser and grammar: Creates std::vector<csv::LineObject> for a list of lines Add BasicParser and BasicBufferHandler to reduce code reduplication
Diffstat (limited to 'src/openvic-dataloader/detail')
-rw-r--r--src/openvic-dataloader/detail/BasicBufferHandler.hpp44
-rw-r--r--src/openvic-dataloader/detail/BasicParser.cpp47
-rw-r--r--src/openvic-dataloader/detail/DetectUtf8.hpp53
-rw-r--r--src/openvic-dataloader/detail/Errors.hpp23
-rw-r--r--src/openvic-dataloader/detail/LexyLitRange.hpp16
-rw-r--r--src/openvic-dataloader/detail/LexyReportError.hpp102
-rw-r--r--src/openvic-dataloader/detail/NullBuff.hpp30
-rw-r--r--src/openvic-dataloader/detail/OStreamOutputIterator.hpp21
-rw-r--r--src/openvic-dataloader/detail/Warnings.hpp21
9 files changed, 357 insertions, 0 deletions
diff --git a/src/openvic-dataloader/detail/BasicBufferHandler.hpp b/src/openvic-dataloader/detail/BasicBufferHandler.hpp
new file mode 100644
index 0000000..ba2cef9
--- /dev/null
+++ b/src/openvic-dataloader/detail/BasicBufferHandler.hpp
@@ -0,0 +1,44 @@
+#pragma once
+
+#include <optional>
+
+#include <openvic-dataloader/ParseError.hpp>
+
+#include <lexy/encoding.hpp>
+#include <lexy/input/buffer.hpp>
+#include <lexy/input/file.hpp>
+
+#include "detail/Errors.hpp"
+
+namespace ovdl::detail {
+ template<typename Encoding = lexy::default_encoding, typename MemoryResource = void>
+ class BasicBufferHandler {
+ public:
+ constexpr bool is_valid() const {
+ return _buffer.size() != 0;
+ }
+
+ constexpr std::optional<ovdl::ParseError> load_buffer_size(const char* data, std::size_t size) {
+ _buffer = lexy::buffer<Encoding, MemoryResource>(data, size);
+ return std::nullopt;
+ }
+
+ constexpr std::optional<ovdl::ParseError> load_buffer(const char* start, const char* end) {
+ _buffer = lexy::buffer<Encoding, MemoryResource>(start, end);
+ return std::nullopt;
+ }
+
+ std::optional<ovdl::ParseError> load_file(const char* path) {
+ auto file = lexy::read_file<Encoding, lexy::encoding_endianness::bom, MemoryResource>(path);
+ if (!file) {
+ return ovdl::errors::make_no_file_error(path);
+ }
+
+ _buffer = file.buffer();
+ return std::nullopt;
+ }
+
+ protected:
+ lexy::buffer<Encoding, MemoryResource> _buffer;
+ };
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/BasicParser.cpp b/src/openvic-dataloader/detail/BasicParser.cpp
new file mode 100644
index 0000000..ee1b516
--- /dev/null
+++ b/src/openvic-dataloader/detail/BasicParser.cpp
@@ -0,0 +1,47 @@
+#include <iostream>
+#include <ostream>
+
+#include <openvic-dataloader/detail/BasicParser.hpp>
+
+#include "detail/NullBuff.hpp"
+
+using namespace ovdl;
+using namespace ovdl::detail;
+
+BasicParser::BasicParser() : _error_stream(detail::cnull) {}
+
+void BasicParser::set_error_log_to_null() {
+ set_error_log_to(detail::cnull);
+}
+
+void BasicParser::set_error_log_to_stderr() {
+ set_error_log_to(std::cerr);
+}
+
+void BasicParser::set_error_log_to_stdout() {
+ set_error_log_to(std::cout);
+}
+
+void BasicParser::set_error_log_to(std::basic_ostream<char>& stream) {
+ _error_stream = stream;
+}
+
+bool BasicParser::has_error() const {
+ return !_errors.empty();
+}
+
+bool BasicParser::has_fatal_error() const {
+ return _has_fatal_error;
+}
+
+bool BasicParser::has_warning() const {
+ return !_warnings.empty();
+}
+
+const std::vector<ovdl::ParseError>& BasicParser::get_errors() const {
+ return _errors;
+}
+
+const std::vector<ovdl::ParseWarning>& BasicParser::get_warnings() const {
+ return _warnings;
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/DetectUtf8.hpp b/src/openvic-dataloader/detail/DetectUtf8.hpp
new file mode 100644
index 0000000..2045b3c
--- /dev/null
+++ b/src/openvic-dataloader/detail/DetectUtf8.hpp
@@ -0,0 +1,53 @@
+#pragma once
+
+#include <lexy/action/match.hpp>
+#include <lexy/dsl.hpp>
+
+#include "detail/LexyLitRange.hpp"
+
+namespace ovdl::detail {
+ namespace detect_utf8 {
+
+ template<bool INCLUDE_ASCII>
+ struct DetectUtf8 {
+ struct not_utf8 {
+ static constexpr auto name = "not utf8";
+ };
+
+ static constexpr auto rule = [] {
+ constexpr auto is_not_ascii_flag = lexy::dsl::context_flag<DetectUtf8>;
+
+ // & 0b10000000 == 0b00000000
+ constexpr auto ascii_values = lexydsl::make_range<0b00000000, 0b01111111>();
+ // & 0b11100000 == 0b11000000
+ constexpr auto two_byte = lexydsl::make_range<0b11000000, 0b11011111>();
+ // & 0b11110000 == 0b11100000
+ constexpr auto three_byte = lexydsl::make_range<0b11100000, 0b11101111>();
+ // & 0b11111000 == 0b11110000
+ constexpr auto four_byte = lexydsl::make_range<0b11110000, 0b11110111>();
+ // & 0b11000000 == 0b10000000
+ constexpr auto check_bytes = lexydsl::make_range<0b10000000, 0b10111111>();
+
+ constexpr auto utf8_check =
+ ((four_byte >> lexy::dsl::times<3>(check_bytes)) |
+ (three_byte >> lexy::dsl::times<2>(check_bytes)) |
+ (two_byte >> lexy::dsl::times<1>(check_bytes))) >>
+ is_not_ascii_flag.set();
+
+ return is_not_ascii_flag.template create<INCLUDE_ASCII>() +
+ lexy::dsl::while_(utf8_check | ascii_values) +
+ lexy::dsl::must(is_not_ascii_flag.is_set()).template error<not_utf8>;
+ }();
+ };
+ }
+
+ template<typename Input>
+ constexpr bool is_utf8_no_ascii(const Input& input) {
+ return lexy::match<detect_utf8::DetectUtf8<false>>(input);
+ }
+
+ template<typename Input>
+ constexpr bool is_utf8(const Input& input) {
+ return lexy::match<detect_utf8::DetectUtf8<true>>(input);
+ }
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/Errors.hpp b/src/openvic-dataloader/detail/Errors.hpp
new file mode 100644
index 0000000..f53bedc
--- /dev/null
+++ b/src/openvic-dataloader/detail/Errors.hpp
@@ -0,0 +1,23 @@
+#pragma once
+
+#include "openvic-dataloader/v2script/Parser.hpp"
+
+namespace ovdl::errors {
+ inline const ParseError make_no_file_error(const char* file_path) {
+ std::string message;
+ if (!file_path) {
+ message = "File path not specified.";
+ } else {
+ message = "File '" + std::string(file_path) + "' was not found.";
+ }
+
+ return ParseError { ParseError::Type::Fatal, message, 1 };
+ }
+}
+
+namespace ovdl::v2script::errors {
+
+}
+
+namespace ovdl::ovscript::errors {
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/LexyLitRange.hpp b/src/openvic-dataloader/detail/LexyLitRange.hpp
new file mode 100644
index 0000000..a6761a8
--- /dev/null
+++ b/src/openvic-dataloader/detail/LexyLitRange.hpp
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <lexy/dsl/literal.hpp>
+
+namespace ovdl::detail::lexydsl {
+ template<unsigned char LOW, unsigned char HIGH>
+ consteval auto make_range() {
+ if constexpr (LOW == HIGH) {
+ return lexy::dsl::lit_c<LOW>;
+ } else if constexpr (LOW == (HIGH - 1)) {
+ return lexy::dsl::lit_c<LOW> / lexy::dsl::lit_c<HIGH>;
+ } else {
+ return lexy::dsl::lit_c<LOW> / make_range<LOW + 1, HIGH>();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/LexyReportError.hpp b/src/openvic-dataloader/detail/LexyReportError.hpp
new file mode 100644
index 0000000..684b5db
--- /dev/null
+++ b/src/openvic-dataloader/detail/LexyReportError.hpp
@@ -0,0 +1,102 @@
+#pragma once
+
+#include <cstddef>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <openvic-dataloader/ParseData.hpp>
+#include <openvic-dataloader/ParseError.hpp>
+
+#include <lexy/input_location.hpp>
+#include <lexy/visualize.hpp>
+
+#include <lexy_ext/report_error.hpp>
+
+namespace ovdl::detail {
+ template<typename OutputIterator>
+ struct _ReportError {
+ OutputIterator _iter;
+ lexy::visualization_options _opts;
+ const char* _path;
+
+ struct _sink {
+ OutputIterator _iter;
+ lexy::visualization_options _opts;
+ const char* _path;
+ std::size_t _count;
+ std::vector<ParseError> _errors;
+
+ using return_type = std::vector<ParseError>;
+
+ template<typename Input, typename Reader, typename Tag>
+ void operator()(const lexy::error_context<Input>& context, const lexy::error<Reader, Tag>& error) {
+ _iter = lexy_ext::_detail::write_error(_iter, context, error, _opts, _path);
+ ++_count;
+
+ // Convert the context location and error location into line/column information.
+ auto context_location = lexy::get_input_location(context.input(), context.position());
+ auto location = lexy::get_input_location(context.input(), error.position(), context_location.anchor());
+
+ std::basic_stringstream<typename Reader::encoding::char_type> message;
+
+ // Write the main annotation.
+ if constexpr (std::is_same_v<Tag, lexy::expected_literal>) {
+ auto string = lexy::_detail::make_literal_lexeme<typename Reader::encoding>(error.string(), error.length());
+
+ message << "expected '" << string.data() << '\'';
+ } else if constexpr (std::is_same_v<Tag, lexy::expected_keyword>) {
+ auto string = lexy::_detail::make_literal_lexeme<typename Reader::encoding>(error.string(), error.length());
+
+ message << "expected keyword '" << string.data() << '\'';
+ } else if constexpr (std::is_same_v<Tag, lexy::expected_char_class>) {
+ message << "expected " << error.name();
+ } else {
+ message << error.message();
+ }
+
+ _errors.push_back(
+ ParseError {
+ ParseError::Type::Fatal, // TODO: distinguish recoverable errors from fatal errors
+ std::move(message.str()),
+ 0, // TODO: implement proper error codes
+ ParseData {
+ context.production(),
+ context_location.line_nr(),
+ context_location.column_nr(),
+ },
+ location.line_nr(),
+ location.column_nr(),
+ });
+ }
+
+ return_type finish() && {
+ if (_count != 0)
+ *_iter++ = '\n';
+ return _errors;
+ }
+ };
+ constexpr auto sink() const {
+ return _sink { _iter, _opts, _path, 0 };
+ }
+
+ /// Specifies a path that will be printed alongside the diagnostic.
+ constexpr _ReportError path(const char* path) const {
+ return { _iter, _opts, path };
+ }
+
+ /// Specifies an output iterator where the errors are written to.
+ template<typename OI>
+ constexpr _ReportError<OI> to(OI out) const {
+ return { out, _opts, _path };
+ }
+
+ /// Overrides visualization options.
+ constexpr _ReportError opts(lexy::visualization_options opts) const {
+ return { _iter, opts, _path };
+ }
+ };
+
+ constexpr auto ReporError = _ReportError<lexy::stderr_output_iterator> {};
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/NullBuff.hpp b/src/openvic-dataloader/detail/NullBuff.hpp
new file mode 100644
index 0000000..baf9e1b
--- /dev/null
+++ b/src/openvic-dataloader/detail/NullBuff.hpp
@@ -0,0 +1,30 @@
+#pragma once
+
+#include <ostream>
+
+namespace ovdl::detail {
+ template<class cT, class traits = std::char_traits<cT>>
+ class basic_nullbuf : public std::basic_streambuf<cT, traits> {
+ typename traits::int_type overflow(typename traits::int_type c) {
+ return traits::not_eof(c); // indicate success
+ }
+ };
+
+ template<class cT, class traits = std::char_traits<cT>>
+ class basic_onullstream : public std::basic_ostream<cT, traits> {
+ public:
+ basic_onullstream() : std::basic_ios<cT, traits>(&m_sbuf),
+ std::basic_ostream<cT, traits>(&m_sbuf) {
+ std::basic_ios<cT, traits>::init(&m_sbuf);
+ }
+
+ private:
+ basic_nullbuf<cT, traits> m_sbuf;
+ };
+
+ typedef basic_onullstream<char> onullstream;
+ typedef basic_onullstream<wchar_t> wonullstream;
+
+ inline onullstream cnull;
+ inline onullstream wcnull;
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/OStreamOutputIterator.hpp b/src/openvic-dataloader/detail/OStreamOutputIterator.hpp
new file mode 100644
index 0000000..81f6c89
--- /dev/null
+++ b/src/openvic-dataloader/detail/OStreamOutputIterator.hpp
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <ostream>
+
+namespace ovdl::detail {
+ struct OStreamOutputIterator {
+ std::reference_wrapper<std::ostream> _stream;
+
+ auto operator*() const noexcept {
+ return *this;
+ }
+ auto operator++(int) const noexcept {
+ return *this;
+ }
+
+ OStreamOutputIterator& operator=(char c) {
+ _stream.get().put(c);
+ return *this;
+ }
+ };
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/Warnings.hpp b/src/openvic-dataloader/detail/Warnings.hpp
new file mode 100644
index 0000000..fc0fbed
--- /dev/null
+++ b/src/openvic-dataloader/detail/Warnings.hpp
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "openvic-dataloader/v2script/Parser.hpp"
+
+namespace ovdl::v2script::warnings {
+ inline const ParseWarning make_utf8_warning(const char* file_path) {
+ constexpr std::string_view message_suffix = "This may cause problems. Prefer Windows-1252 encoding.";
+
+ std::string message;
+ if (!file_path) {
+ message = "Buffer is a UTF-8 encoded string. " + std::string(message_suffix);
+ } else {
+ message = "File '" + std::string(file_path) + "' is a UTF-8 encoded file. " + std::string(message_suffix);
+ }
+
+ return ParseWarning { message, 1 };
+ }
+}
+
+namespace ovdl::ovscript::warnings {
+} \ No newline at end of file