aboutsummaryrefslogtreecommitdiff
path: root/src/openvic-dataloader/detail
diff options
context:
space:
mode:
author Spartan322 <Megacake1234@gmail.com>2023-07-28 06:52:00 +0200
committer Spartan322 <Megacake1234@gmail.com>2023-08-17 09:04:56 +0200
commit90f15b582788a9aab0dfe6c81fc4cbbe1d4d3308 (patch)
treedb58100ed696c992addee1a9113b5415f55615ad /src/openvic-dataloader/detail
parente941573f47fb867ff75c8a2cf78302b754ffbeee (diff)
Rework Grammar and Parser
Properly construct headless binary with basic validation and print functionality Add Error and Warning structs to Parser Add FileNode pointer getter to Parser Change all `char8_t*` and `const char8_t` to `const char*` in Parser Add Parser move operators and Parser deconstructor Add BufferHandler PIMPL object to Parser Add UTF-8 file Warning Add proper Grammar value retrieval Simplify AST node resolution for Grammar Add AbstractSyntaxTree for v2script data parser: Has compile-time embedded type information accessible at compile-time and runtime Optionally compiled AST print functionality Add detail/TypeName.hpp Add detail/SelfType.hpp Add detail/DetectUtf8.hpp Add detail/Errors.hpp Add detail/Warnings.hpp Add `OPENVIC_DATALOADER_PRINT_NODES` for headless construction Fix wrong environment reference for headless construction in SConstruct
Diffstat (limited to 'src/openvic-dataloader/detail')
-rw-r--r--src/openvic-dataloader/detail/DetectUtf8.hpp52
-rw-r--r--src/openvic-dataloader/detail/Errors.hpp19
-rw-r--r--src/openvic-dataloader/detail/LexyLitRange.hpp16
-rw-r--r--src/openvic-dataloader/detail/NullBuff.hpp30
-rw-r--r--src/openvic-dataloader/detail/Warnings.hpp21
5 files changed, 138 insertions, 0 deletions
diff --git a/src/openvic-dataloader/detail/DetectUtf8.hpp b/src/openvic-dataloader/detail/DetectUtf8.hpp
new file mode 100644
index 0000000..b2969ce
--- /dev/null
+++ b/src/openvic-dataloader/detail/DetectUtf8.hpp
@@ -0,0 +1,52 @@
+#pragma once
+
+#include "detail/LexyLitRange.hpp"
+#include <lexy/action/match.hpp>
+#include <lexy/dsl.hpp>
+
+namespace ovdl::detail {
+ namespace detect_utf8 {
+
+ template<bool INCLUDE_ASCII>
+ struct DetectUtf8 {
+ struct not_utf8 {
+ static constexpr auto name = "not utf8";
+ };
+
+ static constexpr auto rule = [] {
+ constexpr auto is_not_ascii_flag = lexy::dsl::context_flag<DetectUtf8>;
+
+ // & 0b10000000 == 0b00000000
+ constexpr auto ascii_values = lexydsl::make_range<0b00000000, 0b01111111>();
+ // & 0b11100000 == 0b11000000
+ constexpr auto two_byte = lexydsl::make_range<0b11000000, 0b11011111>();
+ // & 0b11110000 == 0b11100000
+ constexpr auto three_byte = lexydsl::make_range<0b11100000, 0b11101111>();
+ // & 0b11111000 == 0b11110000
+ constexpr auto four_byte = lexydsl::make_range<0b11110000, 0b11110111>();
+ // & 0b11000000 == 0b10000000
+ constexpr auto check_bytes = lexydsl::make_range<0b10000000, 0b10111111>();
+
+ constexpr auto utf8_check =
+ ((four_byte >> lexy::dsl::times<3>(check_bytes)) |
+ (three_byte >> lexy::dsl::times<2>(check_bytes)) |
+ (two_byte >> lexy::dsl::times<1>(check_bytes))) >>
+ is_not_ascii_flag.set();
+
+ return is_not_ascii_flag.template create<INCLUDE_ASCII>() +
+ lexy::dsl::while_(utf8_check | ascii_values) +
+ lexy::dsl::must(is_not_ascii_flag.is_set()).template error<not_utf8>;
+ }();
+ };
+ }
+
+ template<typename Input>
+ constexpr bool is_utf8_no_ascii(const Input& input) {
+ return lexy::match<detect_utf8::DetectUtf8<false>>(input);
+ }
+
+ template<typename Input>
+ constexpr bool is_utf8(const Input& input) {
+ return lexy::match<detect_utf8::DetectUtf8<true>>(input);
+ }
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/Errors.hpp b/src/openvic-dataloader/detail/Errors.hpp
new file mode 100644
index 0000000..f8ed21b
--- /dev/null
+++ b/src/openvic-dataloader/detail/Errors.hpp
@@ -0,0 +1,19 @@
+#pragma once
+
+#include "openvic-dataloader/v2script/Parser.hpp"
+
+namespace ovdl::v2script::errors {
+ inline const v2script::Parser::Error make_no_file_error(const char* file_path) {
+ std::string message;
+ if (!file_path) {
+ message = "File path not specified.";
+ } else {
+ message = "File '" + std::string(file_path) + "' was not found.";
+ }
+
+ return v2script::Parser::Error { Parser::Error::Type::Fatal, message, 1 };
+ }
+}
+
+namespace ovdl::ovscript::errors {
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/LexyLitRange.hpp b/src/openvic-dataloader/detail/LexyLitRange.hpp
new file mode 100644
index 0000000..a6761a8
--- /dev/null
+++ b/src/openvic-dataloader/detail/LexyLitRange.hpp
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <lexy/dsl/literal.hpp>
+
+namespace ovdl::detail::lexydsl {
+ template<unsigned char LOW, unsigned char HIGH>
+ consteval auto make_range() {
+ if constexpr (LOW == HIGH) {
+ return lexy::dsl::lit_c<LOW>;
+ } else if constexpr (LOW == (HIGH - 1)) {
+ return lexy::dsl::lit_c<LOW> / lexy::dsl::lit_c<HIGH>;
+ } else {
+ return lexy::dsl::lit_c<LOW> / make_range<LOW + 1, HIGH>();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/NullBuff.hpp b/src/openvic-dataloader/detail/NullBuff.hpp
new file mode 100644
index 0000000..baf9e1b
--- /dev/null
+++ b/src/openvic-dataloader/detail/NullBuff.hpp
@@ -0,0 +1,30 @@
+#pragma once
+
+#include <ostream>
+
+namespace ovdl::detail {
+ template<class cT, class traits = std::char_traits<cT>>
+ class basic_nullbuf : public std::basic_streambuf<cT, traits> {
+ typename traits::int_type overflow(typename traits::int_type c) {
+ return traits::not_eof(c); // indicate success
+ }
+ };
+
+ template<class cT, class traits = std::char_traits<cT>>
+ class basic_onullstream : public std::basic_ostream<cT, traits> {
+ public:
+ basic_onullstream() : std::basic_ios<cT, traits>(&m_sbuf),
+ std::basic_ostream<cT, traits>(&m_sbuf) {
+ std::basic_ios<cT, traits>::init(&m_sbuf);
+ }
+
+ private:
+ basic_nullbuf<cT, traits> m_sbuf;
+ };
+
+ typedef basic_onullstream<char> onullstream;
+ typedef basic_onullstream<wchar_t> wonullstream;
+
+ inline onullstream cnull;
+ inline onullstream wcnull;
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/Warnings.hpp b/src/openvic-dataloader/detail/Warnings.hpp
new file mode 100644
index 0000000..f854fa8
--- /dev/null
+++ b/src/openvic-dataloader/detail/Warnings.hpp
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "openvic-dataloader/v2script/Parser.hpp"
+
+namespace ovdl::v2script::warnings {
+ inline const v2script::Parser::Warning make_utf8_warning(const char* file_path) {
+ constexpr std::string_view message_suffix = "This may cause problems. Prefer Windows-1252 encoding.";
+
+ std::string message;
+ if (!file_path) {
+ message = "Buffer is a UTF-8 encoded string. " + std::string(message_suffix);
+ } else {
+ message = "File '" + std::string(file_path) + "' is a UTF-8 encoded file. " + std::string(message_suffix);
+ }
+
+ return v2script::Parser::Warning { message, 1 };
+ }
+}
+
+namespace ovdl::ovscript::warnings {
+} \ No newline at end of file