diff options
author | Spartan322 <Megacake1234@gmail.com> | 2023-07-28 06:52:00 +0200 |
---|---|---|
committer | Spartan322 <Megacake1234@gmail.com> | 2023-08-17 09:04:56 +0200 |
commit | 90f15b582788a9aab0dfe6c81fc4cbbe1d4d3308 (patch) | |
tree | db58100ed696c992addee1a9113b5415f55615ad /src/openvic-dataloader/detail | |
parent | e941573f47fb867ff75c8a2cf78302b754ffbeee (diff) |
Rework Grammar and Parser
Properly construct headless binary with basic validation and print functionality
Add Error and Warning structs to Parser
Add FileNode pointer getter to Parser
Change all `char8_t*` and `const char8_t` to `const char*` in Parser
Add Parser move operators and Parser deconstructor
Add BufferHandler PIMPL object to Parser
Add UTF-8 file Warning
Add proper Grammar value retrieval
Simplify AST node resolution for Grammar
Add AbstractSyntaxTree for v2script data parser:
Has compile-time embedded type information accessible at compile-time and runtime
Optionally compiled AST print functionality
Add detail/TypeName.hpp
Add detail/SelfType.hpp
Add detail/DetectUtf8.hpp
Add detail/Errors.hpp
Add detail/Warnings.hpp
Add `OPENVIC_DATALOADER_PRINT_NODES` for headless construction
Fix wrong environment reference for headless construction in SConstruct
Diffstat (limited to 'src/openvic-dataloader/detail')
-rw-r--r-- | src/openvic-dataloader/detail/DetectUtf8.hpp | 52 | ||||
-rw-r--r-- | src/openvic-dataloader/detail/Errors.hpp | 19 | ||||
-rw-r--r-- | src/openvic-dataloader/detail/LexyLitRange.hpp | 16 | ||||
-rw-r--r-- | src/openvic-dataloader/detail/NullBuff.hpp | 30 | ||||
-rw-r--r-- | src/openvic-dataloader/detail/Warnings.hpp | 21 |
5 files changed, 138 insertions, 0 deletions
diff --git a/src/openvic-dataloader/detail/DetectUtf8.hpp b/src/openvic-dataloader/detail/DetectUtf8.hpp new file mode 100644 index 0000000..b2969ce --- /dev/null +++ b/src/openvic-dataloader/detail/DetectUtf8.hpp @@ -0,0 +1,52 @@ +#pragma once + +#include "detail/LexyLitRange.hpp" +#include <lexy/action/match.hpp> +#include <lexy/dsl.hpp> + +namespace ovdl::detail { + namespace detect_utf8 { + + template<bool INCLUDE_ASCII> + struct DetectUtf8 { + struct not_utf8 { + static constexpr auto name = "not utf8"; + }; + + static constexpr auto rule = [] { + constexpr auto is_not_ascii_flag = lexy::dsl::context_flag<DetectUtf8>; + + // & 0b10000000 == 0b00000000 + constexpr auto ascii_values = lexydsl::make_range<0b00000000, 0b01111111>(); + // & 0b11100000 == 0b11000000 + constexpr auto two_byte = lexydsl::make_range<0b11000000, 0b11011111>(); + // & 0b11110000 == 0b11100000 + constexpr auto three_byte = lexydsl::make_range<0b11100000, 0b11101111>(); + // & 0b11111000 == 0b11110000 + constexpr auto four_byte = lexydsl::make_range<0b11110000, 0b11110111>(); + // & 0b11000000 == 0b10000000 + constexpr auto check_bytes = lexydsl::make_range<0b10000000, 0b10111111>(); + + constexpr auto utf8_check = + ((four_byte >> lexy::dsl::times<3>(check_bytes)) | + (three_byte >> lexy::dsl::times<2>(check_bytes)) | + (two_byte >> lexy::dsl::times<1>(check_bytes))) >> + is_not_ascii_flag.set(); + + return is_not_ascii_flag.template create<INCLUDE_ASCII>() + + lexy::dsl::while_(utf8_check | ascii_values) + + lexy::dsl::must(is_not_ascii_flag.is_set()).template error<not_utf8>; + }(); + }; + } + + template<typename Input> + constexpr bool is_utf8_no_ascii(const Input& input) { + return lexy::match<detect_utf8::DetectUtf8<false>>(input); + } + + template<typename Input> + constexpr bool is_utf8(const Input& input) { + return lexy::match<detect_utf8::DetectUtf8<true>>(input); + } +}
\ No newline at end of file diff --git a/src/openvic-dataloader/detail/Errors.hpp b/src/openvic-dataloader/detail/Errors.hpp new file mode 100644 index 0000000..f8ed21b --- /dev/null +++ b/src/openvic-dataloader/detail/Errors.hpp @@ -0,0 +1,19 @@ +#pragma once + +#include "openvic-dataloader/v2script/Parser.hpp" + +namespace ovdl::v2script::errors { + inline const v2script::Parser::Error make_no_file_error(const char* file_path) { + std::string message; + if (!file_path) { + message = "File path not specified."; + } else { + message = "File '" + std::string(file_path) + "' was not found."; + } + + return v2script::Parser::Error { Parser::Error::Type::Fatal, message, 1 }; + } +} + +namespace ovdl::ovscript::errors { +}
\ No newline at end of file diff --git a/src/openvic-dataloader/detail/LexyLitRange.hpp b/src/openvic-dataloader/detail/LexyLitRange.hpp new file mode 100644 index 0000000..a6761a8 --- /dev/null +++ b/src/openvic-dataloader/detail/LexyLitRange.hpp @@ -0,0 +1,16 @@ +#pragma once + +#include <lexy/dsl/literal.hpp> + +namespace ovdl::detail::lexydsl { + template<unsigned char LOW, unsigned char HIGH> + consteval auto make_range() { + if constexpr (LOW == HIGH) { + return lexy::dsl::lit_c<LOW>; + } else if constexpr (LOW == (HIGH - 1)) { + return lexy::dsl::lit_c<LOW> / lexy::dsl::lit_c<HIGH>; + } else { + return lexy::dsl::lit_c<LOW> / make_range<LOW + 1, HIGH>(); + } + } +}
\ No newline at end of file diff --git a/src/openvic-dataloader/detail/NullBuff.hpp b/src/openvic-dataloader/detail/NullBuff.hpp new file mode 100644 index 0000000..baf9e1b --- /dev/null +++ b/src/openvic-dataloader/detail/NullBuff.hpp @@ -0,0 +1,30 @@ +#pragma once + +#include <ostream> + +namespace ovdl::detail { + template<class cT, class traits = std::char_traits<cT>> + class basic_nullbuf : public std::basic_streambuf<cT, traits> { + typename traits::int_type overflow(typename traits::int_type c) { + return traits::not_eof(c); // indicate success + } + }; + + template<class cT, class traits = std::char_traits<cT>> + class basic_onullstream : public std::basic_ostream<cT, traits> { + public: + basic_onullstream() : std::basic_ios<cT, traits>(&m_sbuf), + std::basic_ostream<cT, traits>(&m_sbuf) { + std::basic_ios<cT, traits>::init(&m_sbuf); + } + + private: + basic_nullbuf<cT, traits> m_sbuf; + }; + + typedef basic_onullstream<char> onullstream; + typedef basic_onullstream<wchar_t> wonullstream; + + inline onullstream cnull; + inline onullstream wcnull; +}
\ No newline at end of file diff --git a/src/openvic-dataloader/detail/Warnings.hpp b/src/openvic-dataloader/detail/Warnings.hpp new file mode 100644 index 0000000..f854fa8 --- /dev/null +++ b/src/openvic-dataloader/detail/Warnings.hpp @@ -0,0 +1,21 @@ +#pragma once + +#include "openvic-dataloader/v2script/Parser.hpp" + +namespace ovdl::v2script::warnings { + inline const v2script::Parser::Warning make_utf8_warning(const char* file_path) { + constexpr std::string_view message_suffix = "This may cause problems. Prefer Windows-1252 encoding."; + + std::string message; + if (!file_path) { + message = "Buffer is a UTF-8 encoded string. " + std::string(message_suffix); + } else { + message = "File '" + std::string(file_path) + "' is a UTF-8 encoded file. " + std::string(message_suffix); + } + + return v2script::Parser::Warning { message, 1 }; + } +} + +namespace ovdl::ovscript::warnings { +}
\ No newline at end of file |