diff options
author | Spartan322 <Megacake1234@gmail.com> | 2023-07-28 06:52:00 +0200 |
---|---|---|
committer | Spartan322 <Megacake1234@gmail.com> | 2023-08-17 09:04:56 +0200 |
commit | 90f15b582788a9aab0dfe6c81fc4cbbe1d4d3308 (patch) | |
tree | db58100ed696c992addee1a9113b5415f55615ad | |
parent | e941573f47fb867ff75c8a2cf78302b754ffbeee (diff) |
Rework Grammar and Parser
Properly construct headless binary with basic validation and print functionality
Add Error and Warning structs to Parser
Add FileNode pointer getter to Parser
Change all `char8_t*` and `const char8_t` to `const char*` in Parser
Add Parser move operators and Parser deconstructor
Add BufferHandler PIMPL object to Parser
Add UTF-8 file Warning
Add proper Grammar value retrieval
Simplify AST node resolution for Grammar
Add AbstractSyntaxTree for v2script data parser:
Has compile-time embedded type information accessible at compile-time and runtime
Optionally compiled AST print functionality
Add detail/TypeName.hpp
Add detail/SelfType.hpp
Add detail/DetectUtf8.hpp
Add detail/Errors.hpp
Add detail/Warnings.hpp
Add `OPENVIC_DATALOADER_PRINT_NODES` for headless construction
Fix wrong environment reference for headless construction in SConstruct
-rw-r--r-- | SConstruct | 3 | ||||
-rw-r--r-- | include/openvic-dataloader/detail/SelfType.hpp | 24 | ||||
-rw-r--r-- | include/openvic-dataloader/detail/TypeName.hpp | 52 | ||||
-rw-r--r-- | include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp | 174 | ||||
-rw-r--r-- | include/openvic-dataloader/v2script/Parser.hpp | 74 | ||||
-rw-r--r-- | src/headless/main.cpp | 34 | ||||
-rw-r--r-- | src/openvic-dataloader/detail/DetectUtf8.hpp | 52 | ||||
-rw-r--r-- | src/openvic-dataloader/detail/Errors.hpp | 19 | ||||
-rw-r--r-- | src/openvic-dataloader/detail/LexyLitRange.hpp | 16 | ||||
-rw-r--r-- | src/openvic-dataloader/detail/NullBuff.hpp | 30 | ||||
-rw-r--r-- | src/openvic-dataloader/detail/Warnings.hpp | 21 | ||||
-rw-r--r-- | src/openvic-dataloader/v2script/DecisionGrammar.hpp | 14 | ||||
-rw-r--r-- | src/openvic-dataloader/v2script/EventGrammar.hpp | 14 | ||||
-rw-r--r-- | src/openvic-dataloader/v2script/Grammar.cpp | 74 | ||||
-rw-r--r-- | src/openvic-dataloader/v2script/Parser.cpp | 224 | ||||
-rw-r--r-- | src/openvic-dataloader/v2script/SimpleGrammar.hpp | 105 |
16 files changed, 843 insertions, 87 deletions
@@ -262,11 +262,12 @@ if env["build_ovdl_headless"]: headless_env = env.Clone() headless_path = ["src/headless"] headless_env.Append(CPPDEFINES=["OPENVIC_DATALOADER_HEADLESS"]) + headless_env.Append(CPPDEFINES=["OPENVIC_DATALOADER_PRINT_NODES"]) headless_env.Append(CPPPATH=[headless_env.Dir(headless_path)]) headless_env.headless_sources = GlobRecursive("*.cpp", headless_path) if not env["build_ovdl_library"]: headless_env.headless_sources += sources - headless_program = env.Program( + headless_program = headless_env.Program( target="bin/%s" % headless_name, source=headless_env.headless_sources, PROGSUFFIX=".headless" + env["PROGSUFFIX"] diff --git a/include/openvic-dataloader/detail/SelfType.hpp b/include/openvic-dataloader/detail/SelfType.hpp new file mode 100644 index 0000000..5366aef --- /dev/null +++ b/include/openvic-dataloader/detail/SelfType.hpp @@ -0,0 +1,24 @@ +#pragma once + +#include <type_traits> + +namespace ovdl::detail { +#pragma GCC diagnostic push +#pragma clang diagnostic ignored "-Wunknown-warning-option" +#pragma GCC diagnostic ignored "-Wnon-template-friend" + template<typename T> + struct Reader { + friend auto adl_GetSelfType(Reader<T>); + }; + + template<typename T, typename U> + struct Writer { + friend auto adl_GetSelfType(Reader<T>) { return U {}; } + }; +#pragma GCC diagnostic pop + + inline void adl_GetSelfType() {} + + template<typename T> + using Read = std::remove_pointer_t<decltype(adl_GetSelfType(Reader<T> {}))>; +} diff --git a/include/openvic-dataloader/detail/TypeName.hpp b/include/openvic-dataloader/detail/TypeName.hpp new file mode 100644 index 0000000..e9f27d3 --- /dev/null +++ b/include/openvic-dataloader/detail/TypeName.hpp @@ -0,0 +1,52 @@ +#pragma once + +#include <array> +#include <cstddef> +#include <string_view> +#include <utility> + +namespace ovdl::detail { + + template<std::size_t... Idxs> + constexpr auto substring_as_array(std::string_view str, std::index_sequence<Idxs...>) { + return std::array { str[Idxs]..., '\n' }; + } + + template<typename T> + constexpr auto type_name_array() { +#if defined(__clang__) + constexpr auto prefix = std::string_view { "[T = " }; + constexpr auto suffix = std::string_view { "]" }; + constexpr auto function = std::string_view { __PRETTY_FUNCTION__ }; +#elif defined(__GNUC__) + constexpr auto prefix = std::string_view { "with T = " }; + constexpr auto suffix = std::string_view { "]" }; + constexpr auto function = std::string_view { __PRETTY_FUNCTION__ }; +#elif defined(_MSC_VER) + constexpr auto prefix = std::string_view { "type_name_array<" }; + constexpr auto suffix = std::string_view { ">(void)" }; + constexpr auto function = std::string_view { __FUNCSIG__ }; +#else +#error Unsupported compiler +#endif + + constexpr auto start = function.find(prefix) + prefix.size(); + constexpr auto end = function.rfind(suffix); + + static_assert(start < end); + + constexpr auto name = function.substr(start, (end - start)); + return substring_as_array(name, std::make_index_sequence<name.size()> {}); + } + + template<typename T> + struct type_name_holder { + static inline constexpr auto value = type_name_array<T>(); + }; + + template<typename T> + constexpr auto type_name() -> std::string_view { + constexpr auto& value = type_name_holder<T>::value; + return std::string_view { value.data(), value.size() }; + } +}
\ No newline at end of file diff --git a/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp b/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp new file mode 100644 index 0000000..80485b7 --- /dev/null +++ b/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp @@ -0,0 +1,174 @@ +#pragma once + +#include <memory> +#include <string> +#include <string_view> +#include <type_traits> +#include <utility> +#include <vector> + +#include <openvic-dataloader/detail/SelfType.hpp> +#include <openvic-dataloader/detail/TypeName.hpp> + +#ifdef OPENVIC_DATALOADER_PRINT_NODES +#include <iostream> + +#define OVDL_PRINT_FUNC_DECL virtual void print(std::ostream& stream) const = 0 +#define OVDL_PRINT_FUNC_DEF(...) \ + void print(std::ostream& stream) const override __VA_ARGS__ +#else +#define OVDL_PRINT_FUNC_DECL +#define OVDL_PRINT_FUNC_DEF(...) +#endif + +// defines get_type_static and get_type for string type naming +#define OVDL_RT_TYPE_DEF \ + static constexpr std::string_view get_type_static() { return ::ovdl::detail::type_name<type>(); } \ + constexpr std::string_view get_type() const override { return ::ovdl::detail::type_name<std::decay_t<decltype(*this)>>(); } + +// defines type for self-class referencing +#define OVDL_TYPE_DEFINE_SELF \ + struct _self_type_tag {}; \ + constexpr auto _self_type_helper()->decltype(::ovdl::detail::Writer<_self_type_tag, decltype(this)> {}); \ + using type = ::ovdl::detail::Read<_self_type_tag>; + +namespace ovdl::v2script::ast { + struct Node { + Node(const Node&) = delete; + Node& operator=(const Node&) = delete; + Node() = default; + Node(Node&&) = default; + Node& operator=(Node&&) = default; + virtual ~Node() = default; + + OVDL_PRINT_FUNC_DECL; + + static constexpr std::string_view get_type_static() { return detail::type_name<Node>(); } + constexpr virtual std::string_view get_type() const = 0; + + template<typename T> + constexpr bool is_type() const { + return get_type().compare(detail::type_name<T>()) == 0; + } + }; + + using NodePtr = Node*; + using NodeUPtr = std::unique_ptr<Node>; + + template<class T, class... Args> + NodePtr make_node_ptr(Args&&... args) { + if constexpr (std::is_pointer_v<NodePtr>) { + return new T(std::forward<Args>(args)...); + } else { + return NodePtr(new T(std::forward<Args>(args)...)); + } + } + + template<typename To, typename From> + To& cast_node_ptr(const From& from) { + if constexpr (std::is_pointer_v<NodePtr>) { + return *static_cast<To*>(from); + } else { + return *static_cast<To*>(from.get()); + } + } + + constexpr std::vector<NodeUPtr> make_node_ptr_vector(const std::vector<NodePtr>& ptrs) { + std::vector<NodeUPtr> result; + result.reserve(ptrs.size()); + for (auto&& p : ptrs) { + result.push_back(NodeUPtr(p)); + } + return result; + } + + struct IdentifierNode final : public Node { + std::string _name; + explicit IdentifierNode(std::string name) + : _name(std::move(name)) { + } + + OVDL_TYPE_DEFINE_SELF; + OVDL_RT_TYPE_DEF; + + OVDL_PRINT_FUNC_DEF({ + stream << _name.c_str(); + }) + }; + + struct StringNode final : public Node { + std::string _name; + explicit StringNode(std::string name) + : _name(std::move(name)) { + } + + OVDL_TYPE_DEFINE_SELF; + OVDL_RT_TYPE_DEF; + + OVDL_PRINT_FUNC_DEF({ + stream << '"' << _name.c_str() << '"'; + }) + }; + + struct AssignNode final : public Node { + std::string _name; + NodeUPtr _initializer; + explicit AssignNode(NodePtr name, NodePtr init) + : _initializer(std::move(init)) { + if (name->is_type<IdentifierNode>()) { + _name = cast_node_ptr<IdentifierNode>(name)._name; + } + } + + OVDL_TYPE_DEFINE_SELF; + OVDL_RT_TYPE_DEF; + + OVDL_PRINT_FUNC_DEF({ + stream << _name.c_str() << " = "; + _initializer->print(stream); + }) + }; + + struct ListNode final : public Node { + std::vector<NodeUPtr> _statements; + explicit ListNode(std::vector<NodePtr> statements = std::vector<NodePtr> {}) + : _statements(make_node_ptr_vector(statements)) { + } + + OVDL_TYPE_DEFINE_SELF; + OVDL_RT_TYPE_DEF; + + OVDL_PRINT_FUNC_DEF({ + stream << '{'; + for (int i = 0; i < _statements.size(); i++) { + auto& statement = _statements[i]; + statement->print(stream); + if (i + 1 != _statements.size()) + stream << ' '; + } + stream << '}'; + }) + }; + + struct FileNode final : public Node { + std::vector<NodeUPtr> _statements; + FileNode() {} + explicit FileNode(std::vector<NodePtr> statements) + : _statements(make_node_ptr_vector(statements)) { + } + + OVDL_TYPE_DEFINE_SELF; + OVDL_RT_TYPE_DEF; + + OVDL_PRINT_FUNC_DEF({ + for (auto& statement : _statements) { + statement->print(stream); + stream << "\n===========\n"; + } + }) + }; +} + +#undef OVDL_PRINT_FUNC_DECL +#undef OVDL_PRINT_FUNC_DEF +#undef OVDL_TYPE_DEFINE_SELF
\ No newline at end of file diff --git a/include/openvic-dataloader/v2script/Parser.hpp b/include/openvic-dataloader/v2script/Parser.hpp index 53aab90..dbbec73 100644 --- a/include/openvic-dataloader/v2script/Parser.hpp +++ b/include/openvic-dataloader/v2script/Parser.hpp @@ -1,27 +1,79 @@ #pragma once #include <cstddef> -#include <cstdio> +#include <functional> +#include <memory> +#include <optional> #include <ostream> +#include <string> +#include <vector> + +#include <openvic-dataloader/v2script/AbstractSyntaxTree.hpp> namespace ovdl::v2script { + + using FileNode = ast::FileNode; + class Parser { public: - static Parser from_buffer(char8_t* data, std::size_t size); - static Parser from_buffer(char8_t* start, char8_t* end); - static Parser from_file(const char8_t* path); + struct Error { + const enum class Type : unsigned char { + Recoverable, + Fatal + } type; + const std::string message; + const int error_value; + }; + + struct Warning { + const std::string message; + const int warning_value; + }; + + Parser(); + + static Parser from_buffer(const char* data, std::size_t size); + static Parser from_buffer(const char* start, const char* end); + static Parser from_file(const char* path); + + Parser& load_from_buffer(const char* data, std::size_t size); + Parser& load_from_buffer(const char* start, const char* end); + Parser& load_from_file(const char* path); + void set_error_log_to_null(); void set_error_log_to_stderr(); - void set_error_log_path(const char8_t* path); - void set_error_log_to(std::basic_ostream<char8_t> stream); - void set_error_log_to(std::FILE* file); + void set_error_log_to_stdout(); + void set_error_log_to(std::basic_ostream<char>& stream); - bool parse(); + bool simple_parse(); - bool has_error(); - bool has_warning(); + bool has_error() const; + bool has_fatal_error() const; + bool has_warning() const; + + const std::vector<Error>& get_errors() const; + const std::vector<Warning>& get_warnings() const; + + const FileNode* get_file_node() const; + + Parser(Parser&&); + Parser& operator=(Parser&&); + + ~Parser(); private: - Parser(); + std::vector<Error> _errors; + std::vector<Warning> _warnings; + + class BufferHandler; + friend class BufferHandler; + std::unique_ptr<BufferHandler> _buffer_handler; + std::unique_ptr<FileNode> _file_node; + std::reference_wrapper<std::ostream> _error_stream; + const char* _file_path; + bool _has_fatal_error = false; + + template<typename... Args> + inline void _run_load_func(std::optional<Error> (BufferHandler::*func)(Args...), Args... args); }; }
\ No newline at end of file diff --git a/src/headless/main.cpp b/src/headless/main.cpp index ffc6dab..afd569f 100644 --- a/src/headless/main.cpp +++ b/src/headless/main.cpp @@ -1,3 +1,35 @@ -int main() { +#include <cstdio> +#include <iostream> +#include <string> +#include <vector> + +#include <openvic-dataloader/v2script/Parser.hpp> + +int main(int argc, char** argv) { + if (argc < 2) { + std::fprintf(stderr, "usage: %s <filename>", argv[0]); + return 1; + } + + auto parser = ovdl::v2script::Parser::from_file(argv[1]); + if (parser.has_error()) { + return 1; + } + + parser.simple_parse(); + if (parser.has_error()) { + return 2; + } + + if (parser.has_warning()) { + for (auto& warning : parser.get_warnings()) { + std::cerr << "Warning: " << warning.message << std::endl; + } + } + +#ifdef OPENVIC_DATALOADER_PRINT_NODES + parser.get_file_node()->print(std::cout); +#endif + return 0; }
\ No newline at end of file diff --git a/src/openvic-dataloader/detail/DetectUtf8.hpp b/src/openvic-dataloader/detail/DetectUtf8.hpp new file mode 100644 index 0000000..b2969ce --- /dev/null +++ b/src/openvic-dataloader/detail/DetectUtf8.hpp @@ -0,0 +1,52 @@ +#pragma once + +#include "detail/LexyLitRange.hpp" +#include <lexy/action/match.hpp> +#include <lexy/dsl.hpp> + +namespace ovdl::detail { + namespace detect_utf8 { + + template<bool INCLUDE_ASCII> + struct DetectUtf8 { + struct not_utf8 { + static constexpr auto name = "not utf8"; + }; + + static constexpr auto rule = [] { + constexpr auto is_not_ascii_flag = lexy::dsl::context_flag<DetectUtf8>; + + // & 0b10000000 == 0b00000000 + constexpr auto ascii_values = lexydsl::make_range<0b00000000, 0b01111111>(); + // & 0b11100000 == 0b11000000 + constexpr auto two_byte = lexydsl::make_range<0b11000000, 0b11011111>(); + // & 0b11110000 == 0b11100000 + constexpr auto three_byte = lexydsl::make_range<0b11100000, 0b11101111>(); + // & 0b11111000 == 0b11110000 + constexpr auto four_byte = lexydsl::make_range<0b11110000, 0b11110111>(); + // & 0b11000000 == 0b10000000 + constexpr auto check_bytes = lexydsl::make_range<0b10000000, 0b10111111>(); + + constexpr auto utf8_check = + ((four_byte >> lexy::dsl::times<3>(check_bytes)) | + (three_byte >> lexy::dsl::times<2>(check_bytes)) | + (two_byte >> lexy::dsl::times<1>(check_bytes))) >> + is_not_ascii_flag.set(); + + return is_not_ascii_flag.template create<INCLUDE_ASCII>() + + lexy::dsl::while_(utf8_check | ascii_values) + + lexy::dsl::must(is_not_ascii_flag.is_set()).template error<not_utf8>; + }(); + }; + } + + template<typename Input> + constexpr bool is_utf8_no_ascii(const Input& input) { + return lexy::match<detect_utf8::DetectUtf8<false>>(input); + } + + template<typename Input> + constexpr bool is_utf8(const Input& input) { + return lexy::match<detect_utf8::DetectUtf8<true>>(input); + } +}
\ No newline at end of file diff --git a/src/openvic-dataloader/detail/Errors.hpp b/src/openvic-dataloader/detail/Errors.hpp new file mode 100644 index 0000000..f8ed21b --- /dev/null +++ b/src/openvic-dataloader/detail/Errors.hpp @@ -0,0 +1,19 @@ +#pragma once + +#include "openvic-dataloader/v2script/Parser.hpp" + +namespace ovdl::v2script::errors { + inline const v2script::Parser::Error make_no_file_error(const char* file_path) { + std::string message; + if (!file_path) { + message = "File path not specified."; + } else { + message = "File '" + std::string(file_path) + "' was not found."; + } + + return v2script::Parser::Error { Parser::Error::Type::Fatal, message, 1 }; + } +} + +namespace ovdl::ovscript::errors { +}
\ No newline at end of file diff --git a/src/openvic-dataloader/detail/LexyLitRange.hpp b/src/openvic-dataloader/detail/LexyLitRange.hpp new file mode 100644 index 0000000..a6761a8 --- /dev/null +++ b/src/openvic-dataloader/detail/LexyLitRange.hpp @@ -0,0 +1,16 @@ +#pragma once + +#include <lexy/dsl/literal.hpp> + +namespace ovdl::detail::lexydsl { + template<unsigned char LOW, unsigned char HIGH> + consteval auto make_range() { + if constexpr (LOW == HIGH) { + return lexy::dsl::lit_c<LOW>; + } else if constexpr (LOW == (HIGH - 1)) { + return lexy::dsl::lit_c<LOW> / lexy::dsl::lit_c<HIGH>; + } else { + return lexy::dsl::lit_c<LOW> / make_range<LOW + 1, HIGH>(); + } + } +}
\ No newline at end of file diff --git a/src/openvic-dataloader/detail/NullBuff.hpp b/src/openvic-dataloader/detail/NullBuff.hpp new file mode 100644 index 0000000..baf9e1b --- /dev/null +++ b/src/openvic-dataloader/detail/NullBuff.hpp @@ -0,0 +1,30 @@ +#pragma once + +#include <ostream> + +namespace ovdl::detail { + template<class cT, class traits = std::char_traits<cT>> + class basic_nullbuf : public std::basic_streambuf<cT, traits> { + typename traits::int_type overflow(typename traits::int_type c) { + return traits::not_eof(c); // indicate success + } + }; + + template<class cT, class traits = std::char_traits<cT>> + class basic_onullstream : public std::basic_ostream<cT, traits> { + public: + basic_onullstream() : std::basic_ios<cT, traits>(&m_sbuf), + std::basic_ostream<cT, traits>(&m_sbuf) { + std::basic_ios<cT, traits>::init(&m_sbuf); + } + + private: + basic_nullbuf<cT, traits> m_sbuf; + }; + + typedef basic_onullstream<char> onullstream; + typedef basic_onullstream<wchar_t> wonullstream; + + inline onullstream cnull; + inline onullstream wcnull; +}
\ No newline at end of file diff --git a/src/openvic-dataloader/detail/Warnings.hpp b/src/openvic-dataloader/detail/Warnings.hpp new file mode 100644 index 0000000..f854fa8 --- /dev/null +++ b/src/openvic-dataloader/detail/Warnings.hpp @@ -0,0 +1,21 @@ +#pragma once + +#include "openvic-dataloader/v2script/Parser.hpp" + +namespace ovdl::v2script::warnings { + inline const v2script::Parser::Warning make_utf8_warning(const char* file_path) { + constexpr std::string_view message_suffix = "This may cause problems. Prefer Windows-1252 encoding."; + + std::string message; + if (!file_path) { + message = "Buffer is a UTF-8 encoded string. " + std::string(message_suffix); + } else { + message = "File '" + std::string(file_path) + "' is a UTF-8 encoded file. " + std::string(message_suffix); + } + + return v2script::Parser::Warning { message, 1 }; + } +} + +namespace ovdl::ovscript::warnings { +}
\ No newline at end of file diff --git a/src/openvic-dataloader/v2script/DecisionGrammar.hpp b/src/openvic-dataloader/v2script/DecisionGrammar.hpp new file mode 100644 index 0000000..ebc9ad2 --- /dev/null +++ b/src/openvic-dataloader/v2script/DecisionGrammar.hpp @@ -0,0 +1,14 @@ +#pragma once + +#include <memory> +#include <string> +#include <vector> + +#include <lexy/callback.hpp> +#include <lexy/dsl.hpp> +#include <openvic-dataloader/v2script/AbstractSyntaxTree.hpp> + +// Decision Grammar Definitions // +namespace ovdl::v2script::grammar { + +}
\ No newline at end of file diff --git a/src/openvic-dataloader/v2script/EventGrammar.hpp b/src/openvic-dataloader/v2script/EventGrammar.hpp new file mode 100644 index 0000000..7ab40d4 --- /dev/null +++ b/src/openvic-dataloader/v2script/EventGrammar.hpp @@ -0,0 +1,14 @@ +#pragma once + +#include <memory> +#include <string> +#include <vector> + +#include <lexy/callback.hpp> +#include <lexy/dsl.hpp> +#include <openvic-dataloader/v2script/AbstractSyntaxTree.hpp> + +// Event Grammar Definitions // +namespace ovdl::v2script::grammar { + +}
\ No newline at end of file diff --git a/src/openvic-dataloader/v2script/Grammar.cpp b/src/openvic-dataloader/v2script/Grammar.cpp deleted file mode 100644 index ec9fac2..0000000 --- a/src/openvic-dataloader/v2script/Grammar.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include <lexy/dsl.hpp> -#include <openvic-dataloader/v2script/Parser.hpp> - -using namespace ovdl::v2script; - -// Node Definitions // -namespace dsl = lexy::dsl; - -namespace ovdl::v2script::nodes { - struct StatementListBlock; - - static constexpr auto whitespace_specifier = dsl::code_point.range<0x09, 0x0A>() / dsl::lit_cp<0x0D> / dsl::lit_cp<0x20>; - static constexpr auto comment_specifier = LEXY_LIT("#") >> dsl::until(dsl::newline).or_eof(); - - static constexpr auto data_specifier = - dsl::ascii::alpha_digit_underscore / - dsl::code_point.range<0x25, 0x27>() / dsl::lit_cp<0x2B> / dsl::code_point.range<0x2D, 0x2E>() / - dsl::lit_cp<0x3A> / - dsl::lit_cp<0x8A> / dsl::lit_cp<0x8C> / dsl::lit_cp<0x8E> / - dsl::lit_cp<0x92> / dsl::lit_cp<0x9A> / dsl::lit_cp<0x9C> / dsl::code_point.range<0x9E, 0x9F>() / - dsl::code_point.range<0xC0, 0xD6>() / dsl::code_point.range<0xD8, 0xF6>() / dsl::code_point.range<0xF8, 0xFF>(); - - static constexpr auto data_char_class = LEXY_CHAR_CLASS("DataSpecifier", data_specifier); - - struct Identifier { - static constexpr auto rule = dsl::identifier(data_char_class); - }; - - struct StringExpression { - static constexpr auto escaped_symbols = lexy::symbol_table<char> // - .map<'"'>('"') - .map<'\''>('\'') - .map<'\\'>('\\') - .map<'/'>('/') - .map<'b'>('\b') - .map<'f'>('\f') - .map<'n'>('\n') - .map<'r'>('\r') - .map<'t'>('\t'); - static constexpr auto rule = [] { - // Arbitrary code points that aren't control characters. - auto c = -dsl::unicode::control; - - // Escape sequences start with a backlash. - // They either map one of the symbols, - // or a Unicode code point of the form uXXXX. - auto escape = dsl::backslash_escape // - .symbol<escaped_symbols>() - .rule(dsl::lit_c<'u'> >> dsl::code_point_id<4>); - return dsl::quoted(c, escape); - }(); - }; - - struct AssignmentStatement { - static constexpr auto rule = dsl::p<Identifier> >> - (dsl::equal_sign >> - (dsl::p<Identifier> | dsl::p<StringExpression> | dsl::recurse_branch<StatementListBlock>) | - dsl::else_ >> dsl::return_); - }; - - struct StatementListBlock { - static constexpr auto rule = - dsl::curly_bracketed.open() >> - dsl::opt(dsl::list(dsl::p<AssignmentStatement>)) + dsl::opt(dsl::semicolon) + - dsl::curly_bracketed.close(); - }; - - struct File { - // Allow arbitrary spaces between individual tokens. - static constexpr auto whitespace = whitespace_specifier | comment_specifier; - - static constexpr auto rule = dsl::terminator(dsl::eof).list(dsl::p<AssignmentStatement>); - }; -} diff --git a/src/openvic-dataloader/v2script/Parser.cpp b/src/openvic-dataloader/v2script/Parser.cpp new file mode 100644 index 0000000..c0b6bd8 --- /dev/null +++ b/src/openvic-dataloader/v2script/Parser.cpp @@ -0,0 +1,224 @@ +#include "openvic-dataloader/v2script/Parser.hpp" + +#include <iostream> +#include <memory> +#include <optional> +#include <string> +#include <string_view> +#include <utility> +#include <vector> + +#include "SimpleGrammar.hpp" +#include "detail/DetectUtf8.hpp" +#include "detail/Errors.hpp" +#include "detail/NullBuff.hpp" +#include "detail/Warnings.hpp" +#include <lexy/action/parse.hpp> +#include <lexy/encoding.hpp> +#include <lexy/input/buffer.hpp> +#include <lexy/input/file.hpp> +#include <lexy/lexeme.hpp> +#include <lexy/visualize.hpp> +#include <lexy_ext/report_error.hpp> +#include <openvic-dataloader/v2script/AbstractSyntaxTree.hpp> + +using namespace ovdl::v2script; + +/// BufferHandler /// + +class Parser::BufferHandler { +public: + bool is_valid() const { + return _buffer.size() != 0; + } + + std::optional<Error> load_buffer(const char* data, std::size_t size) { + _buffer = lexy::buffer(data, size); + return std::nullopt; + } + + std::optional<Error> load_buffer(const char* start, const char* end) { + _buffer = lexy::buffer(start, end); + return std::nullopt; + } + + std::optional<Error> load_file(const char* path) { + auto file = lexy::read_file(path); + if (!file) { + return errors::make_no_file_error(path); + } + + _buffer = file.buffer(); + return std::nullopt; + } + + constexpr bool is_exclusive_utf8() const { + return detail::is_utf8_no_ascii(_buffer); + } + + template<typename Node, typename ErrorCallback> + std::optional<std::vector<Error>> parse(const ErrorCallback& callback) { + auto result = lexy::parse<Node>(_buffer, callback); + if (!result) { + std::vector<Error> errors; + return errors; + } + // This is mighty frustrating + _root = std::unique_ptr<ast::Node>(result.value()); + return std::nullopt; + } + + std::unique_ptr<ast::Node>& get_root() { + return _root; + } + +private: + lexy::buffer<lexy::default_encoding> _buffer; + std::unique_ptr<ast::Node> _root; +}; + +/// BufferHandler /// + +Parser::Parser() + : _buffer_handler(std::make_unique<BufferHandler>()), + _error_stream(detail::cnull) { + set_error_log_to_stderr(); +} + +Parser::Parser(Parser&&) = default; +Parser& Parser::operator=(Parser&& value) = default; +Parser::~Parser() = default; + +Parser Parser::from_buffer(const char* data, std::size_t size) { + Parser result; + return std::move(result.load_from_buffer(data, size)); +} + +Parser Parser::from_buffer(const char* start, const char* end) { + Parser result; + return std::move(result.load_from_buffer(start, end)); +} + +Parser Parser::from_file(const char* path) { + Parser result; + return std::move(result.load_from_file(path)); +} + +/// +/// @brief Executes a function on _buffer_handler that is expected to load a buffer +/// +/// Expected Use: +/// @code {.cpp} +/// _run_load_func(&BufferHandler::<load_function>, <arguments>); +/// @endcode +/// +/// @tparam Args +/// @param func +/// @param args +/// +template<typename... Args> +inline void Parser::_run_load_func(std::optional<Error> (BufferHandler::*func)(Args...), Args... args) { + _warnings.clear(); + _errors.clear(); + _has_fatal_error = false; + if (auto error = (_buffer_handler.get()->*func)(args...); error) { + _has_fatal_error = error.value().type == Error::Type::Fatal; + _errors.push_back(error.value()); + _error_stream.get() << "Error: " << _errors.back().message << '\n'; + } +} + +Parser& Parser::load_from_buffer(const char* data, std::size_t size) { + _run_load_func(&BufferHandler::load_buffer, data, size); + return *this; +} + +Parser& Parser::load_from_buffer(const char* start, const char* end) { + _run_load_func(&BufferHandler::load_buffer, start, end); + return *this; +} + +Parser& Parser::load_from_file(const char* path) { + _file_path = path; + _run_load_func(&BufferHandler::load_file, path); + return *this; +} + +void Parser::set_error_log_to_null() { + set_error_log_to(detail::cnull); +} + +void Parser::set_error_log_to_stderr() { + set_error_log_to(std::cerr); +} + +void Parser::set_error_log_to_stdout() { + set_error_log_to(std::cout); +} + +void Parser::set_error_log_to(std::basic_ostream<char>& stream) { + _error_stream = stream; +} + +bool Parser::simple_parse() { + if (!_buffer_handler->is_valid()) { + return false; + } + + struct ostream_output_iterator { + std::reference_wrapper<std::ostream> _stream; + + auto operator*() const noexcept { + return *this; + } + auto operator++(int) const noexcept { + return *this; + } + + ostream_output_iterator& operator=(char c) { + _stream.get().put(c); + return *this; + } + }; + + if (_buffer_handler->is_exclusive_utf8()) { + _warnings.push_back(warnings::make_utf8_warning(_file_path)); + } + + auto errors = _buffer_handler->parse<grammar::File>(lexy_ext::report_error.path(_file_path).to(ostream_output_iterator { _error_stream })); + if (errors) { + _errors.reserve(errors->size()); + for (auto& err : errors.value()) { + _has_fatal_error |= err.type == Error::Type::Fatal; + _errors.push_back(err); + _error_stream.get() << "Error: " << err.message << '\n'; + } + return false; + } + _file_node.reset(static_cast<ast::FileNode*>(_buffer_handler->get_root().release())); + return true; +} + +bool Parser::has_error() const { + return !_errors.empty(); +} + +bool Parser::has_fatal_error() const { + return _has_fatal_error; +} + +bool Parser::has_warning() const { + return !_warnings.empty(); +} + +const std::vector<Parser::Error>& Parser::get_errors() const { + return _errors; +} + +const std::vector<Parser::Warning>& Parser::get_warnings() const { + return _warnings; +} + +const FileNode* Parser::get_file_node() const { + return _file_node.get(); +}
\ No newline at end of file diff --git a/src/openvic-dataloader/v2script/SimpleGrammar.hpp b/src/openvic-dataloader/v2script/SimpleGrammar.hpp new file mode 100644 index 0000000..48a80ce --- /dev/null +++ b/src/openvic-dataloader/v2script/SimpleGrammar.hpp @@ -0,0 +1,105 @@ +#include <memory> +#include <string> +#include <vector> + +#include "detail/LexyLitRange.hpp" +#include <lexy/callback.hpp> +#include <lexy/dsl.hpp> +#include <openvic-dataloader/v2script/AbstractSyntaxTree.hpp> + +// Grammar Definitions // +namespace ovdl::v2script::grammar { + struct StatementListBlock; + + static constexpr auto whitespace_specifier = lexy::dsl::ascii::blank / lexy::dsl::ascii::newline; + static constexpr auto comment_specifier = LEXY_LIT("#") >> lexy::dsl::until(lexy::dsl::newline).or_eof(); + + static constexpr auto data_specifier = + lexy::dsl::ascii::alpha_digit_underscore / + LEXY_ASCII_ONE_OF("%&'") / lexy::dsl::lit_c<0x2B> / LEXY_ASCII_ONE_OF("-.") / + lexy::dsl::ascii::digit / lexy::dsl::lit_c<0x3A> / + lexy::dsl::lit_c<0x40> / lexy::dsl::ascii::upper / lexy::dsl::lit_c<0x5F> / + lexy::dsl::ascii::lower / lexy::dsl::lit_b<0x8A> / lexy::dsl::lit_b<0x8C> / lexy::dsl::lit_b<0x8E> / + lexy::dsl::lit_b<0x92> / lexy::dsl::lit_b<0x97> / lexy::dsl::lit_b<0x9A> / lexy::dsl::lit_b<0x9C> / lexy::dsl::lit_b<0x9E> / lexy::dsl::lit_b<0x9F> / + lexy::dsl::lit_b<0xC0> / + ovdl::detail::lexydsl::make_range<0xC0, 0xD6>() / ovdl::detail::lexydsl::make_range<0xD8, 0xF6>() / ovdl::detail::lexydsl::make_range<0xF8, 0xFF>(); + + static constexpr auto data_char_class = LEXY_CHAR_CLASS("DataSpecifier", data_specifier); + + struct Identifier { + static constexpr auto rule = lexy::dsl::identifier(data_char_class); + static constexpr auto value = lexy::as_string<std::string> | lexy::new_<ast::IdentifierNode, ast::NodePtr>; + }; + + struct StringExpression { + static constexpr auto escaped_symbols = lexy::symbol_table<char> // + .map<'"'>('"') + .map<'\''>('\'') + .map<'\\'>('\\') + .map<'/'>('/') + .map<'b'>('\b') + .map<'f'>('\f') + .map<'n'>('\n') + .map<'r'>('\r') + .map<'t'>('\t'); + static constexpr auto rule = [] { + // Arbitrary code points that aren't control characters. + auto c = ovdl::detail::lexydsl::make_range<0x20, 0xFF>() - lexy::dsl::ascii::control; + + // Escape sequences start with a backlash. + // They either map one of the symbols, + // or a Unicode code point of the form uXXXX. + auto escape = lexy::dsl::backslash_escape // + .symbol<escaped_symbols>(); + return lexy::dsl::quoted(c, escape); + }(); + + static constexpr auto value = lexy::as_string<std::string> >> lexy::new_<ast::StringNode, ast::NodePtr>; + }; + + struct AssignmentStatement { + static constexpr auto rule = + lexy::dsl::p<Identifier> >> + (lexy::dsl::equal_sign >> + (lexy::dsl::p<Identifier> | lexy::dsl::p<StringExpression> | lexy::dsl::recurse_branch<StatementListBlock>) | + lexy::dsl::else_ >> lexy::dsl::return_) | + lexy::dsl::p<StringExpression>; + + static constexpr auto value = lexy::callback<ast::NodePtr>( + [](auto name, lexy::nullopt = {}) { + return LEXY_MOV(name); + }, + [](auto name, auto&& initalizer) { + return make_node_ptr<ast::AssignNode>(LEXY_MOV(name), LEXY_MOV(initalizer)); + }); + }; + + struct StatementListBlock { + static constexpr auto rule = + lexy::dsl::curly_bracketed.open() >> + lexy::dsl::opt(lexy::dsl::list(lexy::dsl::p<AssignmentStatement>)) + lexy::dsl::opt(lexy::dsl::semicolon) + + lexy::dsl::curly_bracketed.close(); + + static constexpr auto value = + lexy::as_list<std::vector<ast::NodePtr>> >> + lexy::callback<ast::NodePtr>( + [](lexy::nullopt = {}, lexy::nullopt = {}) { + return ast::make_node_ptr<ast::ListNode>(); + }, + [](auto&& list, lexy::nullopt = {}) { + return make_node_ptr<ast::ListNode>(LEXY_MOV(list)); + }, + [](auto& list) { + return make_node_ptr<ast::ListNode>(list); + }); + }; + + struct File { + // Allow arbitrary spaces between individual tokens. + static constexpr auto whitespace = whitespace_specifier | comment_specifier; + + static constexpr auto rule = lexy::dsl::terminator(lexy::dsl::eof).list(lexy::dsl::p<AssignmentStatement>); + + static constexpr auto value = lexy::as_list<std::vector<ast::NodePtr>> >> lexy::new_<ast::FileNode, ast::NodePtr>; + }; +} |