aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Spartan322 <Megacake1234@gmail.com>2023-07-28 06:52:00 +0200
committer Spartan322 <Megacake1234@gmail.com>2023-08-17 09:04:56 +0200
commit90f15b582788a9aab0dfe6c81fc4cbbe1d4d3308 (patch)
treedb58100ed696c992addee1a9113b5415f55615ad
parente941573f47fb867ff75c8a2cf78302b754ffbeee (diff)
Rework Grammar and Parser
Properly construct headless binary with basic validation and print functionality Add Error and Warning structs to Parser Add FileNode pointer getter to Parser Change all `char8_t*` and `const char8_t` to `const char*` in Parser Add Parser move operators and Parser deconstructor Add BufferHandler PIMPL object to Parser Add UTF-8 file Warning Add proper Grammar value retrieval Simplify AST node resolution for Grammar Add AbstractSyntaxTree for v2script data parser: Has compile-time embedded type information accessible at compile-time and runtime Optionally compiled AST print functionality Add detail/TypeName.hpp Add detail/SelfType.hpp Add detail/DetectUtf8.hpp Add detail/Errors.hpp Add detail/Warnings.hpp Add `OPENVIC_DATALOADER_PRINT_NODES` for headless construction Fix wrong environment reference for headless construction in SConstruct
-rw-r--r--SConstruct3
-rw-r--r--include/openvic-dataloader/detail/SelfType.hpp24
-rw-r--r--include/openvic-dataloader/detail/TypeName.hpp52
-rw-r--r--include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp174
-rw-r--r--include/openvic-dataloader/v2script/Parser.hpp74
-rw-r--r--src/headless/main.cpp34
-rw-r--r--src/openvic-dataloader/detail/DetectUtf8.hpp52
-rw-r--r--src/openvic-dataloader/detail/Errors.hpp19
-rw-r--r--src/openvic-dataloader/detail/LexyLitRange.hpp16
-rw-r--r--src/openvic-dataloader/detail/NullBuff.hpp30
-rw-r--r--src/openvic-dataloader/detail/Warnings.hpp21
-rw-r--r--src/openvic-dataloader/v2script/DecisionGrammar.hpp14
-rw-r--r--src/openvic-dataloader/v2script/EventGrammar.hpp14
-rw-r--r--src/openvic-dataloader/v2script/Grammar.cpp74
-rw-r--r--src/openvic-dataloader/v2script/Parser.cpp224
-rw-r--r--src/openvic-dataloader/v2script/SimpleGrammar.hpp105
16 files changed, 843 insertions, 87 deletions
diff --git a/SConstruct b/SConstruct
index f2a66bc..68158cf 100644
--- a/SConstruct
+++ b/SConstruct
@@ -262,11 +262,12 @@ if env["build_ovdl_headless"]:
headless_env = env.Clone()
headless_path = ["src/headless"]
headless_env.Append(CPPDEFINES=["OPENVIC_DATALOADER_HEADLESS"])
+ headless_env.Append(CPPDEFINES=["OPENVIC_DATALOADER_PRINT_NODES"])
headless_env.Append(CPPPATH=[headless_env.Dir(headless_path)])
headless_env.headless_sources = GlobRecursive("*.cpp", headless_path)
if not env["build_ovdl_library"]:
headless_env.headless_sources += sources
- headless_program = env.Program(
+ headless_program = headless_env.Program(
target="bin/%s" % headless_name,
source=headless_env.headless_sources,
PROGSUFFIX=".headless" + env["PROGSUFFIX"]
diff --git a/include/openvic-dataloader/detail/SelfType.hpp b/include/openvic-dataloader/detail/SelfType.hpp
new file mode 100644
index 0000000..5366aef
--- /dev/null
+++ b/include/openvic-dataloader/detail/SelfType.hpp
@@ -0,0 +1,24 @@
+#pragma once
+
+#include <type_traits>
+
+namespace ovdl::detail {
+#pragma GCC diagnostic push
+#pragma clang diagnostic ignored "-Wunknown-warning-option"
+#pragma GCC diagnostic ignored "-Wnon-template-friend"
+ template<typename T>
+ struct Reader {
+ friend auto adl_GetSelfType(Reader<T>);
+ };
+
+ template<typename T, typename U>
+ struct Writer {
+ friend auto adl_GetSelfType(Reader<T>) { return U {}; }
+ };
+#pragma GCC diagnostic pop
+
+ inline void adl_GetSelfType() {}
+
+ template<typename T>
+ using Read = std::remove_pointer_t<decltype(adl_GetSelfType(Reader<T> {}))>;
+}
diff --git a/include/openvic-dataloader/detail/TypeName.hpp b/include/openvic-dataloader/detail/TypeName.hpp
new file mode 100644
index 0000000..e9f27d3
--- /dev/null
+++ b/include/openvic-dataloader/detail/TypeName.hpp
@@ -0,0 +1,52 @@
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <string_view>
+#include <utility>
+
+namespace ovdl::detail {
+
+ template<std::size_t... Idxs>
+ constexpr auto substring_as_array(std::string_view str, std::index_sequence<Idxs...>) {
+ return std::array { str[Idxs]..., '\n' };
+ }
+
+ template<typename T>
+ constexpr auto type_name_array() {
+#if defined(__clang__)
+ constexpr auto prefix = std::string_view { "[T = " };
+ constexpr auto suffix = std::string_view { "]" };
+ constexpr auto function = std::string_view { __PRETTY_FUNCTION__ };
+#elif defined(__GNUC__)
+ constexpr auto prefix = std::string_view { "with T = " };
+ constexpr auto suffix = std::string_view { "]" };
+ constexpr auto function = std::string_view { __PRETTY_FUNCTION__ };
+#elif defined(_MSC_VER)
+ constexpr auto prefix = std::string_view { "type_name_array<" };
+ constexpr auto suffix = std::string_view { ">(void)" };
+ constexpr auto function = std::string_view { __FUNCSIG__ };
+#else
+#error Unsupported compiler
+#endif
+
+ constexpr auto start = function.find(prefix) + prefix.size();
+ constexpr auto end = function.rfind(suffix);
+
+ static_assert(start < end);
+
+ constexpr auto name = function.substr(start, (end - start));
+ return substring_as_array(name, std::make_index_sequence<name.size()> {});
+ }
+
+ template<typename T>
+ struct type_name_holder {
+ static inline constexpr auto value = type_name_array<T>();
+ };
+
+ template<typename T>
+ constexpr auto type_name() -> std::string_view {
+ constexpr auto& value = type_name_holder<T>::value;
+ return std::string_view { value.data(), value.size() };
+ }
+} \ No newline at end of file
diff --git a/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp b/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp
new file mode 100644
index 0000000..80485b7
--- /dev/null
+++ b/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp
@@ -0,0 +1,174 @@
+#pragma once
+
+#include <memory>
+#include <string>
+#include <string_view>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <openvic-dataloader/detail/SelfType.hpp>
+#include <openvic-dataloader/detail/TypeName.hpp>
+
+#ifdef OPENVIC_DATALOADER_PRINT_NODES
+#include <iostream>
+
+#define OVDL_PRINT_FUNC_DECL virtual void print(std::ostream& stream) const = 0
+#define OVDL_PRINT_FUNC_DEF(...) \
+ void print(std::ostream& stream) const override __VA_ARGS__
+#else
+#define OVDL_PRINT_FUNC_DECL
+#define OVDL_PRINT_FUNC_DEF(...)
+#endif
+
+// defines get_type_static and get_type for string type naming
+#define OVDL_RT_TYPE_DEF \
+ static constexpr std::string_view get_type_static() { return ::ovdl::detail::type_name<type>(); } \
+ constexpr std::string_view get_type() const override { return ::ovdl::detail::type_name<std::decay_t<decltype(*this)>>(); }
+
+// defines type for self-class referencing
+#define OVDL_TYPE_DEFINE_SELF \
+ struct _self_type_tag {}; \
+ constexpr auto _self_type_helper()->decltype(::ovdl::detail::Writer<_self_type_tag, decltype(this)> {}); \
+ using type = ::ovdl::detail::Read<_self_type_tag>;
+
+namespace ovdl::v2script::ast {
+ struct Node {
+ Node(const Node&) = delete;
+ Node& operator=(const Node&) = delete;
+ Node() = default;
+ Node(Node&&) = default;
+ Node& operator=(Node&&) = default;
+ virtual ~Node() = default;
+
+ OVDL_PRINT_FUNC_DECL;
+
+ static constexpr std::string_view get_type_static() { return detail::type_name<Node>(); }
+ constexpr virtual std::string_view get_type() const = 0;
+
+ template<typename T>
+ constexpr bool is_type() const {
+ return get_type().compare(detail::type_name<T>()) == 0;
+ }
+ };
+
+ using NodePtr = Node*;
+ using NodeUPtr = std::unique_ptr<Node>;
+
+ template<class T, class... Args>
+ NodePtr make_node_ptr(Args&&... args) {
+ if constexpr (std::is_pointer_v<NodePtr>) {
+ return new T(std::forward<Args>(args)...);
+ } else {
+ return NodePtr(new T(std::forward<Args>(args)...));
+ }
+ }
+
+ template<typename To, typename From>
+ To& cast_node_ptr(const From& from) {
+ if constexpr (std::is_pointer_v<NodePtr>) {
+ return *static_cast<To*>(from);
+ } else {
+ return *static_cast<To*>(from.get());
+ }
+ }
+
+ constexpr std::vector<NodeUPtr> make_node_ptr_vector(const std::vector<NodePtr>& ptrs) {
+ std::vector<NodeUPtr> result;
+ result.reserve(ptrs.size());
+ for (auto&& p : ptrs) {
+ result.push_back(NodeUPtr(p));
+ }
+ return result;
+ }
+
+ struct IdentifierNode final : public Node {
+ std::string _name;
+ explicit IdentifierNode(std::string name)
+ : _name(std::move(name)) {
+ }
+
+ OVDL_TYPE_DEFINE_SELF;
+ OVDL_RT_TYPE_DEF;
+
+ OVDL_PRINT_FUNC_DEF({
+ stream << _name.c_str();
+ })
+ };
+
+ struct StringNode final : public Node {
+ std::string _name;
+ explicit StringNode(std::string name)
+ : _name(std::move(name)) {
+ }
+
+ OVDL_TYPE_DEFINE_SELF;
+ OVDL_RT_TYPE_DEF;
+
+ OVDL_PRINT_FUNC_DEF({
+ stream << '"' << _name.c_str() << '"';
+ })
+ };
+
+ struct AssignNode final : public Node {
+ std::string _name;
+ NodeUPtr _initializer;
+ explicit AssignNode(NodePtr name, NodePtr init)
+ : _initializer(std::move(init)) {
+ if (name->is_type<IdentifierNode>()) {
+ _name = cast_node_ptr<IdentifierNode>(name)._name;
+ }
+ }
+
+ OVDL_TYPE_DEFINE_SELF;
+ OVDL_RT_TYPE_DEF;
+
+ OVDL_PRINT_FUNC_DEF({
+ stream << _name.c_str() << " = ";
+ _initializer->print(stream);
+ })
+ };
+
+ struct ListNode final : public Node {
+ std::vector<NodeUPtr> _statements;
+ explicit ListNode(std::vector<NodePtr> statements = std::vector<NodePtr> {})
+ : _statements(make_node_ptr_vector(statements)) {
+ }
+
+ OVDL_TYPE_DEFINE_SELF;
+ OVDL_RT_TYPE_DEF;
+
+ OVDL_PRINT_FUNC_DEF({
+ stream << '{';
+ for (int i = 0; i < _statements.size(); i++) {
+ auto& statement = _statements[i];
+ statement->print(stream);
+ if (i + 1 != _statements.size())
+ stream << ' ';
+ }
+ stream << '}';
+ })
+ };
+
+ struct FileNode final : public Node {
+ std::vector<NodeUPtr> _statements;
+ FileNode() {}
+ explicit FileNode(std::vector<NodePtr> statements)
+ : _statements(make_node_ptr_vector(statements)) {
+ }
+
+ OVDL_TYPE_DEFINE_SELF;
+ OVDL_RT_TYPE_DEF;
+
+ OVDL_PRINT_FUNC_DEF({
+ for (auto& statement : _statements) {
+ statement->print(stream);
+ stream << "\n===========\n";
+ }
+ })
+ };
+}
+
+#undef OVDL_PRINT_FUNC_DECL
+#undef OVDL_PRINT_FUNC_DEF
+#undef OVDL_TYPE_DEFINE_SELF \ No newline at end of file
diff --git a/include/openvic-dataloader/v2script/Parser.hpp b/include/openvic-dataloader/v2script/Parser.hpp
index 53aab90..dbbec73 100644
--- a/include/openvic-dataloader/v2script/Parser.hpp
+++ b/include/openvic-dataloader/v2script/Parser.hpp
@@ -1,27 +1,79 @@
#pragma once
#include <cstddef>
-#include <cstdio>
+#include <functional>
+#include <memory>
+#include <optional>
#include <ostream>
+#include <string>
+#include <vector>
+
+#include <openvic-dataloader/v2script/AbstractSyntaxTree.hpp>
namespace ovdl::v2script {
+
+ using FileNode = ast::FileNode;
+
class Parser {
public:
- static Parser from_buffer(char8_t* data, std::size_t size);
- static Parser from_buffer(char8_t* start, char8_t* end);
- static Parser from_file(const char8_t* path);
+ struct Error {
+ const enum class Type : unsigned char {
+ Recoverable,
+ Fatal
+ } type;
+ const std::string message;
+ const int error_value;
+ };
+
+ struct Warning {
+ const std::string message;
+ const int warning_value;
+ };
+
+ Parser();
+
+ static Parser from_buffer(const char* data, std::size_t size);
+ static Parser from_buffer(const char* start, const char* end);
+ static Parser from_file(const char* path);
+
+ Parser& load_from_buffer(const char* data, std::size_t size);
+ Parser& load_from_buffer(const char* start, const char* end);
+ Parser& load_from_file(const char* path);
+ void set_error_log_to_null();
void set_error_log_to_stderr();
- void set_error_log_path(const char8_t* path);
- void set_error_log_to(std::basic_ostream<char8_t> stream);
- void set_error_log_to(std::FILE* file);
+ void set_error_log_to_stdout();
+ void set_error_log_to(std::basic_ostream<char>& stream);
- bool parse();
+ bool simple_parse();
- bool has_error();
- bool has_warning();
+ bool has_error() const;
+ bool has_fatal_error() const;
+ bool has_warning() const;
+
+ const std::vector<Error>& get_errors() const;
+ const std::vector<Warning>& get_warnings() const;
+
+ const FileNode* get_file_node() const;
+
+ Parser(Parser&&);
+ Parser& operator=(Parser&&);
+
+ ~Parser();
private:
- Parser();
+ std::vector<Error> _errors;
+ std::vector<Warning> _warnings;
+
+ class BufferHandler;
+ friend class BufferHandler;
+ std::unique_ptr<BufferHandler> _buffer_handler;
+ std::unique_ptr<FileNode> _file_node;
+ std::reference_wrapper<std::ostream> _error_stream;
+ const char* _file_path;
+ bool _has_fatal_error = false;
+
+ template<typename... Args>
+ inline void _run_load_func(std::optional<Error> (BufferHandler::*func)(Args...), Args... args);
};
} \ No newline at end of file
diff --git a/src/headless/main.cpp b/src/headless/main.cpp
index ffc6dab..afd569f 100644
--- a/src/headless/main.cpp
+++ b/src/headless/main.cpp
@@ -1,3 +1,35 @@
-int main() {
+#include <cstdio>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include <openvic-dataloader/v2script/Parser.hpp>
+
+int main(int argc, char** argv) {
+ if (argc < 2) {
+ std::fprintf(stderr, "usage: %s <filename>", argv[0]);
+ return 1;
+ }
+
+ auto parser = ovdl::v2script::Parser::from_file(argv[1]);
+ if (parser.has_error()) {
+ return 1;
+ }
+
+ parser.simple_parse();
+ if (parser.has_error()) {
+ return 2;
+ }
+
+ if (parser.has_warning()) {
+ for (auto& warning : parser.get_warnings()) {
+ std::cerr << "Warning: " << warning.message << std::endl;
+ }
+ }
+
+#ifdef OPENVIC_DATALOADER_PRINT_NODES
+ parser.get_file_node()->print(std::cout);
+#endif
+
return 0;
} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/DetectUtf8.hpp b/src/openvic-dataloader/detail/DetectUtf8.hpp
new file mode 100644
index 0000000..b2969ce
--- /dev/null
+++ b/src/openvic-dataloader/detail/DetectUtf8.hpp
@@ -0,0 +1,52 @@
+#pragma once
+
+#include "detail/LexyLitRange.hpp"
+#include <lexy/action/match.hpp>
+#include <lexy/dsl.hpp>
+
+namespace ovdl::detail {
+ namespace detect_utf8 {
+
+ template<bool INCLUDE_ASCII>
+ struct DetectUtf8 {
+ struct not_utf8 {
+ static constexpr auto name = "not utf8";
+ };
+
+ static constexpr auto rule = [] {
+ constexpr auto is_not_ascii_flag = lexy::dsl::context_flag<DetectUtf8>;
+
+ // & 0b10000000 == 0b00000000
+ constexpr auto ascii_values = lexydsl::make_range<0b00000000, 0b01111111>();
+ // & 0b11100000 == 0b11000000
+ constexpr auto two_byte = lexydsl::make_range<0b11000000, 0b11011111>();
+ // & 0b11110000 == 0b11100000
+ constexpr auto three_byte = lexydsl::make_range<0b11100000, 0b11101111>();
+ // & 0b11111000 == 0b11110000
+ constexpr auto four_byte = lexydsl::make_range<0b11110000, 0b11110111>();
+ // & 0b11000000 == 0b10000000
+ constexpr auto check_bytes = lexydsl::make_range<0b10000000, 0b10111111>();
+
+ constexpr auto utf8_check =
+ ((four_byte >> lexy::dsl::times<3>(check_bytes)) |
+ (three_byte >> lexy::dsl::times<2>(check_bytes)) |
+ (two_byte >> lexy::dsl::times<1>(check_bytes))) >>
+ is_not_ascii_flag.set();
+
+ return is_not_ascii_flag.template create<INCLUDE_ASCII>() +
+ lexy::dsl::while_(utf8_check | ascii_values) +
+ lexy::dsl::must(is_not_ascii_flag.is_set()).template error<not_utf8>;
+ }();
+ };
+ }
+
+ template<typename Input>
+ constexpr bool is_utf8_no_ascii(const Input& input) {
+ return lexy::match<detect_utf8::DetectUtf8<false>>(input);
+ }
+
+ template<typename Input>
+ constexpr bool is_utf8(const Input& input) {
+ return lexy::match<detect_utf8::DetectUtf8<true>>(input);
+ }
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/Errors.hpp b/src/openvic-dataloader/detail/Errors.hpp
new file mode 100644
index 0000000..f8ed21b
--- /dev/null
+++ b/src/openvic-dataloader/detail/Errors.hpp
@@ -0,0 +1,19 @@
+#pragma once
+
+#include "openvic-dataloader/v2script/Parser.hpp"
+
+namespace ovdl::v2script::errors {
+ inline const v2script::Parser::Error make_no_file_error(const char* file_path) {
+ std::string message;
+ if (!file_path) {
+ message = "File path not specified.";
+ } else {
+ message = "File '" + std::string(file_path) + "' was not found.";
+ }
+
+ return v2script::Parser::Error { Parser::Error::Type::Fatal, message, 1 };
+ }
+}
+
+namespace ovdl::ovscript::errors {
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/LexyLitRange.hpp b/src/openvic-dataloader/detail/LexyLitRange.hpp
new file mode 100644
index 0000000..a6761a8
--- /dev/null
+++ b/src/openvic-dataloader/detail/LexyLitRange.hpp
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <lexy/dsl/literal.hpp>
+
+namespace ovdl::detail::lexydsl {
+ template<unsigned char LOW, unsigned char HIGH>
+ consteval auto make_range() {
+ if constexpr (LOW == HIGH) {
+ return lexy::dsl::lit_c<LOW>;
+ } else if constexpr (LOW == (HIGH - 1)) {
+ return lexy::dsl::lit_c<LOW> / lexy::dsl::lit_c<HIGH>;
+ } else {
+ return lexy::dsl::lit_c<LOW> / make_range<LOW + 1, HIGH>();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/NullBuff.hpp b/src/openvic-dataloader/detail/NullBuff.hpp
new file mode 100644
index 0000000..baf9e1b
--- /dev/null
+++ b/src/openvic-dataloader/detail/NullBuff.hpp
@@ -0,0 +1,30 @@
+#pragma once
+
+#include <ostream>
+
+namespace ovdl::detail {
+ template<class cT, class traits = std::char_traits<cT>>
+ class basic_nullbuf : public std::basic_streambuf<cT, traits> {
+ typename traits::int_type overflow(typename traits::int_type c) {
+ return traits::not_eof(c); // indicate success
+ }
+ };
+
+ template<class cT, class traits = std::char_traits<cT>>
+ class basic_onullstream : public std::basic_ostream<cT, traits> {
+ public:
+ basic_onullstream() : std::basic_ios<cT, traits>(&m_sbuf),
+ std::basic_ostream<cT, traits>(&m_sbuf) {
+ std::basic_ios<cT, traits>::init(&m_sbuf);
+ }
+
+ private:
+ basic_nullbuf<cT, traits> m_sbuf;
+ };
+
+ typedef basic_onullstream<char> onullstream;
+ typedef basic_onullstream<wchar_t> wonullstream;
+
+ inline onullstream cnull;
+ inline onullstream wcnull;
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/Warnings.hpp b/src/openvic-dataloader/detail/Warnings.hpp
new file mode 100644
index 0000000..f854fa8
--- /dev/null
+++ b/src/openvic-dataloader/detail/Warnings.hpp
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "openvic-dataloader/v2script/Parser.hpp"
+
+namespace ovdl::v2script::warnings {
+ inline const v2script::Parser::Warning make_utf8_warning(const char* file_path) {
+ constexpr std::string_view message_suffix = "This may cause problems. Prefer Windows-1252 encoding.";
+
+ std::string message;
+ if (!file_path) {
+ message = "Buffer is a UTF-8 encoded string. " + std::string(message_suffix);
+ } else {
+ message = "File '" + std::string(file_path) + "' is a UTF-8 encoded file. " + std::string(message_suffix);
+ }
+
+ return v2script::Parser::Warning { message, 1 };
+ }
+}
+
+namespace ovdl::ovscript::warnings {
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/v2script/DecisionGrammar.hpp b/src/openvic-dataloader/v2script/DecisionGrammar.hpp
new file mode 100644
index 0000000..ebc9ad2
--- /dev/null
+++ b/src/openvic-dataloader/v2script/DecisionGrammar.hpp
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <lexy/callback.hpp>
+#include <lexy/dsl.hpp>
+#include <openvic-dataloader/v2script/AbstractSyntaxTree.hpp>
+
+// Decision Grammar Definitions //
+namespace ovdl::v2script::grammar {
+
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/v2script/EventGrammar.hpp b/src/openvic-dataloader/v2script/EventGrammar.hpp
new file mode 100644
index 0000000..7ab40d4
--- /dev/null
+++ b/src/openvic-dataloader/v2script/EventGrammar.hpp
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <lexy/callback.hpp>
+#include <lexy/dsl.hpp>
+#include <openvic-dataloader/v2script/AbstractSyntaxTree.hpp>
+
+// Event Grammar Definitions //
+namespace ovdl::v2script::grammar {
+
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/v2script/Grammar.cpp b/src/openvic-dataloader/v2script/Grammar.cpp
deleted file mode 100644
index ec9fac2..0000000
--- a/src/openvic-dataloader/v2script/Grammar.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-#include <lexy/dsl.hpp>
-#include <openvic-dataloader/v2script/Parser.hpp>
-
-using namespace ovdl::v2script;
-
-// Node Definitions //
-namespace dsl = lexy::dsl;
-
-namespace ovdl::v2script::nodes {
- struct StatementListBlock;
-
- static constexpr auto whitespace_specifier = dsl::code_point.range<0x09, 0x0A>() / dsl::lit_cp<0x0D> / dsl::lit_cp<0x20>;
- static constexpr auto comment_specifier = LEXY_LIT("#") >> dsl::until(dsl::newline).or_eof();
-
- static constexpr auto data_specifier =
- dsl::ascii::alpha_digit_underscore /
- dsl::code_point.range<0x25, 0x27>() / dsl::lit_cp<0x2B> / dsl::code_point.range<0x2D, 0x2E>() /
- dsl::lit_cp<0x3A> /
- dsl::lit_cp<0x8A> / dsl::lit_cp<0x8C> / dsl::lit_cp<0x8E> /
- dsl::lit_cp<0x92> / dsl::lit_cp<0x9A> / dsl::lit_cp<0x9C> / dsl::code_point.range<0x9E, 0x9F>() /
- dsl::code_point.range<0xC0, 0xD6>() / dsl::code_point.range<0xD8, 0xF6>() / dsl::code_point.range<0xF8, 0xFF>();
-
- static constexpr auto data_char_class = LEXY_CHAR_CLASS("DataSpecifier", data_specifier);
-
- struct Identifier {
- static constexpr auto rule = dsl::identifier(data_char_class);
- };
-
- struct StringExpression {
- static constexpr auto escaped_symbols = lexy::symbol_table<char> //
- .map<'"'>('"')
- .map<'\''>('\'')
- .map<'\\'>('\\')
- .map<'/'>('/')
- .map<'b'>('\b')
- .map<'f'>('\f')
- .map<'n'>('\n')
- .map<'r'>('\r')
- .map<'t'>('\t');
- static constexpr auto rule = [] {
- // Arbitrary code points that aren't control characters.
- auto c = -dsl::unicode::control;
-
- // Escape sequences start with a backlash.
- // They either map one of the symbols,
- // or a Unicode code point of the form uXXXX.
- auto escape = dsl::backslash_escape //
- .symbol<escaped_symbols>()
- .rule(dsl::lit_c<'u'> >> dsl::code_point_id<4>);
- return dsl::quoted(c, escape);
- }();
- };
-
- struct AssignmentStatement {
- static constexpr auto rule = dsl::p<Identifier> >>
- (dsl::equal_sign >>
- (dsl::p<Identifier> | dsl::p<StringExpression> | dsl::recurse_branch<StatementListBlock>) |
- dsl::else_ >> dsl::return_);
- };
-
- struct StatementListBlock {
- static constexpr auto rule =
- dsl::curly_bracketed.open() >>
- dsl::opt(dsl::list(dsl::p<AssignmentStatement>)) + dsl::opt(dsl::semicolon) +
- dsl::curly_bracketed.close();
- };
-
- struct File {
- // Allow arbitrary spaces between individual tokens.
- static constexpr auto whitespace = whitespace_specifier | comment_specifier;
-
- static constexpr auto rule = dsl::terminator(dsl::eof).list(dsl::p<AssignmentStatement>);
- };
-}
diff --git a/src/openvic-dataloader/v2script/Parser.cpp b/src/openvic-dataloader/v2script/Parser.cpp
new file mode 100644
index 0000000..c0b6bd8
--- /dev/null
+++ b/src/openvic-dataloader/v2script/Parser.cpp
@@ -0,0 +1,224 @@
+#include "openvic-dataloader/v2script/Parser.hpp"
+
+#include <iostream>
+#include <memory>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "SimpleGrammar.hpp"
+#include "detail/DetectUtf8.hpp"
+#include "detail/Errors.hpp"
+#include "detail/NullBuff.hpp"
+#include "detail/Warnings.hpp"
+#include <lexy/action/parse.hpp>
+#include <lexy/encoding.hpp>
+#include <lexy/input/buffer.hpp>
+#include <lexy/input/file.hpp>
+#include <lexy/lexeme.hpp>
+#include <lexy/visualize.hpp>
+#include <lexy_ext/report_error.hpp>
+#include <openvic-dataloader/v2script/AbstractSyntaxTree.hpp>
+
+using namespace ovdl::v2script;
+
+/// BufferHandler ///
+
+class Parser::BufferHandler {
+public:
+ bool is_valid() const {
+ return _buffer.size() != 0;
+ }
+
+ std::optional<Error> load_buffer(const char* data, std::size_t size) {
+ _buffer = lexy::buffer(data, size);
+ return std::nullopt;
+ }
+
+ std::optional<Error> load_buffer(const char* start, const char* end) {
+ _buffer = lexy::buffer(start, end);
+ return std::nullopt;
+ }
+
+ std::optional<Error> load_file(const char* path) {
+ auto file = lexy::read_file(path);
+ if (!file) {
+ return errors::make_no_file_error(path);
+ }
+
+ _buffer = file.buffer();
+ return std::nullopt;
+ }
+
+ constexpr bool is_exclusive_utf8() const {
+ return detail::is_utf8_no_ascii(_buffer);
+ }
+
+ template<typename Node, typename ErrorCallback>
+ std::optional<std::vector<Error>> parse(const ErrorCallback& callback) {
+ auto result = lexy::parse<Node>(_buffer, callback);
+ if (!result) {
+ std::vector<Error> errors;
+ return errors;
+ }
+ // This is mighty frustrating
+ _root = std::unique_ptr<ast::Node>(result.value());
+ return std::nullopt;
+ }
+
+ std::unique_ptr<ast::Node>& get_root() {
+ return _root;
+ }
+
+private:
+ lexy::buffer<lexy::default_encoding> _buffer;
+ std::unique_ptr<ast::Node> _root;
+};
+
+/// BufferHandler ///
+
+Parser::Parser()
+ : _buffer_handler(std::make_unique<BufferHandler>()),
+ _error_stream(detail::cnull) {
+ set_error_log_to_stderr();
+}
+
+Parser::Parser(Parser&&) = default;
+Parser& Parser::operator=(Parser&& value) = default;
+Parser::~Parser() = default;
+
+Parser Parser::from_buffer(const char* data, std::size_t size) {
+ Parser result;
+ return std::move(result.load_from_buffer(data, size));
+}
+
+Parser Parser::from_buffer(const char* start, const char* end) {
+ Parser result;
+ return std::move(result.load_from_buffer(start, end));
+}
+
+Parser Parser::from_file(const char* path) {
+ Parser result;
+ return std::move(result.load_from_file(path));
+}
+
+///
+/// @brief Executes a function on _buffer_handler that is expected to load a buffer
+///
+/// Expected Use:
+/// @code {.cpp}
+/// _run_load_func(&BufferHandler::<load_function>, <arguments>);
+/// @endcode
+///
+/// @tparam Args
+/// @param func
+/// @param args
+///
+template<typename... Args>
+inline void Parser::_run_load_func(std::optional<Error> (BufferHandler::*func)(Args...), Args... args) {
+ _warnings.clear();
+ _errors.clear();
+ _has_fatal_error = false;
+ if (auto error = (_buffer_handler.get()->*func)(args...); error) {
+ _has_fatal_error = error.value().type == Error::Type::Fatal;
+ _errors.push_back(error.value());
+ _error_stream.get() << "Error: " << _errors.back().message << '\n';
+ }
+}
+
+Parser& Parser::load_from_buffer(const char* data, std::size_t size) {
+ _run_load_func(&BufferHandler::load_buffer, data, size);
+ return *this;
+}
+
+Parser& Parser::load_from_buffer(const char* start, const char* end) {
+ _run_load_func(&BufferHandler::load_buffer, start, end);
+ return *this;
+}
+
+Parser& Parser::load_from_file(const char* path) {
+ _file_path = path;
+ _run_load_func(&BufferHandler::load_file, path);
+ return *this;
+}
+
+void Parser::set_error_log_to_null() {
+ set_error_log_to(detail::cnull);
+}
+
+void Parser::set_error_log_to_stderr() {
+ set_error_log_to(std::cerr);
+}
+
+void Parser::set_error_log_to_stdout() {
+ set_error_log_to(std::cout);
+}
+
+void Parser::set_error_log_to(std::basic_ostream<char>& stream) {
+ _error_stream = stream;
+}
+
+bool Parser::simple_parse() {
+ if (!_buffer_handler->is_valid()) {
+ return false;
+ }
+
+ struct ostream_output_iterator {
+ std::reference_wrapper<std::ostream> _stream;
+
+ auto operator*() const noexcept {
+ return *this;
+ }
+ auto operator++(int) const noexcept {
+ return *this;
+ }
+
+ ostream_output_iterator& operator=(char c) {
+ _stream.get().put(c);
+ return *this;
+ }
+ };
+
+ if (_buffer_handler->is_exclusive_utf8()) {
+ _warnings.push_back(warnings::make_utf8_warning(_file_path));
+ }
+
+ auto errors = _buffer_handler->parse<grammar::File>(lexy_ext::report_error.path(_file_path).to(ostream_output_iterator { _error_stream }));
+ if (errors) {
+ _errors.reserve(errors->size());
+ for (auto& err : errors.value()) {
+ _has_fatal_error |= err.type == Error::Type::Fatal;
+ _errors.push_back(err);
+ _error_stream.get() << "Error: " << err.message << '\n';
+ }
+ return false;
+ }
+ _file_node.reset(static_cast<ast::FileNode*>(_buffer_handler->get_root().release()));
+ return true;
+}
+
+bool Parser::has_error() const {
+ return !_errors.empty();
+}
+
+bool Parser::has_fatal_error() const {
+ return _has_fatal_error;
+}
+
+bool Parser::has_warning() const {
+ return !_warnings.empty();
+}
+
+const std::vector<Parser::Error>& Parser::get_errors() const {
+ return _errors;
+}
+
+const std::vector<Parser::Warning>& Parser::get_warnings() const {
+ return _warnings;
+}
+
+const FileNode* Parser::get_file_node() const {
+ return _file_node.get();
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/v2script/SimpleGrammar.hpp b/src/openvic-dataloader/v2script/SimpleGrammar.hpp
new file mode 100644
index 0000000..48a80ce
--- /dev/null
+++ b/src/openvic-dataloader/v2script/SimpleGrammar.hpp
@@ -0,0 +1,105 @@
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "detail/LexyLitRange.hpp"
+#include <lexy/callback.hpp>
+#include <lexy/dsl.hpp>
+#include <openvic-dataloader/v2script/AbstractSyntaxTree.hpp>
+
+// Grammar Definitions //
+namespace ovdl::v2script::grammar {
+ struct StatementListBlock;
+
+ static constexpr auto whitespace_specifier = lexy::dsl::ascii::blank / lexy::dsl::ascii::newline;
+ static constexpr auto comment_specifier = LEXY_LIT("#") >> lexy::dsl::until(lexy::dsl::newline).or_eof();
+
+ static constexpr auto data_specifier =
+ lexy::dsl::ascii::alpha_digit_underscore /
+ LEXY_ASCII_ONE_OF("%&'") / lexy::dsl::lit_c<0x2B> / LEXY_ASCII_ONE_OF("-.") /
+ lexy::dsl::ascii::digit / lexy::dsl::lit_c<0x3A> /
+ lexy::dsl::lit_c<0x40> / lexy::dsl::ascii::upper / lexy::dsl::lit_c<0x5F> /
+ lexy::dsl::ascii::lower / lexy::dsl::lit_b<0x8A> / lexy::dsl::lit_b<0x8C> / lexy::dsl::lit_b<0x8E> /
+ lexy::dsl::lit_b<0x92> / lexy::dsl::lit_b<0x97> / lexy::dsl::lit_b<0x9A> / lexy::dsl::lit_b<0x9C> / lexy::dsl::lit_b<0x9E> / lexy::dsl::lit_b<0x9F> /
+ lexy::dsl::lit_b<0xC0> /
+ ovdl::detail::lexydsl::make_range<0xC0, 0xD6>() / ovdl::detail::lexydsl::make_range<0xD8, 0xF6>() / ovdl::detail::lexydsl::make_range<0xF8, 0xFF>();
+
+ static constexpr auto data_char_class = LEXY_CHAR_CLASS("DataSpecifier", data_specifier);
+
+ struct Identifier {
+ static constexpr auto rule = lexy::dsl::identifier(data_char_class);
+ static constexpr auto value = lexy::as_string<std::string> | lexy::new_<ast::IdentifierNode, ast::NodePtr>;
+ };
+
+ struct StringExpression {
+ static constexpr auto escaped_symbols = lexy::symbol_table<char> //
+ .map<'"'>('"')
+ .map<'\''>('\'')
+ .map<'\\'>('\\')
+ .map<'/'>('/')
+ .map<'b'>('\b')
+ .map<'f'>('\f')
+ .map<'n'>('\n')
+ .map<'r'>('\r')
+ .map<'t'>('\t');
+ static constexpr auto rule = [] {
+ // Arbitrary code points that aren't control characters.
+ auto c = ovdl::detail::lexydsl::make_range<0x20, 0xFF>() - lexy::dsl::ascii::control;
+
+ // Escape sequences start with a backlash.
+ // They either map one of the symbols,
+ // or a Unicode code point of the form uXXXX.
+ auto escape = lexy::dsl::backslash_escape //
+ .symbol<escaped_symbols>();
+ return lexy::dsl::quoted(c, escape);
+ }();
+
+ static constexpr auto value = lexy::as_string<std::string> >> lexy::new_<ast::StringNode, ast::NodePtr>;
+ };
+
+ struct AssignmentStatement {
+ static constexpr auto rule =
+ lexy::dsl::p<Identifier> >>
+ (lexy::dsl::equal_sign >>
+ (lexy::dsl::p<Identifier> | lexy::dsl::p<StringExpression> | lexy::dsl::recurse_branch<StatementListBlock>) |
+ lexy::dsl::else_ >> lexy::dsl::return_) |
+ lexy::dsl::p<StringExpression>;
+
+ static constexpr auto value = lexy::callback<ast::NodePtr>(
+ [](auto name, lexy::nullopt = {}) {
+ return LEXY_MOV(name);
+ },
+ [](auto name, auto&& initalizer) {
+ return make_node_ptr<ast::AssignNode>(LEXY_MOV(name), LEXY_MOV(initalizer));
+ });
+ };
+
+ struct StatementListBlock {
+ static constexpr auto rule =
+ lexy::dsl::curly_bracketed.open() >>
+ lexy::dsl::opt(lexy::dsl::list(lexy::dsl::p<AssignmentStatement>)) + lexy::dsl::opt(lexy::dsl::semicolon) +
+ lexy::dsl::curly_bracketed.close();
+
+ static constexpr auto value =
+ lexy::as_list<std::vector<ast::NodePtr>> >>
+ lexy::callback<ast::NodePtr>(
+ [](lexy::nullopt = {}, lexy::nullopt = {}) {
+ return ast::make_node_ptr<ast::ListNode>();
+ },
+ [](auto&& list, lexy::nullopt = {}) {
+ return make_node_ptr<ast::ListNode>(LEXY_MOV(list));
+ },
+ [](auto& list) {
+ return make_node_ptr<ast::ListNode>(list);
+ });
+ };
+
+ struct File {
+ // Allow arbitrary spaces between individual tokens.
+ static constexpr auto whitespace = whitespace_specifier | comment_specifier;
+
+ static constexpr auto rule = lexy::dsl::terminator(lexy::dsl::eof).list(lexy::dsl::p<AssignmentStatement>);
+
+ static constexpr auto value = lexy::as_list<std::vector<ast::NodePtr>> >> lexy::new_<ast::FileNode, ast::NodePtr>;
+ };
+}