aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--SConstruct3
-rw-r--r--include/openvic-dataloader/detail/SelfType.hpp24
-rw-r--r--include/openvic-dataloader/detail/TypeName.hpp52
-rw-r--r--include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp174
-rw-r--r--include/openvic-dataloader/v2script/Parser.hpp74
-rw-r--r--src/headless/main.cpp34
-rw-r--r--src/openvic-dataloader/detail/DetectUtf8.hpp52
-rw-r--r--src/openvic-dataloader/detail/Errors.hpp19
-rw-r--r--src/openvic-dataloader/detail/LexyLitRange.hpp16
-rw-r--r--src/openvic-dataloader/detail/NullBuff.hpp30
-rw-r--r--src/openvic-dataloader/detail/Warnings.hpp21
-rw-r--r--src/openvic-dataloader/v2script/DecisionGrammar.hpp14
-rw-r--r--src/openvic-dataloader/v2script/EventGrammar.hpp14
-rw-r--r--src/openvic-dataloader/v2script/Grammar.cpp74
-rw-r--r--src/openvic-dataloader/v2script/Parser.cpp224
-rw-r--r--src/openvic-dataloader/v2script/SimpleGrammar.hpp105
16 files changed, 843 insertions, 87 deletions
diff --git a/SConstruct b/SConstruct
index f2a66bc..68158cf 100644
--- a/SConstruct
+++ b/SConstruct
@@ -262,11 +262,12 @@ if env["build_ovdl_headless"]:
headless_env = env.Clone()
headless_path = ["src/headless"]
headless_env.Append(CPPDEFINES=["OPENVIC_DATALOADER_HEADLESS"])
+ headless_env.Append(CPPDEFINES=["OPENVIC_DATALOADER_PRINT_NODES"])
headless_env.Append(CPPPATH=[headless_env.Dir(headless_path)])
headless_env.headless_sources = GlobRecursive("*.cpp", headless_path)
if not env["build_ovdl_library"]:
headless_env.headless_sources += sources
- headless_program = env.Program(
+ headless_program = headless_env.Program(
target="bin/%s" % headless_name,
source=headless_env.headless_sources,
PROGSUFFIX=".headless" + env["PROGSUFFIX"]
diff --git a/include/openvic-dataloader/detail/SelfType.hpp b/include/openvic-dataloader/detail/SelfType.hpp
new file mode 100644
index 0000000..5366aef
--- /dev/null
+++ b/include/openvic-dataloader/detail/SelfType.hpp
@@ -0,0 +1,24 @@
+#pragma once
+
+#include <type_traits>
+
+namespace ovdl::detail {
+#pragma GCC diagnostic push
+#pragma clang diagnostic ignored "-Wunknown-warning-option"
+#pragma GCC diagnostic ignored "-Wnon-template-friend"
+ template<typename T>
+ struct Reader {
+ friend auto adl_GetSelfType(Reader<T>);
+ };
+
+ template<typename T, typename U>
+ struct Writer {
+ friend auto adl_GetSelfType(Reader<T>) { return U {}; }
+ };
+#pragma GCC diagnostic pop
+
+ inline void adl_GetSelfType() {}
+
+ template<typename T>
+ using Read = std::remove_pointer_t<decltype(adl_GetSelfType(Reader<T> {}))>;
+}
diff --git a/include/openvic-dataloader/detail/TypeName.hpp b/include/openvic-dataloader/detail/TypeName.hpp
new file mode 100644
index 0000000..e9f27d3
--- /dev/null
+++ b/include/openvic-dataloader/detail/TypeName.hpp
@@ -0,0 +1,52 @@
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <string_view>
+#include <utility>
+
+namespace ovdl::detail {
+
+ template<std::size_t... Idxs>
+ constexpr auto substring_as_array(std::string_view str, std::index_sequence<Idxs...>) {
+ return std::array { str[Idxs]..., '\n' };
+ }
+
+ template<typename T>
+ constexpr auto type_name_array() {
+#if defined(__clang__)
+ constexpr auto prefix = std::string_view { "[T = " };
+ constexpr auto suffix = std::string_view { "]" };
+ constexpr auto function = std::string_view { __PRETTY_FUNCTION__ };
+#elif defined(__GNUC__)
+ constexpr auto prefix = std::string_view { "with T = " };
+ constexpr auto suffix = std::string_view { "]" };
+ constexpr auto function = std::string_view { __PRETTY_FUNCTION__ };
+#elif defined(_MSC_VER)
+ constexpr auto prefix = std::string_view { "type_name_array<" };
+ constexpr auto suffix = std::string_view { ">(void)" };
+ constexpr auto function = std::string_view { __FUNCSIG__ };
+#else
+#error Unsupported compiler
+#endif
+
+ constexpr auto start = function.find(prefix) + prefix.size();
+ constexpr auto end = function.rfind(suffix);
+
+ static_assert(start < end);
+
+ constexpr auto name = function.substr(start, (end - start));
+ return substring_as_array(name, std::make_index_sequence<name.size()> {});
+ }
+
+ template<typename T>
+ struct type_name_holder {
+ static inline constexpr auto value = type_name_array<T>();
+ };
+
+ template<typename T>
+ constexpr auto type_name() -> std::string_view {
+ constexpr auto& value = type_name_holder<T>::value;
+ return std::string_view { value.data(), value.size() };
+ }
+} \ No newline at end of file
diff --git a/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp b/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp
new file mode 100644
index 0000000..80485b7
--- /dev/null
+++ b/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp
@@ -0,0 +1,174 @@
+#pragma once
+
+#include <memory>
+#include <string>
+#include <string_view>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <openvic-dataloader/detail/SelfType.hpp>
+#include <openvic-dataloader/detail/TypeName.hpp>
+
+#ifdef OPENVIC_DATALOADER_PRINT_NODES
+#include <iostream>
+
+#define OVDL_PRINT_FUNC_DECL virtual void print(std::ostream& stream) const = 0
+#define OVDL_PRINT_FUNC_DEF(...) \
+ void print(std::ostream& stream) const override __VA_ARGS__
+#else
+#define OVDL_PRINT_FUNC_DECL
+#define OVDL_PRINT_FUNC_DEF(...)
+#endif
+
+// defines get_type_static and get_type for string type naming
+#define OVDL_RT_TYPE_DEF \
+ static constexpr std::string_view get_type_static() { return ::ovdl::detail::type_name<type>(); } \
+ constexpr std::string_view get_type() const override { return ::ovdl::detail::type_name<std::decay_t<decltype(*this)>>(); }
+
+// defines type for self-class referencing
+#define OVDL_TYPE_DEFINE_SELF \
+ struct _self_type_tag {}; \
+ constexpr auto _self_type_helper()->decltype(::ovdl::detail::Writer<_self_type_tag, decltype(this)> {}); \
+ using type = ::ovdl::detail::Read<_self_type_tag>;
+
+namespace ovdl::v2script::ast {
+ struct Node {
+ Node(const Node&) = delete;
+ Node& operator=(const Node&) = delete;
+ Node() = default;
+ Node(Node&&) = default;
+ Node& operator=(Node&&) = default;
+ virtual ~Node() = default;
+
+ OVDL_PRINT_FUNC_DECL;
+
+ static constexpr std::string_view get_type_static() { return detail::type_name<Node>(); }
+ constexpr virtual std::string_view get_type() const = 0;
+
+ template<typename T>
+ constexpr bool is_type() const {
+ return get_type().compare(detail::type_name<T>()) == 0;
+ }
+ };
+
+ using NodePtr = Node*;
+ using NodeUPtr = std::unique_ptr<Node>;
+
+ template<class T, class... Args>
+ NodePtr make_node_ptr(Args&&... args) {
+ if constexpr (std::is_pointer_v<NodePtr>) {
+ return new T(std::forward<Args>(args)...);
+ } else {
+ return NodePtr(new T(std::forward<Args>(args)...));
+ }
+ }
+
+ template<typename To, typename From>
+ To& cast_node_ptr(const From& from) {
+ if constexpr (std::is_pointer_v<NodePtr>) {
+ return *static_cast<To*>(from);
+ } else {
+ return *static_cast<To*>(from.get());
+ }
+ }
+
+ constexpr std::vector<NodeUPtr> make_node_ptr_vector(const std::vector<NodePtr>& ptrs) {
+ std::vector<NodeUPtr> result;
+ result.reserve(ptrs.size());
+ for (auto&& p : ptrs) {
+ result.push_back(NodeUPtr(p));
+ }
+ return result;
+ }
+
+ struct IdentifierNode final : public Node {
+ std::string _name;
+ explicit IdentifierNode(std::string name)
+ : _name(std::move(name)) {
+ }
+
+ OVDL_TYPE_DEFINE_SELF;
+ OVDL_RT_TYPE_DEF;
+
+ OVDL_PRINT_FUNC_DEF({
+ stream << _name.c_str();
+ })
+ };
+
+ struct StringNode final : public Node {
+ std::string _name;
+ explicit StringNode(std::string name)
+ : _name(std::move(name)) {
+ }
+
+ OVDL_TYPE_DEFINE_SELF;
+ OVDL_RT_TYPE_DEF;
+
+ OVDL_PRINT_FUNC_DEF({
+ stream << '"' << _name.c_str() << '"';
+ })
+ };
+
+ struct AssignNode final : public Node {
+ std::string _name;
+ NodeUPtr _initializer;
+ explicit AssignNode(NodePtr name, NodePtr init)
+ : _initializer(std::move(init)) {
+ if (name->is_type<IdentifierNode>()) {
+ _name = cast_node_ptr<IdentifierNode>(name)._name;
+ }
+ }
+
+ OVDL_TYPE_DEFINE_SELF;
+ OVDL_RT_TYPE_DEF;
+
+ OVDL_PRINT_FUNC_DEF({
+ stream << _name.c_str() << " = ";
+ _initializer->print(stream);
+ })
+ };
+
+ struct ListNode final : public Node {
+ std::vector<NodeUPtr> _statements;
+ explicit ListNode(std::vector<NodePtr> statements = std::vector<NodePtr> {})
+ : _statements(make_node_ptr_vector(statements)) {
+ }
+
+ OVDL_TYPE_DEFINE_SELF;
+ OVDL_RT_TYPE_DEF;
+
+ OVDL_PRINT_FUNC_DEF({
+ stream << '{';
+ for (int i = 0; i < _statements.size(); i++) {
+ auto& statement = _statements[i];
+ statement->print(stream);
+ if (i + 1 != _statements.size())
+ stream << ' ';
+ }
+ stream << '}';
+ })
+ };
+
+ struct FileNode final : public Node {
+ std::vector<NodeUPtr> _statements;
+ FileNode() {}
+ explicit FileNode(std::vector<NodePtr> statements)
+ : _statements(make_node_ptr_vector(statements)) {
+ }
+
+ OVDL_TYPE_DEFINE_SELF;
+ OVDL_RT_TYPE_DEF;
+
+ OVDL_PRINT_FUNC_DEF({
+ for (auto& statement : _statements) {
+ statement->print(stream);
+ stream << "\n===========\n";
+ }
+ })
+ };
+}
+
+#undef OVDL_PRINT_FUNC_DECL
+#undef OVDL_PRINT_FUNC_DEF
+#undef OVDL_TYPE_DEFINE_SELF \ No newline at end of file
diff --git a/include/openvic-dataloader/v2script/Parser.hpp b/include/openvic-dataloader/v2script/Parser.hpp
index 53aab90..dbbec73 100644
--- a/include/openvic-dataloader/v2script/Parser.hpp
+++ b/include/openvic-dataloader/v2script/Parser.hpp
@@ -1,27 +1,79 @@
#pragma once
#include <cstddef>
-#include <cstdio>
+#include <functional>
+#include <memory>
+#include <optional>
#include <ostream>
+#include <string>
+#include <vector>
+
+#include <openvic-dataloader/v2script/AbstractSyntaxTree.hpp>
namespace ovdl::v2script {
+
+ using FileNode = ast::FileNode;
+
class Parser {
public:
- static Parser from_buffer(char8_t* data, std::size_t size);
- static Parser from_buffer(char8_t* start, char8_t* end);
- static Parser from_file(const char8_t* path);
+ struct Error {
+ const enum class Type : unsigned char {
+ Recoverable,
+ Fatal
+ } type;
+ const std::string message;
+ const int error_value;
+ };
+
+ struct Warning {
+ const std::string message;
+ const int warning_value;
+ };
+
+ Parser();
+
+ static Parser from_buffer(const char* data, std::size_t size);
+ static Parser from_buffer(const char* start, const char* end);
+ static Parser from_file(const char* path);
+
+ Parser& load_from_buffer(const char* data, std::size_t size);
+ Parser& load_from_buffer(const char* start, const char* end);
+ Parser& load_from_file(const char* path);
+ void set_error_log_to_null();
void set_error_log_to_stderr();
- void set_error_log_path(const char8_t* path);
- void set_error_log_to(std::basic_ostream<char8_t> stream);
- void set_error_log_to(std::FILE* file);
+ void set_error_log_to_stdout();
+ void set_error_log_to(std::basic_ostream<char>& stream);
- bool parse();
+ bool simple_parse();
- bool has_error();
- bool has_warning();
+ bool has_error() const;
+ bool has_fatal_error() const;
+ bool has_warning() const;
+
+ const std::vector<Error>& get_errors() const;
+ const std::vector<Warning>& get_warnings() const;
+
+ const FileNode* get_file_node() const;
+
+ Parser(Parser&&);
+ Parser& operator=(Parser&&);
+
+ ~Parser();
private:
- Parser();
+ std::vector<Error> _errors;
+ std::vector<Warning> _warnings;
+
+ class BufferHandler;
+ friend class BufferHandler;
+ std::unique_ptr<BufferHandler> _buffer_handler;
+ std::unique_ptr<FileNode> _file_node;
+ std::reference_wrapper<std::ostream> _error_stream;
+ const char* _file_path;
+ bool _has_fatal_error = false;
+
+ template<typename... Args>
+ inline void _run_load_func(std::optional<Error> (BufferHandler::*func)(Args...), Args... args);
};
} \ No newline at end of file
diff --git a/src/headless/main.cpp b/src/headless/main.cpp
index ffc6dab..afd569f 100644
--- a/src/headless/main.cpp
+++ b/src/headless/main.cpp
@@ -1,3 +1,35 @@
-int main() {
+#include <cstdio>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include <openvic-dataloader/v2script/Parser.hpp>
+
+int main(int argc, char** argv) {
+ if (argc < 2) {
+ std::fprintf(stderr, "usage: %s <filename>", argv[0]);
+ return 1;
+ }
+
+ auto parser = ovdl::v2script::Parser::from_file(argv[1]);
+ if (parser.has_error()) {
+ return 1;
+ }
+
+ parser.simple_parse();
+ if (parser.has_error()) {
+ return 2;
+ }
+
+ if (parser.has_warning()) {
+ for (auto& warning : parser.get_warnings()) {
+ std::cerr << "Warning: " << warning.message << std::endl;
+ }
+ }
+
+#ifdef OPENVIC_DATALOADER_PRINT_NODES
+ parser.get_file_node()->print(std::cout);
+#endif
+
return 0;
} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/DetectUtf8.hpp b/src/openvic-dataloader/detail/DetectUtf8.hpp
new file mode 100644
index 0000000..b2969ce
--- /dev/null
+++ b/src/openvic-dataloader/detail/DetectUtf8.hpp
@@ -0,0 +1,52 @@
+#pragma once
+
+#include "detail/LexyLitRange.hpp"
+#include <lexy/action/match.hpp>
+#include <lexy/dsl.hpp>
+
+namespace ovdl::detail {
+ namespace detect_utf8 {
+
+ template<bool INCLUDE_ASCII>
+ struct DetectUtf8 {
+ struct not_utf8 {
+ static constexpr auto name = "not utf8";
+ };
+
+ static constexpr auto rule = [] {
+ constexpr auto is_not_ascii_flag = lexy::dsl::context_flag<DetectUtf8>;
+
+ // & 0b10000000 == 0b00000000
+ constexpr auto ascii_values = lexydsl::make_range<0b00000000, 0b01111111>();
+ // & 0b11100000 == 0b11000000
+ constexpr auto two_byte = lexydsl::make_range<0b11000000, 0b11011111>();
+ // & 0b11110000 == 0b11100000
+ constexpr auto three_byte = lexydsl::make_range<0b11100000, 0b11101111>();
+ // & 0b11111000 == 0b11110000
+ constexpr auto four_byte = lexydsl::make_range<0b11110000, 0b11110111>();
+ // & 0b11000000 == 0b10000000
+ constexpr auto check_bytes = lexydsl::make_range<0b10000000, 0b10111111>();
+
+ constexpr auto utf8_check =
+ ((four_byte >> lexy::dsl::times<3>(check_bytes)) |
+ (three_byte >> lexy::dsl::times<2>(check_bytes)) |
+ (two_byte >> lexy::dsl::times<1>(check_bytes))) >>
+ is_not_ascii_flag.set();
+
+ return is_not_ascii_flag.template create<INCLUDE_ASCII>() +
+ lexy::dsl::while_(utf8_check | ascii_values) +
+ lexy::dsl::must(is_not_ascii_flag.is_set()).template error<not_utf8>;
+ }();
+ };
+ }
+
+ template<typename Input>
+ constexpr bool is_utf8_no_ascii(const Input& input) {
+ return lexy::match<detect_utf8::DetectUtf8<false>>(input);
+ }
+
+ template<typename Input>
+ constexpr bool is_utf8(const Input& input) {
+ return lexy::match<detect_utf8::DetectUtf8<true>>(input);
+ }
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/Errors.hpp b/src/openvic-dataloader/detail/Errors.hpp
new file mode 100644
index 0000000..f8ed21b
--- /dev/null
+++ b/src/openvic-dataloader/detail/Errors.hpp
@@ -0,0 +1,19 @@
+#pragma once
+
+#include "openvic-dataloader/v2script/Parser.hpp"
+
+namespace ovdl::v2script::errors {
+ inline const v2script::Parser::Error make_no_file_error(const char* file_path) {
+ std::string message;
+ if (!file_path) {
+ message = "File path not specified.";
+ } else {
+ message = "File '" + std::string(file_path) + "' was not found.";
+ }
+
+ return v2script::Parser::Error { Parser::Error::Type::Fatal, message, 1 };
+ }
+}
+
+namespace ovdl::ovscript::errors {
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/LexyLitRange.hpp b/src/openvic-dataloader/detail/LexyLitRange.hpp
new file mode 100644
index 0000000..a6761a8
--- /dev/null
+++ b/src/openvic-dataloader/detail/LexyLitRange.hpp
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <lexy/dsl/literal.hpp>
+
+namespace ovdl::detail::lexydsl {
+ template<unsigned char LOW, unsigned char HIGH>
+ consteval auto make_range() {
+ if constexpr (LOW == HIGH) {
+ return lexy::dsl::lit_c<LOW>;
+ } else if constexpr (LOW == (HIGH - 1)) {
+ return lexy::dsl::lit_c<LOW> / lexy::dsl::lit_c<HIGH>;
+ } else {
+ return lexy::dsl::lit_c<LOW> / make_range<LOW + 1, HIGH>();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/NullBuff.hpp b/src/openvic-dataloader/detail/NullBuff.hpp
new file mode 100644
index 0000000..baf9e1b
--- /dev/null
+++ b/src/openvic-dataloader/detail/NullBuff.hpp
@@ -0,0 +1,30 @@
+#pragma once
+
+#include <ostream>
+
+namespace ovdl::detail {
+ template<class cT, class traits = std::char_traits<cT>>
+ class basic_nullbuf : public std::basic_streambuf<cT, traits> {
+ typename traits::int_type overflow(typename traits::int_type c) {
+ return traits::not_eof(c); // indicate success
+ }
+ };
+
+ template<class cT, class traits = std::char_traits<cT>>
+ class basic_onullstream : public std::basic_ostream<cT, traits> {
+ public:
+ basic_onullstream() : std::basic_ios<cT, traits>(&m_sbuf),
+ std::basic_ostream<cT, traits>(&m_sbuf) {
+ std::basic_ios<cT, traits>::init(&m_sbuf);
+ }
+
+ private:
+ basic_nullbuf<cT, traits> m_sbuf;
+ };
+
+ typedef basic_onullstream<char> onullstream;
+ typedef basic_onullstream<wchar_t> wonullstream;
+
+ inline onullstream cnull;
+ inline onullstream wcnull;
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/Warnings.hpp b/src/openvic-dataloader/detail/Warnings.hpp
new file mode 100644
index 0000000..f854fa8
--- /dev/null
+++ b/src/openvic-dataloader/detail/Warnings.hpp
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "openvic-dataloader/v2script/Parser.hpp"
+
+namespace ovdl::v2script::warnings {
+ inline const v2script::Parser::Warning make_utf8_warning(const char* file_path) {
+ constexpr std::string_view message_suffix = "This may cause problems. Prefer Windows-1252 encoding.";
+
+ std::string message;
+ if (!file_path) {
+ message = "Buffer is a UTF-8 encoded string. " + std::string(message_suffix);
+ } else {
+ message = "File '" + std::string(file_path) + "' is a UTF-8 encoded file. " + std::string(message_suffix);
+ }
+
+ return v2script::Parser::Warning { message, 1 };
+ }
+}
+
+namespace ovdl::ovscript::warnings {
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/v2script/DecisionGrammar.hpp b/src/openvic-dataloader/v2script/DecisionGrammar.hpp
new file mode 100644
index 0000000..ebc9ad2
--- /dev/null
+++ b/src/openvic-dataloader/v2script/DecisionGrammar.hpp
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <lexy/callback.hpp>
+#include <lexy/dsl.hpp>
+#include <openvic-dataloader/v2script/AbstractSyntaxTree.hpp>
+
+// Decision Grammar Definitions //
+namespace ovdl::v2script::grammar {
+
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/v2script/EventGrammar.hpp b/src/openvic-dataloader/v2script/EventGrammar.hpp
new file mode 100644
index 0000000..7ab40d4
--- /dev/null
+++ b/src/openvic-dataloader/v2script/EventGrammar.hpp
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <lexy/callback.hpp>
+#include <lexy/dsl.hpp>
+#include <openvic-dataloader/v2script/AbstractSyntaxTree.hpp>
+
+// Event Grammar Definitions //
+namespace ovdl::v2script::grammar {
+
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/v2script/Grammar.cpp b/src/openvic-dataloader/v2script/Grammar.cpp
deleted file mode 100644
index ec9fac2..0000000
--- a/src/openvic-dataloader/v2script/Grammar.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-#include <lexy/dsl.hpp>
-#include <openvic-dataloader/v2script/Parser.hpp>
-
-using namespace ovdl::v2script;
-
-// Node Definitions //
-namespace dsl = lexy::dsl;
-
-namespace ovdl::v2script::nodes {
- struct StatementListBlock;
-
- static constexpr auto whitespace_specifier = dsl::code_point.range<0x09, 0x0A>() / dsl::lit_cp<0x0D> / dsl::lit_cp<0x20>;
- static constexpr auto comment_specifier = LEXY_LIT("#") >> dsl::until(dsl::newline).or_eof();
-
- static constexpr auto data_specifier =
- dsl::ascii::alpha_digit_underscore /
- dsl::code_point.range<0x25, 0x27>() / dsl::lit_cp<0x2B> / dsl::code_point.range<0x2D, 0x2E>() /
- dsl::lit_cp<0x3A> /
- dsl::lit_cp<0x8A> / dsl::lit_cp<0x8C> / dsl::lit_cp<0x8E> /
- dsl::lit_cp<0x92> / dsl::lit_cp<0x9A> / dsl::lit_cp<0x9C> / dsl::code_point.range<0x9E, 0x9F>() /
- dsl::code_point.range<0xC0, 0xD6>() / dsl::code_point.range<0xD8, 0xF6>() / dsl::code_point.range<0xF8, 0xFF>();
-
- static constexpr auto data_char_class = LEXY_CHAR_CLASS("DataSpecifier", data_specifier);
-
- struct Identifier {
- static constexpr auto rule = dsl::identifier(data_char_class);
- };
-
- struct StringExpression {
- static constexpr auto escaped_symbols = lexy::symbol_table<char> //
- .map<'"'>('"')
- .map<'\''>('\'')
- .map<'\\'>('\\')
- .map<'/'>('/')
- .map<'b'>('\b')
- .map<'f'>('\f')
- .map<'n'>('\n')
- .map<'r'>('\r')
- .map<'t'>('\t');
- static constexpr auto rule = [] {
- // Arbitrary code points that aren't control characters.
- auto c = -dsl::unicode::control;
-
- // Escape sequences start with a backlash.
- // They either map one of the symbols,
- // or a Unicode code point of the form uXXXX.
- auto escape = dsl::backslash_escape //
- .symbol<escaped_symbols>()
- .rule(dsl::lit_c<'u'> >> dsl::code_point_id<4>);
- return dsl::quoted(c, escape);
- }();
- };
-
- struct AssignmentStatement {
- static constexpr auto rule = dsl::p<Identifier> >>
- (dsl::equal_sign >>
- (dsl::p<Identifier> | dsl::p<StringExpression> | dsl::recurse_branch<StatementListBlock>) |
- dsl::else_ >> dsl::return_);
- };
-
- struct StatementListBlock {
- static constexpr auto rule =
- dsl::curly_bracketed.open() >>
- dsl::opt(dsl::list(dsl::p<AssignmentStatement>)) + dsl::opt(dsl::semicolon) +
- dsl::curly_bracketed.close();
- };
-
- struct File {
- // Allow arbitrary spaces between individual tokens.
- static constexpr auto whitespace = whitespace_specifier | comment_specifier;
-
- static constexpr auto rule = dsl::terminator(dsl::eof).list(dsl::p<AssignmentStatement>);
- };
-}
diff --git a/src/openvic-dataloader/v2script/Parser.cpp b/src/openvic-dataloader/v2script/Parser.cpp
new file mode 100644
index 0000000..c0b6bd8
--- /dev/null
+++ b/src/openvic-dataloader/v2script/Parser.cpp
@@ -0,0 +1,224 @@
+#include "openvic-dataloader/v2script/Parser.hpp"
+
+#include <iostream>
+#include <memory>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "SimpleGrammar.hpp"
+#include "detail/DetectUtf8.hpp"
+#include "detail/Errors.hpp"
+#include "detail/NullBuff.hpp"
+#include "detail/Warnings.hpp"
+#include <lexy/action/parse.hpp>
+#include <lexy/encoding.hpp>
+#include <lexy/input/buffer.hpp>
+#include <lexy/input/file.hpp>
+#include <lexy/lexeme.hpp>
+#include <lexy/visualize.hpp>
+#include <lexy_ext/report_error.hpp>
+#include <openvic-dataloader/v2script/AbstractSyntaxTree.hpp>
+
+using namespace ovdl::v2script;
+
+/// BufferHandler ///
+
+class Parser::BufferHandler {
+public:
+ bool is_valid() const {
+ return _buffer.size() != 0;
+ }
+
+ std::optional<Error> load_buffer(const char* data, std::size_t size) {
+ _buffer = lexy::buffer(data, size);
+ return std::nullopt;
+ }
+
+ std::optional<Error> load_buffer(const char* start, const char* end) {
+ _buffer = lexy::buffer(start, end);
+ return std::nullopt;
+ }
+
+ std::optional<Error> load_file(const char* path) {
+ auto file = lexy::read_file(path);
+ if (!file) {
+ return errors::make_no_file_error(path);
+ }
+
+ _buffer = file.buffer();
+ return std::nullopt;
+ }
+
+ constexpr bool is_exclusive_utf8() const {
+ return detail::is_utf8_no_ascii(_buffer);
+ }
+
+ template<typename Node, typename ErrorCallback>
+ std::optional<std::vector<Error>> parse(const ErrorCallback& callback) {
+ auto result = lexy::parse<Node>(_buffer, callback);
+ if (!result) {
+ std::vector<Error> errors;
+ return errors;
+ }
+ // This is mighty frustrating
+ _root = std::unique_ptr<ast::Node>(result.value());
+ return std::nullopt;
+ }
+
+ std::unique_ptr<ast::Node>& get_root() {
+ return _root;
+ }
+
+private:
+ lexy::buffer<lexy::default_encoding> _buffer;
+ std::unique_ptr<ast::Node> _root;
+};
+
+/// BufferHandler ///
+
+Parser::Parser()
+ : _buffer_handler(std::make_unique<BufferHandler>()),
+ _error_stream(detail::cnull) {
+ set_error_log_to_stderr();
+}
+
+Parser::Parser(Parser&&) = default;
+Parser& Parser::operator=(Parser&& value) = default;
+Parser::~Parser() = default;
+
+Parser Parser::from_buffer(const char* data, std::size_t size) {
+ Parser result;
+ return std::move(result.load_from_buffer(data, size));
+}
+
+Parser Parser::from_buffer(const char* start, const char* end) {
+ Parser result;
+ return std::move(result.load_from_buffer(start, end));
+}
+
+Parser Parser::from_file(const char* path) {
+ Parser result;
+ return std::move(result.load_from_file(path));
+}
+
+///
+/// @brief Executes a function on _buffer_handler that is expected to load a buffer
+///
+/// Expected Use:
+/// @code {.cpp}
+/// _run_load_func(&BufferHandler::<load_function>, <arguments>);
+/// @endcode
+///
+/// @tparam Args
+/// @param func
+/// @param args
+///
+template<typename... Args>
+inline void Parser::_run_load_func(std::optional<Error> (BufferHandler::*func)(Args...), Args... args) {
+ _warnings.clear();
+ _errors.clear();
+ _has_fatal_error = false;
+ if (auto error = (_buffer_handler.get()->*func)(args...); error) {
+ _has_fatal_error = error.value().type == Error::Type::Fatal;
+ _errors.push_back(error.value());
+ _error_stream.get() << "Error: " << _errors.back().message << '\n';
+ }
+}
+
+Parser& Parser::load_from_buffer(const char* data, std::size_t size) {
+ _run_load_func(&BufferHandler::load_buffer, data, size);
+ return *this;
+}
+
+Parser& Parser::load_from_buffer(const char* start, const char* end) {
+ _run_load_func(&BufferHandler::load_buffer, start, end);
+ return *this;
+}
+
+Parser& Parser::load_from_file(const char* path) {
+ _file_path = path;
+ _run_load_func(&BufferHandler::load_file, path);
+ return *this;
+}
+
+void Parser::set_error_log_to_null() {
+ set_error_log_to(detail::cnull);
+}
+
+void Parser::set_error_log_to_stderr() {
+ set_error_log_to(std::cerr);
+}
+
+void Parser::set_error_log_to_stdout() {
+ set_error_log_to(std::cout);
+}
+
+void Parser::set_error_log_to(std::basic_ostream<char>& stream) {
+ _error_stream = stream;
+}
+
+bool Parser::simple_parse() {
+ if (!_buffer_handler->is_valid()) {
+ return false;
+ }
+
+ struct ostream_output_iterator {
+ std::reference_wrapper<std::ostream> _stream;
+
+ auto operator*() const noexcept {
+ return *this;
+ }
+ auto operator++(int) const noexcept {
+ return *this;
+ }
+
+ ostream_output_iterator& operator=(char c) {
+ _stream.get().put(c);
+ return *this;
+ }
+ };
+
+ if (_buffer_handler->is_exclusive_utf8()) {
+ _warnings.push_back(warnings::make_utf8_warning(_file_path));
+ }
+
+ auto errors = _buffer_handler->parse<grammar::File>(lexy_ext::report_error.path(_file_path).to(ostream_output_iterator { _error_stream }));
+ if (errors) {
+ _errors.reserve(errors->size());
+ for (auto& err : errors.value()) {
+ _has_fatal_error |= err.type == Error::Type::Fatal;
+ _errors.push_back(err);
+ _error_stream.get() << "Error: " << err.message << '\n';
+ }
+ return false;
+ }
+ _file_node.reset(static_cast<ast::FileNode*>(_buffer_handler->get_root().release()));
+ return true;
+}
+
+bool Parser::has_error() const {
+ return !_errors.empty();
+}
+
+bool Parser::has_fatal_error() const {
+ return _has_fatal_error;
+}
+
+bool Parser::has_warning() const {
+ return !_warnings.empty();
+}
+
+const std::vector<Parser::Error>& Parser::get_errors() const {
+ return _errors;
+}
+
+const std::vector<Parser::Warning>& Parser::get_warnings() const {
+ return _warnings;
+}
+
+const FileNode* Parser::get_file_node() const {
+ return _file_node.get();
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/v2script/SimpleGrammar.hpp b/src/openvic-dataloader/v2script/SimpleGrammar.hpp
new file mode 100644
index 0000000..48a80ce
--- /dev/null
+++ b/src/openvic-dataloader/v2script/SimpleGrammar.hpp
@@ -0,0 +1,105 @@
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "detail/LexyLitRange.hpp"
+#include <lexy/callback.hpp>
+#include <lexy/dsl.hpp>
+#include <openvic-dataloader/v2script/AbstractSyntaxTree.hpp>
+
+// Grammar Definitions //
+namespace ovdl::v2script::grammar {
+ struct StatementListBlock;
+
+ static constexpr auto whitespace_specifier = lexy::dsl::ascii::blank / lexy::dsl::ascii::newline;
+ static constexpr auto comment_specifier = LEXY_LIT("#") >> lexy::dsl::until(lexy::dsl::newline).or_eof();
+
+ static constexpr auto data_specifier =
+ lexy::dsl::ascii::alpha_digit_underscore /
+ LEXY_ASCII_ONE_OF("%&'") / lexy::dsl::lit_c<0x2B> / LEXY_ASCII_ONE_OF("-.") /
+ lexy::dsl::ascii::digit / lexy::dsl::lit_c<0x3A> /
+ lexy::dsl::lit_c<0x40> / lexy::dsl::ascii::upper / lexy::dsl::lit_c<0x5F> /
+ lexy::dsl::ascii::lower / lexy::dsl::lit_b<0x8A> / lexy::dsl::lit_b<0x8C> / lexy::dsl::lit_b<0x8E> /
+ lexy::dsl::lit_b<0x92> / lexy::dsl::lit_b<0x97> / lexy::dsl::lit_b<0x9A> / lexy::dsl::lit_b<0x9C> / lexy::dsl::lit_b<0x9E> / lexy::dsl::lit_b<0x9F> /
+ lexy::dsl::lit_b<0xC0> /
+ ovdl::detail::lexydsl::make_range<0xC0, 0xD6>() / ovdl::detail::lexydsl::make_range<0xD8, 0xF6>() / ovdl::detail::lexydsl::make_range<0xF8, 0xFF>();
+
+ static constexpr auto data_char_class = LEXY_CHAR_CLASS("DataSpecifier", data_specifier);
+
+ struct Identifier {
+ static constexpr auto rule = lexy::dsl::identifier(data_char_class);
+ static constexpr auto value = lexy::as_string<std::string> | lexy::new_<ast::IdentifierNode, ast::NodePtr>;
+ };
+
+ struct StringExpression {
+ static constexpr auto escaped_symbols = lexy::symbol_table<char> //
+ .map<'"'>('"')
+ .map<'\''>('\'')
+ .map<'\\'>('\\')
+ .map<'/'>('/')
+ .map<'b'>('\b')
+ .map<'f'>('\f')
+ .map<'n'>('\n')
+ .map<'r'>('\r')
+ .map<'t'>('\t');
+ static constexpr auto rule = [] {
+ // Arbitrary code points that aren't control characters.
+ auto c = ovdl::detail::lexydsl::make_range<0x20, 0xFF>() - lexy::dsl::ascii::control;
+
+ // Escape sequences start with a backlash.
+ // They either map one of the symbols,
+ // or a Unicode code point of the form uXXXX.
+ auto escape = lexy::dsl::backslash_escape //
+ .symbol<escaped_symbols>();
+ return lexy::dsl::quoted(c, escape);
+ }();
+
+ static constexpr auto value = lexy::as_string<std::string> >> lexy::new_<ast::StringNode, ast::NodePtr>;
+ };
+
+ struct AssignmentStatement {
+ static constexpr auto rule =
+ lexy::dsl::p<Identifier> >>
+ (lexy::dsl::equal_sign >>
+ (lexy::dsl::p<Identifier> | lexy::dsl::p<StringExpression> | lexy::dsl::recurse_branch<StatementListBlock>) |
+ lexy::dsl::else_ >> lexy::dsl::return_) |
+ lexy::dsl::p<StringExpression>;
+
+ static constexpr auto value = lexy::callback<ast::NodePtr>(
+ [](auto name, lexy::nullopt = {}) {
+ return LEXY_MOV(name);
+ },
+ [](auto name, auto&& initalizer) {
+ return make_node_ptr<ast::AssignNode>(LEXY_MOV(name), LEXY_MOV(initalizer));
+ });
+ };
+
+ struct StatementListBlock {
+ static constexpr auto rule =
+ lexy::dsl::curly_bracketed.open() >>
+ lexy::dsl::opt(lexy::dsl::list(lexy::dsl::p<AssignmentStatement>)) + lexy::dsl::opt(lexy::dsl::semicolon) +
+ lexy::dsl::curly_bracketed.close();
+
+ static constexpr auto value =
+ lexy::as_list<std::vector<ast::NodePtr>> >>
+ lexy::callback<ast::NodePtr>(
+ [](lexy::nullopt = {}, lexy::nullopt = {}) {
+ return ast::make_node_ptr<ast::ListNode>();
+ },
+ [](auto&& list, lexy::nullopt = {}) {
+ return make_node_ptr<ast::ListNode>(LEXY_MOV(list));
+ },
+ [](auto& list) {
+ return make_node_ptr<ast::ListNode>(list);
+ });
+ };
+
+ struct File {
+ // Allow arbitrary spaces between individual tokens.
+ static constexpr auto whitespace = whitespace_specifier | comment_specifier;
+
+ static constexpr auto rule = lexy::dsl::terminator(lexy::dsl::eof).list(lexy::dsl::p<AssignmentStatement>);
+
+ static constexpr auto value = lexy::as_list<std::vector<ast::NodePtr>> >> lexy::new_<ast::FileNode, ast::NodePtr>;
+ };
+}