aboutsummaryrefslogtreecommitdiff
path: root/src/openvic-dataloader
diff options
context:
space:
mode:
author George L. Albany <Megacake1234@gmail.com>2024-07-22 19:30:41 +0200
committer GitHub <noreply@github.com>2024-07-22 19:30:41 +0200
commit847280022ec8afb35d7d8639afd639c5ec42e3c7 (patch)
treee43aae271fe5aa75f1b0c918c1428e7f766a14f8 /src/openvic-dataloader
parentba8addc72595607206da654bc79c994121b7a3ae (diff)
parenta07c64148eb60b886f92caa46a9c687240ec420c (diff)
Merge pull request #52 from OpenVicProject/add/backslash-identifier
Add backslash identifier support to v2script
Diffstat (limited to 'src/openvic-dataloader')
-rw-r--r--src/openvic-dataloader/AbstractSyntaxTree.hpp5
-rw-r--r--src/openvic-dataloader/DiagnosticLogger.hpp26
-rw-r--r--src/openvic-dataloader/csv/CsvGrammar.hpp28
-rw-r--r--src/openvic-dataloader/csv/Parser.cpp12
-rw-r--r--src/openvic-dataloader/v2script/Parser.cpp12
-rw-r--r--src/openvic-dataloader/v2script/SimpleGrammar.hpp34
6 files changed, 61 insertions, 56 deletions
diff --git a/src/openvic-dataloader/AbstractSyntaxTree.hpp b/src/openvic-dataloader/AbstractSyntaxTree.hpp
index ade1c82..f9f5796 100644
--- a/src/openvic-dataloader/AbstractSyntaxTree.hpp
+++ b/src/openvic-dataloader/AbstractSyntaxTree.hpp
@@ -3,6 +3,7 @@
#include <concepts>
#include <cstdio>
#include <string_view>
+#include <type_traits>
#include <utility>
#include <openvic-dataloader/NodeLocation.hpp>
@@ -51,12 +52,12 @@ namespace ovdl {
using node_type = typename file_type::node_type;
explicit BasicAbstractSyntaxTree(file_type&& file)
- : AbstractSyntaxTree(file.size()),
+ : AbstractSyntaxTree(file.size() * file.visit_buffer([](auto&& buffer) -> size_t { return sizeof(typename std::decay_t<decltype(buffer)>::char_type); })),
_file { std::move(file) } {}
template<typename Encoding, typename MemoryResource = void>
explicit BasicAbstractSyntaxTree(lexy::buffer<Encoding, MemoryResource>&& buffer)
- : AbstractSyntaxTree(buffer.size()),
+ : AbstractSyntaxTree(buffer.size() * sizeof(Encoding::char_type)),
_file { std::move(buffer) } {}
void set_location(const node_type* n, NodeLocation loc) {
diff --git a/src/openvic-dataloader/DiagnosticLogger.hpp b/src/openvic-dataloader/DiagnosticLogger.hpp
index 9810e1e..8c491ca 100644
--- a/src/openvic-dataloader/DiagnosticLogger.hpp
+++ b/src/openvic-dataloader/DiagnosticLogger.hpp
@@ -2,6 +2,7 @@
#include <concepts> // IWYU pragma: keep
#include <cstdio>
+#include <iostream>
#include <ostream>
#include <string>
#include <type_traits>
@@ -37,12 +38,7 @@ namespace ovdl {
template<typename ParseState>
struct BasicDiagnosticLogger;
- struct DiagnosticLogger {
- struct SymbolId;
- using index_type = std::uint32_t;
- using symbol_type = dryad::symbol<SymbolId, index_type>;
- using symbol_interner_type = dryad::symbol_interner<SymbolId, char, index_type>;
-
+ struct DiagnosticLogger : error::ErrorSymbolInterner {
using AnnotationKind = lexy_ext::annotation_kind;
using DiagnosticKind = lexy_ext::diagnostic_kind;
@@ -116,23 +112,23 @@ namespace ovdl {
}
result = writer.error();
} else {
- auto production = _logger.intern_cstr(production_name);
+ auto production = production_name;
if constexpr (std::is_same_v<Tag, lexy::expected_literal>) {
auto string = lexy::_detail::make_literal_lexeme<typename Reader::encoding>(error.string(), error.length());
NodeLocation loc = NodeLocation::make_from(context.position(), error.position() - 1);
- auto message = _logger.intern_cstr(fmt::format("expected '{}'", string.data()));
+ auto message = _logger.intern(fmt::format("expected '{}'", string.data()));
result = _logger.template create<error::ExpectedLiteral>(loc, message, production);
} else if constexpr (std::is_same_v<Tag, lexy::expected_keyword>) {
auto string = lexy::_detail::make_literal_lexeme<typename Reader::encoding>(error.string(), error.length());
NodeLocation loc = NodeLocation::make_from(context.position(), error.position() - 1);
- auto message = _logger.intern_cstr(fmt::format("expected keyword '{}'", string.data()));
+ auto message = _logger.intern(fmt::format("expected keyword '{}'", string.data()));
result = _logger.template create<error::ExpectedKeyword>(loc, message, production);
} else if constexpr (std::is_same_v<Tag, lexy::expected_char_class>) {
- auto message = _logger.intern_cstr(fmt::format("expected {}", error.name()));
+ auto message = _logger.intern(fmt::format("expected {}", error.name()));
result = _logger.template create<error::ExpectedCharClass>(error.position(), message, production);
} else {
NodeLocation loc = NodeLocation::make_from(error.begin(), error.end());
- auto message = _logger.intern_cstr(error.message());
+ auto message = _logger.intern(error.message());
result = _logger.template create<error::GenericParseError>(loc, message, production);
}
}
@@ -361,7 +357,7 @@ namespace ovdl {
});
error::Annotation* annotation;
- auto message = _logger.intern_cstr(output);
+ auto message = _logger.intern(output);
switch (kind) {
case AnnotationKind::primary:
annotation = _logger.create<error::PrimaryAnnotation>(loc, message);
@@ -404,7 +400,7 @@ namespace ovdl {
});
impl.write_path(iter, file().path());
- auto message = intern_cstr(output);
+ auto message = intern(output);
error->_set_message(message);
if (!error->is_linked_in_tree())
insert(error);
@@ -422,8 +418,8 @@ namespace ovdl {
});
impl.write_path(iter, file().path());
- auto production = intern_cstr(production_name);
- auto message = intern_cstr(output);
+ auto production = production_name;
+ auto message = intern(output);
auto* error = [&] {
if constexpr (std::is_same_v<Tag, lexy::expected_literal>) {
return create<error::ExpectedLiteral>(loc, message, production);
diff --git a/src/openvic-dataloader/csv/CsvGrammar.hpp b/src/openvic-dataloader/csv/CsvGrammar.hpp
index 19aee54..91226c7 100644
--- a/src/openvic-dataloader/csv/CsvGrammar.hpp
+++ b/src/openvic-dataloader/csv/CsvGrammar.hpp
@@ -11,10 +11,7 @@
#include <lexy/_detail/config.hpp>
#include <lexy/callback.hpp>
-#include <lexy/callback/string.hpp>
#include <lexy/dsl.hpp>
-#include <lexy/dsl/ascii.hpp>
-#include <lexy/dsl/option.hpp>
#include <lexy/encoding.hpp>
#include "detail/Convert.hpp"
@@ -73,6 +70,9 @@ namespace ovdl::csv::grammar {
constexpr auto escaped_quote = lexy::symbol_table<char> //
.map<'"'>('"');
+ constexpr auto escaped_newline = lexy::symbol_table<char> //
+ .map<'n'>('\n');
+
template<ParseOptions Options>
struct CsvGrammar {
struct StringValue : lexy::scan_production<std::string>,
@@ -118,6 +118,11 @@ namespace ovdl::csv::grammar {
template<auto character>
static constexpr auto _escape_check = character - (lexy::dsl::lit_b<Options.SepChar> / lexy::dsl::ascii::newline);
+ struct Backslash {
+ static constexpr auto rule = LEXY_LIT("\\n");
+ static constexpr auto value = lexy::constant('\n');
+ };
+
template<typename Context, typename Reader>
static constexpr scan_result scan(lexy::rule_scanner<Context, Reader>& scanner, detail::IsFileParseState auto& state) {
using encoding = typename Reader::encoding;
@@ -134,13 +139,16 @@ namespace ovdl::csv::grammar {
if constexpr (Options.SupportStrings) {
return lexy::dsl::identifier(character - (lexy::dsl::lit_b<Options.SepChar> / lexy::dsl::ascii::newline));
} else {
- auto escape_check_char = _escape_check<character>;
- auto id_check_char = escape_check_char - lexy::dsl::lit_b<'\\'>;
- auto id_segment = lexy::dsl::identifier(id_check_char);
- auto escape_segement = lexy::dsl::token(escape_check_char);
- auto escape_sym = lexy::dsl::symbol<escaped_symbols>(escape_segement);
- auto escape_rule = lexy::dsl::lit_b<'\\'> >> escape_sym;
- return lexy::dsl::list(id_segment | escape_rule);
+ constexpr auto backslash = lexy::dsl::lit_b<'\\'>;
+
+ constexpr auto escape_check_char = _escape_check<character>;
+ constexpr auto escape_rule = lexy::dsl::p<Backslash>;
+
+ return lexy::dsl::list(
+ lexy::dsl::identifier(escape_check_char - backslash) |
+ escape_rule |
+ lexy::dsl::capture(escape_check_char) //
+ );
}
}();
diff --git a/src/openvic-dataloader/csv/Parser.cpp b/src/openvic-dataloader/csv/Parser.cpp
index 8fe8b17..bbd1be4 100644
--- a/src/openvic-dataloader/csv/Parser.cpp
+++ b/src/openvic-dataloader/csv/Parser.cpp
@@ -195,6 +195,10 @@ typename Parser::error_range Parser::get_errors() const {
return _parse_handler->get_errors();
}
+std::string_view Parser::error(const ovdl::error::Error* error) const {
+ return error->message(_parse_handler->parse_state().logger().symbol_interner());
+}
+
const FilePosition Parser::get_error_position(const error::Error* error) const {
if (!error || !error->is_linked_in_tree()) {
return {};
@@ -231,20 +235,20 @@ void Parser::print_errors_to(std::basic_ostream<char>& stream) const {
dryad::visit_tree(
error,
[&](const error::BufferError* buffer_error) {
- stream << "buffer error: " << buffer_error->message() << '\n';
+ stream << "buffer error: " << this->error(buffer_error) << '\n';
},
[&](dryad::child_visitor<error::ErrorKind> visitor, const error::AnnotatedError* annotated_error) {
- stream << annotated_error->message() << '\n';
+ stream << this->error(annotated_error) << '\n';
auto annotations = annotated_error->annotations();
for (auto annotation : annotations) {
visitor(annotation);
}
},
[&](const error::PrimaryAnnotation* primary) {
- stream << primary->message() << '\n';
+ stream << this->error(primary) << '\n';
},
[&](const error::SecondaryAnnotation* secondary) {
- stream << secondary->message() << '\n';
+ stream << this->error(secondary) << '\n';
});
}
} \ No newline at end of file
diff --git a/src/openvic-dataloader/v2script/Parser.cpp b/src/openvic-dataloader/v2script/Parser.cpp
index 2375a1a..e5234d6 100644
--- a/src/openvic-dataloader/v2script/Parser.cpp
+++ b/src/openvic-dataloader/v2script/Parser.cpp
@@ -320,6 +320,10 @@ Parser::error_range Parser::get_errors() const {
return _parse_handler->get_errors();
}
+std::string_view Parser::error(const ovdl::error::Error* error) const {
+ return error->message(_parse_handler->parse_state().logger().symbol_interner());
+}
+
const FilePosition Parser::get_error_position(const error::Error* error) const {
if (!error || !error->is_linked_in_tree()) {
return {};
@@ -352,20 +356,20 @@ void Parser::print_errors_to(std::basic_ostream<char>& stream) const {
dryad::visit_tree(
error,
[&](const error::BufferError* buffer_error) {
- stream << "buffer error: " << buffer_error->message() << '\n';
+ stream << "buffer error: " << this->error(buffer_error) << '\n';
},
[&](dryad::child_visitor<error::ErrorKind> visitor, const error::AnnotatedError* annotated_error) {
- stream << annotated_error->message() << '\n';
+ stream << this->error(annotated_error) << '\n';
auto annotations = annotated_error->annotations();
for (auto annotation : annotations) {
visitor(annotation);
}
},
[&](const error::PrimaryAnnotation* primary) {
- stream << primary->message() << '\n';
+ stream << this->error(primary) << '\n';
},
[&](const error::SecondaryAnnotation* secondary) {
- stream << secondary->message() << '\n';
+ stream << this->error(secondary) << '\n';
});
}
} \ No newline at end of file
diff --git a/src/openvic-dataloader/v2script/SimpleGrammar.hpp b/src/openvic-dataloader/v2script/SimpleGrammar.hpp
index c47b243..5474c79 100644
--- a/src/openvic-dataloader/v2script/SimpleGrammar.hpp
+++ b/src/openvic-dataloader/v2script/SimpleGrammar.hpp
@@ -5,15 +5,6 @@
#include <lexy/callback.hpp>
#include <lexy/dsl.hpp>
-#include <lexy/dsl/any.hpp>
-#include <lexy/dsl/identifier.hpp>
-#include <lexy/dsl/option.hpp>
-#include <lexy/dsl/peek.hpp>
-#include <lexy/dsl/punctuator.hpp>
-#include <lexy/dsl/recover.hpp>
-#include <lexy/dsl/scan.hpp>
-#include <lexy/dsl/symbol.hpp>
-#include <lexy/dsl/unicode.hpp>
#include <lexy/encoding.hpp>
#include <lexy/input/base.hpp>
#include <lexy/input/buffer.hpp>
@@ -63,7 +54,7 @@ namespace ovdl::v2script::grammar {
/* REQUIREMENTS: DAT-631 */
static constexpr auto comment_specifier = LEXY_LIT("#") >> lexy::dsl::until(lexy::dsl::newline).or_eof();
- static constexpr auto ascii = lexy::dsl::ascii::alpha_digit_underscore / LEXY_ASCII_ONE_OF("+:@%&'-.");
+ static constexpr auto ascii = lexy::dsl::ascii::alpha_digit_underscore / LEXY_ASCII_ONE_OF("+:@%&'-.\\");
/* REQUIREMENTS:
* DAT-632
@@ -91,7 +82,7 @@ namespace ovdl::v2script::grammar {
static constexpr auto data_char_class = LEXY_CHAR_CLASS("DataSpecifier", data_specifier);
- static constexpr auto utf_data_specifier = lexy::dsl::unicode::xid_continue / LEXY_ASCII_ONE_OF("+:@%&'-.");
+ static constexpr auto utf_data_specifier = lexy::dsl::unicode::xid_continue / LEXY_ASCII_ONE_OF("+:@%&'-.\\");
static constexpr auto utf_char_class = LEXY_CHAR_CLASS("DataSpecifier", utf_data_specifier);
@@ -196,9 +187,9 @@ namespace ovdl::v2script::grammar {
if constexpr (Options.NoStringEscape) {
auto c = [] {
if constexpr (std::same_as<encoding, lexy::default_encoding> || std::same_as<encoding, lexy::byte_encoding>) {
- return dsl::lit_b_range<0x20, 0xFF> / lexy::dsl::lit_b<0x07> / lexy::dsl::lit_b<0x09> / lexy::dsl::lit_b<0x0A> / lexy::dsl::lit_b<0x0D>;
+ return dsl::lit_b_range<0x01, 0xFF>;
} else {
- return -lexy::dsl::unicode::control;
+ return lexy::dsl::unicode::character;
}
}();
return lexy::dsl::quoted(c);
@@ -287,7 +278,7 @@ namespace ovdl::v2script::grammar {
}();
static constexpr auto value = dsl::callback<ast::Statement*>(
- [](detail::IsParseState auto& state, const char* pos, ast::IdentifierValue* name, ast::Value* initializer) {
+ [](detail::IsParseState auto& state, const char* pos, ast::IdentifierValue* name, ast::Value* initializer) -> ast::AssignStatement* {
return state.ast().template create<ast::AssignStatement>(pos, name, initializer);
},
[](detail::IsParseState auto& state, bool&, const char* pos, ast::IdentifierValue* name, ast::Value* initializer) {
@@ -306,11 +297,15 @@ namespace ovdl::v2script::grammar {
return state.ast().template create<ast::ValueStatement>(pos, left);
},
[](detail::IsParseState auto& state, ast::Value* left) -> ast::ValueStatement* {
- if (left == nullptr) return nullptr;
+ if (left == nullptr) { // May no longer be neccessary
+ return nullptr;
+ }
return state.ast().template create<ast::ValueStatement>(state.ast().location_of(left), left);
},
[](detail::IsParseState auto& state, bool&, ast::Value* left) -> ast::ValueStatement* {
- if (left == nullptr) return nullptr;
+ if (left == nullptr) { // May no longer be neccessary
+ return nullptr;
+ }
return state.ast().template create<ast::ValueStatement>(state.ast().location_of(left), left);
});
};
@@ -322,12 +317,12 @@ namespace ovdl::v2script::grammar {
auto assign_statement = lexy::dsl::recurse_branch<AssignmentStatement>;
- auto assign_try = lexy::dsl::try_(assign_statement);
+ auto assign_try = lexy::dsl::try_(assign_statement, lexy::dsl::nullopt);
auto assign_opt = lexy::dsl::opt(lexy::dsl::list(assign_try));
auto curly_bracket = dsl::curly_bracketed(assign_opt + lexy::dsl::opt(lexy::dsl::semicolon));
- return lexy::dsl::try_(curly_bracket, lexy::dsl::find(right_brace));
+ return curly_bracket;
}();
static constexpr auto value =
@@ -346,9 +341,6 @@ namespace ovdl::v2script::grammar {
} else {
return state.ast().template create<ast::ListValue>(ovdl::NodeLocation::make_from(begin, end), LEXY_MOV(list));
}
- },
- [](detail::IsParseState auto& state, lexy::nullopt fail = {}) {
- return fail;
});
};
};