aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
author Spartan322 <Megacake1234@gmail.com>2024-07-02 09:51:59 +0200
committer Spartan322 <Megacake1234@gmail.com>2024-07-05 19:17:25 +0200
commit3eb78b27505b602c1ccfa952c4cc00f942ccb2b9 (patch)
tree457b7fbda6d4470465c05d59b0ca51ed30628640 /src
parentdeed8ec0ae23651529a58125012c1b4aab015d02 (diff)
Fix string interning pointer invalidity for ASTsimplify-string-interning
Fix errorneous string intern compare Add symbol interning tests Optimize non-conversion identifier cases Add `File::size()` Add InstanceOf concept Remove `FlatValue::value(const symbol_interner_type&)` Add `AbstractSyntaxTree::intern/intern_cst(lexy::lexeme)` overload Add `DiagnosticLogger::intern/intern_cstr(lexy::lexeme)` overload Use pinned_vector to maintain string interning pointer validity for buffers Add vmcontainer submodule for pinned_vector pinned_vector reserves virtual memory at runtime using OS APIs to maintain pointer validity Remove Exception and RTTI requirement from range-v3 submodule
Diffstat (limited to 'src')
-rw-r--r--src/openvic-dataloader/AbstractSyntaxTree.cpp2
-rw-r--r--src/openvic-dataloader/AbstractSyntaxTree.hpp23
-rw-r--r--src/openvic-dataloader/DiagnosticLogger.hpp17
-rw-r--r--src/openvic-dataloader/File.cpp4
-rw-r--r--src/openvic-dataloader/File.hpp5
-rw-r--r--src/openvic-dataloader/detail/InternalConcepts.hpp14
-rw-r--r--src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp6
-rw-r--r--src/openvic-dataloader/v2script/EventGrammar.hpp12
-rw-r--r--src/openvic-dataloader/v2script/Parser.cpp2
-rw-r--r--src/openvic-dataloader/v2script/SimpleGrammar.hpp17
10 files changed, 80 insertions, 22 deletions
diff --git a/src/openvic-dataloader/AbstractSyntaxTree.cpp b/src/openvic-dataloader/AbstractSyntaxTree.cpp
index d6f58f7..e6885a5 100644
--- a/src/openvic-dataloader/AbstractSyntaxTree.cpp
+++ b/src/openvic-dataloader/AbstractSyntaxTree.cpp
@@ -11,7 +11,7 @@ AbstractSyntaxTree::symbol_type AbstractSyntaxTree::intern(std::string_view str)
}
const char* AbstractSyntaxTree::intern_cstr(const char* str, std::size_t length) {
- return intern(str, length).c_str(_symbol_interner);
+ return intern(str, length).c_str();
}
const char* AbstractSyntaxTree::intern_cstr(std::string_view str) {
diff --git a/src/openvic-dataloader/AbstractSyntaxTree.hpp b/src/openvic-dataloader/AbstractSyntaxTree.hpp
index a5b8886..ade1c82 100644
--- a/src/openvic-dataloader/AbstractSyntaxTree.hpp
+++ b/src/openvic-dataloader/AbstractSyntaxTree.hpp
@@ -9,6 +9,8 @@
#include <openvic-dataloader/detail/SymbolIntern.hpp>
#include <openvic-dataloader/detail/Utility.hpp>
+#include <lexy/lexeme.hpp>
+
#include <dryad/node.hpp>
#include <dryad/node_map.hpp>
#include <dryad/symbol.hpp>
@@ -16,10 +18,12 @@
#include <fmt/core.h>
-#include "detail/InternalConcepts.hpp"
+#include <detail/InternalConcepts.hpp>
namespace ovdl {
struct AbstractSyntaxTree : SymbolIntern {
+ explicit AbstractSyntaxTree(std::size_t max_elements) : _symbol_interner(max_elements) {}
+
symbol_type intern(const char* str, std::size_t length);
symbol_type intern(std::string_view str);
const char* intern_cstr(const char* str, std::size_t length);
@@ -27,6 +31,15 @@ namespace ovdl {
symbol_interner_type& symbol_interner();
const symbol_interner_type& symbol_interner() const;
+ template<typename Reader>
+ symbol_type intern(lexy::lexeme<Reader> lexeme) {
+ return intern(lexeme.begin(), lexeme.size());
+ }
+ template<typename Reader>
+ const char* intern_cstr(lexy::lexeme<Reader> lexeme) {
+ return intern_cstr(lexeme.begin(), lexeme.size());
+ }
+
protected:
symbol_interner_type _symbol_interner;
};
@@ -37,10 +50,14 @@ namespace ovdl {
using root_node_type = RootNodeT;
using node_type = typename file_type::node_type;
- explicit BasicAbstractSyntaxTree(file_type&& file) : _file { std::move(file) } {}
+ explicit BasicAbstractSyntaxTree(file_type&& file)
+ : AbstractSyntaxTree(file.size()),
+ _file { std::move(file) } {}
template<typename Encoding, typename MemoryResource = void>
- explicit BasicAbstractSyntaxTree(lexy::buffer<Encoding, MemoryResource>&& buffer) : _file { std::move(buffer) } {}
+ explicit BasicAbstractSyntaxTree(lexy::buffer<Encoding, MemoryResource>&& buffer)
+ : AbstractSyntaxTree(buffer.size()),
+ _file { std::move(buffer) } {}
void set_location(const node_type* n, NodeLocation loc) {
_file.set_location(n, loc);
diff --git a/src/openvic-dataloader/DiagnosticLogger.hpp b/src/openvic-dataloader/DiagnosticLogger.hpp
index 2a655a9..9810e1e 100644
--- a/src/openvic-dataloader/DiagnosticLogger.hpp
+++ b/src/openvic-dataloader/DiagnosticLogger.hpp
@@ -19,6 +19,7 @@
#include <lexy/input/base.hpp>
#include <lexy/input/buffer.hpp>
#include <lexy/input_location.hpp>
+#include <lexy/lexeme.hpp>
#include <lexy/visualize.hpp>
#include <dryad/_detail/config.hpp>
@@ -36,7 +37,12 @@ namespace ovdl {
template<typename ParseState>
struct BasicDiagnosticLogger;
- struct DiagnosticLogger : SymbolIntern {
+ struct DiagnosticLogger {
+ struct SymbolId;
+ using index_type = std::uint32_t;
+ using symbol_type = dryad::symbol<SymbolId, index_type>;
+ using symbol_interner_type = dryad::symbol_interner<SymbolId, char, index_type>;
+
using AnnotationKind = lexy_ext::annotation_kind;
using DiagnosticKind = lexy_ext::diagnostic_kind;
@@ -200,6 +206,15 @@ namespace ovdl {
const symbol_interner_type& symbol_interner() const {
return _symbol_interner;
}
+
+ template<typename Reader>
+ symbol_type intern(lexy::lexeme<Reader> lexeme) {
+ return intern(lexeme.data(), lexeme.size());
+ }
+ template<typename Reader>
+ const char* intern_cstr(lexy::lexeme<Reader> lexeme) {
+ return intern_cstr(lexeme.data(), lexeme.size());
+ }
};
template<typename ParseState>
diff --git a/src/openvic-dataloader/File.cpp b/src/openvic-dataloader/File.cpp
index e4d3773..9e721a8 100644
--- a/src/openvic-dataloader/File.cpp
+++ b/src/openvic-dataloader/File.cpp
@@ -16,4 +16,8 @@ const char* File::path() const noexcept {
bool File::is_valid() const noexcept {
return _buffer.index() != 0 && !_buffer.valueless_by_exception() && visit_buffer([](auto&& buffer) { return buffer.data() != nullptr; });
+}
+
+std::size_t File::size() const noexcept {
+ return _buffer.index() != 0 && !_buffer.valueless_by_exception() ? _buffer_size : 0;
} \ No newline at end of file
diff --git a/src/openvic-dataloader/File.hpp b/src/openvic-dataloader/File.hpp
index 90fcb11..ec25640 100644
--- a/src/openvic-dataloader/File.hpp
+++ b/src/openvic-dataloader/File.hpp
@@ -29,6 +29,8 @@ namespace ovdl {
bool is_valid() const noexcept;
+ std::size_t size() const noexcept;
+
template<typename Encoding, typename MemoryResource = void>
constexpr bool is_buffer() const {
return buffer_ids::type_id<lexy::buffer<Encoding, MemoryResource>>() + 1 == _buffer.index();
@@ -104,6 +106,7 @@ namespace ovdl {
protected:
const char* _path;
+ std::size_t _buffer_size = 0;
detail::type_prepend_t<buffer_ids::variant_type, std::monostate> _buffer;
};
@@ -114,12 +117,14 @@ namespace ovdl {
template<typename Encoding, typename MemoryResource = void>
explicit BasicFile(const char* path, lexy::buffer<Encoding, MemoryResource>&& buffer)
: File(path) {
+ _buffer_size = buffer.size();
_buffer = static_cast<std::remove_reference_t<decltype(buffer)>&&>(buffer);
}
template<typename Encoding, typename MemoryResource = void>
explicit BasicFile(lexy::buffer<Encoding, MemoryResource>&& buffer)
: File("") {
+ _buffer_size = buffer.size();
_buffer = static_cast<std::remove_reference_t<decltype(buffer)>&&>(buffer);
}
diff --git a/src/openvic-dataloader/detail/InternalConcepts.hpp b/src/openvic-dataloader/detail/InternalConcepts.hpp
index 0c7913d..06c03a1 100644
--- a/src/openvic-dataloader/detail/InternalConcepts.hpp
+++ b/src/openvic-dataloader/detail/InternalConcepts.hpp
@@ -6,10 +6,13 @@
#include <openvic-dataloader/NodeLocation.hpp>
#include <openvic-dataloader/detail/Encoding.hpp>
#include <openvic-dataloader/detail/SymbolIntern.hpp>
+#include <openvic-dataloader/detail/Utility.hpp>
#include <lexy/encoding.hpp>
#include <lexy/input/buffer.hpp>
+#include <dryad/symbol.hpp>
+
#include <fmt/core.h>
#include <lexy_ext/report_error.hpp>
@@ -17,8 +20,9 @@
namespace ovdl::detail {
template<typename T>
concept IsFile =
- requires(T t, const typename T::node_type* node, NodeLocation location) {
+ requires(T t, const T ct, const typename T::node_type* node, NodeLocation location) {
typename T::node_type;
+ { ct.size() } -> std::same_as<size_t>;
{ t.set_location(node, location) } -> std::same_as<void>;
{ t.location_of(node) } -> std::same_as<NodeLocation>;
};
@@ -58,12 +62,12 @@ namespace ovdl::detail {
{ ct.errored() } -> std::same_as<bool>;
{ ct.warned() } -> std::same_as<bool>;
{ ct.get_errors() } -> std::same_as<typename T::error_range>;
- { t.intern(str, length) } -> std::same_as<ovdl::SymbolIntern::symbol_type>;
- { t.intern(sv) } -> std::same_as<ovdl::SymbolIntern::symbol_type>;
+ { t.intern(str, length) } -> detail::InstanceOf<dryad::symbol>;
+ { t.intern(sv) } -> detail::InstanceOf<dryad::symbol>;
{ t.intern_cstr(str, length) } -> std::same_as<const char*>;
{ t.intern_cstr(sv) } -> std::same_as<const char*>;
- { t.symbol_interner() } -> std::same_as<SymbolIntern::symbol_interner_type&>;
- { ct.symbol_interner() } -> std::same_as<const SymbolIntern::symbol_interner_type&>;
+ { t.symbol_interner() } -> detail::InstanceOf<dryad::symbol_interner>;
+ { ct.symbol_interner() } -> detail::InstanceOf<dryad::symbol_interner>;
{ t.error(std::declval<typename T::template format_str<>>()) } -> std::same_as<typename T::Writer>;
{ t.warning(std::declval<typename T::template format_str<>>()) } -> std::same_as<typename T::Writer>;
{ t.note(std::declval<typename T::template format_str<>>()) } -> std::same_as<typename T::Writer>;
diff --git a/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp b/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp
index 71985f3..72ff627 100644
--- a/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp
+++ b/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp
@@ -75,7 +75,7 @@ std::string FileAbstractSyntaxTree::make_list_visualizer() const {
dryad::visit_node(
node,
[&](const FlatValue* value) {
- result.append(value->value(this->_symbol_interner));
+ result.append(value->value().c_str());
},
[&](const ListValue* value) {
},
@@ -107,10 +107,10 @@ std::string FileAbstractSyntaxTree::make_native_visualizer() const {
dryad::visit_tree(
this->_tree,
[&](const IdentifierValue* value) {
- result.append(value->value(this->_symbol_interner));
+ result.append(value->value().c_str());
},
[&](const StringValue* value) {
- result.append(1, '"').append(value->value(this->_symbol_interner)).append(1, '"');
+ result.append(1, '"').append(value->value().c_str()).append(1, '"');
},
[&](dryad::child_visitor<NodeKind> visitor, const ValueStatement* statement) {
visitor(statement->value());
diff --git a/src/openvic-dataloader/v2script/EventGrammar.hpp b/src/openvic-dataloader/v2script/EventGrammar.hpp
index 130a233..20168f8 100644
--- a/src/openvic-dataloader/v2script/EventGrammar.hpp
+++ b/src/openvic-dataloader/v2script/EventGrammar.hpp
@@ -30,7 +30,7 @@ namespace ovdl::v2script::grammar {
static constexpr auto value = dsl::callback<ast::IdentifierValue*>(
[](detail::IsParseState auto& state, ast::IdentifierValue* value) {
bool is_number = true;
- for (auto* current = value->value(state.ast().symbol_interner()); *current; current++) {
+ for (auto* current = value->value().c_str(); *current; current++) {
is_number = is_number && std::isdigit(*current);
if (!is_number) break;
}
@@ -95,16 +95,16 @@ namespace ovdl::v2script::grammar {
static constexpr auto value =
dsl::callback<ast::EventStatement*>(
[](detail::IsParseState auto& state, NodeLocation loc, ast::IdentifierValue* name, ast::ListValue* list) {
- static auto country_decl = state.ast().intern_cstr("country_event");
- static auto province_decl = state.ast().intern_cstr("province_event");
+ auto country_decl = state.ast().intern("country_event");
+ auto province_decl = state.ast().intern("province_event");
- if (name->value(state.ast().symbol_interner()) != country_decl || name->value(state.ast().symbol_interner()) != province_decl) {
- state.logger().warning("event declarator \"{}\" is not {} or {}", name->value(state.ast().symbol_interner()), country_decl, province_decl) //
+ if (name->value() != country_decl || name->value() != province_decl) {
+ state.logger().warning("event declarator \"{}\" is not {} or {}", name->value().c_str(), country_decl.c_str(), province_decl.c_str()) //
.primary(loc, "here")
.finish();
}
- return state.ast().template create<ast::EventStatement>(loc, name->value(state.ast().symbol_interner()) == province_decl, list);
+ return state.ast().template create<ast::EventStatement>(loc, name->value() == province_decl, list);
});
};
diff --git a/src/openvic-dataloader/v2script/Parser.cpp b/src/openvic-dataloader/v2script/Parser.cpp
index a0003b1..9c6a0c5 100644
--- a/src/openvic-dataloader/v2script/Parser.cpp
+++ b/src/openvic-dataloader/v2script/Parser.cpp
@@ -266,7 +266,7 @@ const FileTree* Parser::get_file_node() const {
}
std::string_view Parser::value(const ovdl::v2script::ast::FlatValue* node) const {
- return node->value(_parse_handler->parse_state().ast().symbol_interner());
+ return node->value().view();
}
std::string Parser::make_native_string() const {
diff --git a/src/openvic-dataloader/v2script/SimpleGrammar.hpp b/src/openvic-dataloader/v2script/SimpleGrammar.hpp
index d42ce07..c47b243 100644
--- a/src/openvic-dataloader/v2script/SimpleGrammar.hpp
+++ b/src/openvic-dataloader/v2script/SimpleGrammar.hpp
@@ -126,6 +126,15 @@ namespace ovdl::v2script::grammar {
do {
if constexpr (std::same_as<encoding, lexy::default_encoding> || std::same_as<encoding, lexy::byte_encoding>) {
if (lexy::scan_result<lexy::lexeme<Reader>> ascii_result; scanner.branch(ascii_result, lexy::dsl::identifier(ascii))) {
+ if (!scanner.peek(data_char_class)) {
+ if (ascii_result.value().size() == 0) {
+ return lexy::scan_failed;
+ }
+
+ auto value = state.ast().intern(ascii_result.value());
+ return state.ast().template create<ast::IdentifierValue>(ovdl::NodeLocation::make_from(content_begin, scanner.position()), value);
+ }
+
value_result.append(ascii_result.value().begin(), ascii_result.value().end());
continue;
}
@@ -150,8 +159,12 @@ namespace ovdl::v2script::grammar {
} else {
auto lexeme_result = scanner.template parse<lexy::lexeme<Reader>>(lexy::dsl::identifier(utf_char_class));
if (lexeme_result) {
- value_result.append(lexeme_result.value().begin(), lexeme_result.value().size());
- break;
+ if (lexeme_result.value().size() == 0) {
+ return lexy::scan_failed;
+ }
+
+ auto value = state.ast().intern(lexeme_result.value());
+ return state.ast().template create<ast::IdentifierValue>(ovdl::NodeLocation::make_from(content_begin, scanner.position()), value);
}
}
} while (scanner);