From 3eb78b27505b602c1ccfa952c4cc00f942ccb2b9 Mon Sep 17 00:00:00 2001 From: Spartan322 Date: Tue, 2 Jul 2024 03:51:59 -0400 Subject: Fix string interning pointer invalidity for AST Fix errorneous string intern compare Add symbol interning tests Optimize non-conversion identifier cases Add `File::size()` Add InstanceOf concept Remove `FlatValue::value(const symbol_interner_type&)` Add `AbstractSyntaxTree::intern/intern_cst(lexy::lexeme)` overload Add `DiagnosticLogger::intern/intern_cstr(lexy::lexeme)` overload Use pinned_vector to maintain string interning pointer validity for buffers Add vmcontainer submodule for pinned_vector pinned_vector reserves virtual memory at runtime using OS APIs to maintain pointer validity Remove Exception and RTTI requirement from range-v3 submodule --- .gitmodules | 5 +- deps/SCsub | 39 +++- deps/dryad | 2 +- deps/range-v3 | 2 +- deps/vmcontainer | 1 + include/openvic-dataloader/detail/SymbolIntern.hpp | 256 ++++++++++++++++++++- include/openvic-dataloader/detail/Utility.hpp | 3 + .../openvic-dataloader/detail/pinned_vector.hpp | 15 ++ .../v2script/AbstractSyntaxTree.hpp | 4 - src/openvic-dataloader/AbstractSyntaxTree.cpp | 2 +- src/openvic-dataloader/AbstractSyntaxTree.hpp | 23 +- src/openvic-dataloader/DiagnosticLogger.hpp | 17 +- src/openvic-dataloader/File.cpp | 4 + src/openvic-dataloader/File.hpp | 5 + src/openvic-dataloader/detail/InternalConcepts.hpp | 14 +- .../v2script/AbstractSyntaxTree.cpp | 6 +- src/openvic-dataloader/v2script/EventGrammar.hpp | 12 +- src/openvic-dataloader/v2script/Parser.cpp | 2 +- src/openvic-dataloader/v2script/SimpleGrammar.hpp | 17 +- tests/src/detail/SymbolIntern.cpp | 138 +++++++++++ tests/src/v2script/AbstractSyntaxTree.cpp | 7 +- 21 files changed, 535 insertions(+), 39 deletions(-) create mode 160000 deps/vmcontainer create mode 100644 include/openvic-dataloader/detail/pinned_vector.hpp create mode 100644 tests/src/detail/SymbolIntern.cpp diff --git a/.gitmodules b/.gitmodules index d4c2c55..c40746f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -14,9 +14,12 @@ ignore = dirty [submodule "deps/range-v3"] path = deps/range-v3 - url = https://github.com/ericniebler/range-v3 + url = https://github.com/spartan322/range-v3 ignore = dirty [submodule "tests/deps/snitch"] path = tests/deps/snitch url = https://github.com/snitch-org/snitch ignore = dirty +[submodule "deps/vmcontainer"] + path = deps/vmcontainer + url = https://github.com/spartan322/vmcontainer diff --git a/deps/SCsub b/deps/SCsub index 5f714df..441ec93 100644 --- a/deps/SCsub +++ b/deps/SCsub @@ -56,7 +56,7 @@ def build_dryad(env): env.Append(CXXFLAGS=["/external:I", include_dir, "/external:W0"]) else: env.Append(CXXFLAGS=["-isystem", include_dir]) - + env.exposed_includes += env.dryad["INCPATH"] def build_fmt(env): @@ -117,7 +117,7 @@ def build_fmt(env): env.Append(CXXFLAGS=[""]) env.Append(LIBPATH=[fmt_env.Dir(source_path)]) env.Prepend(LIBS=[library_name]) - + env.exposed_includes += env.fmt["INCPATH"] def build_range_v3(env): @@ -135,10 +135,41 @@ def build_range_v3(env): env.Append(CXXFLAGS=["/external:I", include_dir, "/external:W0"]) else: env.Append(CXXFLAGS=["-isystem", include_dir]) - + env.exposed_includes += env.range_v3["INCPATH"] +def build_vmcontainer(env): + vmcontainer_env = env.Clone() + + include_path = "vmcontainer/lib/include" + source_path = "vmcontainer/lib/src" + paths = [include_path, source_path] + vmcontainer_env.Append(CPPPATH=[[vmcontainer_env.Dir(p) for p in paths]]) + sources = env.GlobRecursive("*.cpp", paths) + env.vmcontainer_sources = sources + + library_name = "libvmcontainer" + env["LIBSUFFIX"] + library = vmcontainer_env.StaticLibrary(target=os.path.join(source_path, library_name), source=sources) + Default(library) + + include_dir = vmcontainer_env.Dir(include_path) + + env.vmcontainer = {} + env.vmcontainer["INCPATH"] = [include_dir] + + env.Append(CPPPATH=env.vmcontainer["INCPATH"]) + if env.get("is_msvc", False): + env.Append(CXXFLAGS=["/external:I", include_dir, "/external:W0"]) + else: + env.Append(CXXFLAGS=["-isystem", include_dir]) + env.Append(CXXFLAGS=[""]) + env.Append(LIBPATH=[vmcontainer_env.Dir(source_path)]) + env.Prepend(LIBS=[library_name]) + + env.exposed_includes += env.vmcontainer["INCPATH"] + build_dryad(env) build_fmt(env) build_lexy(env) -build_range_v3(env) \ No newline at end of file +build_range_v3(env) +build_vmcontainer(env) \ No newline at end of file diff --git a/deps/dryad b/deps/dryad index 3aa3d76..697b8f8 160000 --- a/deps/dryad +++ b/deps/dryad @@ -1 +1 @@ -Subproject commit 3aa3d7606cb007436bb3433ddf83b8bdcf1ecc4b +Subproject commit 697b8f817e0c6c2a2f398c3e9217533b1507dc21 diff --git a/deps/range-v3 b/deps/range-v3 index 97452bb..334bf57 160000 --- a/deps/range-v3 +++ b/deps/range-v3 @@ -1 +1 @@ -Subproject commit 97452bb3eb74a73fc86504421a6a27c92bce6b99 +Subproject commit 334bf5772462dbd2e0e2ce142e22c4e5a8970a96 diff --git a/deps/vmcontainer b/deps/vmcontainer new file mode 160000 index 0000000..f7851cd --- /dev/null +++ b/deps/vmcontainer @@ -0,0 +1 @@ +Subproject commit f7851cd758af9d65189f55f2a3cba0021e369083 diff --git a/include/openvic-dataloader/detail/SymbolIntern.hpp b/include/openvic-dataloader/detail/SymbolIntern.hpp index 8755887..d72a0ba 100644 --- a/include/openvic-dataloader/detail/SymbolIntern.hpp +++ b/include/openvic-dataloader/detail/SymbolIntern.hpp @@ -1,14 +1,266 @@ #pragma once #include +#include +#include + +#include #include namespace ovdl { + // Contains all unique symbols, null-terminated, in memory one after the other. + template + struct symbol_buffer { + static constexpr auto min_buffer_size = 16 * 1024; + + constexpr symbol_buffer() : _data_buffer(ovdl::detail::max_elements(min_buffer_size + 1)) {} + explicit symbol_buffer(std::size_t max_elements) + : _data_buffer(ovdl::detail::max_elements(std::max(max_elements, min_buffer_size + 1))) { + _data_buffer.reserve(min_buffer_size); + } + + void free() { + } + + bool reserve(std::size_t new_capacity) { + if (new_capacity <= _data_buffer.capacity()) + return true; + + if (new_capacity >= _data_buffer.max_size()) { + _data_buffer.reserve(_data_buffer.max_size()); + return false; + } + + _data_buffer.reserve(new_capacity * sizeof(CharT)); + + return true; + } + + bool reserve_new_string(std::size_t new_string_length) { + // +1 for null-terminator. + auto new_size = _data_buffer.size() + new_string_length + 1; + if (new_size <= _data_buffer.capacity()) + return true; + + auto new_capacity = new_size * 2; + if (new_capacity < min_buffer_size) + new_capacity = min_buffer_size; + + if (!reserve(new_capacity)) { + return _data_buffer.capacity() >= new_size; + } + + return true; + } + + const CharT* insert(const CharT* str, std::size_t length) { + DRYAD_PRECONDITION(_data_buffer.capacity() - _data_buffer.size() >= length + 1); + + auto index = _data_buffer.cend(); + + _data_buffer.insert(_data_buffer.cend(), str, str + (length * sizeof(CharT))); + _data_buffer.push_back(CharT(0)); + + return index; + } + + const CharT* c_str(std::size_t index) const { + DRYAD_PRECONDITION(index < _data_buffer.size()); + return _data_buffer.data() + index; + } + + std::size_t size() const { + return _data_buffer.size(); + } + + std::size_t capacity() const { + return _data_buffer.capacity(); + } + + std::size_t max_size() const { + return _data_buffer.max_size(); + } + + private: + detail::pinned_vector _data_buffer; + }; + + template + struct symbol_index_hash_traits { + const symbol_buffer* buffer; + + using value_type = IndexType; + + struct string_view { + const CharT* ptr; + std::size_t length; + }; + + static constexpr bool is_unoccupied(IndexType index) { + return index == IndexType(-1); + } + static void fill_unoccupied(IndexType* data, std::size_t size) { + // It has all bits set to 1, so we can do it per-byte. + std::memset(data, static_cast(-1), size * sizeof(IndexType)); + } + + static constexpr bool is_equal(IndexType entry, IndexType value) { + return entry == value; + } + bool is_equal(IndexType entry, string_view str) const { + auto existing_str = buffer->c_str(entry); + return std::strncmp(existing_str, str.ptr, str.length) == 0 && *(existing_str + str.length) == CharT(0); + } + + std::size_t hash(IndexType entry) const { + auto str = buffer->c_str(entry); + return dryad::default_hash_algorithm().hash_c_str(str).finish(); + } + static constexpr std::size_t hash(string_view str) { + return dryad::default_hash_algorithm() + .hash_bytes(reinterpret_cast(str.ptr), str.length * sizeof(CharT)) + .finish(); + } + }; + + template + class symbol; + + template + class symbol_interner { + static_assert(std::is_trivial_v); + static_assert(std::is_unsigned_v); + + using resource_ptr = dryad::_detail::memory_resource_ptr; + using traits = symbol_index_hash_traits; + + public: + using symbol = ovdl::symbol; + + //=== construction ===// + constexpr symbol_interner() : _resource(dryad::_detail::get_memory_resource()) {} + constexpr explicit symbol_interner(std::size_t max_elements) + : _buffer(max_elements), + _resource(dryad::_detail::get_memory_resource()) {} + constexpr explicit symbol_interner(std::size_t max_elements, MemoryResource* resource) + : _buffer(max_elements), + _resource(resource) {} + + ~symbol_interner() noexcept { + _buffer.free(); + _map.free(_resource); + } + + symbol_interner(symbol_interner&& other) noexcept + : _buffer(other._buffer), _map(other._map), _resource(other._resource) { + other._buffer = {}; + other._map = {}; + } + + symbol_interner& operator=(symbol_interner&& other) noexcept { + dryad::_detail::swap(_buffer, other._buffer); + dryad::_detail::swap(_map, other._map); + dryad::_detail::swap(_resource, other._resource); + return *this; + } + + //=== interning ===// + bool reserve(std::size_t number_of_symbols, std::size_t average_symbol_length) { + auto success = _buffer.reserve(number_of_symbols * average_symbol_length); + _map.rehash(_resource, _map.to_table_capacity(number_of_symbols), traits { &_buffer }); + return success; + } + + symbol intern(const CharT* str, std::size_t length) { + if (_map.should_rehash()) + _map.rehash(_resource, traits { &_buffer }); + + auto entry = _map.lookup_entry(typename traits::string_view { str, length }, traits { &_buffer }); + if (entry) + // Already interned, return index. + return symbol(_buffer.c_str(entry.get())); + + // Copy string data to buffer, as we don't have it yet. + if (!_buffer.reserve_new_string(length)) // Ran out of virtual memory space + return symbol(); + + auto begin = _buffer.insert(str, length); + auto idx = std::distance(_buffer.c_str(0), begin); + DRYAD_PRECONDITION(idx == IndexType(idx)); // Overflow of index type. + + // Store index in map. + entry.create(IndexType(idx)); + + // Return new symbol. + return symbol(begin); + } + template + symbol intern(const CharT (&literal)[N]) { + DRYAD_PRECONDITION(literal[N - 1] == CharT(0)); + return intern(literal, N - 1); + } + + private: + symbol_buffer _buffer; + dryad::_detail::hash_table _map; + DRYAD_EMPTY_MEMBER resource_ptr _resource; + + friend symbol; + }; + + template + struct symbol { + using char_type = CharT; + + constexpr symbol() = default; + constexpr explicit symbol(const CharT* begin) : _begin(begin) {} + + constexpr explicit operator bool() const { + return _begin != nullptr; + } + + constexpr const CharT* c_str() const { + return _begin; + } + + constexpr const std::basic_string_view view() const { + return _begin; + } + + //=== comparison ===// + friend constexpr bool operator==(symbol lhs, symbol rhs) { + return lhs._begin == rhs._begin; + } + friend constexpr bool operator!=(symbol lhs, symbol rhs) { + return lhs._begin != rhs._begin; + } + + friend constexpr bool operator<(symbol lhs, symbol rhs) { + return lhs._begin < rhs._begin; + } + friend constexpr bool operator<=(symbol lhs, symbol rhs) { + return lhs._begin <= rhs._begin; + } + friend constexpr bool operator>(symbol lhs, symbol rhs) { + return lhs._begin > rhs._begin; + } + friend constexpr bool operator>=(symbol lhs, symbol rhs) { + return lhs._begin >= rhs._begin; + } + + private: + const CharT* _begin = nullptr; + + template + friend class symbol_interner; + }; + struct SymbolIntern { struct SymbolId; using index_type = std::uint32_t; - using symbol_type = dryad::symbol; - using symbol_interner_type = dryad::symbol_interner; + using symbol_type = symbol; + using symbol_interner_type = symbol_interner; }; } \ No newline at end of file diff --git a/include/openvic-dataloader/detail/Utility.hpp b/include/openvic-dataloader/detail/Utility.hpp index 8d9e159..3da21bd 100644 --- a/include/openvic-dataloader/detail/Utility.hpp +++ b/include/openvic-dataloader/detail/Utility.hpp @@ -86,4 +86,7 @@ namespace ovdl::detail { template typename Template> static constexpr auto is_instance_of_v = is_instance_of::value; + + template typename Template> + concept InstanceOf = is_instance_of_v>, Template>; } \ No newline at end of file diff --git a/include/openvic-dataloader/detail/pinned_vector.hpp b/include/openvic-dataloader/detail/pinned_vector.hpp new file mode 100644 index 0000000..42a7760 --- /dev/null +++ b/include/openvic-dataloader/detail/pinned_vector.hpp @@ -0,0 +1,15 @@ +#pragma once + +#include +#include + +namespace ovdl::detail { + static constexpr auto max_elements = mknejp::vmcontainer::max_elements; + static constexpr auto max_bytes = mknejp::vmcontainer::max_bytes; + static constexpr auto max_pages = mknejp::vmcontainer::max_pages; + + using pinned_vector_traits = mknejp::vmcontainer::pinned_vector_traits; + + template + using pinned_vector = mknejp::vmcontainer::pinned_vector; +} \ No newline at end of file diff --git a/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp b/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp index a582187..27ceb3d 100644 --- a/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp +++ b/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp @@ -84,10 +84,6 @@ namespace ovdl::v2script::ast { return _value; } - const char* value(const SymbolIntern::symbol_interner_type& symbols) const { - return _value.c_str(symbols); - } - protected: explicit FlatValue(dryad::node_ctor ctor, NodeKind kind, SymbolIntern::symbol_type value) : node_base(ctor, kind), diff --git a/src/openvic-dataloader/AbstractSyntaxTree.cpp b/src/openvic-dataloader/AbstractSyntaxTree.cpp index d6f58f7..e6885a5 100644 --- a/src/openvic-dataloader/AbstractSyntaxTree.cpp +++ b/src/openvic-dataloader/AbstractSyntaxTree.cpp @@ -11,7 +11,7 @@ AbstractSyntaxTree::symbol_type AbstractSyntaxTree::intern(std::string_view str) } const char* AbstractSyntaxTree::intern_cstr(const char* str, std::size_t length) { - return intern(str, length).c_str(_symbol_interner); + return intern(str, length).c_str(); } const char* AbstractSyntaxTree::intern_cstr(std::string_view str) { diff --git a/src/openvic-dataloader/AbstractSyntaxTree.hpp b/src/openvic-dataloader/AbstractSyntaxTree.hpp index a5b8886..ade1c82 100644 --- a/src/openvic-dataloader/AbstractSyntaxTree.hpp +++ b/src/openvic-dataloader/AbstractSyntaxTree.hpp @@ -9,6 +9,8 @@ #include #include +#include + #include #include #include @@ -16,10 +18,12 @@ #include -#include "detail/InternalConcepts.hpp" +#include namespace ovdl { struct AbstractSyntaxTree : SymbolIntern { + explicit AbstractSyntaxTree(std::size_t max_elements) : _symbol_interner(max_elements) {} + symbol_type intern(const char* str, std::size_t length); symbol_type intern(std::string_view str); const char* intern_cstr(const char* str, std::size_t length); @@ -27,6 +31,15 @@ namespace ovdl { symbol_interner_type& symbol_interner(); const symbol_interner_type& symbol_interner() const; + template + symbol_type intern(lexy::lexeme lexeme) { + return intern(lexeme.begin(), lexeme.size()); + } + template + const char* intern_cstr(lexy::lexeme lexeme) { + return intern_cstr(lexeme.begin(), lexeme.size()); + } + protected: symbol_interner_type _symbol_interner; }; @@ -37,10 +50,14 @@ namespace ovdl { using root_node_type = RootNodeT; using node_type = typename file_type::node_type; - explicit BasicAbstractSyntaxTree(file_type&& file) : _file { std::move(file) } {} + explicit BasicAbstractSyntaxTree(file_type&& file) + : AbstractSyntaxTree(file.size()), + _file { std::move(file) } {} template - explicit BasicAbstractSyntaxTree(lexy::buffer&& buffer) : _file { std::move(buffer) } {} + explicit BasicAbstractSyntaxTree(lexy::buffer&& buffer) + : AbstractSyntaxTree(buffer.size()), + _file { std::move(buffer) } {} void set_location(const node_type* n, NodeLocation loc) { _file.set_location(n, loc); diff --git a/src/openvic-dataloader/DiagnosticLogger.hpp b/src/openvic-dataloader/DiagnosticLogger.hpp index 2a655a9..9810e1e 100644 --- a/src/openvic-dataloader/DiagnosticLogger.hpp +++ b/src/openvic-dataloader/DiagnosticLogger.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -36,7 +37,12 @@ namespace ovdl { template struct BasicDiagnosticLogger; - struct DiagnosticLogger : SymbolIntern { + struct DiagnosticLogger { + struct SymbolId; + using index_type = std::uint32_t; + using symbol_type = dryad::symbol; + using symbol_interner_type = dryad::symbol_interner; + using AnnotationKind = lexy_ext::annotation_kind; using DiagnosticKind = lexy_ext::diagnostic_kind; @@ -200,6 +206,15 @@ namespace ovdl { const symbol_interner_type& symbol_interner() const { return _symbol_interner; } + + template + symbol_type intern(lexy::lexeme lexeme) { + return intern(lexeme.data(), lexeme.size()); + } + template + const char* intern_cstr(lexy::lexeme lexeme) { + return intern_cstr(lexeme.data(), lexeme.size()); + } }; template diff --git a/src/openvic-dataloader/File.cpp b/src/openvic-dataloader/File.cpp index e4d3773..9e721a8 100644 --- a/src/openvic-dataloader/File.cpp +++ b/src/openvic-dataloader/File.cpp @@ -16,4 +16,8 @@ const char* File::path() const noexcept { bool File::is_valid() const noexcept { return _buffer.index() != 0 && !_buffer.valueless_by_exception() && visit_buffer([](auto&& buffer) { return buffer.data() != nullptr; }); +} + +std::size_t File::size() const noexcept { + return _buffer.index() != 0 && !_buffer.valueless_by_exception() ? _buffer_size : 0; } \ No newline at end of file diff --git a/src/openvic-dataloader/File.hpp b/src/openvic-dataloader/File.hpp index 90fcb11..ec25640 100644 --- a/src/openvic-dataloader/File.hpp +++ b/src/openvic-dataloader/File.hpp @@ -29,6 +29,8 @@ namespace ovdl { bool is_valid() const noexcept; + std::size_t size() const noexcept; + template constexpr bool is_buffer() const { return buffer_ids::type_id>() + 1 == _buffer.index(); @@ -104,6 +106,7 @@ namespace ovdl { protected: const char* _path; + std::size_t _buffer_size = 0; detail::type_prepend_t _buffer; }; @@ -114,12 +117,14 @@ namespace ovdl { template explicit BasicFile(const char* path, lexy::buffer&& buffer) : File(path) { + _buffer_size = buffer.size(); _buffer = static_cast&&>(buffer); } template explicit BasicFile(lexy::buffer&& buffer) : File("") { + _buffer_size = buffer.size(); _buffer = static_cast&&>(buffer); } diff --git a/src/openvic-dataloader/detail/InternalConcepts.hpp b/src/openvic-dataloader/detail/InternalConcepts.hpp index 0c7913d..06c03a1 100644 --- a/src/openvic-dataloader/detail/InternalConcepts.hpp +++ b/src/openvic-dataloader/detail/InternalConcepts.hpp @@ -6,10 +6,13 @@ #include #include #include +#include #include #include +#include + #include #include @@ -17,8 +20,9 @@ namespace ovdl::detail { template concept IsFile = - requires(T t, const typename T::node_type* node, NodeLocation location) { + requires(T t, const T ct, const typename T::node_type* node, NodeLocation location) { typename T::node_type; + { ct.size() } -> std::same_as; { t.set_location(node, location) } -> std::same_as; { t.location_of(node) } -> std::same_as; }; @@ -58,12 +62,12 @@ namespace ovdl::detail { { ct.errored() } -> std::same_as; { ct.warned() } -> std::same_as; { ct.get_errors() } -> std::same_as; - { t.intern(str, length) } -> std::same_as; - { t.intern(sv) } -> std::same_as; + { t.intern(str, length) } -> detail::InstanceOf; + { t.intern(sv) } -> detail::InstanceOf; { t.intern_cstr(str, length) } -> std::same_as; { t.intern_cstr(sv) } -> std::same_as; - { t.symbol_interner() } -> std::same_as; - { ct.symbol_interner() } -> std::same_as; + { t.symbol_interner() } -> detail::InstanceOf; + { ct.symbol_interner() } -> detail::InstanceOf; { t.error(std::declval>()) } -> std::same_as; { t.warning(std::declval>()) } -> std::same_as; { t.note(std::declval>()) } -> std::same_as; diff --git a/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp b/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp index 71985f3..72ff627 100644 --- a/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp +++ b/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp @@ -75,7 +75,7 @@ std::string FileAbstractSyntaxTree::make_list_visualizer() const { dryad::visit_node( node, [&](const FlatValue* value) { - result.append(value->value(this->_symbol_interner)); + result.append(value->value().c_str()); }, [&](const ListValue* value) { }, @@ -107,10 +107,10 @@ std::string FileAbstractSyntaxTree::make_native_visualizer() const { dryad::visit_tree( this->_tree, [&](const IdentifierValue* value) { - result.append(value->value(this->_symbol_interner)); + result.append(value->value().c_str()); }, [&](const StringValue* value) { - result.append(1, '"').append(value->value(this->_symbol_interner)).append(1, '"'); + result.append(1, '"').append(value->value().c_str()).append(1, '"'); }, [&](dryad::child_visitor visitor, const ValueStatement* statement) { visitor(statement->value()); diff --git a/src/openvic-dataloader/v2script/EventGrammar.hpp b/src/openvic-dataloader/v2script/EventGrammar.hpp index 130a233..20168f8 100644 --- a/src/openvic-dataloader/v2script/EventGrammar.hpp +++ b/src/openvic-dataloader/v2script/EventGrammar.hpp @@ -30,7 +30,7 @@ namespace ovdl::v2script::grammar { static constexpr auto value = dsl::callback( [](detail::IsParseState auto& state, ast::IdentifierValue* value) { bool is_number = true; - for (auto* current = value->value(state.ast().symbol_interner()); *current; current++) { + for (auto* current = value->value().c_str(); *current; current++) { is_number = is_number && std::isdigit(*current); if (!is_number) break; } @@ -95,16 +95,16 @@ namespace ovdl::v2script::grammar { static constexpr auto value = dsl::callback( [](detail::IsParseState auto& state, NodeLocation loc, ast::IdentifierValue* name, ast::ListValue* list) { - static auto country_decl = state.ast().intern_cstr("country_event"); - static auto province_decl = state.ast().intern_cstr("province_event"); + auto country_decl = state.ast().intern("country_event"); + auto province_decl = state.ast().intern("province_event"); - if (name->value(state.ast().symbol_interner()) != country_decl || name->value(state.ast().symbol_interner()) != province_decl) { - state.logger().warning("event declarator \"{}\" is not {} or {}", name->value(state.ast().symbol_interner()), country_decl, province_decl) // + if (name->value() != country_decl || name->value() != province_decl) { + state.logger().warning("event declarator \"{}\" is not {} or {}", name->value().c_str(), country_decl.c_str(), province_decl.c_str()) // .primary(loc, "here") .finish(); } - return state.ast().template create(loc, name->value(state.ast().symbol_interner()) == province_decl, list); + return state.ast().template create(loc, name->value() == province_decl, list); }); }; diff --git a/src/openvic-dataloader/v2script/Parser.cpp b/src/openvic-dataloader/v2script/Parser.cpp index a0003b1..9c6a0c5 100644 --- a/src/openvic-dataloader/v2script/Parser.cpp +++ b/src/openvic-dataloader/v2script/Parser.cpp @@ -266,7 +266,7 @@ const FileTree* Parser::get_file_node() const { } std::string_view Parser::value(const ovdl::v2script::ast::FlatValue* node) const { - return node->value(_parse_handler->parse_state().ast().symbol_interner()); + return node->value().view(); } std::string Parser::make_native_string() const { diff --git a/src/openvic-dataloader/v2script/SimpleGrammar.hpp b/src/openvic-dataloader/v2script/SimpleGrammar.hpp index d42ce07..c47b243 100644 --- a/src/openvic-dataloader/v2script/SimpleGrammar.hpp +++ b/src/openvic-dataloader/v2script/SimpleGrammar.hpp @@ -126,6 +126,15 @@ namespace ovdl::v2script::grammar { do { if constexpr (std::same_as || std::same_as) { if (lexy::scan_result> ascii_result; scanner.branch(ascii_result, lexy::dsl::identifier(ascii))) { + if (!scanner.peek(data_char_class)) { + if (ascii_result.value().size() == 0) { + return lexy::scan_failed; + } + + auto value = state.ast().intern(ascii_result.value()); + return state.ast().template create(ovdl::NodeLocation::make_from(content_begin, scanner.position()), value); + } + value_result.append(ascii_result.value().begin(), ascii_result.value().end()); continue; } @@ -150,8 +159,12 @@ namespace ovdl::v2script::grammar { } else { auto lexeme_result = scanner.template parse>(lexy::dsl::identifier(utf_char_class)); if (lexeme_result) { - value_result.append(lexeme_result.value().begin(), lexeme_result.value().size()); - break; + if (lexeme_result.value().size() == 0) { + return lexy::scan_failed; + } + + auto value = state.ast().intern(lexeme_result.value()); + return state.ast().template create(ovdl::NodeLocation::make_from(content_begin, scanner.position()), value); } } } while (scanner); diff --git a/tests/src/detail/SymbolIntern.cpp b/tests/src/detail/SymbolIntern.cpp new file mode 100644 index 0000000..aab6584 --- /dev/null +++ b/tests/src/detail/SymbolIntern.cpp @@ -0,0 +1,138 @@ +#include +#include + +#include +#include + +#include "Helper.hpp" +#include + +using namespace std::string_view_literals; + +using symbol_buffer = ovdl::symbol_buffer; +using symbol_interner = ovdl::symbol_interner; +using symbol = ovdl::symbol; + +namespace snitch { + template + bool append(snitch::small_string_span ss, const ovdl::symbol& s) { + return append(ss, "{", static_cast(s.c_str()), ",\"", s.view(), "\"}"); + } +} + +TEST_CASE("symbol_buffer", "[symbol-buffer]") { + static constexpr std::string_view buffer_in = "input value"; + static constexpr std::array fake_insert {}; + + symbol_buffer buffer; + + { + CAPTURE(buffer_in.size()); + CHECK(buffer.reserve(buffer_in.size())); + } + + std::string_view buffer_val = buffer.insert(buffer_in.data(), buffer_in.size()); + CHECK(buffer_val == buffer_in); + CHECK(std::distance(buffer.c_str(0), buffer_val.data() + buffer_val.size()) == buffer_in.size()); + + // Minimum buffer size is 1024 * 16 + // The default buffer constructor is expected to treat this as the max size as well + { + CAPTURE(buffer.size()); + CAPTURE(fake_insert.size()); + CHECK_IF(buffer.reserve(buffer.size() + fake_insert.size())) { + buffer.insert(fake_insert.data(), fake_insert.size() - 1); + } + } + // Pinned vector buffer operates based on system page sizes + // May have more capacity then specified + // Ensure we attempt to reserve beyond vector's max size + { + CAPTURE(buffer.size()); + CAPTURE(ovdl::detail::pinned_vector::page_size()); + CHECK_FALSE(buffer.reserve(buffer.size() + ovdl::detail::pinned_vector::page_size())); + } +} + +TEST_CASE("symbol_buffer, max size", "[symbol-buffer-max-size]") { + static constexpr std::string_view buffer_in = "input value"; + static constexpr std::array fake_insert {}; + + symbol_buffer buffer(symbol_buffer::min_buffer_size * 2 + 1); + + { + CAPTURE(buffer_in.size()); + CHECK(buffer.reserve(buffer_in.size())); + } + + std::string_view buffer_val = buffer.insert(buffer_in.data(), buffer_in.size()); + CHECK(buffer_val == buffer_in); + CHECK(std::distance(buffer.c_str(0), buffer_val.data() + buffer_val.size()) == buffer_in.size()); + + { + CAPTURE(buffer.size()); + CAPTURE(fake_insert.size()); + CHECK_IF(buffer.reserve(buffer.size() + fake_insert.size())) { + buffer.insert(fake_insert.data(), fake_insert.size() - 1); + } + } + // Pinned vector buffer operates based on system page sizes + // May have more capacity then specified + // Ensure we attempt to reserve beyond vector's max size + { + CAPTURE(buffer.size()); + CAPTURE(ovdl::detail::pinned_vector::page_size()); + CHECK_FALSE(buffer.reserve(buffer.size() + ovdl::detail::pinned_vector::page_size())); + } +} + +TEST_CASE("symbol_interner", "[symbol-intern]") { + symbol_interner interner(symbol_buffer::min_buffer_size * 2); + + auto test = interner.intern("test"); + auto test2 = interner.intern("test"); + + CHECK(test.view() == "test"sv); + CHECK(test2.view() == "test"sv); + + CHECK(test == test2); + + auto test3 = interner.intern("test3"); + + CHECK(test.view() == "test"sv); + CHECK(test2.view() == "test"sv); + CHECK(test3.view() == "test3"sv); + + CHECK(test == test2); + CHECK_FALSE(test == test3); + CHECK_FALSE(test2 == test3); + + CHECK_IF(interner.reserve(1024, 16 + 1)) { + auto test4 = interner.intern("test3"); + + CHECK(test.view() == "test"sv); + CHECK(test2.view() == "test"sv); + CHECK(test3.view() == "test3"sv); + CHECK(test4.view() == "test3"sv); + + CHECK(test3 == test4); + CHECK_FALSE(test == test3); + CHECK_FALSE(test2 == test3); + + auto test5 = interner.intern("test5"); + + CHECK(test.view() == "test"sv); + CHECK(test2.view() == "test"sv); + CHECK(test3.view() == "test3"sv); + CHECK(test4.view() == "test3"sv); + CHECK(test5.view() == "test5"sv); + + CHECK(test3 == test4); + CHECK_FALSE(test == test3); + CHECK_FALSE(test2 == test3); + CHECK_FALSE(test5 == test); + CHECK_FALSE(test5 == test2); + CHECK_FALSE(test5 == test3); + CHECK_FALSE(test5 == test4); + } +} \ No newline at end of file diff --git a/tests/src/v2script/AbstractSyntaxTree.cpp b/tests/src/v2script/AbstractSyntaxTree.cpp index c06da08..ad9382f 100644 --- a/tests/src/v2script/AbstractSyntaxTree.cpp +++ b/tests/src/v2script/AbstractSyntaxTree.cpp @@ -1,5 +1,4 @@ #include -#include #include #include @@ -69,13 +68,13 @@ TEST_CASE("V2Script Nodes", "[v2script-nodes]") { auto* id = ast.create_with_intern("id"); CHECK_IF(id) { CHECK(id->kind() == NodeKind::IdentifierValue); - CHECK(id->value(ast.symbol_interner) == "id"sv); + CHECK(id->value().view() == "id"sv); } auto* str = ast.create_with_intern("str"); CHECK_IF(str) { CHECK(str->kind() == NodeKind::StringValue); - CHECK(str->value(ast.symbol_interner) == "str"sv); + CHECK(str->value().view() == "str"sv); } auto* list = ast.create(); @@ -162,7 +161,7 @@ TEST_CASE("V2Script Nodes Location", "[v2script-nodes-location]") { auto* id = ast.create_with_loc_and_intern(NodeLocation::make_from(&fake_buffer[0], &fake_buffer[1]), "id"); CHECK_IF(id) { - CHECK(id->value(ast.symbol_interner) == "id"sv); + CHECK(id->value().view() == "id"sv); auto location = ast.location_of(id); CHECK_FALSE(location.is_synthesized()); -- cgit v1.2.3-56-ga3b1