diff options
21 files changed, 532 insertions, 37 deletions
diff --git a/.gitmodules b/.gitmodules index d4c2c55..c40746f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -14,9 +14,12 @@ ignore = dirty [submodule "deps/range-v3"] path = deps/range-v3 - url = https://github.com/ericniebler/range-v3 + url = https://github.com/spartan322/range-v3 ignore = dirty [submodule "tests/deps/snitch"] path = tests/deps/snitch url = https://github.com/snitch-org/snitch ignore = dirty +[submodule "deps/vmcontainer"] + path = deps/vmcontainer + url = https://github.com/spartan322/vmcontainer @@ -56,7 +56,7 @@ def build_dryad(env): env.Append(CXXFLAGS=["/external:I", include_dir, "/external:W0"]) else: env.Append(CXXFLAGS=["-isystem", include_dir]) - + env.exposed_includes += env.dryad["INCPATH"] def build_fmt(env): @@ -117,7 +117,7 @@ def build_fmt(env): env.Append(CXXFLAGS=[""]) env.Append(LIBPATH=[fmt_env.Dir(source_path)]) env.Prepend(LIBS=[library_name]) - + env.exposed_includes += env.fmt["INCPATH"] def build_range_v3(env): @@ -135,10 +135,41 @@ def build_range_v3(env): env.Append(CXXFLAGS=["/external:I", include_dir, "/external:W0"]) else: env.Append(CXXFLAGS=["-isystem", include_dir]) - + env.exposed_includes += env.range_v3["INCPATH"] +def build_vmcontainer(env): + vmcontainer_env = env.Clone() + + include_path = "vmcontainer/lib/include" + source_path = "vmcontainer/lib/src" + paths = [include_path, source_path] + vmcontainer_env.Append(CPPPATH=[[vmcontainer_env.Dir(p) for p in paths]]) + sources = env.GlobRecursive("*.cpp", paths) + env.vmcontainer_sources = sources + + library_name = "libvmcontainer" + env["LIBSUFFIX"] + library = vmcontainer_env.StaticLibrary(target=os.path.join(source_path, library_name), source=sources) + Default(library) + + include_dir = vmcontainer_env.Dir(include_path) + + env.vmcontainer = {} + env.vmcontainer["INCPATH"] = [include_dir] + + env.Append(CPPPATH=env.vmcontainer["INCPATH"]) + if env.get("is_msvc", False): + env.Append(CXXFLAGS=["/external:I", include_dir, "/external:W0"]) + else: + env.Append(CXXFLAGS=["-isystem", include_dir]) + env.Append(CXXFLAGS=[""]) + env.Append(LIBPATH=[vmcontainer_env.Dir(source_path)]) + env.Prepend(LIBS=[library_name]) + + env.exposed_includes += env.vmcontainer["INCPATH"] + build_dryad(env) build_fmt(env) build_lexy(env) -build_range_v3(env)
\ No newline at end of file +build_range_v3(env) +build_vmcontainer(env)
\ No newline at end of file diff --git a/deps/dryad b/deps/dryad -Subproject 3aa3d7606cb007436bb3433ddf83b8bdcf1ecc4 +Subproject 697b8f817e0c6c2a2f398c3e9217533b1507dc2 diff --git a/deps/range-v3 b/deps/range-v3 -Subproject 97452bb3eb74a73fc86504421a6a27c92bce6b9 +Subproject 334bf5772462dbd2e0e2ce142e22c4e5a8970a9 diff --git a/deps/vmcontainer b/deps/vmcontainer new file mode 160000 +Subproject f7851cd758af9d65189f55f2a3cba0021e36908 diff --git a/include/openvic-dataloader/detail/SymbolIntern.hpp b/include/openvic-dataloader/detail/SymbolIntern.hpp index 8755887..d72a0ba 100644 --- a/include/openvic-dataloader/detail/SymbolIntern.hpp +++ b/include/openvic-dataloader/detail/SymbolIntern.hpp @@ -1,14 +1,266 @@ #pragma once #include <cstdint> +#include <iterator> +#include <string_view> + +#include <openvic-dataloader/detail/pinned_vector.hpp> #include <dryad/symbol.hpp> namespace ovdl { + // Contains all unique symbols, null-terminated, in memory one after the other. + template<typename CharT> + struct symbol_buffer { + static constexpr auto min_buffer_size = 16 * 1024; + + constexpr symbol_buffer() : _data_buffer(ovdl::detail::max_elements(min_buffer_size + 1)) {} + explicit symbol_buffer(std::size_t max_elements) + : _data_buffer(ovdl::detail::max_elements(std::max<std::size_t>(max_elements, min_buffer_size + 1))) { + _data_buffer.reserve(min_buffer_size); + } + + void free() { + } + + bool reserve(std::size_t new_capacity) { + if (new_capacity <= _data_buffer.capacity()) + return true; + + if (new_capacity >= _data_buffer.max_size()) { + _data_buffer.reserve(_data_buffer.max_size()); + return false; + } + + _data_buffer.reserve(new_capacity * sizeof(CharT)); + + return true; + } + + bool reserve_new_string(std::size_t new_string_length) { + // +1 for null-terminator. + auto new_size = _data_buffer.size() + new_string_length + 1; + if (new_size <= _data_buffer.capacity()) + return true; + + auto new_capacity = new_size * 2; + if (new_capacity < min_buffer_size) + new_capacity = min_buffer_size; + + if (!reserve(new_capacity)) { + return _data_buffer.capacity() >= new_size; + } + + return true; + } + + const CharT* insert(const CharT* str, std::size_t length) { + DRYAD_PRECONDITION(_data_buffer.capacity() - _data_buffer.size() >= length + 1); + + auto index = _data_buffer.cend(); + + _data_buffer.insert(_data_buffer.cend(), str, str + (length * sizeof(CharT))); + _data_buffer.push_back(CharT(0)); + + return index; + } + + const CharT* c_str(std::size_t index) const { + DRYAD_PRECONDITION(index < _data_buffer.size()); + return _data_buffer.data() + index; + } + + std::size_t size() const { + return _data_buffer.size(); + } + + std::size_t capacity() const { + return _data_buffer.capacity(); + } + + std::size_t max_size() const { + return _data_buffer.max_size(); + } + + private: + detail::pinned_vector<CharT> _data_buffer; + }; + + template<typename IndexType, typename CharT> + struct symbol_index_hash_traits { + const symbol_buffer<CharT>* buffer; + + using value_type = IndexType; + + struct string_view { + const CharT* ptr; + std::size_t length; + }; + + static constexpr bool is_unoccupied(IndexType index) { + return index == IndexType(-1); + } + static void fill_unoccupied(IndexType* data, std::size_t size) { + // It has all bits set to 1, so we can do it per-byte. + std::memset(data, static_cast<unsigned char>(-1), size * sizeof(IndexType)); + } + + static constexpr bool is_equal(IndexType entry, IndexType value) { + return entry == value; + } + bool is_equal(IndexType entry, string_view str) const { + auto existing_str = buffer->c_str(entry); + return std::strncmp(existing_str, str.ptr, str.length) == 0 && *(existing_str + str.length) == CharT(0); + } + + std::size_t hash(IndexType entry) const { + auto str = buffer->c_str(entry); + return dryad::default_hash_algorithm().hash_c_str(str).finish(); + } + static constexpr std::size_t hash(string_view str) { + return dryad::default_hash_algorithm() + .hash_bytes(reinterpret_cast<const unsigned char*>(str.ptr), str.length * sizeof(CharT)) + .finish(); + } + }; + + template<typename CharT = char> + class symbol; + + template<typename Id, typename CharT = char, typename IndexType = std::size_t, + typename MemoryResource = void> + class symbol_interner { + static_assert(std::is_trivial_v<CharT>); + static_assert(std::is_unsigned_v<IndexType>); + + using resource_ptr = dryad::_detail::memory_resource_ptr<MemoryResource>; + using traits = symbol_index_hash_traits<IndexType, CharT>; + + public: + using symbol = ovdl::symbol<CharT>; + + //=== construction ===// + constexpr symbol_interner() : _resource(dryad::_detail::get_memory_resource<MemoryResource>()) {} + constexpr explicit symbol_interner(std::size_t max_elements) + : _buffer(max_elements), + _resource(dryad::_detail::get_memory_resource<MemoryResource>()) {} + constexpr explicit symbol_interner(std::size_t max_elements, MemoryResource* resource) + : _buffer(max_elements), + _resource(resource) {} + + ~symbol_interner() noexcept { + _buffer.free(); + _map.free(_resource); + } + + symbol_interner(symbol_interner&& other) noexcept + : _buffer(other._buffer), _map(other._map), _resource(other._resource) { + other._buffer = {}; + other._map = {}; + } + + symbol_interner& operator=(symbol_interner&& other) noexcept { + dryad::_detail::swap(_buffer, other._buffer); + dryad::_detail::swap(_map, other._map); + dryad::_detail::swap(_resource, other._resource); + return *this; + } + + //=== interning ===// + bool reserve(std::size_t number_of_symbols, std::size_t average_symbol_length) { + auto success = _buffer.reserve(number_of_symbols * average_symbol_length); + _map.rehash(_resource, _map.to_table_capacity(number_of_symbols), traits { &_buffer }); + return success; + } + + symbol intern(const CharT* str, std::size_t length) { + if (_map.should_rehash()) + _map.rehash(_resource, traits { &_buffer }); + + auto entry = _map.lookup_entry(typename traits::string_view { str, length }, traits { &_buffer }); + if (entry) + // Already interned, return index. + return symbol(_buffer.c_str(entry.get())); + + // Copy string data to buffer, as we don't have it yet. + if (!_buffer.reserve_new_string(length)) // Ran out of virtual memory space + return symbol(); + + auto begin = _buffer.insert(str, length); + auto idx = std::distance(_buffer.c_str(0), begin); + DRYAD_PRECONDITION(idx == IndexType(idx)); // Overflow of index type. + + // Store index in map. + entry.create(IndexType(idx)); + + // Return new symbol. + return symbol(begin); + } + template<std::size_t N> + symbol intern(const CharT (&literal)[N]) { + DRYAD_PRECONDITION(literal[N - 1] == CharT(0)); + return intern(literal, N - 1); + } + + private: + symbol_buffer<CharT> _buffer; + dryad::_detail::hash_table<traits, 1024> _map; + DRYAD_EMPTY_MEMBER resource_ptr _resource; + + friend symbol; + }; + + template<typename CharT> + struct symbol { + using char_type = CharT; + + constexpr symbol() = default; + constexpr explicit symbol(const CharT* begin) : _begin(begin) {} + + constexpr explicit operator bool() const { + return _begin != nullptr; + } + + constexpr const CharT* c_str() const { + return _begin; + } + + constexpr const std::basic_string_view<CharT> view() const { + return _begin; + } + + //=== comparison ===// + friend constexpr bool operator==(symbol lhs, symbol rhs) { + return lhs._begin == rhs._begin; + } + friend constexpr bool operator!=(symbol lhs, symbol rhs) { + return lhs._begin != rhs._begin; + } + + friend constexpr bool operator<(symbol lhs, symbol rhs) { + return lhs._begin < rhs._begin; + } + friend constexpr bool operator<=(symbol lhs, symbol rhs) { + return lhs._begin <= rhs._begin; + } + friend constexpr bool operator>(symbol lhs, symbol rhs) { + return lhs._begin > rhs._begin; + } + friend constexpr bool operator>=(symbol lhs, symbol rhs) { + return lhs._begin >= rhs._begin; + } + + private: + const CharT* _begin = nullptr; + + template<typename, typename, typename, typename> + friend class symbol_interner; + }; + struct SymbolIntern { struct SymbolId; using index_type = std::uint32_t; - using symbol_type = dryad::symbol<SymbolId, index_type>; - using symbol_interner_type = dryad::symbol_interner<SymbolId, char, index_type>; + using symbol_type = symbol<char>; + using symbol_interner_type = symbol_interner<SymbolId, symbol_type::char_type, index_type>; }; }
\ No newline at end of file diff --git a/include/openvic-dataloader/detail/Utility.hpp b/include/openvic-dataloader/detail/Utility.hpp index 8d9e159..3da21bd 100644 --- a/include/openvic-dataloader/detail/Utility.hpp +++ b/include/openvic-dataloader/detail/Utility.hpp @@ -86,4 +86,7 @@ namespace ovdl::detail { template<typename Type, template<typename...> typename Template> static constexpr auto is_instance_of_v = is_instance_of<Type, Template>::value; + + template<typename T, template<typename...> typename Template> + concept InstanceOf = is_instance_of_v<std::remove_cv_t<std::remove_reference_t<T>>, Template>; }
\ No newline at end of file diff --git a/include/openvic-dataloader/detail/pinned_vector.hpp b/include/openvic-dataloader/detail/pinned_vector.hpp new file mode 100644 index 0000000..42a7760 --- /dev/null +++ b/include/openvic-dataloader/detail/pinned_vector.hpp @@ -0,0 +1,15 @@ +#pragma once + +#include <vmcontainer/detail.hpp> +#include <vmcontainer/pinned_vector.hpp> + +namespace ovdl::detail { + static constexpr auto max_elements = mknejp::vmcontainer::max_elements; + static constexpr auto max_bytes = mknejp::vmcontainer::max_bytes; + static constexpr auto max_pages = mknejp::vmcontainer::max_pages; + + using pinned_vector_traits = mknejp::vmcontainer::pinned_vector_traits; + + template<typename T, typename Traits = pinned_vector_traits> + using pinned_vector = mknejp::vmcontainer::pinned_vector<T, Traits>; +}
\ No newline at end of file diff --git a/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp b/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp index a582187..27ceb3d 100644 --- a/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp +++ b/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp @@ -84,10 +84,6 @@ namespace ovdl::v2script::ast { return _value; } - const char* value(const SymbolIntern::symbol_interner_type& symbols) const { - return _value.c_str(symbols); - } - protected: explicit FlatValue(dryad::node_ctor ctor, NodeKind kind, SymbolIntern::symbol_type value) : node_base(ctor, kind), diff --git a/src/openvic-dataloader/AbstractSyntaxTree.cpp b/src/openvic-dataloader/AbstractSyntaxTree.cpp index d6f58f7..e6885a5 100644 --- a/src/openvic-dataloader/AbstractSyntaxTree.cpp +++ b/src/openvic-dataloader/AbstractSyntaxTree.cpp @@ -11,7 +11,7 @@ AbstractSyntaxTree::symbol_type AbstractSyntaxTree::intern(std::string_view str) } const char* AbstractSyntaxTree::intern_cstr(const char* str, std::size_t length) { - return intern(str, length).c_str(_symbol_interner); + return intern(str, length).c_str(); } const char* AbstractSyntaxTree::intern_cstr(std::string_view str) { diff --git a/src/openvic-dataloader/AbstractSyntaxTree.hpp b/src/openvic-dataloader/AbstractSyntaxTree.hpp index a5b8886..ade1c82 100644 --- a/src/openvic-dataloader/AbstractSyntaxTree.hpp +++ b/src/openvic-dataloader/AbstractSyntaxTree.hpp @@ -9,6 +9,8 @@ #include <openvic-dataloader/detail/SymbolIntern.hpp> #include <openvic-dataloader/detail/Utility.hpp> +#include <lexy/lexeme.hpp> + #include <dryad/node.hpp> #include <dryad/node_map.hpp> #include <dryad/symbol.hpp> @@ -16,10 +18,12 @@ #include <fmt/core.h> -#include "detail/InternalConcepts.hpp" +#include <detail/InternalConcepts.hpp> namespace ovdl { struct AbstractSyntaxTree : SymbolIntern { + explicit AbstractSyntaxTree(std::size_t max_elements) : _symbol_interner(max_elements) {} + symbol_type intern(const char* str, std::size_t length); symbol_type intern(std::string_view str); const char* intern_cstr(const char* str, std::size_t length); @@ -27,6 +31,15 @@ namespace ovdl { symbol_interner_type& symbol_interner(); const symbol_interner_type& symbol_interner() const; + template<typename Reader> + symbol_type intern(lexy::lexeme<Reader> lexeme) { + return intern(lexeme.begin(), lexeme.size()); + } + template<typename Reader> + const char* intern_cstr(lexy::lexeme<Reader> lexeme) { + return intern_cstr(lexeme.begin(), lexeme.size()); + } + protected: symbol_interner_type _symbol_interner; }; @@ -37,10 +50,14 @@ namespace ovdl { using root_node_type = RootNodeT; using node_type = typename file_type::node_type; - explicit BasicAbstractSyntaxTree(file_type&& file) : _file { std::move(file) } {} + explicit BasicAbstractSyntaxTree(file_type&& file) + : AbstractSyntaxTree(file.size()), + _file { std::move(file) } {} template<typename Encoding, typename MemoryResource = void> - explicit BasicAbstractSyntaxTree(lexy::buffer<Encoding, MemoryResource>&& buffer) : _file { std::move(buffer) } {} + explicit BasicAbstractSyntaxTree(lexy::buffer<Encoding, MemoryResource>&& buffer) + : AbstractSyntaxTree(buffer.size()), + _file { std::move(buffer) } {} void set_location(const node_type* n, NodeLocation loc) { _file.set_location(n, loc); diff --git a/src/openvic-dataloader/DiagnosticLogger.hpp b/src/openvic-dataloader/DiagnosticLogger.hpp index 2a655a9..9810e1e 100644 --- a/src/openvic-dataloader/DiagnosticLogger.hpp +++ b/src/openvic-dataloader/DiagnosticLogger.hpp @@ -19,6 +19,7 @@ #include <lexy/input/base.hpp> #include <lexy/input/buffer.hpp> #include <lexy/input_location.hpp> +#include <lexy/lexeme.hpp> #include <lexy/visualize.hpp> #include <dryad/_detail/config.hpp> @@ -36,7 +37,12 @@ namespace ovdl { template<typename ParseState> struct BasicDiagnosticLogger; - struct DiagnosticLogger : SymbolIntern { + struct DiagnosticLogger { + struct SymbolId; + using index_type = std::uint32_t; + using symbol_type = dryad::symbol<SymbolId, index_type>; + using symbol_interner_type = dryad::symbol_interner<SymbolId, char, index_type>; + using AnnotationKind = lexy_ext::annotation_kind; using DiagnosticKind = lexy_ext::diagnostic_kind; @@ -200,6 +206,15 @@ namespace ovdl { const symbol_interner_type& symbol_interner() const { return _symbol_interner; } + + template<typename Reader> + symbol_type intern(lexy::lexeme<Reader> lexeme) { + return intern(lexeme.data(), lexeme.size()); + } + template<typename Reader> + const char* intern_cstr(lexy::lexeme<Reader> lexeme) { + return intern_cstr(lexeme.data(), lexeme.size()); + } }; template<typename ParseState> diff --git a/src/openvic-dataloader/File.cpp b/src/openvic-dataloader/File.cpp index e4d3773..9e721a8 100644 --- a/src/openvic-dataloader/File.cpp +++ b/src/openvic-dataloader/File.cpp @@ -16,4 +16,8 @@ const char* File::path() const noexcept { bool File::is_valid() const noexcept { return _buffer.index() != 0 && !_buffer.valueless_by_exception() && visit_buffer([](auto&& buffer) { return buffer.data() != nullptr; }); +} + +std::size_t File::size() const noexcept { + return _buffer.index() != 0 && !_buffer.valueless_by_exception() ? _buffer_size : 0; }
\ No newline at end of file diff --git a/src/openvic-dataloader/File.hpp b/src/openvic-dataloader/File.hpp index 90fcb11..ec25640 100644 --- a/src/openvic-dataloader/File.hpp +++ b/src/openvic-dataloader/File.hpp @@ -29,6 +29,8 @@ namespace ovdl { bool is_valid() const noexcept; + std::size_t size() const noexcept; + template<typename Encoding, typename MemoryResource = void> constexpr bool is_buffer() const { return buffer_ids::type_id<lexy::buffer<Encoding, MemoryResource>>() + 1 == _buffer.index(); @@ -104,6 +106,7 @@ namespace ovdl { protected: const char* _path; + std::size_t _buffer_size = 0; detail::type_prepend_t<buffer_ids::variant_type, std::monostate> _buffer; }; @@ -114,12 +117,14 @@ namespace ovdl { template<typename Encoding, typename MemoryResource = void> explicit BasicFile(const char* path, lexy::buffer<Encoding, MemoryResource>&& buffer) : File(path) { + _buffer_size = buffer.size(); _buffer = static_cast<std::remove_reference_t<decltype(buffer)>&&>(buffer); } template<typename Encoding, typename MemoryResource = void> explicit BasicFile(lexy::buffer<Encoding, MemoryResource>&& buffer) : File("") { + _buffer_size = buffer.size(); _buffer = static_cast<std::remove_reference_t<decltype(buffer)>&&>(buffer); } diff --git a/src/openvic-dataloader/detail/InternalConcepts.hpp b/src/openvic-dataloader/detail/InternalConcepts.hpp index 0c7913d..06c03a1 100644 --- a/src/openvic-dataloader/detail/InternalConcepts.hpp +++ b/src/openvic-dataloader/detail/InternalConcepts.hpp @@ -6,10 +6,13 @@ #include <openvic-dataloader/NodeLocation.hpp> #include <openvic-dataloader/detail/Encoding.hpp> #include <openvic-dataloader/detail/SymbolIntern.hpp> +#include <openvic-dataloader/detail/Utility.hpp> #include <lexy/encoding.hpp> #include <lexy/input/buffer.hpp> +#include <dryad/symbol.hpp> + #include <fmt/core.h> #include <lexy_ext/report_error.hpp> @@ -17,8 +20,9 @@ namespace ovdl::detail { template<typename T> concept IsFile = - requires(T t, const typename T::node_type* node, NodeLocation location) { + requires(T t, const T ct, const typename T::node_type* node, NodeLocation location) { typename T::node_type; + { ct.size() } -> std::same_as<size_t>; { t.set_location(node, location) } -> std::same_as<void>; { t.location_of(node) } -> std::same_as<NodeLocation>; }; @@ -58,12 +62,12 @@ namespace ovdl::detail { { ct.errored() } -> std::same_as<bool>; { ct.warned() } -> std::same_as<bool>; { ct.get_errors() } -> std::same_as<typename T::error_range>; - { t.intern(str, length) } -> std::same_as<ovdl::SymbolIntern::symbol_type>; - { t.intern(sv) } -> std::same_as<ovdl::SymbolIntern::symbol_type>; + { t.intern(str, length) } -> detail::InstanceOf<dryad::symbol>; + { t.intern(sv) } -> detail::InstanceOf<dryad::symbol>; { t.intern_cstr(str, length) } -> std::same_as<const char*>; { t.intern_cstr(sv) } -> std::same_as<const char*>; - { t.symbol_interner() } -> std::same_as<SymbolIntern::symbol_interner_type&>; - { ct.symbol_interner() } -> std::same_as<const SymbolIntern::symbol_interner_type&>; + { t.symbol_interner() } -> detail::InstanceOf<dryad::symbol_interner>; + { ct.symbol_interner() } -> detail::InstanceOf<dryad::symbol_interner>; { t.error(std::declval<typename T::template format_str<>>()) } -> std::same_as<typename T::Writer>; { t.warning(std::declval<typename T::template format_str<>>()) } -> std::same_as<typename T::Writer>; { t.note(std::declval<typename T::template format_str<>>()) } -> std::same_as<typename T::Writer>; diff --git a/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp b/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp index 71985f3..72ff627 100644 --- a/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp +++ b/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp @@ -75,7 +75,7 @@ std::string FileAbstractSyntaxTree::make_list_visualizer() const { dryad::visit_node( node, [&](const FlatValue* value) { - result.append(value->value(this->_symbol_interner)); + result.append(value->value().c_str()); }, [&](const ListValue* value) { }, @@ -107,10 +107,10 @@ std::string FileAbstractSyntaxTree::make_native_visualizer() const { dryad::visit_tree( this->_tree, [&](const IdentifierValue* value) { - result.append(value->value(this->_symbol_interner)); + result.append(value->value().c_str()); }, [&](const StringValue* value) { - result.append(1, '"').append(value->value(this->_symbol_interner)).append(1, '"'); + result.append(1, '"').append(value->value().c_str()).append(1, '"'); }, [&](dryad::child_visitor<NodeKind> visitor, const ValueStatement* statement) { visitor(statement->value()); diff --git a/src/openvic-dataloader/v2script/EventGrammar.hpp b/src/openvic-dataloader/v2script/EventGrammar.hpp index 130a233..20168f8 100644 --- a/src/openvic-dataloader/v2script/EventGrammar.hpp +++ b/src/openvic-dataloader/v2script/EventGrammar.hpp @@ -30,7 +30,7 @@ namespace ovdl::v2script::grammar { static constexpr auto value = dsl::callback<ast::IdentifierValue*>( [](detail::IsParseState auto& state, ast::IdentifierValue* value) { bool is_number = true; - for (auto* current = value->value(state.ast().symbol_interner()); *current; current++) { + for (auto* current = value->value().c_str(); *current; current++) { is_number = is_number && std::isdigit(*current); if (!is_number) break; } @@ -95,16 +95,16 @@ namespace ovdl::v2script::grammar { static constexpr auto value = dsl::callback<ast::EventStatement*>( [](detail::IsParseState auto& state, NodeLocation loc, ast::IdentifierValue* name, ast::ListValue* list) { - static auto country_decl = state.ast().intern_cstr("country_event"); - static auto province_decl = state.ast().intern_cstr("province_event"); + auto country_decl = state.ast().intern("country_event"); + auto province_decl = state.ast().intern("province_event"); - if (name->value(state.ast().symbol_interner()) != country_decl || name->value(state.ast().symbol_interner()) != province_decl) { - state.logger().warning("event declarator \"{}\" is not {} or {}", name->value(state.ast().symbol_interner()), country_decl, province_decl) // + if (name->value() != country_decl || name->value() != province_decl) { + state.logger().warning("event declarator \"{}\" is not {} or {}", name->value().c_str(), country_decl.c_str(), province_decl.c_str()) // .primary(loc, "here") .finish(); } - return state.ast().template create<ast::EventStatement>(loc, name->value(state.ast().symbol_interner()) == province_decl, list); + return state.ast().template create<ast::EventStatement>(loc, name->value() == province_decl, list); }); }; diff --git a/src/openvic-dataloader/v2script/Parser.cpp b/src/openvic-dataloader/v2script/Parser.cpp index a0003b1..9c6a0c5 100644 --- a/src/openvic-dataloader/v2script/Parser.cpp +++ b/src/openvic-dataloader/v2script/Parser.cpp @@ -266,7 +266,7 @@ const FileTree* Parser::get_file_node() const { } std::string_view Parser::value(const ovdl::v2script::ast::FlatValue* node) const { - return node->value(_parse_handler->parse_state().ast().symbol_interner()); + return node->value().view(); } std::string Parser::make_native_string() const { diff --git a/src/openvic-dataloader/v2script/SimpleGrammar.hpp b/src/openvic-dataloader/v2script/SimpleGrammar.hpp index d42ce07..c47b243 100644 --- a/src/openvic-dataloader/v2script/SimpleGrammar.hpp +++ b/src/openvic-dataloader/v2script/SimpleGrammar.hpp @@ -126,6 +126,15 @@ namespace ovdl::v2script::grammar { do { if constexpr (std::same_as<encoding, lexy::default_encoding> || std::same_as<encoding, lexy::byte_encoding>) { if (lexy::scan_result<lexy::lexeme<Reader>> ascii_result; scanner.branch(ascii_result, lexy::dsl::identifier(ascii))) { + if (!scanner.peek(data_char_class)) { + if (ascii_result.value().size() == 0) { + return lexy::scan_failed; + } + + auto value = state.ast().intern(ascii_result.value()); + return state.ast().template create<ast::IdentifierValue>(ovdl::NodeLocation::make_from(content_begin, scanner.position()), value); + } + value_result.append(ascii_result.value().begin(), ascii_result.value().end()); continue; } @@ -150,8 +159,12 @@ namespace ovdl::v2script::grammar { } else { auto lexeme_result = scanner.template parse<lexy::lexeme<Reader>>(lexy::dsl::identifier(utf_char_class)); if (lexeme_result) { - value_result.append(lexeme_result.value().begin(), lexeme_result.value().size()); - break; + if (lexeme_result.value().size() == 0) { + return lexy::scan_failed; + } + + auto value = state.ast().intern(lexeme_result.value()); + return state.ast().template create<ast::IdentifierValue>(ovdl::NodeLocation::make_from(content_begin, scanner.position()), value); } } } while (scanner); diff --git a/tests/src/detail/SymbolIntern.cpp b/tests/src/detail/SymbolIntern.cpp new file mode 100644 index 0000000..aab6584 --- /dev/null +++ b/tests/src/detail/SymbolIntern.cpp @@ -0,0 +1,138 @@ +#include <iterator> +#include <string_view> + +#include <openvic-dataloader/detail/SymbolIntern.hpp> +#include <openvic-dataloader/detail/pinned_vector.hpp> + +#include "Helper.hpp" +#include <snitch/snitch.hpp> + +using namespace std::string_view_literals; + +using symbol_buffer = ovdl::symbol_buffer<char>; +using symbol_interner = ovdl::symbol_interner<struct Id, char, std::size_t, void>; +using symbol = ovdl::symbol<char>; + +namespace snitch { + template<typename CharT> + bool append(snitch::small_string_span ss, const ovdl::symbol<CharT>& s) { + return append(ss, "{", static_cast<const void*>(s.c_str()), ",\"", s.view(), "\"}"); + } +} + +TEST_CASE("symbol_buffer", "[symbol-buffer]") { + static constexpr std::string_view buffer_in = "input value"; + static constexpr std::array<char, symbol_buffer::min_buffer_size - buffer_in.size()> fake_insert {}; + + symbol_buffer buffer; + + { + CAPTURE(buffer_in.size()); + CHECK(buffer.reserve(buffer_in.size())); + } + + std::string_view buffer_val = buffer.insert(buffer_in.data(), buffer_in.size()); + CHECK(buffer_val == buffer_in); + CHECK(std::distance(buffer.c_str(0), buffer_val.data() + buffer_val.size()) == buffer_in.size()); + + // Minimum buffer size is 1024 * 16 + // The default buffer constructor is expected to treat this as the max size as well + { + CAPTURE(buffer.size()); + CAPTURE(fake_insert.size()); + CHECK_IF(buffer.reserve(buffer.size() + fake_insert.size())) { + buffer.insert(fake_insert.data(), fake_insert.size() - 1); + } + } + // Pinned vector buffer operates based on system page sizes + // May have more capacity then specified + // Ensure we attempt to reserve beyond vector's max size + { + CAPTURE(buffer.size()); + CAPTURE(ovdl::detail::pinned_vector<char>::page_size()); + CHECK_FALSE(buffer.reserve(buffer.size() + ovdl::detail::pinned_vector<char>::page_size())); + } +} + +TEST_CASE("symbol_buffer, max size", "[symbol-buffer-max-size]") { + static constexpr std::string_view buffer_in = "input value"; + static constexpr std::array<char, symbol_buffer::min_buffer_size * 2 - buffer_in.size()> fake_insert {}; + + symbol_buffer buffer(symbol_buffer::min_buffer_size * 2 + 1); + + { + CAPTURE(buffer_in.size()); + CHECK(buffer.reserve(buffer_in.size())); + } + + std::string_view buffer_val = buffer.insert(buffer_in.data(), buffer_in.size()); + CHECK(buffer_val == buffer_in); + CHECK(std::distance(buffer.c_str(0), buffer_val.data() + buffer_val.size()) == buffer_in.size()); + + { + CAPTURE(buffer.size()); + CAPTURE(fake_insert.size()); + CHECK_IF(buffer.reserve(buffer.size() + fake_insert.size())) { + buffer.insert(fake_insert.data(), fake_insert.size() - 1); + } + } + // Pinned vector buffer operates based on system page sizes + // May have more capacity then specified + // Ensure we attempt to reserve beyond vector's max size + { + CAPTURE(buffer.size()); + CAPTURE(ovdl::detail::pinned_vector<char>::page_size()); + CHECK_FALSE(buffer.reserve(buffer.size() + ovdl::detail::pinned_vector<char>::page_size())); + } +} + +TEST_CASE("symbol_interner", "[symbol-intern]") { + symbol_interner interner(symbol_buffer::min_buffer_size * 2); + + auto test = interner.intern("test"); + auto test2 = interner.intern("test"); + + CHECK(test.view() == "test"sv); + CHECK(test2.view() == "test"sv); + + CHECK(test == test2); + + auto test3 = interner.intern("test3"); + + CHECK(test.view() == "test"sv); + CHECK(test2.view() == "test"sv); + CHECK(test3.view() == "test3"sv); + + CHECK(test == test2); + CHECK_FALSE(test == test3); + CHECK_FALSE(test2 == test3); + + CHECK_IF(interner.reserve(1024, 16 + 1)) { + auto test4 = interner.intern("test3"); + + CHECK(test.view() == "test"sv); + CHECK(test2.view() == "test"sv); + CHECK(test3.view() == "test3"sv); + CHECK(test4.view() == "test3"sv); + + CHECK(test3 == test4); + CHECK_FALSE(test == test3); + CHECK_FALSE(test2 == test3); + + auto test5 = interner.intern("test5"); + + CHECK(test.view() == "test"sv); + CHECK(test2.view() == "test"sv); + CHECK(test3.view() == "test3"sv); + CHECK(test4.view() == "test3"sv); + CHECK(test5.view() == "test5"sv); + + CHECK(test3 == test4); + CHECK_FALSE(test == test3); + CHECK_FALSE(test2 == test3); + CHECK_FALSE(test5 == test); + CHECK_FALSE(test5 == test2); + CHECK_FALSE(test5 == test3); + CHECK_FALSE(test5 == test4); + } +}
\ No newline at end of file diff --git a/tests/src/v2script/AbstractSyntaxTree.cpp b/tests/src/v2script/AbstractSyntaxTree.cpp index c06da08..ad9382f 100644 --- a/tests/src/v2script/AbstractSyntaxTree.cpp +++ b/tests/src/v2script/AbstractSyntaxTree.cpp @@ -1,5 +1,4 @@ #include <string_view> -#include <type_traits> #include <openvic-dataloader/NodeLocation.hpp> #include <openvic-dataloader/detail/SymbolIntern.hpp> @@ -69,13 +68,13 @@ TEST_CASE("V2Script Nodes", "[v2script-nodes]") { auto* id = ast.create_with_intern<IdentifierValue>("id"); CHECK_IF(id) { CHECK(id->kind() == NodeKind::IdentifierValue); - CHECK(id->value(ast.symbol_interner) == "id"sv); + CHECK(id->value().view() == "id"sv); } auto* str = ast.create_with_intern<StringValue>("str"); CHECK_IF(str) { CHECK(str->kind() == NodeKind::StringValue); - CHECK(str->value(ast.symbol_interner) == "str"sv); + CHECK(str->value().view() == "str"sv); } auto* list = ast.create<ListValue>(); @@ -162,7 +161,7 @@ TEST_CASE("V2Script Nodes Location", "[v2script-nodes-location]") { auto* id = ast.create_with_loc_and_intern<IdentifierValue>(NodeLocation::make_from(&fake_buffer[0], &fake_buffer[1]), "id"); CHECK_IF(id) { - CHECK(id->value(ast.symbol_interner) == "id"sv); + CHECK(id->value().view() == "id"sv); auto location = ast.location_of(id); CHECK_FALSE(location.is_synthesized()); |