aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Spartan322 <Megacake1234@gmail.com>2024-07-02 09:51:59 +0200
committer Spartan322 <Megacake1234@gmail.com>2024-07-05 19:17:25 +0200
commit3eb78b27505b602c1ccfa952c4cc00f942ccb2b9 (patch)
tree457b7fbda6d4470465c05d59b0ca51ed30628640
parentdeed8ec0ae23651529a58125012c1b4aab015d02 (diff)
Fix string interning pointer invalidity for ASTsimplify-string-interning
Fix errorneous string intern compare Add symbol interning tests Optimize non-conversion identifier cases Add `File::size()` Add InstanceOf concept Remove `FlatValue::value(const symbol_interner_type&)` Add `AbstractSyntaxTree::intern/intern_cst(lexy::lexeme)` overload Add `DiagnosticLogger::intern/intern_cstr(lexy::lexeme)` overload Use pinned_vector to maintain string interning pointer validity for buffers Add vmcontainer submodule for pinned_vector pinned_vector reserves virtual memory at runtime using OS APIs to maintain pointer validity Remove Exception and RTTI requirement from range-v3 submodule
-rw-r--r--.gitmodules5
-rw-r--r--deps/SCsub39
m---------deps/dryad0
m---------deps/range-v30
m---------deps/vmcontainer0
-rw-r--r--include/openvic-dataloader/detail/SymbolIntern.hpp256
-rw-r--r--include/openvic-dataloader/detail/Utility.hpp3
-rw-r--r--include/openvic-dataloader/detail/pinned_vector.hpp15
-rw-r--r--include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp4
-rw-r--r--src/openvic-dataloader/AbstractSyntaxTree.cpp2
-rw-r--r--src/openvic-dataloader/AbstractSyntaxTree.hpp23
-rw-r--r--src/openvic-dataloader/DiagnosticLogger.hpp17
-rw-r--r--src/openvic-dataloader/File.cpp4
-rw-r--r--src/openvic-dataloader/File.hpp5
-rw-r--r--src/openvic-dataloader/detail/InternalConcepts.hpp14
-rw-r--r--src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp6
-rw-r--r--src/openvic-dataloader/v2script/EventGrammar.hpp12
-rw-r--r--src/openvic-dataloader/v2script/Parser.cpp2
-rw-r--r--src/openvic-dataloader/v2script/SimpleGrammar.hpp17
-rw-r--r--tests/src/detail/SymbolIntern.cpp138
-rw-r--r--tests/src/v2script/AbstractSyntaxTree.cpp7
21 files changed, 532 insertions, 37 deletions
diff --git a/.gitmodules b/.gitmodules
index d4c2c55..c40746f 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -14,9 +14,12 @@
ignore = dirty
[submodule "deps/range-v3"]
path = deps/range-v3
- url = https://github.com/ericniebler/range-v3
+ url = https://github.com/spartan322/range-v3
ignore = dirty
[submodule "tests/deps/snitch"]
path = tests/deps/snitch
url = https://github.com/snitch-org/snitch
ignore = dirty
+[submodule "deps/vmcontainer"]
+ path = deps/vmcontainer
+ url = https://github.com/spartan322/vmcontainer
diff --git a/deps/SCsub b/deps/SCsub
index 5f714df..441ec93 100644
--- a/deps/SCsub
+++ b/deps/SCsub
@@ -56,7 +56,7 @@ def build_dryad(env):
env.Append(CXXFLAGS=["/external:I", include_dir, "/external:W0"])
else:
env.Append(CXXFLAGS=["-isystem", include_dir])
-
+
env.exposed_includes += env.dryad["INCPATH"]
def build_fmt(env):
@@ -117,7 +117,7 @@ def build_fmt(env):
env.Append(CXXFLAGS=[""])
env.Append(LIBPATH=[fmt_env.Dir(source_path)])
env.Prepend(LIBS=[library_name])
-
+
env.exposed_includes += env.fmt["INCPATH"]
def build_range_v3(env):
@@ -135,10 +135,41 @@ def build_range_v3(env):
env.Append(CXXFLAGS=["/external:I", include_dir, "/external:W0"])
else:
env.Append(CXXFLAGS=["-isystem", include_dir])
-
+
env.exposed_includes += env.range_v3["INCPATH"]
+def build_vmcontainer(env):
+ vmcontainer_env = env.Clone()
+
+ include_path = "vmcontainer/lib/include"
+ source_path = "vmcontainer/lib/src"
+ paths = [include_path, source_path]
+ vmcontainer_env.Append(CPPPATH=[[vmcontainer_env.Dir(p) for p in paths]])
+ sources = env.GlobRecursive("*.cpp", paths)
+ env.vmcontainer_sources = sources
+
+ library_name = "libvmcontainer" + env["LIBSUFFIX"]
+ library = vmcontainer_env.StaticLibrary(target=os.path.join(source_path, library_name), source=sources)
+ Default(library)
+
+ include_dir = vmcontainer_env.Dir(include_path)
+
+ env.vmcontainer = {}
+ env.vmcontainer["INCPATH"] = [include_dir]
+
+ env.Append(CPPPATH=env.vmcontainer["INCPATH"])
+ if env.get("is_msvc", False):
+ env.Append(CXXFLAGS=["/external:I", include_dir, "/external:W0"])
+ else:
+ env.Append(CXXFLAGS=["-isystem", include_dir])
+ env.Append(CXXFLAGS=[""])
+ env.Append(LIBPATH=[vmcontainer_env.Dir(source_path)])
+ env.Prepend(LIBS=[library_name])
+
+ env.exposed_includes += env.vmcontainer["INCPATH"]
+
build_dryad(env)
build_fmt(env)
build_lexy(env)
-build_range_v3(env) \ No newline at end of file
+build_range_v3(env)
+build_vmcontainer(env) \ No newline at end of file
diff --git a/deps/dryad b/deps/dryad
-Subproject 3aa3d7606cb007436bb3433ddf83b8bdcf1ecc4
+Subproject 697b8f817e0c6c2a2f398c3e9217533b1507dc2
diff --git a/deps/range-v3 b/deps/range-v3
-Subproject 97452bb3eb74a73fc86504421a6a27c92bce6b9
+Subproject 334bf5772462dbd2e0e2ce142e22c4e5a8970a9
diff --git a/deps/vmcontainer b/deps/vmcontainer
new file mode 160000
+Subproject f7851cd758af9d65189f55f2a3cba0021e36908
diff --git a/include/openvic-dataloader/detail/SymbolIntern.hpp b/include/openvic-dataloader/detail/SymbolIntern.hpp
index 8755887..d72a0ba 100644
--- a/include/openvic-dataloader/detail/SymbolIntern.hpp
+++ b/include/openvic-dataloader/detail/SymbolIntern.hpp
@@ -1,14 +1,266 @@
#pragma once
#include <cstdint>
+#include <iterator>
+#include <string_view>
+
+#include <openvic-dataloader/detail/pinned_vector.hpp>
#include <dryad/symbol.hpp>
namespace ovdl {
+ // Contains all unique symbols, null-terminated, in memory one after the other.
+ template<typename CharT>
+ struct symbol_buffer {
+ static constexpr auto min_buffer_size = 16 * 1024;
+
+ constexpr symbol_buffer() : _data_buffer(ovdl::detail::max_elements(min_buffer_size + 1)) {}
+ explicit symbol_buffer(std::size_t max_elements)
+ : _data_buffer(ovdl::detail::max_elements(std::max<std::size_t>(max_elements, min_buffer_size + 1))) {
+ _data_buffer.reserve(min_buffer_size);
+ }
+
+ void free() {
+ }
+
+ bool reserve(std::size_t new_capacity) {
+ if (new_capacity <= _data_buffer.capacity())
+ return true;
+
+ if (new_capacity >= _data_buffer.max_size()) {
+ _data_buffer.reserve(_data_buffer.max_size());
+ return false;
+ }
+
+ _data_buffer.reserve(new_capacity * sizeof(CharT));
+
+ return true;
+ }
+
+ bool reserve_new_string(std::size_t new_string_length) {
+ // +1 for null-terminator.
+ auto new_size = _data_buffer.size() + new_string_length + 1;
+ if (new_size <= _data_buffer.capacity())
+ return true;
+
+ auto new_capacity = new_size * 2;
+ if (new_capacity < min_buffer_size)
+ new_capacity = min_buffer_size;
+
+ if (!reserve(new_capacity)) {
+ return _data_buffer.capacity() >= new_size;
+ }
+
+ return true;
+ }
+
+ const CharT* insert(const CharT* str, std::size_t length) {
+ DRYAD_PRECONDITION(_data_buffer.capacity() - _data_buffer.size() >= length + 1);
+
+ auto index = _data_buffer.cend();
+
+ _data_buffer.insert(_data_buffer.cend(), str, str + (length * sizeof(CharT)));
+ _data_buffer.push_back(CharT(0));
+
+ return index;
+ }
+
+ const CharT* c_str(std::size_t index) const {
+ DRYAD_PRECONDITION(index < _data_buffer.size());
+ return _data_buffer.data() + index;
+ }
+
+ std::size_t size() const {
+ return _data_buffer.size();
+ }
+
+ std::size_t capacity() const {
+ return _data_buffer.capacity();
+ }
+
+ std::size_t max_size() const {
+ return _data_buffer.max_size();
+ }
+
+ private:
+ detail::pinned_vector<CharT> _data_buffer;
+ };
+
+ template<typename IndexType, typename CharT>
+ struct symbol_index_hash_traits {
+ const symbol_buffer<CharT>* buffer;
+
+ using value_type = IndexType;
+
+ struct string_view {
+ const CharT* ptr;
+ std::size_t length;
+ };
+
+ static constexpr bool is_unoccupied(IndexType index) {
+ return index == IndexType(-1);
+ }
+ static void fill_unoccupied(IndexType* data, std::size_t size) {
+ // It has all bits set to 1, so we can do it per-byte.
+ std::memset(data, static_cast<unsigned char>(-1), size * sizeof(IndexType));
+ }
+
+ static constexpr bool is_equal(IndexType entry, IndexType value) {
+ return entry == value;
+ }
+ bool is_equal(IndexType entry, string_view str) const {
+ auto existing_str = buffer->c_str(entry);
+ return std::strncmp(existing_str, str.ptr, str.length) == 0 && *(existing_str + str.length) == CharT(0);
+ }
+
+ std::size_t hash(IndexType entry) const {
+ auto str = buffer->c_str(entry);
+ return dryad::default_hash_algorithm().hash_c_str(str).finish();
+ }
+ static constexpr std::size_t hash(string_view str) {
+ return dryad::default_hash_algorithm()
+ .hash_bytes(reinterpret_cast<const unsigned char*>(str.ptr), str.length * sizeof(CharT))
+ .finish();
+ }
+ };
+
+ template<typename CharT = char>
+ class symbol;
+
+ template<typename Id, typename CharT = char, typename IndexType = std::size_t,
+ typename MemoryResource = void>
+ class symbol_interner {
+ static_assert(std::is_trivial_v<CharT>);
+ static_assert(std::is_unsigned_v<IndexType>);
+
+ using resource_ptr = dryad::_detail::memory_resource_ptr<MemoryResource>;
+ using traits = symbol_index_hash_traits<IndexType, CharT>;
+
+ public:
+ using symbol = ovdl::symbol<CharT>;
+
+ //=== construction ===//
+ constexpr symbol_interner() : _resource(dryad::_detail::get_memory_resource<MemoryResource>()) {}
+ constexpr explicit symbol_interner(std::size_t max_elements)
+ : _buffer(max_elements),
+ _resource(dryad::_detail::get_memory_resource<MemoryResource>()) {}
+ constexpr explicit symbol_interner(std::size_t max_elements, MemoryResource* resource)
+ : _buffer(max_elements),
+ _resource(resource) {}
+
+ ~symbol_interner() noexcept {
+ _buffer.free();
+ _map.free(_resource);
+ }
+
+ symbol_interner(symbol_interner&& other) noexcept
+ : _buffer(other._buffer), _map(other._map), _resource(other._resource) {
+ other._buffer = {};
+ other._map = {};
+ }
+
+ symbol_interner& operator=(symbol_interner&& other) noexcept {
+ dryad::_detail::swap(_buffer, other._buffer);
+ dryad::_detail::swap(_map, other._map);
+ dryad::_detail::swap(_resource, other._resource);
+ return *this;
+ }
+
+ //=== interning ===//
+ bool reserve(std::size_t number_of_symbols, std::size_t average_symbol_length) {
+ auto success = _buffer.reserve(number_of_symbols * average_symbol_length);
+ _map.rehash(_resource, _map.to_table_capacity(number_of_symbols), traits { &_buffer });
+ return success;
+ }
+
+ symbol intern(const CharT* str, std::size_t length) {
+ if (_map.should_rehash())
+ _map.rehash(_resource, traits { &_buffer });
+
+ auto entry = _map.lookup_entry(typename traits::string_view { str, length }, traits { &_buffer });
+ if (entry)
+ // Already interned, return index.
+ return symbol(_buffer.c_str(entry.get()));
+
+ // Copy string data to buffer, as we don't have it yet.
+ if (!_buffer.reserve_new_string(length)) // Ran out of virtual memory space
+ return symbol();
+
+ auto begin = _buffer.insert(str, length);
+ auto idx = std::distance(_buffer.c_str(0), begin);
+ DRYAD_PRECONDITION(idx == IndexType(idx)); // Overflow of index type.
+
+ // Store index in map.
+ entry.create(IndexType(idx));
+
+ // Return new symbol.
+ return symbol(begin);
+ }
+ template<std::size_t N>
+ symbol intern(const CharT (&literal)[N]) {
+ DRYAD_PRECONDITION(literal[N - 1] == CharT(0));
+ return intern(literal, N - 1);
+ }
+
+ private:
+ symbol_buffer<CharT> _buffer;
+ dryad::_detail::hash_table<traits, 1024> _map;
+ DRYAD_EMPTY_MEMBER resource_ptr _resource;
+
+ friend symbol;
+ };
+
+ template<typename CharT>
+ struct symbol {
+ using char_type = CharT;
+
+ constexpr symbol() = default;
+ constexpr explicit symbol(const CharT* begin) : _begin(begin) {}
+
+ constexpr explicit operator bool() const {
+ return _begin != nullptr;
+ }
+
+ constexpr const CharT* c_str() const {
+ return _begin;
+ }
+
+ constexpr const std::basic_string_view<CharT> view() const {
+ return _begin;
+ }
+
+ //=== comparison ===//
+ friend constexpr bool operator==(symbol lhs, symbol rhs) {
+ return lhs._begin == rhs._begin;
+ }
+ friend constexpr bool operator!=(symbol lhs, symbol rhs) {
+ return lhs._begin != rhs._begin;
+ }
+
+ friend constexpr bool operator<(symbol lhs, symbol rhs) {
+ return lhs._begin < rhs._begin;
+ }
+ friend constexpr bool operator<=(symbol lhs, symbol rhs) {
+ return lhs._begin <= rhs._begin;
+ }
+ friend constexpr bool operator>(symbol lhs, symbol rhs) {
+ return lhs._begin > rhs._begin;
+ }
+ friend constexpr bool operator>=(symbol lhs, symbol rhs) {
+ return lhs._begin >= rhs._begin;
+ }
+
+ private:
+ const CharT* _begin = nullptr;
+
+ template<typename, typename, typename, typename>
+ friend class symbol_interner;
+ };
+
struct SymbolIntern {
struct SymbolId;
using index_type = std::uint32_t;
- using symbol_type = dryad::symbol<SymbolId, index_type>;
- using symbol_interner_type = dryad::symbol_interner<SymbolId, char, index_type>;
+ using symbol_type = symbol<char>;
+ using symbol_interner_type = symbol_interner<SymbolId, symbol_type::char_type, index_type>;
};
} \ No newline at end of file
diff --git a/include/openvic-dataloader/detail/Utility.hpp b/include/openvic-dataloader/detail/Utility.hpp
index 8d9e159..3da21bd 100644
--- a/include/openvic-dataloader/detail/Utility.hpp
+++ b/include/openvic-dataloader/detail/Utility.hpp
@@ -86,4 +86,7 @@ namespace ovdl::detail {
template<typename Type, template<typename...> typename Template>
static constexpr auto is_instance_of_v = is_instance_of<Type, Template>::value;
+
+ template<typename T, template<typename...> typename Template>
+ concept InstanceOf = is_instance_of_v<std::remove_cv_t<std::remove_reference_t<T>>, Template>;
} \ No newline at end of file
diff --git a/include/openvic-dataloader/detail/pinned_vector.hpp b/include/openvic-dataloader/detail/pinned_vector.hpp
new file mode 100644
index 0000000..42a7760
--- /dev/null
+++ b/include/openvic-dataloader/detail/pinned_vector.hpp
@@ -0,0 +1,15 @@
+#pragma once
+
+#include <vmcontainer/detail.hpp>
+#include <vmcontainer/pinned_vector.hpp>
+
+namespace ovdl::detail {
+ static constexpr auto max_elements = mknejp::vmcontainer::max_elements;
+ static constexpr auto max_bytes = mknejp::vmcontainer::max_bytes;
+ static constexpr auto max_pages = mknejp::vmcontainer::max_pages;
+
+ using pinned_vector_traits = mknejp::vmcontainer::pinned_vector_traits;
+
+ template<typename T, typename Traits = pinned_vector_traits>
+ using pinned_vector = mknejp::vmcontainer::pinned_vector<T, Traits>;
+} \ No newline at end of file
diff --git a/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp b/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp
index a582187..27ceb3d 100644
--- a/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp
+++ b/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp
@@ -84,10 +84,6 @@ namespace ovdl::v2script::ast {
return _value;
}
- const char* value(const SymbolIntern::symbol_interner_type& symbols) const {
- return _value.c_str(symbols);
- }
-
protected:
explicit FlatValue(dryad::node_ctor ctor, NodeKind kind, SymbolIntern::symbol_type value)
: node_base(ctor, kind),
diff --git a/src/openvic-dataloader/AbstractSyntaxTree.cpp b/src/openvic-dataloader/AbstractSyntaxTree.cpp
index d6f58f7..e6885a5 100644
--- a/src/openvic-dataloader/AbstractSyntaxTree.cpp
+++ b/src/openvic-dataloader/AbstractSyntaxTree.cpp
@@ -11,7 +11,7 @@ AbstractSyntaxTree::symbol_type AbstractSyntaxTree::intern(std::string_view str)
}
const char* AbstractSyntaxTree::intern_cstr(const char* str, std::size_t length) {
- return intern(str, length).c_str(_symbol_interner);
+ return intern(str, length).c_str();
}
const char* AbstractSyntaxTree::intern_cstr(std::string_view str) {
diff --git a/src/openvic-dataloader/AbstractSyntaxTree.hpp b/src/openvic-dataloader/AbstractSyntaxTree.hpp
index a5b8886..ade1c82 100644
--- a/src/openvic-dataloader/AbstractSyntaxTree.hpp
+++ b/src/openvic-dataloader/AbstractSyntaxTree.hpp
@@ -9,6 +9,8 @@
#include <openvic-dataloader/detail/SymbolIntern.hpp>
#include <openvic-dataloader/detail/Utility.hpp>
+#include <lexy/lexeme.hpp>
+
#include <dryad/node.hpp>
#include <dryad/node_map.hpp>
#include <dryad/symbol.hpp>
@@ -16,10 +18,12 @@
#include <fmt/core.h>
-#include "detail/InternalConcepts.hpp"
+#include <detail/InternalConcepts.hpp>
namespace ovdl {
struct AbstractSyntaxTree : SymbolIntern {
+ explicit AbstractSyntaxTree(std::size_t max_elements) : _symbol_interner(max_elements) {}
+
symbol_type intern(const char* str, std::size_t length);
symbol_type intern(std::string_view str);
const char* intern_cstr(const char* str, std::size_t length);
@@ -27,6 +31,15 @@ namespace ovdl {
symbol_interner_type& symbol_interner();
const symbol_interner_type& symbol_interner() const;
+ template<typename Reader>
+ symbol_type intern(lexy::lexeme<Reader> lexeme) {
+ return intern(lexeme.begin(), lexeme.size());
+ }
+ template<typename Reader>
+ const char* intern_cstr(lexy::lexeme<Reader> lexeme) {
+ return intern_cstr(lexeme.begin(), lexeme.size());
+ }
+
protected:
symbol_interner_type _symbol_interner;
};
@@ -37,10 +50,14 @@ namespace ovdl {
using root_node_type = RootNodeT;
using node_type = typename file_type::node_type;
- explicit BasicAbstractSyntaxTree(file_type&& file) : _file { std::move(file) } {}
+ explicit BasicAbstractSyntaxTree(file_type&& file)
+ : AbstractSyntaxTree(file.size()),
+ _file { std::move(file) } {}
template<typename Encoding, typename MemoryResource = void>
- explicit BasicAbstractSyntaxTree(lexy::buffer<Encoding, MemoryResource>&& buffer) : _file { std::move(buffer) } {}
+ explicit BasicAbstractSyntaxTree(lexy::buffer<Encoding, MemoryResource>&& buffer)
+ : AbstractSyntaxTree(buffer.size()),
+ _file { std::move(buffer) } {}
void set_location(const node_type* n, NodeLocation loc) {
_file.set_location(n, loc);
diff --git a/src/openvic-dataloader/DiagnosticLogger.hpp b/src/openvic-dataloader/DiagnosticLogger.hpp
index 2a655a9..9810e1e 100644
--- a/src/openvic-dataloader/DiagnosticLogger.hpp
+++ b/src/openvic-dataloader/DiagnosticLogger.hpp
@@ -19,6 +19,7 @@
#include <lexy/input/base.hpp>
#include <lexy/input/buffer.hpp>
#include <lexy/input_location.hpp>
+#include <lexy/lexeme.hpp>
#include <lexy/visualize.hpp>
#include <dryad/_detail/config.hpp>
@@ -36,7 +37,12 @@ namespace ovdl {
template<typename ParseState>
struct BasicDiagnosticLogger;
- struct DiagnosticLogger : SymbolIntern {
+ struct DiagnosticLogger {
+ struct SymbolId;
+ using index_type = std::uint32_t;
+ using symbol_type = dryad::symbol<SymbolId, index_type>;
+ using symbol_interner_type = dryad::symbol_interner<SymbolId, char, index_type>;
+
using AnnotationKind = lexy_ext::annotation_kind;
using DiagnosticKind = lexy_ext::diagnostic_kind;
@@ -200,6 +206,15 @@ namespace ovdl {
const symbol_interner_type& symbol_interner() const {
return _symbol_interner;
}
+
+ template<typename Reader>
+ symbol_type intern(lexy::lexeme<Reader> lexeme) {
+ return intern(lexeme.data(), lexeme.size());
+ }
+ template<typename Reader>
+ const char* intern_cstr(lexy::lexeme<Reader> lexeme) {
+ return intern_cstr(lexeme.data(), lexeme.size());
+ }
};
template<typename ParseState>
diff --git a/src/openvic-dataloader/File.cpp b/src/openvic-dataloader/File.cpp
index e4d3773..9e721a8 100644
--- a/src/openvic-dataloader/File.cpp
+++ b/src/openvic-dataloader/File.cpp
@@ -16,4 +16,8 @@ const char* File::path() const noexcept {
bool File::is_valid() const noexcept {
return _buffer.index() != 0 && !_buffer.valueless_by_exception() && visit_buffer([](auto&& buffer) { return buffer.data() != nullptr; });
+}
+
+std::size_t File::size() const noexcept {
+ return _buffer.index() != 0 && !_buffer.valueless_by_exception() ? _buffer_size : 0;
} \ No newline at end of file
diff --git a/src/openvic-dataloader/File.hpp b/src/openvic-dataloader/File.hpp
index 90fcb11..ec25640 100644
--- a/src/openvic-dataloader/File.hpp
+++ b/src/openvic-dataloader/File.hpp
@@ -29,6 +29,8 @@ namespace ovdl {
bool is_valid() const noexcept;
+ std::size_t size() const noexcept;
+
template<typename Encoding, typename MemoryResource = void>
constexpr bool is_buffer() const {
return buffer_ids::type_id<lexy::buffer<Encoding, MemoryResource>>() + 1 == _buffer.index();
@@ -104,6 +106,7 @@ namespace ovdl {
protected:
const char* _path;
+ std::size_t _buffer_size = 0;
detail::type_prepend_t<buffer_ids::variant_type, std::monostate> _buffer;
};
@@ -114,12 +117,14 @@ namespace ovdl {
template<typename Encoding, typename MemoryResource = void>
explicit BasicFile(const char* path, lexy::buffer<Encoding, MemoryResource>&& buffer)
: File(path) {
+ _buffer_size = buffer.size();
_buffer = static_cast<std::remove_reference_t<decltype(buffer)>&&>(buffer);
}
template<typename Encoding, typename MemoryResource = void>
explicit BasicFile(lexy::buffer<Encoding, MemoryResource>&& buffer)
: File("") {
+ _buffer_size = buffer.size();
_buffer = static_cast<std::remove_reference_t<decltype(buffer)>&&>(buffer);
}
diff --git a/src/openvic-dataloader/detail/InternalConcepts.hpp b/src/openvic-dataloader/detail/InternalConcepts.hpp
index 0c7913d..06c03a1 100644
--- a/src/openvic-dataloader/detail/InternalConcepts.hpp
+++ b/src/openvic-dataloader/detail/InternalConcepts.hpp
@@ -6,10 +6,13 @@
#include <openvic-dataloader/NodeLocation.hpp>
#include <openvic-dataloader/detail/Encoding.hpp>
#include <openvic-dataloader/detail/SymbolIntern.hpp>
+#include <openvic-dataloader/detail/Utility.hpp>
#include <lexy/encoding.hpp>
#include <lexy/input/buffer.hpp>
+#include <dryad/symbol.hpp>
+
#include <fmt/core.h>
#include <lexy_ext/report_error.hpp>
@@ -17,8 +20,9 @@
namespace ovdl::detail {
template<typename T>
concept IsFile =
- requires(T t, const typename T::node_type* node, NodeLocation location) {
+ requires(T t, const T ct, const typename T::node_type* node, NodeLocation location) {
typename T::node_type;
+ { ct.size() } -> std::same_as<size_t>;
{ t.set_location(node, location) } -> std::same_as<void>;
{ t.location_of(node) } -> std::same_as<NodeLocation>;
};
@@ -58,12 +62,12 @@ namespace ovdl::detail {
{ ct.errored() } -> std::same_as<bool>;
{ ct.warned() } -> std::same_as<bool>;
{ ct.get_errors() } -> std::same_as<typename T::error_range>;
- { t.intern(str, length) } -> std::same_as<ovdl::SymbolIntern::symbol_type>;
- { t.intern(sv) } -> std::same_as<ovdl::SymbolIntern::symbol_type>;
+ { t.intern(str, length) } -> detail::InstanceOf<dryad::symbol>;
+ { t.intern(sv) } -> detail::InstanceOf<dryad::symbol>;
{ t.intern_cstr(str, length) } -> std::same_as<const char*>;
{ t.intern_cstr(sv) } -> std::same_as<const char*>;
- { t.symbol_interner() } -> std::same_as<SymbolIntern::symbol_interner_type&>;
- { ct.symbol_interner() } -> std::same_as<const SymbolIntern::symbol_interner_type&>;
+ { t.symbol_interner() } -> detail::InstanceOf<dryad::symbol_interner>;
+ { ct.symbol_interner() } -> detail::InstanceOf<dryad::symbol_interner>;
{ t.error(std::declval<typename T::template format_str<>>()) } -> std::same_as<typename T::Writer>;
{ t.warning(std::declval<typename T::template format_str<>>()) } -> std::same_as<typename T::Writer>;
{ t.note(std::declval<typename T::template format_str<>>()) } -> std::same_as<typename T::Writer>;
diff --git a/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp b/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp
index 71985f3..72ff627 100644
--- a/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp
+++ b/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp
@@ -75,7 +75,7 @@ std::string FileAbstractSyntaxTree::make_list_visualizer() const {
dryad::visit_node(
node,
[&](const FlatValue* value) {
- result.append(value->value(this->_symbol_interner));
+ result.append(value->value().c_str());
},
[&](const ListValue* value) {
},
@@ -107,10 +107,10 @@ std::string FileAbstractSyntaxTree::make_native_visualizer() const {
dryad::visit_tree(
this->_tree,
[&](const IdentifierValue* value) {
- result.append(value->value(this->_symbol_interner));
+ result.append(value->value().c_str());
},
[&](const StringValue* value) {
- result.append(1, '"').append(value->value(this->_symbol_interner)).append(1, '"');
+ result.append(1, '"').append(value->value().c_str()).append(1, '"');
},
[&](dryad::child_visitor<NodeKind> visitor, const ValueStatement* statement) {
visitor(statement->value());
diff --git a/src/openvic-dataloader/v2script/EventGrammar.hpp b/src/openvic-dataloader/v2script/EventGrammar.hpp
index 130a233..20168f8 100644
--- a/src/openvic-dataloader/v2script/EventGrammar.hpp
+++ b/src/openvic-dataloader/v2script/EventGrammar.hpp
@@ -30,7 +30,7 @@ namespace ovdl::v2script::grammar {
static constexpr auto value = dsl::callback<ast::IdentifierValue*>(
[](detail::IsParseState auto& state, ast::IdentifierValue* value) {
bool is_number = true;
- for (auto* current = value->value(state.ast().symbol_interner()); *current; current++) {
+ for (auto* current = value->value().c_str(); *current; current++) {
is_number = is_number && std::isdigit(*current);
if (!is_number) break;
}
@@ -95,16 +95,16 @@ namespace ovdl::v2script::grammar {
static constexpr auto value =
dsl::callback<ast::EventStatement*>(
[](detail::IsParseState auto& state, NodeLocation loc, ast::IdentifierValue* name, ast::ListValue* list) {
- static auto country_decl = state.ast().intern_cstr("country_event");
- static auto province_decl = state.ast().intern_cstr("province_event");
+ auto country_decl = state.ast().intern("country_event");
+ auto province_decl = state.ast().intern("province_event");
- if (name->value(state.ast().symbol_interner()) != country_decl || name->value(state.ast().symbol_interner()) != province_decl) {
- state.logger().warning("event declarator \"{}\" is not {} or {}", name->value(state.ast().symbol_interner()), country_decl, province_decl) //
+ if (name->value() != country_decl || name->value() != province_decl) {
+ state.logger().warning("event declarator \"{}\" is not {} or {}", name->value().c_str(), country_decl.c_str(), province_decl.c_str()) //
.primary(loc, "here")
.finish();
}
- return state.ast().template create<ast::EventStatement>(loc, name->value(state.ast().symbol_interner()) == province_decl, list);
+ return state.ast().template create<ast::EventStatement>(loc, name->value() == province_decl, list);
});
};
diff --git a/src/openvic-dataloader/v2script/Parser.cpp b/src/openvic-dataloader/v2script/Parser.cpp
index a0003b1..9c6a0c5 100644
--- a/src/openvic-dataloader/v2script/Parser.cpp
+++ b/src/openvic-dataloader/v2script/Parser.cpp
@@ -266,7 +266,7 @@ const FileTree* Parser::get_file_node() const {
}
std::string_view Parser::value(const ovdl::v2script::ast::FlatValue* node) const {
- return node->value(_parse_handler->parse_state().ast().symbol_interner());
+ return node->value().view();
}
std::string Parser::make_native_string() const {
diff --git a/src/openvic-dataloader/v2script/SimpleGrammar.hpp b/src/openvic-dataloader/v2script/SimpleGrammar.hpp
index d42ce07..c47b243 100644
--- a/src/openvic-dataloader/v2script/SimpleGrammar.hpp
+++ b/src/openvic-dataloader/v2script/SimpleGrammar.hpp
@@ -126,6 +126,15 @@ namespace ovdl::v2script::grammar {
do {
if constexpr (std::same_as<encoding, lexy::default_encoding> || std::same_as<encoding, lexy::byte_encoding>) {
if (lexy::scan_result<lexy::lexeme<Reader>> ascii_result; scanner.branch(ascii_result, lexy::dsl::identifier(ascii))) {
+ if (!scanner.peek(data_char_class)) {
+ if (ascii_result.value().size() == 0) {
+ return lexy::scan_failed;
+ }
+
+ auto value = state.ast().intern(ascii_result.value());
+ return state.ast().template create<ast::IdentifierValue>(ovdl::NodeLocation::make_from(content_begin, scanner.position()), value);
+ }
+
value_result.append(ascii_result.value().begin(), ascii_result.value().end());
continue;
}
@@ -150,8 +159,12 @@ namespace ovdl::v2script::grammar {
} else {
auto lexeme_result = scanner.template parse<lexy::lexeme<Reader>>(lexy::dsl::identifier(utf_char_class));
if (lexeme_result) {
- value_result.append(lexeme_result.value().begin(), lexeme_result.value().size());
- break;
+ if (lexeme_result.value().size() == 0) {
+ return lexy::scan_failed;
+ }
+
+ auto value = state.ast().intern(lexeme_result.value());
+ return state.ast().template create<ast::IdentifierValue>(ovdl::NodeLocation::make_from(content_begin, scanner.position()), value);
}
}
} while (scanner);
diff --git a/tests/src/detail/SymbolIntern.cpp b/tests/src/detail/SymbolIntern.cpp
new file mode 100644
index 0000000..aab6584
--- /dev/null
+++ b/tests/src/detail/SymbolIntern.cpp
@@ -0,0 +1,138 @@
+#include <iterator>
+#include <string_view>
+
+#include <openvic-dataloader/detail/SymbolIntern.hpp>
+#include <openvic-dataloader/detail/pinned_vector.hpp>
+
+#include "Helper.hpp"
+#include <snitch/snitch.hpp>
+
+using namespace std::string_view_literals;
+
+using symbol_buffer = ovdl::symbol_buffer<char>;
+using symbol_interner = ovdl::symbol_interner<struct Id, char, std::size_t, void>;
+using symbol = ovdl::symbol<char>;
+
+namespace snitch {
+ template<typename CharT>
+ bool append(snitch::small_string_span ss, const ovdl::symbol<CharT>& s) {
+ return append(ss, "{", static_cast<const void*>(s.c_str()), ",\"", s.view(), "\"}");
+ }
+}
+
+TEST_CASE("symbol_buffer", "[symbol-buffer]") {
+ static constexpr std::string_view buffer_in = "input value";
+ static constexpr std::array<char, symbol_buffer::min_buffer_size - buffer_in.size()> fake_insert {};
+
+ symbol_buffer buffer;
+
+ {
+ CAPTURE(buffer_in.size());
+ CHECK(buffer.reserve(buffer_in.size()));
+ }
+
+ std::string_view buffer_val = buffer.insert(buffer_in.data(), buffer_in.size());
+ CHECK(buffer_val == buffer_in);
+ CHECK(std::distance(buffer.c_str(0), buffer_val.data() + buffer_val.size()) == buffer_in.size());
+
+ // Minimum buffer size is 1024 * 16
+ // The default buffer constructor is expected to treat this as the max size as well
+ {
+ CAPTURE(buffer.size());
+ CAPTURE(fake_insert.size());
+ CHECK_IF(buffer.reserve(buffer.size() + fake_insert.size())) {
+ buffer.insert(fake_insert.data(), fake_insert.size() - 1);
+ }
+ }
+ // Pinned vector buffer operates based on system page sizes
+ // May have more capacity then specified
+ // Ensure we attempt to reserve beyond vector's max size
+ {
+ CAPTURE(buffer.size());
+ CAPTURE(ovdl::detail::pinned_vector<char>::page_size());
+ CHECK_FALSE(buffer.reserve(buffer.size() + ovdl::detail::pinned_vector<char>::page_size()));
+ }
+}
+
+TEST_CASE("symbol_buffer, max size", "[symbol-buffer-max-size]") {
+ static constexpr std::string_view buffer_in = "input value";
+ static constexpr std::array<char, symbol_buffer::min_buffer_size * 2 - buffer_in.size()> fake_insert {};
+
+ symbol_buffer buffer(symbol_buffer::min_buffer_size * 2 + 1);
+
+ {
+ CAPTURE(buffer_in.size());
+ CHECK(buffer.reserve(buffer_in.size()));
+ }
+
+ std::string_view buffer_val = buffer.insert(buffer_in.data(), buffer_in.size());
+ CHECK(buffer_val == buffer_in);
+ CHECK(std::distance(buffer.c_str(0), buffer_val.data() + buffer_val.size()) == buffer_in.size());
+
+ {
+ CAPTURE(buffer.size());
+ CAPTURE(fake_insert.size());
+ CHECK_IF(buffer.reserve(buffer.size() + fake_insert.size())) {
+ buffer.insert(fake_insert.data(), fake_insert.size() - 1);
+ }
+ }
+ // Pinned vector buffer operates based on system page sizes
+ // May have more capacity then specified
+ // Ensure we attempt to reserve beyond vector's max size
+ {
+ CAPTURE(buffer.size());
+ CAPTURE(ovdl::detail::pinned_vector<char>::page_size());
+ CHECK_FALSE(buffer.reserve(buffer.size() + ovdl::detail::pinned_vector<char>::page_size()));
+ }
+}
+
+TEST_CASE("symbol_interner", "[symbol-intern]") {
+ symbol_interner interner(symbol_buffer::min_buffer_size * 2);
+
+ auto test = interner.intern("test");
+ auto test2 = interner.intern("test");
+
+ CHECK(test.view() == "test"sv);
+ CHECK(test2.view() == "test"sv);
+
+ CHECK(test == test2);
+
+ auto test3 = interner.intern("test3");
+
+ CHECK(test.view() == "test"sv);
+ CHECK(test2.view() == "test"sv);
+ CHECK(test3.view() == "test3"sv);
+
+ CHECK(test == test2);
+ CHECK_FALSE(test == test3);
+ CHECK_FALSE(test2 == test3);
+
+ CHECK_IF(interner.reserve(1024, 16 + 1)) {
+ auto test4 = interner.intern("test3");
+
+ CHECK(test.view() == "test"sv);
+ CHECK(test2.view() == "test"sv);
+ CHECK(test3.view() == "test3"sv);
+ CHECK(test4.view() == "test3"sv);
+
+ CHECK(test3 == test4);
+ CHECK_FALSE(test == test3);
+ CHECK_FALSE(test2 == test3);
+
+ auto test5 = interner.intern("test5");
+
+ CHECK(test.view() == "test"sv);
+ CHECK(test2.view() == "test"sv);
+ CHECK(test3.view() == "test3"sv);
+ CHECK(test4.view() == "test3"sv);
+ CHECK(test5.view() == "test5"sv);
+
+ CHECK(test3 == test4);
+ CHECK_FALSE(test == test3);
+ CHECK_FALSE(test2 == test3);
+ CHECK_FALSE(test5 == test);
+ CHECK_FALSE(test5 == test2);
+ CHECK_FALSE(test5 == test3);
+ CHECK_FALSE(test5 == test4);
+ }
+} \ No newline at end of file
diff --git a/tests/src/v2script/AbstractSyntaxTree.cpp b/tests/src/v2script/AbstractSyntaxTree.cpp
index c06da08..ad9382f 100644
--- a/tests/src/v2script/AbstractSyntaxTree.cpp
+++ b/tests/src/v2script/AbstractSyntaxTree.cpp
@@ -1,5 +1,4 @@
#include <string_view>
-#include <type_traits>
#include <openvic-dataloader/NodeLocation.hpp>
#include <openvic-dataloader/detail/SymbolIntern.hpp>
@@ -69,13 +68,13 @@ TEST_CASE("V2Script Nodes", "[v2script-nodes]") {
auto* id = ast.create_with_intern<IdentifierValue>("id");
CHECK_IF(id) {
CHECK(id->kind() == NodeKind::IdentifierValue);
- CHECK(id->value(ast.symbol_interner) == "id"sv);
+ CHECK(id->value().view() == "id"sv);
}
auto* str = ast.create_with_intern<StringValue>("str");
CHECK_IF(str) {
CHECK(str->kind() == NodeKind::StringValue);
- CHECK(str->value(ast.symbol_interner) == "str"sv);
+ CHECK(str->value().view() == "str"sv);
}
auto* list = ast.create<ListValue>();
@@ -162,7 +161,7 @@ TEST_CASE("V2Script Nodes Location", "[v2script-nodes-location]") {
auto* id = ast.create_with_loc_and_intern<IdentifierValue>(NodeLocation::make_from(&fake_buffer[0], &fake_buffer[1]), "id");
CHECK_IF(id) {
- CHECK(id->value(ast.symbol_interner) == "id"sv);
+ CHECK(id->value().view() == "id"sv);
auto location = ast.location_of(id);
CHECK_FALSE(location.is_synthesized());