diff options
author | Spartan322 <Megacake1234@gmail.com> | 2024-05-09 16:06:02 +0200 |
---|---|---|
committer | Spartan322 <Megacake1234@gmail.com> | 2024-06-18 01:31:12 +0200 |
commit | b0c3ba3f91926b0c95625bdbf4aab69269130b13 (patch) | |
tree | f15ebc47d6bf370031af28e4bb4814ae30ef46e1 /include | |
parent | 7b521d6023113372cf6b02e562828273c4040f0e (diff) |
Add runtime encoding detection and conversionfix/char-detection
Win-1251/1252 detection is a reduced C++ version of https://github.com/hsivonen/chardetng
Add manually-specified encoding fallback
Add default system encoding fallback
Add error recovery to v2script
Add unknown encoding detection warning
Remove csv::Parser templating
Fix lua files dropping data
Update lexy to foonathan/lexy@1e5d99fa3826b1c3c8628d3a11117fb4fb4cc0d0
Remove exclusive reliance on lexy::default_encoding for v2script
Move internal concepts to src/openvic-detail/InternalConcepts.hpp
Move contents of DetectUtf8.hpp to src/detail/Detect.hpp
Move openvic-dataloader/AbstractSyntaxTree.hpp to src
Move DiagnosticLogger.hpp to src
Move File.hpp to src
Move openvic-dataloader/detail/utlity files to openvic-dataloader/detail
Add ovdl::utility::type_concat
Add ovdl::utility::type_prepend
Add ovdl::utility::is_instance_of
Overhaul parse error messages
Diffstat (limited to 'include')
26 files changed, 248 insertions, 1094 deletions
diff --git a/include/openvic-dataloader/AbstractSyntaxTree.hpp b/include/openvic-dataloader/AbstractSyntaxTree.hpp deleted file mode 100644 index c6453e3..0000000 --- a/include/openvic-dataloader/AbstractSyntaxTree.hpp +++ /dev/null @@ -1,107 +0,0 @@ -#pragma once - -#include <concepts> -#include <cstdio> -#include <string_view> -#include <utility> - -#include <openvic-dataloader/File.hpp> -#include <openvic-dataloader/NodeLocation.hpp> -#include <openvic-dataloader/detail/SymbolIntern.hpp> -#include <openvic-dataloader/detail/utility/Utility.hpp> - -#include <dryad/node.hpp> -#include <dryad/node_map.hpp> -#include <dryad/symbol.hpp> -#include <dryad/tree.hpp> - -#include <fmt/core.h> - -namespace ovdl { - struct AbstractSyntaxTree : SymbolIntern { - symbol_type intern(const char* str, std::size_t length); - symbol_type intern(std::string_view str); - const char* intern_cstr(const char* str, std::size_t length); - const char* intern_cstr(std::string_view str); - symbol_interner_type& symbol_interner(); - const symbol_interner_type& symbol_interner() const; - - protected: - symbol_interner_type _symbol_interner; - }; - - template<typename T> - concept IsAst = - std::derived_from<T, AbstractSyntaxTree> && - requires( - T t, - const T ct, - const typename T::node_type* node, - NodeLocation loc // - ) { - requires IsFile<typename T::file_type>; - typename T::root_node_type; - typename T::node_type; - requires std::derived_from<typename T::root_node_type, typename T::node_type>; - { t.set_location(node, loc) } -> std::same_as<void>; - { t.location_of(node) } -> std::same_as<NodeLocation>; - { t.root() } -> std::same_as<typename T::root_node_type*>; - { ct.root() } -> std::same_as<const typename T::root_node_type*>; - { t.file() } -> std::same_as<typename T::file_type&>; - { ct.file() } -> std::same_as<const typename T::file_type&>; - }; - - template<IsFile FileT, std::derived_from<typename FileT::node_type> RootNodeT> - struct BasicAbstractSyntaxTree : AbstractSyntaxTree { - using file_type = FileT; - using root_node_type = RootNodeT; - using node_type = typename file_type::node_type; - - explicit BasicAbstractSyntaxTree(file_type&& file) : _file(std::move(file)) {} - explicit BasicAbstractSyntaxTree(lexy::buffer<typename file_type::encoding_type, void>&& buffer) : _file(std::move(buffer)) {} - - void set_location(const node_type* n, NodeLocation loc) { - _file.set_location(n, loc); - } - - NodeLocation location_of(const node_type* n) const { - return _file.location_of(n); - } - - root_node_type* root() { - return _tree.root(); - } - - const root_node_type* root() const { - return _tree.root(); - } - - file_type& file() { - return _file; - } - - const file_type& file() const { - return _file; - } - - template<typename T, typename... Args> - T* create(NodeLocation loc, Args&&... args) { - auto node = _tree.template create<T>(DRYAD_FWD(args)...); - set_location(node, loc); - return node; - } - - template<typename T, typename... Args> - T* create(const char* begin, const char* end, Args&&... args) { - return create<T>(NodeLocation::make_from(begin, end), DRYAD_FWD(args)...); - } - - void set_root(root_node_type* node) { - _tree.set_root(node); - } - - protected: - dryad::tree<root_node_type> _tree; - file_type _file; - }; -}
\ No newline at end of file diff --git a/include/openvic-dataloader/DiagnosticLogger.hpp b/include/openvic-dataloader/DiagnosticLogger.hpp deleted file mode 100644 index bd8f9cc..0000000 --- a/include/openvic-dataloader/DiagnosticLogger.hpp +++ /dev/null @@ -1,395 +0,0 @@ -#pragma once - -#include <concepts> -#include <cstdarg> -#include <cstdio> -#include <ostream> -#include <string> -#include <utility> - -#include <openvic-dataloader/AbstractSyntaxTree.hpp> -#include <openvic-dataloader/Error.hpp> -#include <openvic-dataloader/File.hpp> -#include <openvic-dataloader/NodeLocation.hpp> -#include <openvic-dataloader/detail/CallbackOStream.hpp> -#include <openvic-dataloader/detail/LexyReportError.hpp> -#include <openvic-dataloader/detail/OStreamOutputIterator.hpp> -#include <openvic-dataloader/detail/SymbolIntern.hpp> -#include <openvic-dataloader/detail/utility/ErrorRange.hpp> -#include <openvic-dataloader/detail/utility/Utility.hpp> - -#include <lexy/error.hpp> -#include <lexy/input/base.hpp> -#include <lexy/input/buffer.hpp> -#include <lexy/visualize.hpp> - -#include <dryad/_detail/config.hpp> -#include <dryad/abstract_node.hpp> -#include <dryad/arena.hpp> -#include <dryad/node.hpp> -#include <dryad/tree.hpp> - -#include <fmt/core.h> - -#include <lexy_ext/report_error.hpp> - -namespace ovdl { - struct DiagnosticLogger : SymbolIntern { - using AnnotationKind = lexy_ext::annotation_kind; - using DiagnosticKind = lexy_ext::diagnostic_kind; - - using error_range = detail::error_range<error::Root>; - - explicit operator bool() const; - bool errored() const; - bool warned() const; - - NodeLocation location_of(const error::Error* error) const; - - template<std::derived_from<DiagnosticLogger> Logger> - struct ErrorCallback { - ErrorCallback(Logger& logger) : _logger(&logger) {} - - struct sink_t { - using return_type = std::size_t; - - template<typename Input, typename Tag> - void operator()(lexy::error_context<Input> const& context, lexy::error_for<Input, Tag> const& error) { - using Reader = lexy::input_reader<Input>; - error::Error* result; - - std::string production_name = context.production(); - auto left_strip = production_name.find_first_of('<'); - if (left_strip != std::string::npos) { - auto right_strip = production_name.find_first_of('>', left_strip); - if (right_strip != std::string::npos) { - production_name.erase(left_strip, right_strip - left_strip + 1); - } - } - - auto production = _logger.intern_cstr(production_name); - if constexpr (std::is_same_v<Tag, lexy::expected_literal>) { - auto string = lexy::_detail::make_literal_lexeme<typename Reader::encoding>(error.string(), error.length()); - NodeLocation loc = NodeLocation::make_from(context.position(), error.position() - 1); - auto message = _logger.intern_cstr(fmt::format("expected '{}'", string.data())); - result = _logger.template create<error::ExpectedLiteral>(loc, message, production); - } else if constexpr (std::is_same_v<Tag, lexy::expected_keyword>) { - auto string = lexy::_detail::make_literal_lexeme<typename Reader::encoding>(error.string(), error.length()); - NodeLocation loc = NodeLocation::make_from(context.position(), error.position() - 1); - auto message = _logger.intern_cstr(fmt::format("expected keyword '{}'", string.data())); - result = _logger.template create<error::ExpectedKeyword>(loc, message, production); - } else if constexpr (std::is_same_v<Tag, lexy::expected_char_class>) { - auto message = _logger.intern_cstr(fmt::format("expected {}", error.name())); - result = _logger.template create<error::ExpectedCharClass>(error.position(), message, production); - } else { - NodeLocation loc = NodeLocation::make_from(error.begin(), error.end()); - auto message = _logger.intern_cstr(error.message()); - result = _logger.template create<error::GenericParseError>(loc, message, production); - } - - _logger.insert(result); - - _count++; - } - - std::size_t finish() && { - return _count; - } - - Logger& _logger; - std::size_t _count; - }; - - constexpr auto sink() const { - return sink_t { *_logger, 0 }; - } - - mutable Logger* _logger; - }; - - template<typename T, typename... Args> - T* create(NodeLocation loc, Args&&... args) { - using node_creator = dryad::node_creator<decltype(DRYAD_DECLVAL(T).kind()), void>; - T* result = _tree.create<T>(DRYAD_FWD(args)...); - _map.insert(result, loc); - return result; - } - - template<typename T> - T* create() { - using node_creator = dryad::node_creator<decltype(DRYAD_DECLVAL(T).kind()), void>; - T* result = _tree.create<T>(); - return result; - } - - protected: - bool _errored = false; - bool _warned = false; - dryad::node_map<const error::Error, NodeLocation> _map; - dryad::tree<error::Root> _tree; - - symbol_interner_type _symbol_interner; - - void insert(error::Error* root) { - _tree.root()->insert_back(root); - } - - public: - symbol_type intern(const char* str, std::size_t length) { - return _symbol_interner.intern(str, length); - } - symbol_type intern(std::string_view str) { - return intern(str.data(), str.size()); - } - const char* intern_cstr(const char* str, std::size_t length) { - return intern(str, length).c_str(_symbol_interner); - } - const char* intern_cstr(std::string_view str) { - return intern_cstr(str.data(), str.size()); - } - symbol_interner_type& symbol_interner() { - return _symbol_interner; - } - const symbol_interner_type& symbol_interner() const { - return _symbol_interner; - } - }; - - template<IsFile FileT> - struct BasicDiagnosticLogger : DiagnosticLogger { - using file_type = FileT; - - template<typename... Args> - using format_str = fmt::basic_format_string<char, fmt::type_identity_t<Args>...>; - - explicit BasicDiagnosticLogger(const file_type& file) - : _file(&file) { - _tree.set_root(_tree.create<error::Root>()); - } - - struct Writer; - - template<typename... Args> - Writer error(format_str<Args...> fmt, Args&&... args) { - return log(DiagnosticKind::error, fmt, std::forward<Args>(args)...); - } - - template<typename... Args> - Writer warning(format_str<Args...> fmt, Args&&... args) { - return log(DiagnosticKind::warning, fmt, std::forward<Args>(args)...); - } - - template<typename... Args> - Writer note(format_str<Args...> fmt, Args&&... args) { - return log(DiagnosticKind::note, fmt, std::forward<Args>(args)...); - } - - template<typename... Args> - Writer info(format_str<Args...> fmt, Args&&... args) { - return log(DiagnosticKind::info, fmt, std::forward<Args>(args)...); - } - - template<typename... Args> - Writer debug(format_str<Args...> fmt, Args&&... args) { - return log(DiagnosticKind::debug, fmt, std::forward<Args>(args)...); - } - - template<typename... Args> - Writer fixit(format_str<Args...> fmt, Args&&... args) { - return log(DiagnosticKind::fixit, fmt, std::forward<Args>(args)...); - } - - template<typename... Args> - Writer help(format_str<Args...> fmt, Args&&... args) { - return log(DiagnosticKind::help, fmt, std::forward<Args>(args)...); - } - - Writer error(std::string_view sv) { - return log(DiagnosticKind::error, fmt::runtime(sv)); - } - - Writer warning(std::string_view sv) { - return log(DiagnosticKind::warning, fmt::runtime(sv)); - } - - Writer note(std::string_view sv) { - return log(DiagnosticKind::note, fmt::runtime(sv)); - } - - Writer info(std::string_view sv) { - return log(DiagnosticKind::info, fmt::runtime(sv)); - } - - Writer debug(std::string_view sv) { - return log(DiagnosticKind::debug, fmt::runtime(sv)); - } - - Writer fixit(std::string_view sv) { - return log(DiagnosticKind::fixit, fmt::runtime(sv)); - } - - Writer help(std::string_view sv) { - return log(DiagnosticKind::help, fmt::runtime(sv)); - } - - auto error_callback() { - return ErrorCallback(*this); - } - - template<typename CharT> - static void _write_to_buffer(const CharT* s, std::streamsize n, void* output_str) { - auto* output = reinterpret_cast<std::basic_string<CharT>*>(output_str); - output->append(s, n); - } - - template<typename CharT> - auto make_callback_stream(std::basic_string<CharT>& output) { - return detail::make_callback_stream<CharT>(&_write_to_buffer<CharT>, reinterpret_cast<void*>(&output)); - } - - template<typename CharT> - detail::OStreamOutputIterator make_ostream_iterator(std::basic_ostream<CharT>& stream) { - return detail::OStreamOutputIterator { stream }; - } - - struct Writer { - template<typename... Args> - [[nodiscard]] Writer& primary(NodeLocation loc, format_str<Args...> fmt, Args&&... args) { - return annotation(AnnotationKind::primary, loc, fmt, std::forward<Args>(args)...); - } - - template<typename... Args> - [[nodiscard]] Writer& secondary(NodeLocation loc, format_str<Args...> fmt, Args&&... args) { - return annotation(AnnotationKind::secondary, loc, fmt, std::forward<Args>(args)...); - } - - [[nodiscard]] Writer& primary(NodeLocation loc, std::string_view sv) { - return annotation(AnnotationKind::primary, loc, fmt::runtime(sv)); - } - - [[nodiscard]] Writer& secondary(NodeLocation loc, std::string_view sv) { - return annotation(AnnotationKind::secondary, loc, fmt::runtime(sv)); - } - - void finish() {} - - template<typename... Args> - [[nodiscard]] Writer& annotation(AnnotationKind kind, NodeLocation loc, format_str<Args...> fmt, Args&&... args) { - auto begin_loc = lexy::get_input_location(_file->buffer(), loc.begin()); - - std::basic_string<typename decltype(fmt.get())::value_type> output; - auto stream = _logger.make_callback_stream(output); - auto iter = _logger.make_ostream_iterator(stream); - - _impl.write_empty_annotation(iter); - _impl.write_annotation(iter, kind, begin_loc, loc.end(), - [&](auto out, lexy::visualization_options) { - return lexy::_detail::write_str(out, fmt::format(fmt, std::forward<Args>(args)...).c_str()); - }); - - error::Annotation* annotation; - auto message = _logger.intern_cstr(output); - switch (kind) { - case AnnotationKind::primary: - annotation = _logger.create<error::PrimaryAnnotation>(loc, message); - break; - case AnnotationKind::secondary: - annotation = _logger.create<error::SecondaryAnnotation>(loc, message); - break; - default: detail::unreachable(); - } - _semantic->push_back(annotation); - return *this; - } - - private: - Writer(BasicDiagnosticLogger& logger, const file_type* file, error::Semantic* semantic) - : _file(file), - _impl(file->buffer(), { lexy::visualize_fancy }), - _logger(logger), - _semantic(semantic) {} - - const file_type* _file; - lexy_ext::diagnostic_writer<lexy::buffer<typename file_type::encoding_type>> _impl; - BasicDiagnosticLogger& _logger; - error::Semantic* _semantic; - - friend BasicDiagnosticLogger; - }; - - using diagnostic_writer = lexy_ext::diagnostic_writer<lexy::buffer<typename file_type::encoding_type>>; - - template<std::derived_from<error::Error> T, typename... Args> - void log_with_impl(diagnostic_writer& impl, T* error, DiagnosticKind kind, format_str<Args...> fmt, Args&&... args) { - std::basic_string<typename decltype(fmt.get())::value_type> output; - auto stream = make_callback_stream(output); - auto iter = make_ostream_iterator(stream); - - impl.write_message(iter, kind, - [&](auto out, lexy::visualization_options) { - return lexy::_detail::write_str(out, fmt::format(fmt, std::forward<Args>(args)...).c_str()); - }); - impl.write_path(iter, _file->path()); - - auto message = intern_cstr(output); - error->_set_message(message); - insert(error); - } - - template<std::derived_from<error::Error> T, typename... Args> - void log_with_error(T* error, DiagnosticKind kind, format_str<Args...> fmt, Args&&... args) { - auto impl = diagnostic_writer { _file->buffer() }; - log_with_impl(impl, error, kind, fmt, std::forward<Args>(args)...); - } - - template<std::derived_from<error::Error> T, typename... Args> - void create_log(DiagnosticKind kind, format_str<Args...> fmt, Args&&... args) { - log_with_error(create<T>(), kind, fmt, std::forward<Args>(args)...); - } - - template<typename... Args> - Writer log(DiagnosticKind kind, format_str<Args...> fmt, Args&&... args) { - error::Semantic* semantic; - - switch (kind) { - case DiagnosticKind::error: - semantic = create<error::SemanticError>(); - break; - case DiagnosticKind::warning: - semantic = create<error::SemanticWarning>(); - break; - case DiagnosticKind::info: - semantic = create<error::SemanticInfo>(); - break; - case DiagnosticKind::debug: - semantic = create<error::SemanticDebug>(); - break; - case DiagnosticKind::fixit: - semantic = create<error::SemanticFixit>(); - break; - case DiagnosticKind::help: - semantic = create<error::SemanticHelp>(); - break; - default: detail::unreachable(); - } - - Writer result(*this, _file, semantic); - - log_with_impl(result._impl, semantic, kind, fmt, std::forward<Args>(args)...); - - if (kind == DiagnosticKind::error) - _errored = true; - if (kind == DiagnosticKind::warning) - _warned = true; - - return result; - } - - error_range get_errors() const { - return _tree.root()->errors(); - } - - private: - const file_type* _file; - }; -}
\ No newline at end of file diff --git a/include/openvic-dataloader/Error.hpp b/include/openvic-dataloader/Error.hpp index 726079c..a2e13fe 100644 --- a/include/openvic-dataloader/Error.hpp +++ b/include/openvic-dataloader/Error.hpp @@ -3,14 +3,13 @@ #include <cstdint> #include <string_view> -#include <openvic-dataloader/File.hpp> -#include <openvic-dataloader/detail/utility/Utility.hpp> +#include <openvic-dataloader/detail/Utility.hpp> #include <dryad/abstract_node.hpp> #include <dryad/node.hpp> namespace ovdl { - template<IsFile> + template<typename> struct BasicDiagnosticLogger; } @@ -40,6 +39,10 @@ namespace ovdl::error { FirstSemantic = SemanticError, LastSemantic = SemanticHelp, + // Annotated Error // + FirstAnnotatedError = FirstParseError, + LastAnnotatedError = LastSemantic, + PrimaryAnnotation, SecondaryAnnotation, @@ -59,15 +62,15 @@ namespace ovdl::error { } struct Error : dryad::abstract_node_all<ErrorKind> { - std::string_view message() const { return _message; } + const char* message() const { return _message; } protected: DRYAD_ABSTRACT_NODE_CTOR(Error); void _set_message(const char* message) { _message = message; } - const char* _message; + const char* _message = ""; - template<IsFile> + template<typename> friend struct ovdl::BasicDiagnosticLogger; }; @@ -98,7 +101,30 @@ namespace ovdl::error { explicit BufferError(dryad::node_ctor ctor) : node_base(ctor) {} }; - struct ParseError : dryad::abstract_node_range<Error, ErrorKind::FirstParseError, ErrorKind::LastParseError> { + struct Annotation : dryad::abstract_node_range<Error, ErrorKind::FirstAnnotation, ErrorKind::LastAnnotation> { + protected: + explicit Annotation(dryad::node_ctor ctor, ErrorKind kind, const char* message) : node_base(ctor, kind) { + _set_message(message); + } + }; + + struct AnnotatedError : dryad::abstract_node_range<dryad::container_node<Error>, ErrorKind::FirstAnnotatedError, ErrorKind::LastAnnotatedError> { + DRYAD_CHILD_NODE_RANGE_GETTER(Annotation, annotations, nullptr, this->node_after(_last_annotation)); + + void push_back(Annotation* annotation); + void push_back(AnnotationList p_annotations); + + protected: + explicit AnnotatedError(dryad::node_ctor ctor, ErrorKind kind) : node_base(ctor, kind) { + insert_child_list_after(nullptr, AnnotationList {}); + _last_annotation = nullptr; + } + + private: + Annotation* _last_annotation; + }; + + struct ParseError : dryad::abstract_node_range<AnnotatedError, ErrorKind::FirstParseError, ErrorKind::LastParseError> { std::string_view production_name() const { return _production_name; } protected: @@ -116,8 +142,10 @@ namespace ovdl::error { template<ErrorKind NodeKind> struct _ParseError_t : dryad::basic_node<NodeKind, ParseError> { + using base_node = dryad::basic_node<NodeKind, ParseError>; + explicit _ParseError_t(dryad::node_ctor ctor, const char* message, const char* production_name) - : dryad::basic_node<NodeKind, ParseError>(ctor, message, production_name) {} + : base_node(ctor, message, production_name) {} }; using ExpectedLiteral = _ParseError_t<ErrorKind::ExpectedLiteral>; @@ -125,30 +153,21 @@ namespace ovdl::error { using ExpectedCharClass = _ParseError_t<ErrorKind::ExpectedCharClass>; using GenericParseError = _ParseError_t<ErrorKind::GenericParseError>; - struct Semantic : dryad::abstract_node_range<dryad::container_node<Error>, ErrorKind::FirstSemantic, ErrorKind::LastSemantic> { - DRYAD_CHILD_NODE_RANGE_GETTER(Annotation, annotations, nullptr, this->node_after(_last_annotation)); - - void push_back(Annotation* annotation); - void push_back(AnnotationList p_annotations); - + struct Semantic : dryad::abstract_node_range<AnnotatedError, ErrorKind::FirstSemantic, ErrorKind::LastSemantic> { protected: explicit Semantic(dryad::node_ctor ctor, ErrorKind kind) : node_base(ctor, kind) {}; explicit Semantic(dryad::node_ctor ctor, ErrorKind kind, const char* message) : node_base(ctor, kind) { - insert_child_list_after(nullptr, AnnotationList {}); _set_message(message); }; explicit Semantic(dryad::node_ctor ctor, ErrorKind kind, const char* message, AnnotationList annotations) : node_base(ctor, kind) { - insert_child_list_after(nullptr, annotations); + push_back(annotations); _set_message(message); }; - - private: - Error* _last_annotation; }; template<ErrorKind NodeKind> @@ -172,13 +191,6 @@ namespace ovdl::error { using SemanticFixit = _SemanticError_t<ErrorKind::SemanticFixit>; using SemanticHelp = _SemanticError_t<ErrorKind::SemanticHelp>; - struct Annotation : dryad::abstract_node_range<Error, ErrorKind::FirstAnnotation, ErrorKind::LastAnnotation> { - protected: - explicit Annotation(dryad::node_ctor ctor, ErrorKind kind, const char* message) : node_base(ctor, kind) { - _set_message(message); - } - }; - template<ErrorKind NodeKind> struct _Annotation_t : dryad::basic_node<NodeKind, Annotation> { explicit _Annotation_t(dryad::node_ctor ctor, const char* message) @@ -188,12 +200,13 @@ namespace ovdl::error { using PrimaryAnnotation = _Annotation_t<ErrorKind::PrimaryAnnotation>; using SecondaryAnnotation = _Annotation_t<ErrorKind::SecondaryAnnotation>; - inline void Semantic::push_back(Annotation* annotation) { - insert_child_after(annotations().end().deref(), annotation); + inline void AnnotatedError::push_back(Annotation* annotation) { + insert_child_after(_last_annotation, annotation); _last_annotation = annotation; } - inline void Semantic::push_back(AnnotationList p_annotations) { + inline void AnnotatedError::push_back(AnnotationList p_annotations) { + if (p_annotations.empty()) return; insert_child_list_after(annotations().end().deref(), p_annotations); _last_annotation = *p_annotations.end(); } diff --git a/include/openvic-dataloader/File.hpp b/include/openvic-dataloader/File.hpp deleted file mode 100644 index caa4a0a..0000000 --- a/include/openvic-dataloader/File.hpp +++ /dev/null @@ -1,69 +0,0 @@ -#pragma once - -#include <concepts> - -#include <openvic-dataloader/NodeLocation.hpp> -#include <openvic-dataloader/detail/LexyFwdDeclaration.hpp> - -#include <dryad/node_map.hpp> - -namespace ovdl { - template<typename T> - concept IsEncoding = requires(T t) { - typename T::char_type; - typename T::int_type; - { T::template is_secondary_char_type<typename T::char_type>() } -> std::same_as<bool>; - { T::eof() } -> std::same_as<typename T::int_type>; - { T::to_int_type(typename T::char_type {}) } -> std::same_as<typename T::int_type>; - }; - - struct File { - explicit File(const char* path); - - const char* path() const noexcept; - - protected: - const char* _path; - }; - - template<typename T> - concept IsFile = - std::derived_from<T, File> && IsEncoding<typename T::encoding_type> && - requires(T t, const typename T::node_type* node, NodeLocation location) { - { t.buffer() } -> std::same_as<const lexy::buffer<typename T::encoding_type, void>&>; - { t.set_location(node, location) } -> std::same_as<void>; - { t.location_of(node) } -> std::same_as<NodeLocation>; - }; - - template<typename EncodingT, typename NodeT> - struct BasicFile : File { - using encoding_type = EncodingT; - using node_type = NodeT; - - explicit BasicFile(const char* path, lexy::buffer<encoding_type, void>&& buffer) - : File(path), - _buffer(static_cast<std ::remove_reference_t<decltype(buffer)>&&>(buffer)) {} - - explicit BasicFile(lexy::buffer<encoding_type, void>&& buffer) - : File(""), - _buffer(static_cast<std ::remove_reference_t<decltype(buffer)>&&>(buffer)) {} - - const lexy::buffer<encoding_type, void>& buffer() const { - return _buffer; - } - - void set_location(const node_type* n, NodeLocation loc) { - _map.insert(n, loc); - } - - NodeLocation location_of(const node_type* n) const { - auto result = _map.lookup(n); - DRYAD_ASSERT(result != nullptr, "every Node should have a NodeLocation"); - return *result; - } - - protected: - lexy::buffer<encoding_type, void> _buffer; - dryad::node_map<const node_type, NodeLocation> _map; - }; -}
\ No newline at end of file diff --git a/include/openvic-dataloader/NodeLocation.hpp b/include/openvic-dataloader/NodeLocation.hpp index 117560b..ced79e6 100644 --- a/include/openvic-dataloader/NodeLocation.hpp +++ b/include/openvic-dataloader/NodeLocation.hpp @@ -3,28 +3,68 @@ #include <cstdint> namespace ovdl { - struct NodeLocation { - const char* _begin = nullptr; - const char* _end = nullptr; + template<typename CharT> + struct BasicNodeLocation { + using char_type = CharT; - NodeLocation(); - NodeLocation(const char* pos); - NodeLocation(const char* begin, const char* end); + const char_type* _begin = nullptr; + const char_type* _end = nullptr; - NodeLocation(const NodeLocation&) noexcept; - NodeLocation& operator=(const NodeLocation&); + BasicNodeLocation() = default; + BasicNodeLocation(const char_type* pos) : _begin(pos), + _end(pos) {} + BasicNodeLocation(const char_type* begin, const char_type* end) : _begin(begin), + _end(end) {} - NodeLocation(NodeLocation&&); - NodeLocation& operator=(NodeLocation&&); + BasicNodeLocation(const BasicNodeLocation&) noexcept = default; + BasicNodeLocation& operator=(const BasicNodeLocation&) = default; - const char* begin() const; - const char* end() const; + BasicNodeLocation(BasicNodeLocation&&) = default; + BasicNodeLocation& operator=(BasicNodeLocation&&) = default; - bool is_synthesized() const; + template<typename OtherCharT> + void set_from(const BasicNodeLocation<OtherCharT>& other) { + if constexpr (sizeof(CharT) <= sizeof(OtherCharT)) { + _begin = reinterpret_cast<const CharT*>(other.begin()); + if (other.begin() == other.end()) + _end = _begin; + else + _end = reinterpret_cast<const CharT*>(other.end()) + (sizeof(OtherCharT) - sizeof(CharT)); + } else { + _begin = reinterpret_cast<const CharT*>(other.begin()); + if (other.end() - other.begin() <= 0) { + _end = reinterpret_cast<const CharT*>(other.begin()); + } else { + _end = reinterpret_cast<const CharT*>(other.end() - (sizeof(CharT) - sizeof(OtherCharT))); + } + } + } - static NodeLocation make_from(const char* begin, const char* end); + template<typename OtherCharT> + BasicNodeLocation(const BasicNodeLocation<OtherCharT>& other) { + set_from(other); + } + + template<typename OtherCharT> + BasicNodeLocation& operator=(const BasicNodeLocation<OtherCharT>& other) { + set_from(other); + return *this; + } + + const char_type* begin() const { return _begin; } + const char_type* end() const { return _end; } + + bool is_synthesized() const { return _begin == nullptr && _end == nullptr; } + + static BasicNodeLocation make_from(const char_type* begin, const char_type* end) { + end++; + if (begin >= end) return BasicNodeLocation(begin); + return BasicNodeLocation(begin, end); + } }; + using NodeLocation = BasicNodeLocation<char>; + struct FilePosition { std::uint32_t start_line = std::uint32_t(-1), end_line = std::uint32_t(-1), start_column = std::uint32_t(-1), end_column = std::uint32_t(-1); diff --git a/include/openvic-dataloader/ParseData.hpp b/include/openvic-dataloader/ParseData.hpp deleted file mode 100644 index 8bec7d2..0000000 --- a/include/openvic-dataloader/ParseData.hpp +++ /dev/null @@ -1,11 +0,0 @@ -#pragma once - -#include <string> - -namespace ovdl { - struct ParseData { - const std::string production_name; - const unsigned int context_start_line; - const unsigned int context_start_column; - }; -}
\ No newline at end of file diff --git a/include/openvic-dataloader/ParseError.hpp b/include/openvic-dataloader/ParseError.hpp deleted file mode 100644 index 9e4541e..0000000 --- a/include/openvic-dataloader/ParseError.hpp +++ /dev/null @@ -1,20 +0,0 @@ -#pragma once - -#include <string> - -#include <openvic-dataloader/ParseData.hpp> - -namespace ovdl { - struct ParseError { - const enum class Type : unsigned char { - Recoverable, - Fatal - } type; - const std::string message; - const int error_value; - const ParseData parse_data; - const unsigned int start_line; - const unsigned int start_column; - }; - -}
\ No newline at end of file diff --git a/include/openvic-dataloader/ParseState.hpp b/include/openvic-dataloader/ParseState.hpp deleted file mode 100644 index 5655606..0000000 --- a/include/openvic-dataloader/ParseState.hpp +++ /dev/null @@ -1,120 +0,0 @@ -#pragma once - -#include <concepts> - -#include <openvic-dataloader/AbstractSyntaxTree.hpp> -#include <openvic-dataloader/DiagnosticLogger.hpp> - -#include <dryad/tree.hpp> - -namespace ovdl { - template<typename T> - concept IsParseState = requires( - T t, - const T ct, - typename T::ast_type::file_type&& file, - lexy::buffer<typename T::ast_type::file_type::encoding_type>&& buffer, - const char* path // - ) { - requires IsAst<typename T::ast_type>; - requires std::derived_from<typename T::diagnostic_logger_type, DiagnosticLogger>; - { T { std::move(file) } } -> std::same_as<T>; - { T { std::move(buffer) } } -> std::same_as<T>; - { T { path, std::move(buffer) } } -> std::same_as<T>; - { t.ast() } -> std::same_as<typename T::ast_type&>; - { ct.ast() } -> std::same_as<const typename T::ast_type&>; - { t.logger() } -> std::same_as<typename T::diagnostic_logger_type&>; - { ct.logger() } -> std::same_as<const typename T::diagnostic_logger_type&>; - }; - - template<IsAst AstT> - struct ParseState { - using ast_type = AstT; - using diagnostic_logger_type = BasicDiagnosticLogger<typename ast_type::file_type>; - - ParseState(typename ast_type::file_type&& file) - : _ast { std::move(file) }, - _logger { _ast.file() } {} - - ParseState(lexy::buffer<typename ast_type::file_type::encoding_type>&& buffer) - : ParseState(typename ast_type::file_type { std::move(buffer) }) {} - - ParseState(const char* path, lexy::buffer<typename ast_type::file_type::encoding_type>&& buffer) - : ParseState(typename ast_type::file_type { path, std::move(buffer) }) {} - - ast_type& ast() { - return _ast; - } - - const ast_type& ast() const { - return _ast; - } - - diagnostic_logger_type& logger() { - return _logger; - } - - const diagnostic_logger_type& logger() const { - return _logger; - } - - private: - ast_type _ast; - diagnostic_logger_type _logger; - }; - - template<typename T> - concept IsFileParseState = requires( - T t, - const T ct, - typename T::file_type&& file, - lexy::buffer<typename T::file_type::encoding_type>&& buffer, - const char* path // - ) { - requires IsFile<typename T::file_type>; - requires std::derived_from<typename T::diagnostic_logger_type, DiagnosticLogger>; - { T { std::move(file) } } -> std::same_as<T>; - { T { std::move(buffer) } } -> std::same_as<T>; - { T { path, std::move(buffer) } } -> std::same_as<T>; - { t.file() } -> std::same_as<typename T::file_type&>; - { ct.file() } -> std::same_as<const typename T::file_type&>; - { t.logger() } -> std::same_as<typename T::diagnostic_logger_type&>; - { ct.logger() } -> std::same_as<const typename T::diagnostic_logger_type&>; - }; - - template<IsFile FileT> - struct FileParseState { - using file_type = FileT; - using diagnostic_logger_type = BasicDiagnosticLogger<file_type>; - - FileParseState(file_type&& file) - : _file { std::move(file) }, - _logger { file } {} - - FileParseState(lexy::buffer<typename file_type::encoding_type>&& buffer) - : FileParseState(file_type { std::move(buffer) }) {} - - FileParseState(const char* path, lexy::buffer<typename file_type::encoding_type>&& buffer) - : FileParseState(file_type { path, std::move(buffer) }) {} - - file_type& file() { - return _file; - } - - const file_type& file() const { - return _file; - } - - diagnostic_logger_type& logger() { - return _logger; - } - - const diagnostic_logger_type& logger() const { - return _logger; - } - - private: - file_type _file; - diagnostic_logger_type _logger; - }; -}
\ No newline at end of file diff --git a/include/openvic-dataloader/ParseWarning.hpp b/include/openvic-dataloader/ParseWarning.hpp deleted file mode 100644 index 307599f..0000000 --- a/include/openvic-dataloader/ParseWarning.hpp +++ /dev/null @@ -1,10 +0,0 @@ -#pragma once - -#include <string> - -namespace ovdl { - struct ParseWarning { - const std::string message; - const int warning_value; - }; -}
\ No newline at end of file diff --git a/include/openvic-dataloader/Parser.hpp b/include/openvic-dataloader/Parser.hpp index b885f3d..ba390e7 100644 --- a/include/openvic-dataloader/Parser.hpp +++ b/include/openvic-dataloader/Parser.hpp @@ -3,9 +3,6 @@ #include <string> #include <string_view> -#include <openvic-dataloader/ParseError.hpp> -#include <openvic-dataloader/ParseWarning.hpp> - namespace ovdl::detail { struct BasicParser { BasicParser(); diff --git a/include/openvic-dataloader/csv/LineObject.hpp b/include/openvic-dataloader/csv/LineObject.hpp index ca632cd..c839be2 100644 --- a/include/openvic-dataloader/csv/LineObject.hpp +++ b/include/openvic-dataloader/csv/LineObject.hpp @@ -13,7 +13,7 @@ #include <utility> #include <vector> -#include <openvic-dataloader/detail/utility/Constexpr.hpp> +#include <openvic-dataloader/detail/Constexpr.hpp> namespace ovdl::csv { /// LineObject should be able to recognize the differences between: diff --git a/include/openvic-dataloader/csv/Parser.hpp b/include/openvic-dataloader/csv/Parser.hpp index 06e7251..35421c8 100644 --- a/include/openvic-dataloader/csv/Parser.hpp +++ b/include/openvic-dataloader/csv/Parser.hpp @@ -1,41 +1,38 @@ #pragma once #include <filesystem> +#include <optional> #include <openvic-dataloader/Error.hpp> +#include <openvic-dataloader/NodeLocation.hpp> #include <openvic-dataloader/Parser.hpp> #include <openvic-dataloader/csv/LineObject.hpp> -#include <openvic-dataloader/detail/utility/Concepts.hpp> -#include <openvic-dataloader/detail/utility/ErrorRange.hpp> +#include <openvic-dataloader/detail/Concepts.hpp> +#include <openvic-dataloader/detail/Encoding.hpp> +#include <openvic-dataloader/detail/ErrorRange.hpp> #include <dryad/node.hpp> namespace ovdl::csv { - enum class EncodingType { - Windows1252, - Utf8 - }; - - template<EncodingType Encoding = EncodingType::Windows1252> class Parser final : public detail::BasicParser { public: Parser(); Parser(std::basic_ostream<char>& error_stream); - static Parser from_buffer(const char* data, std::size_t size); - static Parser from_buffer(const char* start, const char* end); - static Parser from_string(const std::string_view string); - static Parser from_file(const char* path); - static Parser from_file(const std::filesystem::path& path); + static Parser from_buffer(const char* data, std::size_t size, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + static Parser from_buffer(const char* start, const char* end, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + static Parser from_string(const std::string_view string, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + static Parser from_file(const char* path, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + static Parser from_file(const std::filesystem::path& path, std::optional<detail::Encoding> encoding_fallback = std::nullopt); - constexpr Parser& load_from_buffer(const char* data, std::size_t size); - constexpr Parser& load_from_buffer(const char* start, const char* end); - constexpr Parser& load_from_string(const std::string_view string); - Parser& load_from_file(const char* path); - Parser& load_from_file(const std::filesystem::path& path); + constexpr Parser& load_from_buffer(const char* data, std::size_t size, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + constexpr Parser& load_from_buffer(const char* start, const char* end, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + constexpr Parser& load_from_string(const std::string_view string, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + Parser& load_from_file(const char* path, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + Parser& load_from_file(const std::filesystem::path& path, std::optional<detail::Encoding> encoding_fallback = std::nullopt); - constexpr Parser& load_from_file(const detail::HasCstr auto& path) { - return load_from_file(path.c_str()); + constexpr Parser& load_from_file(const detail::HasCstr auto& path, std::optional<detail::Encoding> encoding_fallback = std::nullopt) { + return load_from_file(path.c_str(), encoding_fallback); } bool parse_csv(bool handle_strings = false); @@ -57,12 +54,8 @@ namespace ovdl::csv { private: class ParseHandler; std::unique_ptr<ParseHandler> _parse_handler; - std::vector<csv::LineObject> _lines; template<typename... Args> constexpr void _run_load_func(detail::LoadCallback<ParseHandler, Args...> auto func, Args... args); }; - - using Windows1252Parser = Parser<EncodingType::Windows1252>; - using Utf8Parser = Parser<EncodingType::Utf8>; }
\ No newline at end of file diff --git a/include/openvic-dataloader/detail/utility/Concepts.hpp b/include/openvic-dataloader/detail/Concepts.hpp index 0ba91cc..79e04a9 100644 --- a/include/openvic-dataloader/detail/utility/Concepts.hpp +++ b/include/openvic-dataloader/detail/Concepts.hpp @@ -6,7 +6,6 @@ #include <utility> namespace ovdl { - struct NodeLocation; struct File; namespace detail { enum class buffer_error : std::uint8_t; @@ -15,7 +14,7 @@ namespace ovdl { namespace ovdl::detail { template<typename T, typename... Ts> - concept any_of = (std::same_as<T, Ts> || ...); + concept any_of = std::disjunction_v<std::is_same<T, Ts>...>; template<typename T> concept HasCstr = @@ -42,4 +41,9 @@ namespace ovdl::detail { { T::eof() } -> std::same_as<typename T::int_type>; { T::to_int_type(typename T::char_type {}) } -> std::same_as<typename T::int_type>; }; + + template<typename T, typename R, typename... Args> + concept Invocable_R = std::invocable<T, Args...> && requires(Args&&... args) { + { invoke(forward<Args>(args)...) } -> std::convertible_to<R>; + }; }
\ No newline at end of file diff --git a/include/openvic-dataloader/detail/utility/Constexpr.hpp b/include/openvic-dataloader/detail/Constexpr.hpp index 49479c5..49479c5 100644 --- a/include/openvic-dataloader/detail/utility/Constexpr.hpp +++ b/include/openvic-dataloader/detail/Constexpr.hpp diff --git a/include/openvic-dataloader/detail/Encoding.hpp b/include/openvic-dataloader/detail/Encoding.hpp new file mode 100644 index 0000000..12a0524 --- /dev/null +++ b/include/openvic-dataloader/detail/Encoding.hpp @@ -0,0 +1,13 @@ +#pragma once + +#include <cstdint> + +namespace ovdl::detail { + enum class Encoding : std::int8_t { + Unknown, + Ascii, + Utf8, + Windows1251, + Windows1252 + }; +}
\ No newline at end of file diff --git a/include/openvic-dataloader/detail/utility/ErrorRange.hpp b/include/openvic-dataloader/detail/ErrorRange.hpp index 7d5ca13..7d5ca13 100644 --- a/include/openvic-dataloader/detail/utility/ErrorRange.hpp +++ b/include/openvic-dataloader/detail/ErrorRange.hpp diff --git a/include/openvic-dataloader/detail/LexyFwdDeclaration.hpp b/include/openvic-dataloader/detail/LexyFwdDeclaration.hpp deleted file mode 100644 index 554c88d..0000000 --- a/include/openvic-dataloader/detail/LexyFwdDeclaration.hpp +++ /dev/null @@ -1,8 +0,0 @@ -#pragma once - -namespace lexy { - struct default_encoding; - - template<typename Encoding, typename MemoryResource> - struct buffer; -}
\ No newline at end of file diff --git a/include/openvic-dataloader/detail/LexyReportError.hpp b/include/openvic-dataloader/detail/LexyReportError.hpp deleted file mode 100644 index 3c32bd1..0000000 --- a/include/openvic-dataloader/detail/LexyReportError.hpp +++ /dev/null @@ -1,107 +0,0 @@ -#pragma once - -#include <cstddef> -#include <sstream> -#include <utility> -#include <vector> - -#include <openvic-dataloader/ParseData.hpp> -#include <openvic-dataloader/ParseError.hpp> - -#include <lexy/input_location.hpp> -#include <lexy/visualize.hpp> - -#include "openvic-dataloader/detail/utility/Concepts.hpp" - -#include <lexy_ext/report_error.hpp> - -namespace ovdl::detail { - template<typename OutputIterator> - struct _ReportError { - OutputIterator _iter; - lexy::visualization_options _opts; - const char* _path; - - struct _sink { - OutputIterator _iter; - lexy::visualization_options _opts; - const char* _path; - std::size_t _count; - std::vector<ParseError> _errors; - - using return_type = std::vector<ParseError>; - - template<typename Input, typename Reader, typename Tag> - void operator()(const lexy::error_context<Input>& context, const lexy::error<Reader, Tag>& error) { - _iter = lexy_ext::_detail::write_error(_iter, context, error, _opts, _path); - ++_count; - - // Convert the context location and error location into line/column information. - auto context_location = lexy::get_input_location(context.input(), context.position()); - auto location = lexy::get_input_location(context.input(), error.position(), context_location.anchor()); - - std::basic_stringstream<typename Reader::encoding::char_type> message; - - // Write the main annotation. - if constexpr (std::is_same_v<Tag, lexy::expected_literal>) { - auto string = lexy::_detail::make_literal_lexeme<typename Reader::encoding>(error.string(), error.length()); - - message << "expected '" << string.data() << '\''; - } else if constexpr (std::is_same_v<Tag, lexy::expected_keyword>) { - auto string = lexy::_detail::make_literal_lexeme<typename Reader::encoding>(error.string(), error.length()); - - message << "expected keyword '" << string.data() << '\''; - } else if constexpr (std::is_same_v<Tag, lexy::expected_char_class>) { - message << "expected " << error.name(); - } else { - message << error.message(); - } - - _errors.push_back( - ParseError { - ParseError::Type::Fatal, // TODO: distinguish recoverable errors from fatal errors - std::move(message.str()), - 0, // TODO: implement proper error codes - ParseData { - context.production(), - context_location.line_nr(), - context_location.column_nr(), - }, - location.line_nr(), - location.column_nr(), - }); - } - - return_type finish() && { - if (_count != 0) - *_iter++ = '\n'; - return _errors; - } - }; - constexpr auto sink() const { - return _sink { _iter, _opts, _path, 0 }; - } - - /// Specifies a path that will be printed alongside the diagnostic. - constexpr _ReportError path(const char* path) const { - return { _iter, _opts, path }; - } - - constexpr _ReportError path(const detail::HasCstr auto& path_object) const { - return path(path_object.c_str()); - } - - /// Specifies an output iterator where the errors are written to. - template<typename OI> - constexpr _ReportError<OI> to(OI out) const { - return { out, _opts, _path }; - } - - /// Overrides visualization options. - constexpr _ReportError opts(lexy::visualization_options opts) const { - return { _iter, opts, _path }; - } - }; - - constexpr auto ReporError = _ReportError<lexy::stderr_output_iterator> {}; -}
\ No newline at end of file diff --git a/include/openvic-dataloader/detail/OStreamOutputIterator.hpp b/include/openvic-dataloader/detail/OStreamOutputIterator.hpp index 8f120c7..81f6c89 100644 --- a/include/openvic-dataloader/detail/OStreamOutputIterator.hpp +++ b/include/openvic-dataloader/detail/OStreamOutputIterator.hpp @@ -1,6 +1,5 @@ #pragma once -#include <memory> #include <ostream> namespace ovdl::detail { diff --git a/include/openvic-dataloader/detail/Utility.hpp b/include/openvic-dataloader/detail/Utility.hpp new file mode 100644 index 0000000..8d9e159 --- /dev/null +++ b/include/openvic-dataloader/detail/Utility.hpp @@ -0,0 +1,89 @@ +#pragma once + +#include <cstdint> +#include <tuple> +#include <type_traits> +#include <variant> + +#include <openvic-dataloader/detail/Concepts.hpp> + +namespace ovdl::detail { + [[noreturn]] inline void unreachable() { + // Uses compiler specific extensions if possible. + // Even if no extension is used, undefined behavior is still raised by + // an empty function body and the noreturn attribute. +#ifdef __GNUC__ // GCC, Clang, ICC + __builtin_unreachable(); +#elif defined(_MSC_VER) // MSVC + __assume(false); +#endif + } + + template<typename EnumT> + requires std::is_enum_v<EnumT> + constexpr std::underlying_type_t<EnumT> to_underlying(EnumT e) { + return static_cast<std::underlying_type_t<EnumT>>(e); + } + + template<typename EnumT> + requires std::is_enum_v<EnumT> + constexpr EnumT from_underlying(std::underlying_type_t<EnumT> ut) { + return static_cast<EnumT>(ut); + } + + template<typename Type, typename... Types> + struct TypeRegister { + using tuple_type = std::tuple<Type, Types...>; + using variant_type = std::variant<Type, Types...>; + + template<typename QueriedType> + struct _id_getter { + static constexpr std::uint32_t type_id() { + static_assert(any_of<QueriedType, Type, Types...>, "Cannot query an non-registered type"); + + if constexpr (std::is_same_v<Type, QueriedType>) return 0; + else return 1 + TypeRegister<Types...>::template _id_getter<QueriedType>::type_id(); + }; + }; + + template<typename QueriedType> + static constexpr std::uint32_t type_id() { + + return _id_getter<QueriedType>::type_id(); + } + + template<std::uint32_t Id> + using type_by_id = std::tuple_element_t<Id, tuple_type>; + }; + + template<typename...> + struct type_concat; + + template<typename... Ts, template<typename...> typename TT, typename... TTs> + struct type_concat<TT<TTs...>, Ts...> { + using type = TT<TTs..., Ts...>; + }; + + template<typename... Ts> + using type_concat_t = type_concat<Ts...>::type; + + template<typename...> + struct type_prepend; + + template<typename... Ts, template<typename...> typename TT, typename... TTs> + struct type_prepend<TT<TTs...>, Ts...> { + using type = TT<Ts..., TTs...>; + }; + + template<typename... Ts> + using type_prepend_t = type_prepend<Ts...>::type; + + template<typename Type, template<typename...> typename Template> + struct is_instance_of : std::false_type {}; + + template<typename... Ts, template<typename...> typename Template> + struct is_instance_of<Template<Ts...>, Template> : std::true_type {}; + + template<typename Type, template<typename...> typename Template> + static constexpr auto is_instance_of_v = is_instance_of<Type, Template>::value; +}
\ No newline at end of file diff --git a/include/openvic-dataloader/detail/utility/PointerHash.hpp b/include/openvic-dataloader/detail/utility/PointerHash.hpp deleted file mode 100644 index c0d28bc..0000000 --- a/include/openvic-dataloader/detail/utility/PointerHash.hpp +++ /dev/null @@ -1,23 +0,0 @@ -#pragma once - -#include <cstdint> - -namespace ovdl::detail { - /* hash any pointer */ - template<typename T> - struct PointerHash { - using type = T; - using ptr_type = T*; - using const_type = const T; - using const_ptr_type = const T*; - using const_ptr_const_type = const const_ptr_type; - constexpr std::size_t operator()(const_ptr_const_type pointer) const { - auto addr = reinterpret_cast<uintptr_t>(pointer); -#if SIZE_MAX < UINTPTR_MAX - /* size_t is not large enough to hold the pointer’s memory address */ - addr %= SIZE_MAX; /* truncate the address so it is small enough to fit in a size_t */ -#endif - return addr; - } - }; -}
\ No newline at end of file diff --git a/include/openvic-dataloader/detail/utility/SelfType.hpp b/include/openvic-dataloader/detail/utility/SelfType.hpp deleted file mode 100644 index 5209700..0000000 --- a/include/openvic-dataloader/detail/utility/SelfType.hpp +++ /dev/null @@ -1,28 +0,0 @@ -#pragma once - -#include <type_traits> - -namespace ovdl::detail { -#if !defined(_MSC_VER) -#pragma GCC diagnostic push -#pragma clang diagnostic ignored "-Wunknown-warning-option" -#pragma GCC diagnostic ignored "-Wnon-template-friend" -#endif - template<typename T> - struct Reader { - friend auto adl_GetSelfType(Reader<T>); - }; - - template<typename T, typename U> - struct Writer { - friend auto adl_GetSelfType(Reader<T>) { return U {}; } - }; -#if !defined(_MSC_VER) -#pragma GCC diagnostic pop -#endif - - inline void adl_GetSelfType() {} - - template<typename T> - using Read = std::remove_pointer_t<decltype(adl_GetSelfType(Reader<T> {}))>; -} diff --git a/include/openvic-dataloader/detail/utility/TypeName.hpp b/include/openvic-dataloader/detail/utility/TypeName.hpp deleted file mode 100644 index 1a34a0f..0000000 --- a/include/openvic-dataloader/detail/utility/TypeName.hpp +++ /dev/null @@ -1,52 +0,0 @@ -#pragma once - -#include <array> -#include <cstddef> -#include <string_view> -#include <utility> - -namespace ovdl::detail { - - template<std::size_t... Idxs> - constexpr auto substring_as_array(std::string_view str, std::index_sequence<Idxs...>) { - return std::array { str[Idxs]... }; - } - - template<typename T> - constexpr auto type_name_array() { -#if defined(__clang__) - constexpr auto prefix = std::string_view { "[T = " }; - constexpr auto suffix = std::string_view { "]" }; - constexpr auto function = std::string_view { __PRETTY_FUNCTION__ }; -#elif defined(__GNUC__) - constexpr auto prefix = std::string_view { "with T = " }; - constexpr auto suffix = std::string_view { "]" }; - constexpr auto function = std::string_view { __PRETTY_FUNCTION__ }; -#elif defined(_MSC_VER) - constexpr auto prefix = std::string_view { "type_name_array<" }; - constexpr auto suffix = std::string_view { ">(void)" }; - constexpr auto function = std::string_view { __FUNCSIG__ }; -#else -#error Unsupported compiler -#endif - - constexpr auto start = function.find(prefix) + prefix.size(); - constexpr auto end = function.rfind(suffix); - - static_assert(start < end); - - constexpr auto name = function.substr(start, (end - start)); - return substring_as_array(name, std::make_index_sequence<name.size()> {}); - } - - template<typename T> - struct type_name_holder { - static inline constexpr auto value = type_name_array<T>(); - }; - - template<typename T> - constexpr auto type_name() -> std::string_view { - constexpr auto& value = type_name_holder<T>::value; - return std::string_view { value.data(), value.size() }; - } -}
\ No newline at end of file diff --git a/include/openvic-dataloader/detail/utility/Utility.hpp b/include/openvic-dataloader/detail/utility/Utility.hpp deleted file mode 100644 index 138a029..0000000 --- a/include/openvic-dataloader/detail/utility/Utility.hpp +++ /dev/null @@ -1,38 +0,0 @@ -#pragma once - -#include <string_view> -#include <type_traits> - -#include "openvic-dataloader/detail/utility/TypeName.hpp" - -namespace ovdl::detail { - [[noreturn]] inline void unreachable() { - // Uses compiler specific extensions if possible. - // Even if no extension is used, undefined behavior is still raised by - // an empty function body and the noreturn attribute. -#ifdef __GNUC__ // GCC, Clang, ICC - __builtin_unreachable(); -#elif defined(_MSC_VER) // MSVC - __assume(false); -#endif - } - - template<typename Kind> - constexpr std::string_view get_kind_name() { - constexpr auto name = type_name<Kind>(); - - return name; - } - - template<typename EnumT> - requires std::is_enum_v<EnumT> - constexpr std::underlying_type_t<EnumT> to_underlying(EnumT e) { - return static_cast<std::underlying_type_t<EnumT>>(e); - } - - template<typename EnumT> - requires std::is_enum_v<EnumT> - constexpr EnumT from_underlying(std::underlying_type_t<EnumT> ut) { - return static_cast<EnumT>(ut); - } -}
\ No newline at end of file diff --git a/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp b/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp index 27dbfcb..29e7866 100644 --- a/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp +++ b/include/openvic-dataloader/v2script/AbstractSyntaxTree.hpp @@ -1,12 +1,10 @@ #pragma once -#include <cstdio> #include <string_view> -#include <openvic-dataloader/AbstractSyntaxTree.hpp> -#include <openvic-dataloader/File.hpp> #include <openvic-dataloader/NodeLocation.hpp> -#include <openvic-dataloader/detail/LexyFwdDeclaration.hpp> +#include <openvic-dataloader/detail/SymbolIntern.hpp> +#include <openvic-dataloader/detail/Utility.hpp> #include <dryad/_detail/assert.hpp> #include <dryad/_detail/config.hpp> @@ -82,37 +80,34 @@ namespace ovdl::v2script::ast { }; struct FlatValue : dryad::abstract_node_range<Value, NodeKind::FirstFlatValue, NodeKind::LastFlatValue> { - AbstractSyntaxTree::symbol_type value() const { + SymbolIntern::symbol_type value() const { return _value; } - const char* value(const AbstractSyntaxTree::symbol_interner_type& symbols) const { + const char* value(const SymbolIntern::symbol_interner_type& symbols) const { return _value.c_str(symbols); } protected: - explicit FlatValue(dryad::node_ctor ctor, NodeKind kind, AbstractSyntaxTree::symbol_type value) + explicit FlatValue(dryad::node_ctor ctor, NodeKind kind, SymbolIntern::symbol_type value) : node_base(ctor, kind), _value(value) {} protected: - AbstractSyntaxTree::symbol_type _value; + SymbolIntern::symbol_type _value; }; struct IdentifierValue : dryad::basic_node<NodeKind::IdentifierValue, FlatValue> { - explicit IdentifierValue(dryad::node_ctor ctor, AbstractSyntaxTree::symbol_type value) : node_base(ctor, value) {} + explicit IdentifierValue(dryad::node_ctor ctor, SymbolIntern::symbol_type value) : node_base(ctor, value) {} }; struct StringValue : dryad::basic_node<NodeKind::StringValue, FlatValue> { - explicit StringValue(dryad::node_ctor ctor, AbstractSyntaxTree::symbol_type value) : node_base(ctor, value) {} + explicit StringValue(dryad::node_ctor ctor, SymbolIntern::symbol_type value) : node_base(ctor, value) {} }; struct ListValue : dryad::basic_node<NodeKind::ListValue, dryad::container_node<Value>> { explicit ListValue(dryad::node_ctor ctor, StatementList statements); - explicit ListValue(dryad::node_ctor ctor, AssignStatementList statements) - : node_base(ctor) { - insert_child_list_after(nullptr, statements); - } + explicit ListValue(dryad::node_ctor ctor, AssignStatementList statements); explicit ListValue(dryad::node_ctor ctor) : ListValue(ctor, StatementList {}) { } @@ -171,10 +166,7 @@ namespace ovdl::v2script::ast { struct FileTree : dryad::basic_node<NodeKind::FileTree, dryad::container_node<Node>> { explicit FileTree(dryad::node_ctor ctor, StatementList statements); - explicit FileTree(dryad::node_ctor ctor, AssignStatementList statements) : node_base(ctor) { - insert_child_list_after(nullptr, statements); - } - + explicit FileTree(dryad::node_ctor ctor, AssignStatementList statements); explicit FileTree(dryad::node_ctor ctor) : FileTree(ctor, StatementList {}) { } diff --git a/include/openvic-dataloader/v2script/Parser.hpp b/include/openvic-dataloader/v2script/Parser.hpp index f9f0ce8..1f6b158 100644 --- a/include/openvic-dataloader/v2script/Parser.hpp +++ b/include/openvic-dataloader/v2script/Parser.hpp @@ -3,6 +3,7 @@ #include <cstddef> #include <filesystem> #include <memory> +#include <optional> #include <ostream> #include <string> #include <string_view> @@ -10,8 +11,9 @@ #include <openvic-dataloader/Error.hpp> #include <openvic-dataloader/NodeLocation.hpp> #include <openvic-dataloader/Parser.hpp> -#include <openvic-dataloader/detail/utility/Concepts.hpp> -#include <openvic-dataloader/detail/utility/ErrorRange.hpp> +#include <openvic-dataloader/detail/Concepts.hpp> +#include <openvic-dataloader/detail/Encoding.hpp> +#include <openvic-dataloader/detail/ErrorRange.hpp> #include <openvic-dataloader/v2script/AbstractSyntaxTree.hpp> #include <dryad/node.hpp> @@ -25,20 +27,20 @@ namespace ovdl::v2script { Parser(); Parser(std::basic_ostream<char>& error_stream); - static Parser from_buffer(const char* data, std::size_t size); - static Parser from_buffer(const char* start, const char* end); - static Parser from_string(const std::string_view string); - static Parser from_file(const char* path); - static Parser from_file(const std::filesystem::path& path); + static Parser from_buffer(const char* data, std::size_t size, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + static Parser from_buffer(const char* start, const char* end, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + static Parser from_string(const std::string_view string, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + static Parser from_file(const char* path, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + static Parser from_file(const std::filesystem::path& path, std::optional<detail::Encoding> encoding_fallback = std::nullopt); - constexpr Parser& load_from_buffer(const char* data, std::size_t size); - constexpr Parser& load_from_buffer(const char* start, const char* end); - constexpr Parser& load_from_string(const std::string_view string); - Parser& load_from_file(const char* path); - Parser& load_from_file(const std::filesystem::path& path); + constexpr Parser& load_from_buffer(const char* data, std::size_t size, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + constexpr Parser& load_from_buffer(const char* start, const char* end, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + constexpr Parser& load_from_string(const std::string_view string, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + Parser& load_from_file(const char* path, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + Parser& load_from_file(const std::filesystem::path& path, std::optional<detail::Encoding> encoding_fallback = std::nullopt); - constexpr Parser& load_from_file(const detail::HasCstr auto& path) { - return load_from_file(path.c_str()); + constexpr Parser& load_from_file(const detail::HasCstr auto& path, std::optional<detail::Encoding> encoding_fallback = std::nullopt) { + return load_from_file(path.c_str(), encoding_fallback); } bool simple_parse(); |