From b0c3ba3f91926b0c95625bdbf4aab69269130b13 Mon Sep 17 00:00:00 2001 From: Spartan322 Date: Thu, 9 May 2024 10:06:02 -0400 Subject: Add runtime encoding detection and conversion Win-1251/1252 detection is a reduced C++ version of https://github.com/hsivonen/chardetng Add manually-specified encoding fallback Add default system encoding fallback Add error recovery to v2script Add unknown encoding detection warning Remove csv::Parser templating Fix lua files dropping data Update lexy to foonathan/lexy@1e5d99fa3826b1c3c8628d3a11117fb4fb4cc0d0 Remove exclusive reliance on lexy::default_encoding for v2script Move internal concepts to src/openvic-detail/InternalConcepts.hpp Move contents of DetectUtf8.hpp to src/detail/Detect.hpp Move openvic-dataloader/AbstractSyntaxTree.hpp to src Move DiagnosticLogger.hpp to src Move File.hpp to src Move openvic-dataloader/detail/utlity files to openvic-dataloader/detail Add ovdl::utility::type_concat Add ovdl::utility::type_prepend Add ovdl::utility::is_instance_of Overhaul parse error messages --- include/openvic-dataloader/detail/Concepts.hpp | 49 ++++++++++ include/openvic-dataloader/detail/Constexpr.hpp | 15 +++ include/openvic-dataloader/detail/Encoding.hpp | 13 +++ include/openvic-dataloader/detail/ErrorRange.hpp | 10 ++ .../detail/LexyFwdDeclaration.hpp | 8 -- .../openvic-dataloader/detail/LexyReportError.hpp | 107 --------------------- .../detail/OStreamOutputIterator.hpp | 1 - include/openvic-dataloader/detail/Utility.hpp | 89 +++++++++++++++++ .../openvic-dataloader/detail/utility/Concepts.hpp | 45 --------- .../detail/utility/Constexpr.hpp | 15 --- .../detail/utility/ErrorRange.hpp | 10 -- .../detail/utility/PointerHash.hpp | 23 ----- .../openvic-dataloader/detail/utility/SelfType.hpp | 28 ------ .../openvic-dataloader/detail/utility/TypeName.hpp | 52 ---------- .../openvic-dataloader/detail/utility/Utility.hpp | 38 -------- 15 files changed, 176 insertions(+), 327 deletions(-) create mode 100644 include/openvic-dataloader/detail/Concepts.hpp create mode 100644 include/openvic-dataloader/detail/Constexpr.hpp create mode 100644 include/openvic-dataloader/detail/Encoding.hpp create mode 100644 include/openvic-dataloader/detail/ErrorRange.hpp delete mode 100644 include/openvic-dataloader/detail/LexyFwdDeclaration.hpp delete mode 100644 include/openvic-dataloader/detail/LexyReportError.hpp create mode 100644 include/openvic-dataloader/detail/Utility.hpp delete mode 100644 include/openvic-dataloader/detail/utility/Concepts.hpp delete mode 100644 include/openvic-dataloader/detail/utility/Constexpr.hpp delete mode 100644 include/openvic-dataloader/detail/utility/ErrorRange.hpp delete mode 100644 include/openvic-dataloader/detail/utility/PointerHash.hpp delete mode 100644 include/openvic-dataloader/detail/utility/SelfType.hpp delete mode 100644 include/openvic-dataloader/detail/utility/TypeName.hpp delete mode 100644 include/openvic-dataloader/detail/utility/Utility.hpp (limited to 'include/openvic-dataloader/detail') diff --git a/include/openvic-dataloader/detail/Concepts.hpp b/include/openvic-dataloader/detail/Concepts.hpp new file mode 100644 index 0000000..79e04a9 --- /dev/null +++ b/include/openvic-dataloader/detail/Concepts.hpp @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include +#include + +namespace ovdl { + struct File; + namespace detail { + enum class buffer_error : std::uint8_t; + } +} + +namespace ovdl::detail { + template + concept any_of = std::disjunction_v...>; + + template + concept HasCstr = + requires(T t) { + { t.c_str() } -> std::same_as; + }; + + template + concept HasPath = requires(T& t) { + { t.path() } -> std::same_as; + }; + + template + concept LoadCallback = + requires(T&& t, Self&& self, Args&&... args) { + { std::invoke(std::forward(t), std::forward(self), std::forward(args)...) } -> std::same_as; + }; + + template + concept IsEncoding = requires(T t) { + typename T::char_type; + typename T::int_type; + { T::template is_secondary_char_type() } -> std::same_as; + { T::eof() } -> std::same_as; + { T::to_int_type(typename T::char_type {}) } -> std::same_as; + }; + + template + concept Invocable_R = std::invocable && requires(Args&&... args) { + { invoke(forward(args)...) } -> std::convertible_to; + }; +} \ No newline at end of file diff --git a/include/openvic-dataloader/detail/Constexpr.hpp b/include/openvic-dataloader/detail/Constexpr.hpp new file mode 100644 index 0000000..49479c5 --- /dev/null +++ b/include/openvic-dataloader/detail/Constexpr.hpp @@ -0,0 +1,15 @@ +#pragma once + +// THANK YOU APPLE FOR YOUR UTTER DISREGARD FOR C++20 + +#if __cpp_lib_optional >= 202106L +#define OVDL_OPTIONAL_CONSTEXPR constexpr +#else +#define OVDL_OPTIONAL_CONSTEXPR inline +#endif + +#if __cpp_lib_constexpr_vector >= 201907L +#define OVDL_VECTOR_CONSTEXPR constexpr +#else +#define OVDL_VECTOR_CONSTEXPR inline +#endif \ No newline at end of file diff --git a/include/openvic-dataloader/detail/Encoding.hpp b/include/openvic-dataloader/detail/Encoding.hpp new file mode 100644 index 0000000..12a0524 --- /dev/null +++ b/include/openvic-dataloader/detail/Encoding.hpp @@ -0,0 +1,13 @@ +#pragma once + +#include + +namespace ovdl::detail { + enum class Encoding : std::int8_t { + Unknown, + Ascii, + Utf8, + Windows1251, + Windows1252 + }; +} \ No newline at end of file diff --git a/include/openvic-dataloader/detail/ErrorRange.hpp b/include/openvic-dataloader/detail/ErrorRange.hpp new file mode 100644 index 0000000..7d5ca13 --- /dev/null +++ b/include/openvic-dataloader/detail/ErrorRange.hpp @@ -0,0 +1,10 @@ +#pragma once + +#include + +#include + +namespace ovdl::detail { + template + using error_range = decltype(std::declval()->errors()); +} \ No newline at end of file diff --git a/include/openvic-dataloader/detail/LexyFwdDeclaration.hpp b/include/openvic-dataloader/detail/LexyFwdDeclaration.hpp deleted file mode 100644 index 554c88d..0000000 --- a/include/openvic-dataloader/detail/LexyFwdDeclaration.hpp +++ /dev/null @@ -1,8 +0,0 @@ -#pragma once - -namespace lexy { - struct default_encoding; - - template - struct buffer; -} \ No newline at end of file diff --git a/include/openvic-dataloader/detail/LexyReportError.hpp b/include/openvic-dataloader/detail/LexyReportError.hpp deleted file mode 100644 index 3c32bd1..0000000 --- a/include/openvic-dataloader/detail/LexyReportError.hpp +++ /dev/null @@ -1,107 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include -#include - -#include -#include - -#include "openvic-dataloader/detail/utility/Concepts.hpp" - -#include - -namespace ovdl::detail { - template - struct _ReportError { - OutputIterator _iter; - lexy::visualization_options _opts; - const char* _path; - - struct _sink { - OutputIterator _iter; - lexy::visualization_options _opts; - const char* _path; - std::size_t _count; - std::vector _errors; - - using return_type = std::vector; - - template - void operator()(const lexy::error_context& context, const lexy::error& error) { - _iter = lexy_ext::_detail::write_error(_iter, context, error, _opts, _path); - ++_count; - - // Convert the context location and error location into line/column information. - auto context_location = lexy::get_input_location(context.input(), context.position()); - auto location = lexy::get_input_location(context.input(), error.position(), context_location.anchor()); - - std::basic_stringstream message; - - // Write the main annotation. - if constexpr (std::is_same_v) { - auto string = lexy::_detail::make_literal_lexeme(error.string(), error.length()); - - message << "expected '" << string.data() << '\''; - } else if constexpr (std::is_same_v) { - auto string = lexy::_detail::make_literal_lexeme(error.string(), error.length()); - - message << "expected keyword '" << string.data() << '\''; - } else if constexpr (std::is_same_v) { - message << "expected " << error.name(); - } else { - message << error.message(); - } - - _errors.push_back( - ParseError { - ParseError::Type::Fatal, // TODO: distinguish recoverable errors from fatal errors - std::move(message.str()), - 0, // TODO: implement proper error codes - ParseData { - context.production(), - context_location.line_nr(), - context_location.column_nr(), - }, - location.line_nr(), - location.column_nr(), - }); - } - - return_type finish() && { - if (_count != 0) - *_iter++ = '\n'; - return _errors; - } - }; - constexpr auto sink() const { - return _sink { _iter, _opts, _path, 0 }; - } - - /// Specifies a path that will be printed alongside the diagnostic. - constexpr _ReportError path(const char* path) const { - return { _iter, _opts, path }; - } - - constexpr _ReportError path(const detail::HasCstr auto& path_object) const { - return path(path_object.c_str()); - } - - /// Specifies an output iterator where the errors are written to. - template - constexpr _ReportError to(OI out) const { - return { out, _opts, _path }; - } - - /// Overrides visualization options. - constexpr _ReportError opts(lexy::visualization_options opts) const { - return { _iter, opts, _path }; - } - }; - - constexpr auto ReporError = _ReportError {}; -} \ No newline at end of file diff --git a/include/openvic-dataloader/detail/OStreamOutputIterator.hpp b/include/openvic-dataloader/detail/OStreamOutputIterator.hpp index 8f120c7..81f6c89 100644 --- a/include/openvic-dataloader/detail/OStreamOutputIterator.hpp +++ b/include/openvic-dataloader/detail/OStreamOutputIterator.hpp @@ -1,6 +1,5 @@ #pragma once -#include #include namespace ovdl::detail { diff --git a/include/openvic-dataloader/detail/Utility.hpp b/include/openvic-dataloader/detail/Utility.hpp new file mode 100644 index 0000000..8d9e159 --- /dev/null +++ b/include/openvic-dataloader/detail/Utility.hpp @@ -0,0 +1,89 @@ +#pragma once + +#include +#include +#include +#include + +#include + +namespace ovdl::detail { + [[noreturn]] inline void unreachable() { + // Uses compiler specific extensions if possible. + // Even if no extension is used, undefined behavior is still raised by + // an empty function body and the noreturn attribute. +#ifdef __GNUC__ // GCC, Clang, ICC + __builtin_unreachable(); +#elif defined(_MSC_VER) // MSVC + __assume(false); +#endif + } + + template + requires std::is_enum_v + constexpr std::underlying_type_t to_underlying(EnumT e) { + return static_cast>(e); + } + + template + requires std::is_enum_v + constexpr EnumT from_underlying(std::underlying_type_t ut) { + return static_cast(ut); + } + + template + struct TypeRegister { + using tuple_type = std::tuple; + using variant_type = std::variant; + + template + struct _id_getter { + static constexpr std::uint32_t type_id() { + static_assert(any_of, "Cannot query an non-registered type"); + + if constexpr (std::is_same_v) return 0; + else return 1 + TypeRegister::template _id_getter::type_id(); + }; + }; + + template + static constexpr std::uint32_t type_id() { + + return _id_getter::type_id(); + } + + template + using type_by_id = std::tuple_element_t; + }; + + template + struct type_concat; + + template typename TT, typename... TTs> + struct type_concat, Ts...> { + using type = TT; + }; + + template + using type_concat_t = type_concat::type; + + template + struct type_prepend; + + template typename TT, typename... TTs> + struct type_prepend, Ts...> { + using type = TT; + }; + + template + using type_prepend_t = type_prepend::type; + + template typename Template> + struct is_instance_of : std::false_type {}; + + template typename Template> + struct is_instance_of, Template> : std::true_type {}; + + template typename Template> + static constexpr auto is_instance_of_v = is_instance_of::value; +} \ No newline at end of file diff --git a/include/openvic-dataloader/detail/utility/Concepts.hpp b/include/openvic-dataloader/detail/utility/Concepts.hpp deleted file mode 100644 index 0ba91cc..0000000 --- a/include/openvic-dataloader/detail/utility/Concepts.hpp +++ /dev/null @@ -1,45 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -namespace ovdl { - struct NodeLocation; - struct File; - namespace detail { - enum class buffer_error : std::uint8_t; - } -} - -namespace ovdl::detail { - template - concept any_of = (std::same_as || ...); - - template - concept HasCstr = - requires(T t) { - { t.c_str() } -> std::same_as; - }; - - template - concept HasPath = requires(T& t) { - { t.path() } -> std::same_as; - }; - - template - concept LoadCallback = - requires(T&& t, Self&& self, Args&&... args) { - { std::invoke(std::forward(t), std::forward(self), std::forward(args)...) } -> std::same_as; - }; - - template - concept IsEncoding = requires(T t) { - typename T::char_type; - typename T::int_type; - { T::template is_secondary_char_type() } -> std::same_as; - { T::eof() } -> std::same_as; - { T::to_int_type(typename T::char_type {}) } -> std::same_as; - }; -} \ No newline at end of file diff --git a/include/openvic-dataloader/detail/utility/Constexpr.hpp b/include/openvic-dataloader/detail/utility/Constexpr.hpp deleted file mode 100644 index 49479c5..0000000 --- a/include/openvic-dataloader/detail/utility/Constexpr.hpp +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -// THANK YOU APPLE FOR YOUR UTTER DISREGARD FOR C++20 - -#if __cpp_lib_optional >= 202106L -#define OVDL_OPTIONAL_CONSTEXPR constexpr -#else -#define OVDL_OPTIONAL_CONSTEXPR inline -#endif - -#if __cpp_lib_constexpr_vector >= 201907L -#define OVDL_VECTOR_CONSTEXPR constexpr -#else -#define OVDL_VECTOR_CONSTEXPR inline -#endif \ No newline at end of file diff --git a/include/openvic-dataloader/detail/utility/ErrorRange.hpp b/include/openvic-dataloader/detail/utility/ErrorRange.hpp deleted file mode 100644 index 7d5ca13..0000000 --- a/include/openvic-dataloader/detail/utility/ErrorRange.hpp +++ /dev/null @@ -1,10 +0,0 @@ -#pragma once - -#include - -#include - -namespace ovdl::detail { - template - using error_range = decltype(std::declval()->errors()); -} \ No newline at end of file diff --git a/include/openvic-dataloader/detail/utility/PointerHash.hpp b/include/openvic-dataloader/detail/utility/PointerHash.hpp deleted file mode 100644 index c0d28bc..0000000 --- a/include/openvic-dataloader/detail/utility/PointerHash.hpp +++ /dev/null @@ -1,23 +0,0 @@ -#pragma once - -#include - -namespace ovdl::detail { - /* hash any pointer */ - template - struct PointerHash { - using type = T; - using ptr_type = T*; - using const_type = const T; - using const_ptr_type = const T*; - using const_ptr_const_type = const const_ptr_type; - constexpr std::size_t operator()(const_ptr_const_type pointer) const { - auto addr = reinterpret_cast(pointer); -#if SIZE_MAX < UINTPTR_MAX - /* size_t is not large enough to hold the pointer’s memory address */ - addr %= SIZE_MAX; /* truncate the address so it is small enough to fit in a size_t */ -#endif - return addr; - } - }; -} \ No newline at end of file diff --git a/include/openvic-dataloader/detail/utility/SelfType.hpp b/include/openvic-dataloader/detail/utility/SelfType.hpp deleted file mode 100644 index 5209700..0000000 --- a/include/openvic-dataloader/detail/utility/SelfType.hpp +++ /dev/null @@ -1,28 +0,0 @@ -#pragma once - -#include - -namespace ovdl::detail { -#if !defined(_MSC_VER) -#pragma GCC diagnostic push -#pragma clang diagnostic ignored "-Wunknown-warning-option" -#pragma GCC diagnostic ignored "-Wnon-template-friend" -#endif - template - struct Reader { - friend auto adl_GetSelfType(Reader); - }; - - template - struct Writer { - friend auto adl_GetSelfType(Reader) { return U {}; } - }; -#if !defined(_MSC_VER) -#pragma GCC diagnostic pop -#endif - - inline void adl_GetSelfType() {} - - template - using Read = std::remove_pointer_t {}))>; -} diff --git a/include/openvic-dataloader/detail/utility/TypeName.hpp b/include/openvic-dataloader/detail/utility/TypeName.hpp deleted file mode 100644 index 1a34a0f..0000000 --- a/include/openvic-dataloader/detail/utility/TypeName.hpp +++ /dev/null @@ -1,52 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -namespace ovdl::detail { - - template - constexpr auto substring_as_array(std::string_view str, std::index_sequence) { - return std::array { str[Idxs]... }; - } - - template - constexpr auto type_name_array() { -#if defined(__clang__) - constexpr auto prefix = std::string_view { "[T = " }; - constexpr auto suffix = std::string_view { "]" }; - constexpr auto function = std::string_view { __PRETTY_FUNCTION__ }; -#elif defined(__GNUC__) - constexpr auto prefix = std::string_view { "with T = " }; - constexpr auto suffix = std::string_view { "]" }; - constexpr auto function = std::string_view { __PRETTY_FUNCTION__ }; -#elif defined(_MSC_VER) - constexpr auto prefix = std::string_view { "type_name_array<" }; - constexpr auto suffix = std::string_view { ">(void)" }; - constexpr auto function = std::string_view { __FUNCSIG__ }; -#else -#error Unsupported compiler -#endif - - constexpr auto start = function.find(prefix) + prefix.size(); - constexpr auto end = function.rfind(suffix); - - static_assert(start < end); - - constexpr auto name = function.substr(start, (end - start)); - return substring_as_array(name, std::make_index_sequence {}); - } - - template - struct type_name_holder { - static inline constexpr auto value = type_name_array(); - }; - - template - constexpr auto type_name() -> std::string_view { - constexpr auto& value = type_name_holder::value; - return std::string_view { value.data(), value.size() }; - } -} \ No newline at end of file diff --git a/include/openvic-dataloader/detail/utility/Utility.hpp b/include/openvic-dataloader/detail/utility/Utility.hpp deleted file mode 100644 index 138a029..0000000 --- a/include/openvic-dataloader/detail/utility/Utility.hpp +++ /dev/null @@ -1,38 +0,0 @@ -#pragma once - -#include -#include - -#include "openvic-dataloader/detail/utility/TypeName.hpp" - -namespace ovdl::detail { - [[noreturn]] inline void unreachable() { - // Uses compiler specific extensions if possible. - // Even if no extension is used, undefined behavior is still raised by - // an empty function body and the noreturn attribute. -#ifdef __GNUC__ // GCC, Clang, ICC - __builtin_unreachable(); -#elif defined(_MSC_VER) // MSVC - __assume(false); -#endif - } - - template - constexpr std::string_view get_kind_name() { - constexpr auto name = type_name(); - - return name; - } - - template - requires std::is_enum_v - constexpr std::underlying_type_t to_underlying(EnumT e) { - return static_cast>(e); - } - - template - requires std::is_enum_v - constexpr EnumT from_underlying(std::underlying_type_t ut) { - return static_cast(ut); - } -} \ No newline at end of file -- cgit v1.2.3-56-ga3b1