diff options
author | Spartan322 <Megacake1234@gmail.com> | 2024-05-09 16:06:02 +0200 |
---|---|---|
committer | Spartan322 <Megacake1234@gmail.com> | 2024-06-18 01:31:12 +0200 |
commit | b0c3ba3f91926b0c95625bdbf4aab69269130b13 (patch) | |
tree | f15ebc47d6bf370031af28e4bb4814ae30ef46e1 /include/openvic-dataloader/detail | |
parent | 7b521d6023113372cf6b02e562828273c4040f0e (diff) |
Add runtime encoding detection and conversionfix/char-detection
Win-1251/1252 detection is a reduced C++ version of https://github.com/hsivonen/chardetng
Add manually-specified encoding fallback
Add default system encoding fallback
Add error recovery to v2script
Add unknown encoding detection warning
Remove csv::Parser templating
Fix lua files dropping data
Update lexy to foonathan/lexy@1e5d99fa3826b1c3c8628d3a11117fb4fb4cc0d0
Remove exclusive reliance on lexy::default_encoding for v2script
Move internal concepts to src/openvic-detail/InternalConcepts.hpp
Move contents of DetectUtf8.hpp to src/detail/Detect.hpp
Move openvic-dataloader/AbstractSyntaxTree.hpp to src
Move DiagnosticLogger.hpp to src
Move File.hpp to src
Move openvic-dataloader/detail/utlity files to openvic-dataloader/detail
Add ovdl::utility::type_concat
Add ovdl::utility::type_prepend
Add ovdl::utility::is_instance_of
Overhaul parse error messages
Diffstat (limited to 'include/openvic-dataloader/detail')
-rw-r--r-- | include/openvic-dataloader/detail/Concepts.hpp (renamed from include/openvic-dataloader/detail/utility/Concepts.hpp) | 8 | ||||
-rw-r--r-- | include/openvic-dataloader/detail/Constexpr.hpp (renamed from include/openvic-dataloader/detail/utility/Constexpr.hpp) | 0 | ||||
-rw-r--r-- | include/openvic-dataloader/detail/Encoding.hpp | 13 | ||||
-rw-r--r-- | include/openvic-dataloader/detail/ErrorRange.hpp (renamed from include/openvic-dataloader/detail/utility/ErrorRange.hpp) | 0 | ||||
-rw-r--r-- | include/openvic-dataloader/detail/LexyFwdDeclaration.hpp | 8 | ||||
-rw-r--r-- | include/openvic-dataloader/detail/LexyReportError.hpp | 107 | ||||
-rw-r--r-- | include/openvic-dataloader/detail/OStreamOutputIterator.hpp | 1 | ||||
-rw-r--r-- | include/openvic-dataloader/detail/Utility.hpp | 89 | ||||
-rw-r--r-- | include/openvic-dataloader/detail/utility/PointerHash.hpp | 23 | ||||
-rw-r--r-- | include/openvic-dataloader/detail/utility/SelfType.hpp | 28 | ||||
-rw-r--r-- | include/openvic-dataloader/detail/utility/TypeName.hpp | 52 | ||||
-rw-r--r-- | include/openvic-dataloader/detail/utility/Utility.hpp | 38 |
12 files changed, 108 insertions, 259 deletions
diff --git a/include/openvic-dataloader/detail/utility/Concepts.hpp b/include/openvic-dataloader/detail/Concepts.hpp index 0ba91cc..79e04a9 100644 --- a/include/openvic-dataloader/detail/utility/Concepts.hpp +++ b/include/openvic-dataloader/detail/Concepts.hpp @@ -6,7 +6,6 @@ #include <utility> namespace ovdl { - struct NodeLocation; struct File; namespace detail { enum class buffer_error : std::uint8_t; @@ -15,7 +14,7 @@ namespace ovdl { namespace ovdl::detail { template<typename T, typename... Ts> - concept any_of = (std::same_as<T, Ts> || ...); + concept any_of = std::disjunction_v<std::is_same<T, Ts>...>; template<typename T> concept HasCstr = @@ -42,4 +41,9 @@ namespace ovdl::detail { { T::eof() } -> std::same_as<typename T::int_type>; { T::to_int_type(typename T::char_type {}) } -> std::same_as<typename T::int_type>; }; + + template<typename T, typename R, typename... Args> + concept Invocable_R = std::invocable<T, Args...> && requires(Args&&... args) { + { invoke(forward<Args>(args)...) } -> std::convertible_to<R>; + }; }
\ No newline at end of file diff --git a/include/openvic-dataloader/detail/utility/Constexpr.hpp b/include/openvic-dataloader/detail/Constexpr.hpp index 49479c5..49479c5 100644 --- a/include/openvic-dataloader/detail/utility/Constexpr.hpp +++ b/include/openvic-dataloader/detail/Constexpr.hpp diff --git a/include/openvic-dataloader/detail/Encoding.hpp b/include/openvic-dataloader/detail/Encoding.hpp new file mode 100644 index 0000000..12a0524 --- /dev/null +++ b/include/openvic-dataloader/detail/Encoding.hpp @@ -0,0 +1,13 @@ +#pragma once + +#include <cstdint> + +namespace ovdl::detail { + enum class Encoding : std::int8_t { + Unknown, + Ascii, + Utf8, + Windows1251, + Windows1252 + }; +}
\ No newline at end of file diff --git a/include/openvic-dataloader/detail/utility/ErrorRange.hpp b/include/openvic-dataloader/detail/ErrorRange.hpp index 7d5ca13..7d5ca13 100644 --- a/include/openvic-dataloader/detail/utility/ErrorRange.hpp +++ b/include/openvic-dataloader/detail/ErrorRange.hpp diff --git a/include/openvic-dataloader/detail/LexyFwdDeclaration.hpp b/include/openvic-dataloader/detail/LexyFwdDeclaration.hpp deleted file mode 100644 index 554c88d..0000000 --- a/include/openvic-dataloader/detail/LexyFwdDeclaration.hpp +++ /dev/null @@ -1,8 +0,0 @@ -#pragma once - -namespace lexy { - struct default_encoding; - - template<typename Encoding, typename MemoryResource> - struct buffer; -}
\ No newline at end of file diff --git a/include/openvic-dataloader/detail/LexyReportError.hpp b/include/openvic-dataloader/detail/LexyReportError.hpp deleted file mode 100644 index 3c32bd1..0000000 --- a/include/openvic-dataloader/detail/LexyReportError.hpp +++ /dev/null @@ -1,107 +0,0 @@ -#pragma once - -#include <cstddef> -#include <sstream> -#include <utility> -#include <vector> - -#include <openvic-dataloader/ParseData.hpp> -#include <openvic-dataloader/ParseError.hpp> - -#include <lexy/input_location.hpp> -#include <lexy/visualize.hpp> - -#include "openvic-dataloader/detail/utility/Concepts.hpp" - -#include <lexy_ext/report_error.hpp> - -namespace ovdl::detail { - template<typename OutputIterator> - struct _ReportError { - OutputIterator _iter; - lexy::visualization_options _opts; - const char* _path; - - struct _sink { - OutputIterator _iter; - lexy::visualization_options _opts; - const char* _path; - std::size_t _count; - std::vector<ParseError> _errors; - - using return_type = std::vector<ParseError>; - - template<typename Input, typename Reader, typename Tag> - void operator()(const lexy::error_context<Input>& context, const lexy::error<Reader, Tag>& error) { - _iter = lexy_ext::_detail::write_error(_iter, context, error, _opts, _path); - ++_count; - - // Convert the context location and error location into line/column information. - auto context_location = lexy::get_input_location(context.input(), context.position()); - auto location = lexy::get_input_location(context.input(), error.position(), context_location.anchor()); - - std::basic_stringstream<typename Reader::encoding::char_type> message; - - // Write the main annotation. - if constexpr (std::is_same_v<Tag, lexy::expected_literal>) { - auto string = lexy::_detail::make_literal_lexeme<typename Reader::encoding>(error.string(), error.length()); - - message << "expected '" << string.data() << '\''; - } else if constexpr (std::is_same_v<Tag, lexy::expected_keyword>) { - auto string = lexy::_detail::make_literal_lexeme<typename Reader::encoding>(error.string(), error.length()); - - message << "expected keyword '" << string.data() << '\''; - } else if constexpr (std::is_same_v<Tag, lexy::expected_char_class>) { - message << "expected " << error.name(); - } else { - message << error.message(); - } - - _errors.push_back( - ParseError { - ParseError::Type::Fatal, // TODO: distinguish recoverable errors from fatal errors - std::move(message.str()), - 0, // TODO: implement proper error codes - ParseData { - context.production(), - context_location.line_nr(), - context_location.column_nr(), - }, - location.line_nr(), - location.column_nr(), - }); - } - - return_type finish() && { - if (_count != 0) - *_iter++ = '\n'; - return _errors; - } - }; - constexpr auto sink() const { - return _sink { _iter, _opts, _path, 0 }; - } - - /// Specifies a path that will be printed alongside the diagnostic. - constexpr _ReportError path(const char* path) const { - return { _iter, _opts, path }; - } - - constexpr _ReportError path(const detail::HasCstr auto& path_object) const { - return path(path_object.c_str()); - } - - /// Specifies an output iterator where the errors are written to. - template<typename OI> - constexpr _ReportError<OI> to(OI out) const { - return { out, _opts, _path }; - } - - /// Overrides visualization options. - constexpr _ReportError opts(lexy::visualization_options opts) const { - return { _iter, opts, _path }; - } - }; - - constexpr auto ReporError = _ReportError<lexy::stderr_output_iterator> {}; -}
\ No newline at end of file diff --git a/include/openvic-dataloader/detail/OStreamOutputIterator.hpp b/include/openvic-dataloader/detail/OStreamOutputIterator.hpp index 8f120c7..81f6c89 100644 --- a/include/openvic-dataloader/detail/OStreamOutputIterator.hpp +++ b/include/openvic-dataloader/detail/OStreamOutputIterator.hpp @@ -1,6 +1,5 @@ #pragma once -#include <memory> #include <ostream> namespace ovdl::detail { diff --git a/include/openvic-dataloader/detail/Utility.hpp b/include/openvic-dataloader/detail/Utility.hpp new file mode 100644 index 0000000..8d9e159 --- /dev/null +++ b/include/openvic-dataloader/detail/Utility.hpp @@ -0,0 +1,89 @@ +#pragma once + +#include <cstdint> +#include <tuple> +#include <type_traits> +#include <variant> + +#include <openvic-dataloader/detail/Concepts.hpp> + +namespace ovdl::detail { + [[noreturn]] inline void unreachable() { + // Uses compiler specific extensions if possible. + // Even if no extension is used, undefined behavior is still raised by + // an empty function body and the noreturn attribute. +#ifdef __GNUC__ // GCC, Clang, ICC + __builtin_unreachable(); +#elif defined(_MSC_VER) // MSVC + __assume(false); +#endif + } + + template<typename EnumT> + requires std::is_enum_v<EnumT> + constexpr std::underlying_type_t<EnumT> to_underlying(EnumT e) { + return static_cast<std::underlying_type_t<EnumT>>(e); + } + + template<typename EnumT> + requires std::is_enum_v<EnumT> + constexpr EnumT from_underlying(std::underlying_type_t<EnumT> ut) { + return static_cast<EnumT>(ut); + } + + template<typename Type, typename... Types> + struct TypeRegister { + using tuple_type = std::tuple<Type, Types...>; + using variant_type = std::variant<Type, Types...>; + + template<typename QueriedType> + struct _id_getter { + static constexpr std::uint32_t type_id() { + static_assert(any_of<QueriedType, Type, Types...>, "Cannot query an non-registered type"); + + if constexpr (std::is_same_v<Type, QueriedType>) return 0; + else return 1 + TypeRegister<Types...>::template _id_getter<QueriedType>::type_id(); + }; + }; + + template<typename QueriedType> + static constexpr std::uint32_t type_id() { + + return _id_getter<QueriedType>::type_id(); + } + + template<std::uint32_t Id> + using type_by_id = std::tuple_element_t<Id, tuple_type>; + }; + + template<typename...> + struct type_concat; + + template<typename... Ts, template<typename...> typename TT, typename... TTs> + struct type_concat<TT<TTs...>, Ts...> { + using type = TT<TTs..., Ts...>; + }; + + template<typename... Ts> + using type_concat_t = type_concat<Ts...>::type; + + template<typename...> + struct type_prepend; + + template<typename... Ts, template<typename...> typename TT, typename... TTs> + struct type_prepend<TT<TTs...>, Ts...> { + using type = TT<Ts..., TTs...>; + }; + + template<typename... Ts> + using type_prepend_t = type_prepend<Ts...>::type; + + template<typename Type, template<typename...> typename Template> + struct is_instance_of : std::false_type {}; + + template<typename... Ts, template<typename...> typename Template> + struct is_instance_of<Template<Ts...>, Template> : std::true_type {}; + + template<typename Type, template<typename...> typename Template> + static constexpr auto is_instance_of_v = is_instance_of<Type, Template>::value; +}
\ No newline at end of file diff --git a/include/openvic-dataloader/detail/utility/PointerHash.hpp b/include/openvic-dataloader/detail/utility/PointerHash.hpp deleted file mode 100644 index c0d28bc..0000000 --- a/include/openvic-dataloader/detail/utility/PointerHash.hpp +++ /dev/null @@ -1,23 +0,0 @@ -#pragma once - -#include <cstdint> - -namespace ovdl::detail { - /* hash any pointer */ - template<typename T> - struct PointerHash { - using type = T; - using ptr_type = T*; - using const_type = const T; - using const_ptr_type = const T*; - using const_ptr_const_type = const const_ptr_type; - constexpr std::size_t operator()(const_ptr_const_type pointer) const { - auto addr = reinterpret_cast<uintptr_t>(pointer); -#if SIZE_MAX < UINTPTR_MAX - /* size_t is not large enough to hold the pointer’s memory address */ - addr %= SIZE_MAX; /* truncate the address so it is small enough to fit in a size_t */ -#endif - return addr; - } - }; -}
\ No newline at end of file diff --git a/include/openvic-dataloader/detail/utility/SelfType.hpp b/include/openvic-dataloader/detail/utility/SelfType.hpp deleted file mode 100644 index 5209700..0000000 --- a/include/openvic-dataloader/detail/utility/SelfType.hpp +++ /dev/null @@ -1,28 +0,0 @@ -#pragma once - -#include <type_traits> - -namespace ovdl::detail { -#if !defined(_MSC_VER) -#pragma GCC diagnostic push -#pragma clang diagnostic ignored "-Wunknown-warning-option" -#pragma GCC diagnostic ignored "-Wnon-template-friend" -#endif - template<typename T> - struct Reader { - friend auto adl_GetSelfType(Reader<T>); - }; - - template<typename T, typename U> - struct Writer { - friend auto adl_GetSelfType(Reader<T>) { return U {}; } - }; -#if !defined(_MSC_VER) -#pragma GCC diagnostic pop -#endif - - inline void adl_GetSelfType() {} - - template<typename T> - using Read = std::remove_pointer_t<decltype(adl_GetSelfType(Reader<T> {}))>; -} diff --git a/include/openvic-dataloader/detail/utility/TypeName.hpp b/include/openvic-dataloader/detail/utility/TypeName.hpp deleted file mode 100644 index 1a34a0f..0000000 --- a/include/openvic-dataloader/detail/utility/TypeName.hpp +++ /dev/null @@ -1,52 +0,0 @@ -#pragma once - -#include <array> -#include <cstddef> -#include <string_view> -#include <utility> - -namespace ovdl::detail { - - template<std::size_t... Idxs> - constexpr auto substring_as_array(std::string_view str, std::index_sequence<Idxs...>) { - return std::array { str[Idxs]... }; - } - - template<typename T> - constexpr auto type_name_array() { -#if defined(__clang__) - constexpr auto prefix = std::string_view { "[T = " }; - constexpr auto suffix = std::string_view { "]" }; - constexpr auto function = std::string_view { __PRETTY_FUNCTION__ }; -#elif defined(__GNUC__) - constexpr auto prefix = std::string_view { "with T = " }; - constexpr auto suffix = std::string_view { "]" }; - constexpr auto function = std::string_view { __PRETTY_FUNCTION__ }; -#elif defined(_MSC_VER) - constexpr auto prefix = std::string_view { "type_name_array<" }; - constexpr auto suffix = std::string_view { ">(void)" }; - constexpr auto function = std::string_view { __FUNCSIG__ }; -#else -#error Unsupported compiler -#endif - - constexpr auto start = function.find(prefix) + prefix.size(); - constexpr auto end = function.rfind(suffix); - - static_assert(start < end); - - constexpr auto name = function.substr(start, (end - start)); - return substring_as_array(name, std::make_index_sequence<name.size()> {}); - } - - template<typename T> - struct type_name_holder { - static inline constexpr auto value = type_name_array<T>(); - }; - - template<typename T> - constexpr auto type_name() -> std::string_view { - constexpr auto& value = type_name_holder<T>::value; - return std::string_view { value.data(), value.size() }; - } -}
\ No newline at end of file diff --git a/include/openvic-dataloader/detail/utility/Utility.hpp b/include/openvic-dataloader/detail/utility/Utility.hpp deleted file mode 100644 index 138a029..0000000 --- a/include/openvic-dataloader/detail/utility/Utility.hpp +++ /dev/null @@ -1,38 +0,0 @@ -#pragma once - -#include <string_view> -#include <type_traits> - -#include "openvic-dataloader/detail/utility/TypeName.hpp" - -namespace ovdl::detail { - [[noreturn]] inline void unreachable() { - // Uses compiler specific extensions if possible. - // Even if no extension is used, undefined behavior is still raised by - // an empty function body and the noreturn attribute. -#ifdef __GNUC__ // GCC, Clang, ICC - __builtin_unreachable(); -#elif defined(_MSC_VER) // MSVC - __assume(false); -#endif - } - - template<typename Kind> - constexpr std::string_view get_kind_name() { - constexpr auto name = type_name<Kind>(); - - return name; - } - - template<typename EnumT> - requires std::is_enum_v<EnumT> - constexpr std::underlying_type_t<EnumT> to_underlying(EnumT e) { - return static_cast<std::underlying_type_t<EnumT>>(e); - } - - template<typename EnumT> - requires std::is_enum_v<EnumT> - constexpr EnumT from_underlying(std::underlying_type_t<EnumT> ut) { - return static_cast<EnumT>(ut); - } -}
\ No newline at end of file |