diff options
author | Spartan322 <Megacake1234@gmail.com> | 2024-05-09 16:06:02 +0200 |
---|---|---|
committer | Spartan322 <Megacake1234@gmail.com> | 2024-06-18 01:31:12 +0200 |
commit | b0c3ba3f91926b0c95625bdbf4aab69269130b13 (patch) | |
tree | f15ebc47d6bf370031af28e4bb4814ae30ef46e1 /include/openvic-dataloader/csv | |
parent | 7b521d6023113372cf6b02e562828273c4040f0e (diff) |
Add runtime encoding detection and conversionfix/char-detection
Win-1251/1252 detection is a reduced C++ version of https://github.com/hsivonen/chardetng
Add manually-specified encoding fallback
Add default system encoding fallback
Add error recovery to v2script
Add unknown encoding detection warning
Remove csv::Parser templating
Fix lua files dropping data
Update lexy to foonathan/lexy@1e5d99fa3826b1c3c8628d3a11117fb4fb4cc0d0
Remove exclusive reliance on lexy::default_encoding for v2script
Move internal concepts to src/openvic-detail/InternalConcepts.hpp
Move contents of DetectUtf8.hpp to src/detail/Detect.hpp
Move openvic-dataloader/AbstractSyntaxTree.hpp to src
Move DiagnosticLogger.hpp to src
Move File.hpp to src
Move openvic-dataloader/detail/utlity files to openvic-dataloader/detail
Add ovdl::utility::type_concat
Add ovdl::utility::type_prepend
Add ovdl::utility::is_instance_of
Overhaul parse error messages
Diffstat (limited to 'include/openvic-dataloader/csv')
-rw-r--r-- | include/openvic-dataloader/csv/LineObject.hpp | 2 | ||||
-rw-r--r-- | include/openvic-dataloader/csv/Parser.hpp | 41 |
2 files changed, 18 insertions, 25 deletions
diff --git a/include/openvic-dataloader/csv/LineObject.hpp b/include/openvic-dataloader/csv/LineObject.hpp index ca632cd..c839be2 100644 --- a/include/openvic-dataloader/csv/LineObject.hpp +++ b/include/openvic-dataloader/csv/LineObject.hpp @@ -13,7 +13,7 @@ #include <utility> #include <vector> -#include <openvic-dataloader/detail/utility/Constexpr.hpp> +#include <openvic-dataloader/detail/Constexpr.hpp> namespace ovdl::csv { /// LineObject should be able to recognize the differences between: diff --git a/include/openvic-dataloader/csv/Parser.hpp b/include/openvic-dataloader/csv/Parser.hpp index 06e7251..35421c8 100644 --- a/include/openvic-dataloader/csv/Parser.hpp +++ b/include/openvic-dataloader/csv/Parser.hpp @@ -1,41 +1,38 @@ #pragma once #include <filesystem> +#include <optional> #include <openvic-dataloader/Error.hpp> +#include <openvic-dataloader/NodeLocation.hpp> #include <openvic-dataloader/Parser.hpp> #include <openvic-dataloader/csv/LineObject.hpp> -#include <openvic-dataloader/detail/utility/Concepts.hpp> -#include <openvic-dataloader/detail/utility/ErrorRange.hpp> +#include <openvic-dataloader/detail/Concepts.hpp> +#include <openvic-dataloader/detail/Encoding.hpp> +#include <openvic-dataloader/detail/ErrorRange.hpp> #include <dryad/node.hpp> namespace ovdl::csv { - enum class EncodingType { - Windows1252, - Utf8 - }; - - template<EncodingType Encoding = EncodingType::Windows1252> class Parser final : public detail::BasicParser { public: Parser(); Parser(std::basic_ostream<char>& error_stream); - static Parser from_buffer(const char* data, std::size_t size); - static Parser from_buffer(const char* start, const char* end); - static Parser from_string(const std::string_view string); - static Parser from_file(const char* path); - static Parser from_file(const std::filesystem::path& path); + static Parser from_buffer(const char* data, std::size_t size, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + static Parser from_buffer(const char* start, const char* end, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + static Parser from_string(const std::string_view string, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + static Parser from_file(const char* path, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + static Parser from_file(const std::filesystem::path& path, std::optional<detail::Encoding> encoding_fallback = std::nullopt); - constexpr Parser& load_from_buffer(const char* data, std::size_t size); - constexpr Parser& load_from_buffer(const char* start, const char* end); - constexpr Parser& load_from_string(const std::string_view string); - Parser& load_from_file(const char* path); - Parser& load_from_file(const std::filesystem::path& path); + constexpr Parser& load_from_buffer(const char* data, std::size_t size, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + constexpr Parser& load_from_buffer(const char* start, const char* end, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + constexpr Parser& load_from_string(const std::string_view string, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + Parser& load_from_file(const char* path, std::optional<detail::Encoding> encoding_fallback = std::nullopt); + Parser& load_from_file(const std::filesystem::path& path, std::optional<detail::Encoding> encoding_fallback = std::nullopt); - constexpr Parser& load_from_file(const detail::HasCstr auto& path) { - return load_from_file(path.c_str()); + constexpr Parser& load_from_file(const detail::HasCstr auto& path, std::optional<detail::Encoding> encoding_fallback = std::nullopt) { + return load_from_file(path.c_str(), encoding_fallback); } bool parse_csv(bool handle_strings = false); @@ -57,12 +54,8 @@ namespace ovdl::csv { private: class ParseHandler; std::unique_ptr<ParseHandler> _parse_handler; - std::vector<csv::LineObject> _lines; template<typename... Args> constexpr void _run_load_func(detail::LoadCallback<ParseHandler, Args...> auto func, Args... args); }; - - using Windows1252Parser = Parser<EncodingType::Windows1252>; - using Utf8Parser = Parser<EncodingType::Utf8>; }
\ No newline at end of file |