aboutsummaryrefslogtreecommitdiff
path: root/src/openvic-dataloader/detail
diff options
context:
space:
mode:
Diffstat (limited to 'src/openvic-dataloader/detail')
-rw-r--r--src/openvic-dataloader/detail/DetectUtf8.hpp52
-rw-r--r--src/openvic-dataloader/detail/Errors.hpp19
-rw-r--r--src/openvic-dataloader/detail/LexyLitRange.hpp16
-rw-r--r--src/openvic-dataloader/detail/NullBuff.hpp30
-rw-r--r--src/openvic-dataloader/detail/Warnings.hpp21
5 files changed, 138 insertions, 0 deletions
diff --git a/src/openvic-dataloader/detail/DetectUtf8.hpp b/src/openvic-dataloader/detail/DetectUtf8.hpp
new file mode 100644
index 0000000..b2969ce
--- /dev/null
+++ b/src/openvic-dataloader/detail/DetectUtf8.hpp
@@ -0,0 +1,52 @@
+#pragma once
+
+#include "detail/LexyLitRange.hpp"
+#include <lexy/action/match.hpp>
+#include <lexy/dsl.hpp>
+
+namespace ovdl::detail {
+ namespace detect_utf8 {
+
+ template<bool INCLUDE_ASCII>
+ struct DetectUtf8 {
+ struct not_utf8 {
+ static constexpr auto name = "not utf8";
+ };
+
+ static constexpr auto rule = [] {
+ constexpr auto is_not_ascii_flag = lexy::dsl::context_flag<DetectUtf8>;
+
+ // & 0b10000000 == 0b00000000
+ constexpr auto ascii_values = lexydsl::make_range<0b00000000, 0b01111111>();
+ // & 0b11100000 == 0b11000000
+ constexpr auto two_byte = lexydsl::make_range<0b11000000, 0b11011111>();
+ // & 0b11110000 == 0b11100000
+ constexpr auto three_byte = lexydsl::make_range<0b11100000, 0b11101111>();
+ // & 0b11111000 == 0b11110000
+ constexpr auto four_byte = lexydsl::make_range<0b11110000, 0b11110111>();
+ // & 0b11000000 == 0b10000000
+ constexpr auto check_bytes = lexydsl::make_range<0b10000000, 0b10111111>();
+
+ constexpr auto utf8_check =
+ ((four_byte >> lexy::dsl::times<3>(check_bytes)) |
+ (three_byte >> lexy::dsl::times<2>(check_bytes)) |
+ (two_byte >> lexy::dsl::times<1>(check_bytes))) >>
+ is_not_ascii_flag.set();
+
+ return is_not_ascii_flag.template create<INCLUDE_ASCII>() +
+ lexy::dsl::while_(utf8_check | ascii_values) +
+ lexy::dsl::must(is_not_ascii_flag.is_set()).template error<not_utf8>;
+ }();
+ };
+ }
+
+ template<typename Input>
+ constexpr bool is_utf8_no_ascii(const Input& input) {
+ return lexy::match<detect_utf8::DetectUtf8<false>>(input);
+ }
+
+ template<typename Input>
+ constexpr bool is_utf8(const Input& input) {
+ return lexy::match<detect_utf8::DetectUtf8<true>>(input);
+ }
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/Errors.hpp b/src/openvic-dataloader/detail/Errors.hpp
new file mode 100644
index 0000000..f8ed21b
--- /dev/null
+++ b/src/openvic-dataloader/detail/Errors.hpp
@@ -0,0 +1,19 @@
+#pragma once
+
+#include "openvic-dataloader/v2script/Parser.hpp"
+
+namespace ovdl::v2script::errors {
+ inline const v2script::Parser::Error make_no_file_error(const char* file_path) {
+ std::string message;
+ if (!file_path) {
+ message = "File path not specified.";
+ } else {
+ message = "File '" + std::string(file_path) + "' was not found.";
+ }
+
+ return v2script::Parser::Error { Parser::Error::Type::Fatal, message, 1 };
+ }
+}
+
+namespace ovdl::ovscript::errors {
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/LexyLitRange.hpp b/src/openvic-dataloader/detail/LexyLitRange.hpp
new file mode 100644
index 0000000..a6761a8
--- /dev/null
+++ b/src/openvic-dataloader/detail/LexyLitRange.hpp
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <lexy/dsl/literal.hpp>
+
+namespace ovdl::detail::lexydsl {
+ template<unsigned char LOW, unsigned char HIGH>
+ consteval auto make_range() {
+ if constexpr (LOW == HIGH) {
+ return lexy::dsl::lit_c<LOW>;
+ } else if constexpr (LOW == (HIGH - 1)) {
+ return lexy::dsl::lit_c<LOW> / lexy::dsl::lit_c<HIGH>;
+ } else {
+ return lexy::dsl::lit_c<LOW> / make_range<LOW + 1, HIGH>();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/NullBuff.hpp b/src/openvic-dataloader/detail/NullBuff.hpp
new file mode 100644
index 0000000..baf9e1b
--- /dev/null
+++ b/src/openvic-dataloader/detail/NullBuff.hpp
@@ -0,0 +1,30 @@
+#pragma once
+
+#include <ostream>
+
+namespace ovdl::detail {
+ template<class cT, class traits = std::char_traits<cT>>
+ class basic_nullbuf : public std::basic_streambuf<cT, traits> {
+ typename traits::int_type overflow(typename traits::int_type c) {
+ return traits::not_eof(c); // indicate success
+ }
+ };
+
+ template<class cT, class traits = std::char_traits<cT>>
+ class basic_onullstream : public std::basic_ostream<cT, traits> {
+ public:
+ basic_onullstream() : std::basic_ios<cT, traits>(&m_sbuf),
+ std::basic_ostream<cT, traits>(&m_sbuf) {
+ std::basic_ios<cT, traits>::init(&m_sbuf);
+ }
+
+ private:
+ basic_nullbuf<cT, traits> m_sbuf;
+ };
+
+ typedef basic_onullstream<char> onullstream;
+ typedef basic_onullstream<wchar_t> wonullstream;
+
+ inline onullstream cnull;
+ inline onullstream wcnull;
+} \ No newline at end of file
diff --git a/src/openvic-dataloader/detail/Warnings.hpp b/src/openvic-dataloader/detail/Warnings.hpp
new file mode 100644
index 0000000..f854fa8
--- /dev/null
+++ b/src/openvic-dataloader/detail/Warnings.hpp
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "openvic-dataloader/v2script/Parser.hpp"
+
+namespace ovdl::v2script::warnings {
+ inline const v2script::Parser::Warning make_utf8_warning(const char* file_path) {
+ constexpr std::string_view message_suffix = "This may cause problems. Prefer Windows-1252 encoding.";
+
+ std::string message;
+ if (!file_path) {
+ message = "Buffer is a UTF-8 encoded string. " + std::string(message_suffix);
+ } else {
+ message = "File '" + std::string(file_path) + "' is a UTF-8 encoded file. " + std::string(message_suffix);
+ }
+
+ return v2script::Parser::Warning { message, 1 };
+ }
+}
+
+namespace ovdl::ovscript::warnings {
+} \ No newline at end of file