aboutsummaryrefslogtreecommitdiff
path: root/src/openvic-dataloader/detail/dsl.hpp
diff options
context:
space:
mode:
author Spartan322 <Megacake1234@gmail.com>2024-05-09 16:06:02 +0200
committer Spartan322 <Megacake1234@gmail.com>2024-06-18 01:31:12 +0200
commitb0c3ba3f91926b0c95625bdbf4aab69269130b13 (patch)
treef15ebc47d6bf370031af28e4bb4814ae30ef46e1 /src/openvic-dataloader/detail/dsl.hpp
parent7b521d6023113372cf6b02e562828273c4040f0e (diff)
Add runtime encoding detection and conversionfix/char-detection
Win-1251/1252 detection is a reduced C++ version of https://github.com/hsivonen/chardetng Add manually-specified encoding fallback Add default system encoding fallback Add error recovery to v2script Add unknown encoding detection warning Remove csv::Parser templating Fix lua files dropping data Update lexy to foonathan/lexy@1e5d99fa3826b1c3c8628d3a11117fb4fb4cc0d0 Remove exclusive reliance on lexy::default_encoding for v2script Move internal concepts to src/openvic-detail/InternalConcepts.hpp Move contents of DetectUtf8.hpp to src/detail/Detect.hpp Move openvic-dataloader/AbstractSyntaxTree.hpp to src Move DiagnosticLogger.hpp to src Move File.hpp to src Move openvic-dataloader/detail/utlity files to openvic-dataloader/detail Add ovdl::utility::type_concat Add ovdl::utility::type_prepend Add ovdl::utility::is_instance_of Overhaul parse error messages
Diffstat (limited to 'src/openvic-dataloader/detail/dsl.hpp')
-rw-r--r--src/openvic-dataloader/detail/dsl.hpp194
1 files changed, 163 insertions, 31 deletions
diff --git a/src/openvic-dataloader/detail/dsl.hpp b/src/openvic-dataloader/detail/dsl.hpp
index ccc1af6..fd8981a 100644
--- a/src/openvic-dataloader/detail/dsl.hpp
+++ b/src/openvic-dataloader/detail/dsl.hpp
@@ -1,16 +1,20 @@
#pragma once
+#include <concepts> // IWYU pragma: keep
#include <type_traits>
#include <openvic-dataloader/NodeLocation.hpp>
-#include <openvic-dataloader/ParseState.hpp>
+#include <lexy/_detail/config.hpp>
#include <lexy/callback/adapter.hpp>
#include <lexy/callback/bind.hpp>
#include <lexy/callback/container.hpp>
#include <lexy/callback/fold.hpp>
#include <lexy/dsl.hpp>
+#include <lexy/dsl/literal.hpp>
+#include <lexy/encoding.hpp>
+#include "detail/InternalConcepts.hpp"
#include "detail/StringLiteral.hpp"
namespace ovdl::dsl {
@@ -20,10 +24,46 @@ namespace ovdl::dsl {
}
template<typename Sink>
- constexpr auto sink(Sink sink) {
+ constexpr auto bind_sink(Sink sink) {
return lexy::bind_sink(sink, lexy::parse_state);
}
+ template<typename ReturnT, typename Sink>
+ struct _sink_with_state {
+ using return_type = ReturnT;
+
+ LEXY_EMPTY_MEMBER Sink _sink_cb;
+
+ template<detail::IsStateType StateType, typename SinkCallback>
+ struct _sink_callback {
+ StateType& _state;
+ SinkCallback _sink_cb;
+
+ using return_type = decltype(LEXY_MOV(_sink_cb).finish());
+
+ template<typename... Args>
+ constexpr void operator()(Args&&... args) {
+ lexy::_detail::invoke(_sink_cb, _state, LEXY_FWD(args)...);
+ }
+
+ constexpr return_type finish() && { return LEXY_MOV(_sink_cb).finish(); }
+ };
+
+ template<typename... Args>
+ constexpr auto operator()(detail::IsStateType auto& state, Args... args) const -> decltype(_sink_cb(state, LEXY_FWD(args)...)) {
+ return _sink_cb(state, LEXY_FWD(args)...);
+ }
+
+ constexpr auto sink(detail::IsStateType auto& state) const {
+ return _sink_callback<std::decay_t<decltype(state)>, decltype(_sink_cb.sink())> { state, _sink_cb.sink() };
+ }
+ };
+
+ template<typename ReturnT, typename Sink>
+ constexpr auto sink(Sink&& sink) {
+ return bind_sink(_sink_with_state<ReturnT, Sink> { LEXY_FWD(sink) });
+ }
+
template<typename Container, typename Callback>
constexpr auto collect(Callback callback) {
return sink(lexy::collect<Container>(callback));
@@ -34,49 +74,76 @@ namespace ovdl::dsl {
return sink(lexy::collect(callback));
}
- template<IsParseState StateType, typename T>
+ template<typename T>
constexpr auto construct = callback<T*>(
- [](StateType& state, ovdl::NodeLocation loc, auto&& arg) {
- if constexpr (std::is_same_v<std::decay_t<decltype(arg)>, lexy::nullopt>)
+ [](detail::IsParseState auto& state, ovdl::NodeLocation loc, auto&& arg) {
+ if constexpr (std::same_as<std::decay_t<decltype(arg)>, lexy::nullopt>)
return state.ast().template create<T>(loc);
else
return state.ast().template create<T>(loc, DRYAD_FWD(arg));
},
- [](StateType& state, ovdl::NodeLocation loc, auto&&... args) {
+ [](detail::IsParseState auto& state, ovdl::NodeLocation loc, auto&&... args) {
return state.ast().template create<T>(loc, DRYAD_FWD(args)...);
});
- template<IsParseState StateType, typename T, typename ListType, bool DisableEmpty = false>
+ template<typename T, typename ListType, bool DisableEmpty = false>
constexpr auto construct_list = callback<T*>(
- [](StateType& state, const char* begin, ListType&& arg, const char* end) {
+ [](detail::IsParseState auto& state, const char* begin, ListType&& arg, const char* end) {
return state.ast().template create<T>(NodeLocation::make_from(begin, end), DRYAD_FWD(arg));
},
- [](StateType& state, const char* begin, lexy::nullopt, const char* end) {
+ [](detail::IsParseState auto& state, const char* begin, lexy::nullopt, const char* end) {
return state.ast().template create<T>(NodeLocation::make_from(begin, end));
},
- [](StateType& state, const char* begin, const char* end) {
+ [](detail::IsParseState auto& state, const char* begin, const char* end) {
return state.ast().template create<T>(NodeLocation::make_from(begin, end));
+ },
+ [](detail::IsParseState auto& state) {
+ return nullptr;
});
- template<IsParseState StateType, typename T, typename ListType>
- constexpr auto construct_list<StateType, T, ListType, true> = callback<T*>(
- [](StateType& state, const char* begin, ListType&& arg, const char* end) {
+ template<typename T, typename ListType>
+ constexpr auto construct_list<T, ListType, true> = callback<T*>(
+ [](detail::IsParseState auto& state, const char* begin, ListType&& arg, const char* end) {
return state.ast().template create<T>(NodeLocation::make_from(begin, end), DRYAD_FWD(arg));
},
- [](StateType& state, const char* begin, lexy::nullopt, const char* end) {
+ [](detail::IsParseState auto& state, const char* begin, lexy::nullopt, const char* end) {
return state.ast().template create<T>(NodeLocation::make_from(begin, end));
});
- template<unsigned char LOW, unsigned char HIGH>
- consteval auto make_range() {
- if constexpr (LOW == HIGH) {
- return ::lexy::dsl::lit_c<LOW>;
- } else if constexpr (LOW == (HIGH - 1)) {
- return ::lexy::dsl::lit_c<LOW> / ::lexy::dsl::lit_c<HIGH>;
- } else {
- return ::lexy::dsl::lit_c<LOW> / make_range<LOW + 1, HIGH>();
+ template<typename CharT, CharT LowC, CharT HighC>
+ struct _crange : lexyd::char_class_base<_crange<CharT, LowC, HighC>> {
+ static_assert(LowC >= 0, "LowC cannot be less than 0");
+ static_assert(HighC - LowC > 0, "LowC must be less than HighC");
+
+ static constexpr auto char_class_unicode() {
+ return LowC <= 0x7F && HighC <= 0x7F;
}
- }
+
+ static LEXY_CONSTEVAL auto char_class_name() {
+ return "range";
+ }
+
+ static LEXY_CONSTEVAL auto char_class_ascii() {
+ lexy::_detail::ascii_set result;
+ if constexpr (LowC <= 0x7F && HighC <= 0x7F)
+ for (auto c = LowC; c <= HighC; c++)
+ result.insert(c);
+ return result;
+ }
+
+ static constexpr auto char_class_match_cp([[maybe_unused]] char32_t cp) {
+ if constexpr (LowC <= 0x7F && HighC <= 0x7F)
+ return std::false_type {};
+ else
+ return LowC <= cp && cp <= HighC;
+ }
+ };
+
+ template<auto LowC, decltype(LowC) HighC>
+ constexpr auto lit_c_range = _crange<LEXY_DECAY_DECLTYPE(LowC), LowC, HighC> {};
+
+ template<unsigned char LowC, unsigned char HighC>
+ constexpr auto lit_b_range = _crange<unsigned char, LowC, HighC> {};
template<auto Open, auto Close>
constexpr auto position_brackets = lexy::dsl::brackets(lexy::dsl::position(lexy::dsl::lit_c<Open>), lexy::dsl::position(lexy::dsl::lit_c<Close>));
@@ -89,14 +156,13 @@ namespace ovdl::dsl {
template<typename Production>
constexpr auto p = lexy::dsl::position(lexy::dsl::p<Production>);
- template<IsParseState ParseType, typename ReturnType, ovdl::detail::string_literal Keyword>
+ template<typename ReturnType, ovdl::detail::string_literal Keyword>
static constexpr auto default_kw_value = dsl::callback<ReturnType*>(
- [](ParseType& state, NodeLocation loc) {
+ [](detail::IsParseState auto& state, NodeLocation loc) {
return state.ast().template create<ReturnType>(loc, state.ast().intern(Keyword.data(), Keyword.size()));
});
template<
- IsParseState ParseType,
auto Identifier,
typename RuleValue,
ovdl::detail::string_literal Keyword,
@@ -109,18 +175,17 @@ namespace ovdl::dsl {
static constexpr auto value = Value;
};
static constexpr auto rule = dsl::p<rule_t> >> Production;
- static constexpr auto value = construct<ParseType, RuleValue>;
+ static constexpr auto value = construct<RuleValue>;
};
template<
- IsParseState ParseType,
auto Identifier,
typename RuleValue,
ovdl::detail::string_literal Keyword,
auto Production,
auto Value>
- struct fkeyword_rule : keyword_rule<ParseType, Identifier, RuleValue, Keyword, Production, Value> {
- using base_type = keyword_rule<ParseType, Identifier, RuleValue, Keyword, Production, Value>;
+ struct fkeyword_rule : keyword_rule<Identifier, RuleValue, Keyword, Production, Value> {
+ using base_type = keyword_rule<Identifier, RuleValue, Keyword, Production, Value>;
struct context_t;
struct rule_t : base_type::rule_t {
static constexpr auto flag = lexy::dsl::context_flag<context_t>;
@@ -139,7 +204,7 @@ namespace ovdl::dsl {
static constexpr auto make_flag = rule_t::flag.create();
static constexpr auto rule = dsl::p<rule_t> >> (rule_t::must >> rule_t::flag.set()) >> Production;
- static constexpr auto value = construct<ParseType, RuleValue>;
+ static constexpr auto value = construct<RuleValue>;
};
template<typename... Args>
@@ -147,4 +212,71 @@ namespace ovdl::dsl {
static constexpr auto flags = (Args::make_flag + ...);
static constexpr auto p = (lexy::dsl::p<Args> | ...);
};
+
+ template<typename Rule, typename RuleUtf, typename Tag>
+ struct _peek : lexyd::branch_base {
+ template<typename Reader>
+ struct bp {
+ typename Reader::iterator begin;
+ typename Reader::marker end;
+
+ constexpr bool try_parse(const void*, Reader reader) {
+ using encoding = typename Reader::encoding;
+
+ auto parser = [&] {
+ if constexpr (std::same_as<encoding, lexy::default_encoding> || std::same_as<encoding, lexy::byte_encoding>) {
+ // We need to match the entire rule.
+ return lexy::token_parser_for<decltype(lexy::dsl::token(Rule {})), Reader> { reader };
+ } else {
+ // We need to match the entire rule.
+ return lexy::token_parser_for<decltype(lexy::dsl::token(RuleUtf {})), Reader> { reader };
+ }
+ }();
+
+ begin = reader.position();
+ auto result = parser.try_parse(reader);
+ end = parser.end;
+
+ return result;
+ }
+
+ template<typename Context>
+ constexpr void cancel(Context& context) {
+ context.on(lexyd::_ev::backtracked {}, begin, end.position());
+ }
+
+ template<typename NextParser, typename Context, typename... Args>
+ LEXY_PARSER_FUNC bool finish(Context& context, Reader& reader, Args&&... args) {
+ context.on(lexyd::_ev::backtracked {}, begin, end.position());
+ return NextParser::parse(context, reader, LEXY_FWD(args)...);
+ }
+ };
+
+ template<typename NextParser>
+ struct p {
+ template<typename Context, typename Reader, typename... Args>
+ LEXY_PARSER_FUNC static bool parse(Context& context, Reader& reader, Args&&... args) {
+ bp<Reader> impl {};
+ if (!impl.try_parse(context.control_block, reader)) {
+ // Report that we've failed.
+ using tag = lexy::_detail::type_or<Tag, lexy::peek_failure>;
+ auto err = lexy::error<Reader, tag>(impl.begin, impl.end.position());
+ context.on(lexyd::_ev::error {}, err);
+
+ // But recover immediately, as we wouldn't have consumed anything either way.
+ }
+
+ context.on(lexyd::_ev::backtracked {}, impl.begin, impl.end);
+ return NextParser::parse(context, reader, LEXY_FWD(args)...);
+ }
+ };
+
+ template<typename Error>
+ static constexpr _peek<Rule, RuleUtf, Error> error = {};
+ };
+
+ template<typename Rule, typename RuleUtf>
+ constexpr auto peek(Rule, RuleUtf) {
+ return _peek<Rule, RuleUtf, void> {};
+ }
} \ No newline at end of file