From 7d5d86e44ebbd907c690023b4546a0ae0a37de3a Mon Sep 17 00:00:00 2001 From: Spartan322 Date: Fri, 20 Oct 2023 16:39:26 -0400 Subject: Add grammar support for BEL, HT, LF, and CR characters Add stripping of LF and CR characters if `v2script::grammar::StringExpression`'s `Options::NoStringEscape` is false --- .../v2script/AbstractSyntaxTree.cpp | 38 ++++++++++++++++------ src/openvic-dataloader/v2script/SimpleGrammar.hpp | 9 ++--- 2 files changed, 33 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp b/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp index c7a325b..5518e5d 100644 --- a/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp +++ b/src/openvic-dataloader/v2script/AbstractSyntaxTree.cpp @@ -14,6 +14,22 @@ using namespace ovdl::v2script::ast; +static void _handle_string_characters(std::string& string, bool allow_newline) { + size_t position = 0; + for (auto& c : string) { + switch (c) { + case '\r': + case '\n': + if (allow_newline) goto END_LOOP; + c = ' '; + break; + default: break; + } + END_LOOP: + position++; + } +} + void ovdl::v2script::ast::copy_into_node_ptr_vector(const std::vector& source, std::vector& dest) { dest.clear(); dest.reserve(source.size()); @@ -23,22 +39,24 @@ void ovdl::v2script::ast::copy_into_node_ptr_vector(const std::vector& } AbstractStringNode::AbstractStringNode() : Node({}) {} -AbstractStringNode::AbstractStringNode(NodeLocation location, std::string&& name) : Node(location), - _name(std::move(name)) {} +AbstractStringNode::AbstractStringNode(NodeLocation location, std::string&& name, bool allow_newline) : Node(location), + _name(std::move(name)) { + _handle_string_characters(_name, allow_newline); +} AbstractStringNode::AbstractStringNode(NodeLocation location) : Node(location) {} -AbstractStringNode::AbstractStringNode(std::string&& name) : AbstractStringNode({}, std::move(name)) {} +AbstractStringNode::AbstractStringNode(std::string&& name, bool allow_newline) : AbstractStringNode({}, std::move(name), allow_newline) {} std::ostream& AbstractStringNode::print(std::ostream& stream, size_t indent) const { return stream << _name; } -#define OVDL_AST_STRING_NODE_DEF(NAME, ...) \ - NAME::NAME() : AbstractStringNode() {} \ - NAME::NAME(std::string&& name) : AbstractStringNode(std::move(name)) {} \ - NAME::NAME(lexy::nullopt) : AbstractStringNode() {} \ - NAME::NAME(NodeLocation location) : AbstractStringNode(location) {} \ - NAME::NAME(NodeLocation location, std::string&& name) : AbstractStringNode(location, std::move(name)) {} \ - NAME::NAME(NodeLocation location, lexy::nullopt) : AbstractStringNode(location, {}) {} \ +#define OVDL_AST_STRING_NODE_DEF(NAME, ...) \ + NAME::NAME() : AbstractStringNode() {} \ + NAME::NAME(std::string&& name, bool allow_newline) : AbstractStringNode(std::move(name), allow_newline) {} \ + NAME::NAME(lexy::nullopt) : AbstractStringNode() {} \ + NAME::NAME(NodeLocation location) : AbstractStringNode(location) {} \ + NAME::NAME(NodeLocation location, std::string&& name, bool allow_newline) : AbstractStringNode(location, std::move(name), allow_newline) {} \ + NAME::NAME(NodeLocation location, lexy::nullopt) : AbstractStringNode(location, {}, true) {} \ std::ostream& NAME::print(std::ostream& stream, size_t indent) const __VA_ARGS__ OVDL_AST_STRING_NODE_DEF(IdentifierNode, { diff --git a/src/openvic-dataloader/v2script/SimpleGrammar.hpp b/src/openvic-dataloader/v2script/SimpleGrammar.hpp index 9bddabd..99bbfca 100644 --- a/src/openvic-dataloader/v2script/SimpleGrammar.hpp +++ b/src/openvic-dataloader/v2script/SimpleGrammar.hpp @@ -83,12 +83,13 @@ namespace ovdl::v2script::grammar { template struct StringExpression { static constexpr auto rule = [] { - // Arbitrary code points that aren't control characters. - auto c = ovdl::detail::lexydsl::make_range<0x20, 0xFF>() - lexy::dsl::ascii::control; - if constexpr (Options.NoStringEscape) { + auto c = ovdl::detail::lexydsl::make_range<0x20, 0xFF>() / lexy::dsl::lit_b<0x07> / lexy::dsl::lit_b<0x09> / lexy::dsl::lit_b<0x0A> / lexy::dsl::lit_b<0x0D>; return lexy::dsl::delimited(lexy::dsl::position(lexy::dsl::lit_b<'"'>))(c); } else { + // Arbitrary code points that aren't control characters. + auto c = ovdl::detail::lexydsl::make_range<0x20, 0xFF>() - lexy::dsl::ascii::control; + // Escape sequences start with a backlash. // They either map one of the symbols, // or a Unicode code point of the form uXXXX. @@ -102,7 +103,7 @@ namespace ovdl::v2script::grammar { lexy::as_string >> lexy::callback( [](const char* begin, auto&& str, const char* end) { - return ast::make_node_ptr(ast::NodeLocation::make_from(begin, end), LEXY_MOV(str)); + return ast::make_node_ptr(ast::NodeLocation::make_from(begin, end), LEXY_MOV(str), Options.NoStringEscape); }); }; -- cgit v1.2.3-56-ga3b1