From 5068d34c08f951a7ea6257d305a1627b09a95817 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 19:44:55 +0200 Subject: Adding upstream version 0.11.1. Signed-off-by: Daniel Baumann --- src/third-party/scnlib/include/scn/reader/string.h | 1336 ++++++++++++++++++++ 1 file changed, 1336 insertions(+) create mode 100644 src/third-party/scnlib/include/scn/reader/string.h (limited to 'src/third-party/scnlib/include/scn/reader/string.h') diff --git a/src/third-party/scnlib/include/scn/reader/string.h b/src/third-party/scnlib/include/scn/reader/string.h new file mode 100644 index 0000000..19727ee --- /dev/null +++ b/src/third-party/scnlib/include/scn/reader/string.h @@ -0,0 +1,1336 @@ +// Copyright 2017 Elias Kosunen +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is a part of scnlib: +// https://github.com/eliaskosunen/scnlib + +#ifndef SCN_READER_STRING_H +#define SCN_READER_STRING_H + +#include "../util/small_vector.h" +#include "common.h" + +namespace scn { + SCN_BEGIN_NAMESPACE + namespace detail { + class set_parser_type { + public: + constexpr set_parser_type() = default; + + template + error parse_set(ParseCtx& pctx, bool& parsed) + { + using char_type = typename ParseCtx::char_type; + SCN_EXPECT(pctx.next_char() == ascii_widen('[')); + + pctx.advance_char(); + if (!pctx || pctx.check_arg_end()) { + return {error::invalid_format_string, + "Unexpected end of format string argument"}; + } + + get_option(flag::enabled) = true; + parsed = true; + + if (pctx.next_char() == ascii_widen('^')) { + // inverted + get_option(flag::inverted) = true; + pctx.advance_char(); + if (!pctx || pctx.check_arg_end()) { + return {error::invalid_format_string, + "Unexpected end of format string argument"}; + } + } + + if (pctx.next_char() == ascii_widen(']')) { + // end of range + get_option(flag::accept_all) = true; + pctx.advance_char(); + return {}; + } + + while (true) { + if (!pctx || pctx.check_arg_end()) { + return {error::invalid_format_string, + "Unexpected end of format string argument"}; + } + + const auto ch = pctx.next_char(); + if (ch == ascii_widen(']')) { + break; + } + + auto err = parse_next_char(pctx, true); + if (!err) { + return err; + } + + err = pctx.advance_cp(); + if (!err) { + pctx.advance_char(); + } + } + auto err = pctx.advance_cp(); + if (!err) { + pctx.advance_char(); + } + + return {}; + } + + error sanitize(bool localized) + { + // specifiers -> chars, if not localized + if (get_option(flag::use_specifiers)) { + if ((get_option(specifier::letters) || + get_option(specifier::alpha)) && + get_option(specifier::inverted_letters)) { + get_option(flag::accept_all) = true; + } + if (get_option(specifier::alnum_underscore) && + get_option(specifier::inverted_alnum_underscore)) { + get_option(flag::accept_all) = true; + } + if ((get_option(specifier::whitespace) || + get_option(specifier::space)) && + get_option(specifier::inverted_whitespace)) { + get_option(flag::accept_all) = true; + } + if ((get_option(specifier::numbers) || + get_option(specifier::digit)) && + get_option(specifier::inverted_numbers)) { + get_option(flag::accept_all) = true; + } + } + + if (get_option(flag::use_specifiers) && + !get_option(flag::accept_all)) { + if (localized) { + if (get_option(specifier::letters)) { + get_option(specifier::letters) = false; + get_option(specifier::alpha) = true; + } + if (get_option(specifier::alnum_underscore)) { + get_option(specifier::alnum_underscore) = false; + get_option(specifier::alnum) = true; + get_option('_') = true; + } + if (get_option(specifier::whitespace)) { + get_option(specifier::whitespace) = false; + get_option(specifier::space) = true; + } + if (get_option(specifier::numbers)) { + get_option(specifier::numbers) = false; + get_option(specifier::digit) = true; + } + } + else { + auto do_range = [&](char a, char b) { + for (; a < b; ++a) { + get_option(a) = true; + } + get_option(b) = true; + }; + auto do_lower = [&]() { + // a-z + do_range(0x61, 0x7a); + }; + auto do_upper = [&]() { + // A-Z + do_range(0x41, 0x5a); + }; + auto do_digit = [&]() { + // 0-9 + do_range(0x30, 0x39); + }; + + if (get_option(specifier::alnum)) { + do_lower(); + do_upper(); + do_digit(); + get_option(specifier::alnum) = false; + } + if (get_option(specifier::alpha)) { + do_lower(); + do_upper(); + get_option(specifier::alpha) = false; + } + if (get_option(specifier::blank)) { + get_option(' ') = true; + get_option('\t') = true; + get_option(specifier::blank) = false; + } + if (get_option(specifier::cntrl)) { + do_range(0, 0x1f); + get_option(0x7f) = true; + get_option(specifier::cntrl) = false; + } + if (get_option(specifier::digit)) { + do_digit(); + get_option(specifier::digit) = false; + } + if (get_option(specifier::graph)) { + do_range(0x21, 0x7e); + get_option(specifier::graph) = false; + } + if (get_option(specifier::lower)) { + do_lower(); + get_option(specifier::lower) = false; + } + if (get_option(specifier::print)) { + do_range(0x20, 0x7e); + get_option(specifier::print) = false; + } + if (get_option(specifier::punct)) { + do_range(0x21, 0x2f); + do_range(0x3a, 0x40); + do_range(0x5b, 0x60); + do_range(0x7b, 0x7e); + get_option(specifier::punct) = false; + } + if (get_option(specifier::space)) { + do_range(0x9, 0xd); + get_option(' ') = true; + get_option(specifier::space) = false; + } + if (get_option(specifier::upper)) { + do_upper(); + get_option(specifier::upper) = false; + } + if (get_option(specifier::xdigit)) { + do_digit(); + do_range(0x41, 0x46); + do_range(0x61, 0x66); + get_option(specifier::xdigit) = false; + } + if (get_option(specifier::letters)) { + do_upper(); + do_lower(); + get_option(specifier::letters) = false; + } + if (get_option(specifier::inverted_letters)) { + do_range(0x0, 0x2f); + do_range(0x3a, 0x40); + do_range(0x5b, 0x60); + do_range(0x7b, 0x7f); + get_option(specifier::inverted_letters) = false; + } + if (get_option(specifier::alnum_underscore)) { + do_digit(); + do_upper(); + do_lower(); + get_option('_') = true; + get_option(specifier::alnum_underscore) = false; + } + if (get_option(specifier::inverted_alnum_underscore)) { + bool underscore = get_option('_'); + do_range(0x0, 0x2f); + do_range(0x3a, 0x40); + do_range(0x5b, 0x60); + do_range(0x7b, 0x7f); + get_option('_') = underscore; // reset back + get_option(specifier::inverted_alnum_underscore) = + false; + } + if (get_option(specifier::whitespace)) { + do_range(0x9, 0xd); + get_option(' ') = true; + get_option(specifier::whitespace) = false; + } + if (get_option(specifier::inverted_whitespace)) { + do_range(0, 0x8); + do_range(0xe, 0x1f); + do_range(0x21, 0x7f); + get_option(specifier::inverted_whitespace) = false; + } + if (get_option(specifier::numbers)) { + do_digit(); + get_option(specifier::numbers) = false; + } + if (get_option(specifier::inverted_numbers)) { + do_range(0, 0x2f); + do_range(0x3a, 0x7f); + get_option(specifier::inverted_numbers) = false; + } + + { + bool first = get_option(0); + char i = 1; + for (; i < 0x7f; ++i) { + if (first != get_option(i)) { + break; + } + } + if (i == 0x7f && first == get_option(0x7f)) { + get_option(flag::accept_all) = true; + if (!first) { + get_option(flag::inverted) = true; + } + } + } + + get_option(flag::use_specifiers) = false; + get_option(flag::use_chars) = true; + } + } + + return {}; + } + + // true = char accepted + template + bool check_character(CharT ch, bool localized, const Locale& loc) + { + SCN_EXPECT(get_option(flag::enabled)); + + const bool not_inverted = !get_option(flag::inverted); + if (get_option(flag::accept_all)) { + return not_inverted; + } + + if (get_option(flag::use_specifiers)) { + SCN_EXPECT(localized); // ensured by sanitize() + SCN_UNUSED(localized); + SCN_CLANG_PUSH_IGNORE_UNDEFINED_TEMPLATE + if (get_option(specifier::alnum) && + loc.get_localized().is_alnum(ch)) { + return not_inverted; + } + if (get_option(specifier::alpha) && + loc.get_localized().is_alpha(ch)) { + return not_inverted; + } + if (get_option(specifier::blank) && + loc.get_localized().is_blank(ch)) { + return not_inverted; + } + if (get_option(specifier::cntrl) && + loc.get_localized().is_cntrl(ch)) { + return not_inverted; + } + if (get_option(specifier::digit) && + loc.get_localized().is_digit(ch)) { + return not_inverted; + } + if (get_option(specifier::graph) && + loc.get_localized().is_graph(ch)) { + return not_inverted; + } + if (get_option(specifier::lower) && + loc.get_localized().is_lower(ch)) { + return not_inverted; + } + if (get_option(specifier::print) && + loc.get_localized().is_print(ch)) { + return not_inverted; + } + if (get_option(specifier::punct) && + loc.get_localized().is_punct(ch)) { + return not_inverted; + } + if (get_option(specifier::space) && + loc.get_localized().is_space(ch)) { + return not_inverted; + } + if (get_option(specifier::upper) && + loc.get_localized().is_upper(ch)) { + return not_inverted; + } + if (get_option(specifier::xdigit) && + loc.get_localized().is_xdigit(ch)) { + return not_inverted; + } + SCN_CLANG_POP_IGNORE_UNDEFINED_TEMPLATE + } + if (get_option(flag::use_chars) && (ch >= 0 && ch <= 0x7f)) { + if (get_option(static_cast(ch))) { + return not_inverted; + } + } + if (get_option(flag::use_ranges)) { + const auto c = static_cast(ch); + for (const auto& e : set_extra_ranges) { + if (c >= e.begin && c <= e.end) { + return not_inverted; + } + } + } + return !not_inverted; + } + + enum class specifier : size_t { + alnum = 0x80, + alpha, + blank, + cntrl, + digit, + graph, + lower, + print, + punct, + space, + upper, + xdigit, + letters = 0x90, // \l + inverted_letters, // \L + alnum_underscore, // \w + inverted_alnum_underscore, // \W + whitespace, // \s + inverted_whitespace, // \S + numbers, // \d + inverted_numbers, // \D + last = 0x9f + }; + enum class flag : size_t { + enabled = 0xa0, // using [set] + accept_all, // empty [set] + inverted, // ^ flag + // 0x00 - 0x7f + use_chars, + // 0x80 - 0x8f + use_specifiers, + // set_extra_ranges + use_ranges, + last = 0xaf + }; + + bool& get_option(char ch) + { + SCN_GCC_PUSH + SCN_GCC_IGNORE("-Wtype-limits") + SCN_EXPECT(ch >= 0 && ch <= 0x7f); + SCN_GCC_POP + return set_options[static_cast(ch)]; + } + SCN_NODISCARD bool get_option(char ch) const + { + SCN_GCC_PUSH + SCN_GCC_IGNORE("-Wtype-limits") + SCN_EXPECT(ch >= 0 && ch <= 0x7f); + SCN_GCC_POP + return set_options[static_cast(ch)]; + } + + bool& get_option(specifier s) + { + return set_options[static_cast(s)]; + } + SCN_NODISCARD bool get_option(specifier s) const + { + return set_options[static_cast(s)]; + } + + bool& get_option(flag f) + { + return set_options[static_cast(f)]; + } + SCN_NODISCARD bool get_option(flag f) const + { + return set_options[static_cast(f)]; + } + + SCN_NODISCARD bool enabled() const + { + return get_option(flag::enabled); + } + + private: + void accept_char(char ch) + { + get_option(ch) = true; + get_option(flag::use_chars) = true; + } + void accept_char(code_point cp) + { + if (cp >= 0 && cp <= 0x7f) { + return accept_char(static_cast(cp)); + } + set_extra_ranges.push_back(set_range::single(cp)); + get_option(flag::use_ranges) = true; + } + void accept_char(wchar_t ch) + { + SCN_GCC_COMPAT_PUSH + SCN_GCC_COMPAT_IGNORE("-Wtype-limits") + if (ch >= 0 && ch <= 0x7f) { + return accept_char(static_cast(ch)); + } + SCN_GCC_COMPAT_POP + set_extra_ranges.push_back(set_range::single(ch)); + get_option(flag::use_ranges) = true; + } + + void accept_char_range(char first, char last) + { + SCN_EXPECT(first >= 0); + SCN_EXPECT(last >= 0); + SCN_EXPECT(first <= last); + get_option(flag::use_chars) = true; + for (; first != last; ++first) { + get_option(first) = true; + } + SCN_ENSURE(first == last); + get_option(last) = true; + } + void accept_char_range(code_point first, code_point last) + { + SCN_EXPECT(first <= last); + if (first >= 0 && last <= 0x7f) { + return accept_char_range(static_cast(first), + static_cast(last)); + } + set_extra_ranges.push_back(set_range::range(first, last)); + get_option(flag::use_ranges) = true; + } + void accept_char_range(wchar_t first, wchar_t last) + { + SCN_EXPECT(first <= last); + SCN_GCC_COMPAT_PUSH + SCN_GCC_COMPAT_IGNORE("-Wtype-limits") + if (first >= 0 && last <= 0x7f) { + return accept_char_range(static_cast(first), + static_cast(last)); + } + SCN_GCC_COMPAT_POP + set_extra_ranges.push_back(set_range::range(first, last)); + get_option(flag::use_ranges) = true; + } + + template + error parse_range(ParseCtx& pctx, code_point begin) + { + using char_type = typename ParseCtx::char_type; + SCN_EXPECT(pctx.next_char() == ascii_widen('-')); + if (pctx.can_peek_char() && + pctx.peek_char() == ascii_widen(']')) { + // Just a '-' + accept_char(begin); + accept_char(ascii_widen('-')); + return {}; + } + pctx.advance_char(); + if (!pctx || pctx.check_arg_end()) { + return {error::invalid_format_string, + "Unexpected end of format string argument"}; + } + return parse_next_char(pctx, false, begin); + } + template + error parse_literal(ParseCtx& pctx, + bool allow_range, + code_point begin = make_code_point(0)) + { + using char_type = typename ParseCtx::char_type; + if (allow_range) { + auto e = pctx.peek_cp(); + if (!e && e.error().code() != error::end_of_range) { + return e.error(); + } + if (e && e.value() == ascii_widen('-')) { + const auto cp = pctx.next_cp(); + if (!cp) { + return cp.error(); + } + auto err = pctx.advance_cp(); + if (!err) { + return err; + } + return parse_range(pctx, cp.value()); + } + } + const auto cp = pctx.next_cp(); + if (!cp) { + return cp.error(); + } + if (cp.value() >= 0 && cp.value() <= 0x7f) { + if (!allow_range) { + if (static_cast< + typename std::make_unsigned::type>( + cp.value()) < + static_cast< + typename std::make_unsigned::type>( + begin)) { + return {error::invalid_format_string, + "Last char in [set] range is less than the " + "first"}; + } + accept_char_range(begin, cp.value()); + } + else { + accept_char(cp.value()); + } + } + else { + if (!allow_range) { + if (static_cast< + typename std::make_unsigned::type>( + cp.value()) < + static_cast< + typename std::make_unsigned::type>( + begin)) { + return {error::invalid_format_string, + "Last char in [set] range is less than the " + "first"}; + } + set_extra_ranges.push_back( + set_range::range(begin, cp.value())); + } + else { + set_extra_ranges.push_back( + set_range::single(cp.value())); + } + get_option(flag::use_ranges) = true; + } + return {}; + } + template + error parse_colon_specifier(ParseCtx& pctx) + { + using char_type = typename ParseCtx::char_type; + SCN_EXPECT(pctx.next_char() == ascii_widen(':')); + pctx.advance_char(); + if (!pctx || pctx.check_arg_end()) { + return {error::invalid_format_string, + "Unexpected end of format string argument"}; + } + if (pctx.next_char() == ascii_widen(']')) { + return { + error::invalid_format_string, + "Unexpected end of [set] in format string after ':'"}; + } + + std::basic_string buf; + while (true) { + if (!pctx || pctx.check_arg_end()) { + return {error::invalid_format_string, + "Unexpected end of format string argument"}; + } + auto ch = pctx.next_char(); + if (ch == ascii_widen(':')) { + break; + } + if (ch == ascii_widen(']')) { + return {error::invalid_format_string, + "Unexpected end of [set] :specifier:, did you " + "forget a terminating colon?"}; + } + buf.push_back(ch); + pctx.advance_char(); + } + + auto ch = pctx.next_char(); + if (buf == all_str(ch)) { + get_option(flag::accept_all) = true; + return {}; + } + if (buf == alnum_str(ch)) { + get_option(specifier::alnum) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (buf == alpha_str(ch)) { + get_option(specifier::alpha) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (buf == blank_str(ch)) { + get_option(specifier::blank) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (buf == cntrl_str(ch)) { + get_option(specifier::cntrl) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (buf == digit_str(ch)) { + get_option(specifier::digit) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (buf == graph_str(ch)) { + get_option(specifier::graph) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (buf == lower_str(ch)) { + get_option(specifier::lower) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (buf == print_str(ch)) { + get_option(specifier::print) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (buf == punct_str(ch)) { + get_option(specifier::punct) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (buf == space_str(ch)) { + get_option(specifier::space) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (buf == upper_str(ch)) { + get_option(specifier::upper) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (buf == xdigit_str(ch)) { + get_option(specifier::xdigit) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + + return {error::invalid_format_string, + "Invalid :specifier: in [set]"}; + } + template + error parse_backslash_hex(ParseCtx& pctx, + bool allow_range, + code_point begin = make_code_point(0)) + { + using char_type = typename ParseCtx::char_type; + SCN_EXPECT(pctx.next_char() == ascii_widen('x') || + pctx.next_char() == ascii_widen('u') || + pctx.next_char() == ascii_widen('U')); + + const char_type flag_char = pctx.next_char(); + const int chars = [flag_char]() { + auto ch = static_cast(flag_char); + if (ch == 'x') { + return 2; + } + if (ch == 'u') { + return 4; + } + if (ch == 'U') { + return 8; + } + SCN_ENSURE(false); + SCN_UNREACHABLE; + }(); + + char_type str[8] = {0}; + for (int i = 0; i < chars; ++i) { + pctx.advance_char(); + if (!pctx || pctx.check_arg_end()) { + return {error::invalid_format_string, + "Unexpected end of format string argument " + "after '\\x', '\\u', or '\\U'"}; + } + if (pctx.next_char() == ascii_widen(']')) { + return {error::invalid_format_string, + "Unexpected end of [set] in format string " + "after '\\x', '\\u', or '\\U'"}; + } + str[i] = pctx.next_char(); + } + + auto scanner = simple_integer_scanner{}; + uint64_t i; + SCN_CLANG_PUSH_IGNORE_UNDEFINED_TEMPLATE + auto res = scanner.scan( + scn::make_span(str, static_cast(chars)).as_const(), + i, 16); + SCN_CLANG_POP_IGNORE_UNDEFINED_TEMPLATE + if (!res) { + return {error::invalid_format_string, + "Failed to parse '\\x', '\\u', or '\\U' flag in " + "format string"}; + } + const uint64_t min = 0; + const uint64_t max = [chars]() { + if (chars == 2) { + // \x + return uint64_t{0x7f}; + } + if (chars == 4) { + return uint64_t{0xffff}; + } + if (chars == 8) { + return uint64_t{0xffffffff}; + } + SCN_ENSURE(false); + SCN_UNREACHABLE; + }(); + if (i < min || i > max) { + return {error::invalid_format_string, + "'\\x', '\\u', or '\\U' option in format string " + "out of range"}; + } + + if (allow_range && pctx.can_peek_char() && + pctx.peek_char() == ascii_widen('-')) { + pctx.advance_char(); + return parse_range(pctx, make_code_point(i)); + } + if (!allow_range) { + accept_char_range(begin, make_code_point(i)); + } + else { + accept_char(make_code_point(i)); + } + return {}; + } + template + error parse_backslash_specifier( + ParseCtx& pctx, + bool allow_range, + code_point begin = make_code_point(0)) + { + using char_type = typename ParseCtx::char_type; + SCN_EXPECT(pctx.next_char() == ascii_widen('\\')); + pctx.advance_char(); + + if (!pctx || pctx.check_arg_end()) { + return {error::invalid_format_string, + "Unexpected end of format string argument"}; + } + if (pctx.next_char() == ascii_widen(']') && + pctx.can_peek_char() && + pctx.peek_char() == ascii_widen('}')) { + return {error::invalid_format_string, + "Unexpected end of [set] in format string"}; + } + + if (pctx.next_char() == ascii_widen('\\')) { + // Literal "\\" + accept_char(pctx.next_char()); + return {}; + } + + // specifiers + if (pctx.next_char() == ascii_widen('l')) { + // \l + get_option(specifier::letters) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (pctx.next_char() == ascii_widen('L')) { + // \L + get_option(specifier::inverted_letters) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + + if (pctx.next_char() == ascii_widen('w')) { + // \w + get_option(specifier::alnum_underscore) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (pctx.next_char() == ascii_widen('W')) { + // \W + get_option(specifier::inverted_alnum_underscore) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + + if (pctx.next_char() == ascii_widen('s')) { + // \s + get_option(specifier::whitespace) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (pctx.next_char() == ascii_widen('S')) { + // \S + get_option(specifier::inverted_whitespace) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + + if (pctx.next_char() == ascii_widen('d')) { + // \d + get_option(specifier::numbers) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (pctx.next_char() == ascii_widen('D')) { + // \D + get_option(specifier::inverted_numbers) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + + if (pctx.next_char() == ascii_widen('x') || + pctx.next_char() == ascii_widen('u') || + pctx.next_char() == ascii_widen('U')) { + // \x__, \u____, or \U________ + return parse_backslash_hex(pctx, allow_range, begin); + } + + // Literal, e.g. \: -> : + return parse_literal(pctx, true); + } + template + error parse_next_char(ParseCtx& pctx, + bool allow_range, + code_point begin = make_code_point(0)) + { + using char_type = typename ParseCtx::char_type; + const auto ch = pctx.next_char(); + if (ch == ascii_widen('\\')) { + return parse_backslash_specifier(pctx, allow_range, begin); + } + if (allow_range && ch == ascii_widen(':')) { + return parse_colon_specifier(pctx); + } + return parse_literal(pctx, allow_range, begin); + } + + SCN_NODISCARD static constexpr const char* all_str(char) + { + return "all"; + } + SCN_NODISCARD static constexpr const wchar_t* all_str(wchar_t) + { + return L"all"; + } + SCN_NODISCARD static constexpr const char* alnum_str(char) + { + return "alnum"; + } + SCN_NODISCARD static constexpr const wchar_t* alnum_str(wchar_t) + { + return L"alnum"; + } + SCN_NODISCARD static constexpr const char* alpha_str(char) + { + return "alpha"; + } + SCN_NODISCARD static constexpr const wchar_t* alpha_str(wchar_t) + { + return L"alpha"; + } + SCN_NODISCARD static constexpr const char* blank_str(char) + { + return "blank"; + } + SCN_NODISCARD static constexpr const wchar_t* blank_str(wchar_t) + { + return L"blank"; + } + SCN_NODISCARD static constexpr const char* cntrl_str(char) + { + return "cntrl"; + } + SCN_NODISCARD static constexpr const wchar_t* cntrl_str(wchar_t) + { + return L"cntrl"; + } + SCN_NODISCARD static constexpr const char* digit_str(char) + { + return "digit"; + } + SCN_NODISCARD static constexpr const wchar_t* digit_str(wchar_t) + { + return L"digit"; + } + SCN_NODISCARD static constexpr const char* graph_str(char) + { + return "graph"; + } + SCN_NODISCARD static constexpr const wchar_t* graph_str(wchar_t) + { + return L"graph"; + } + SCN_NODISCARD static constexpr const char* lower_str(char) + { + return "lower"; + } + SCN_NODISCARD static constexpr const wchar_t* lower_str(wchar_t) + { + return L"lower"; + } + SCN_NODISCARD static constexpr const char* print_str(char) + { + return "print"; + } + SCN_NODISCARD static constexpr const wchar_t* print_str(wchar_t) + { + return L"print"; + } + SCN_NODISCARD static constexpr const char* punct_str(char) + { + return "punct"; + } + SCN_NODISCARD static constexpr const wchar_t* punct_str(wchar_t) + { + return L"punct"; + } + SCN_NODISCARD static constexpr const char* space_str(char) + { + return "space"; + } + SCN_NODISCARD static constexpr const wchar_t* space_str(wchar_t) + { + return L"space"; + } + SCN_NODISCARD static constexpr const char* upper_str(char) + { + return "upper"; + } + SCN_NODISCARD static constexpr const wchar_t* upper_str(wchar_t) + { + return L"upper"; + } + SCN_NODISCARD static constexpr const char* xdigit_str(char) + { + return "xdigit"; + } + SCN_NODISCARD static constexpr const wchar_t* xdigit_str(wchar_t) + { + return L"xdigit"; + } + + // 0x00 - 0x7f, individual chars, true = accept + // 0x80 - 0x9f, specifiers, true = accept (if use_specifiers = true) + // 0xa0 - 0xaf, flags + array set_options{{false}}; + + struct set_range { + constexpr set_range(uint32_t b, uint32_t e) : begin(b), end(e) + { + } + + uint32_t begin{}; + uint32_t end{}; // inclusive + + static set_range single(code_point cp) + { + return {static_cast(cp), + static_cast(cp)}; + } + static set_range single(wchar_t ch) + { + return {static_cast(ch), + static_cast(ch)}; + } + + static set_range range(code_point begin, code_point end) + { + SCN_EXPECT(begin <= end); + return {static_cast(begin), + static_cast(end)}; + } + static set_range range(wchar_t begin, wchar_t end) + { + SCN_EXPECT(begin <= end); + return {static_cast(begin), + static_cast(end)}; + } + }; + // Used if set_options[use_ranges] = true + small_vector set_extra_ranges{}; + }; + + struct string_scanner : common_parser { + static constexpr bool skip_preceding_whitespace() + { + return false; + } + + template + error parse(ParseCtx& pctx) + { + using char_type = typename ParseCtx::char_type; + + auto s_flag = detail::ascii_widen('s'); + bool s_set{}; + + auto each = [&](ParseCtx& p, bool& parsed) -> error { + if (p.next_char() == ascii_widen('[')) { + if (set_parser.get_option( + set_parser_type::flag::enabled)) { + return {error::invalid_format_string, + "[set] already specified for this argument " + "in format string"}; + } + return set_parser.parse_set(p, parsed); + } + return {}; + }; + auto e = parse_common(pctx, span{&s_flag, 1}, + span{&s_set, 1}, each); + if (!e) { + return e; + } + if (set_parser.enabled()) { + bool loc = (common_options & localized) != 0; + return set_parser.sanitize(loc); + } + return {}; + } + + template + error scan( + std::basic_string, + Allocator>& val, + Context& ctx) + { + if (set_parser.enabled()) { + bool loc = (common_options & localized) != 0; + bool mb = (loc || set_parser.get_option( + set_parser_type::flag::use_ranges)) && + is_multichar_type(typename Context::char_type{}); + return do_scan(ctx, val, + pred{ctx, set_parser, loc, mb}); + } + + auto e = skip_range_whitespace(ctx, false); + if (!e) { + return e; + } + + auto is_space_pred = make_is_space_predicate( + ctx.locale(), (common_options & localized) != 0, + field_width); + return do_scan(ctx, val, is_space_pred); + } + + set_parser_type set_parser; + + protected: + template + error do_scan( + Context& ctx, + std::basic_string, + Allocator>& val, + Pred&& predicate) + { + using string_type = std::basic_string< + typename Context::char_type, + std::char_traits, Allocator>; + + if (Context::range_type::is_contiguous) { + auto s = read_until_space_zero_copy( + ctx.range(), SCN_FWD(predicate), false); + if (!s) { + return s.error(); + } + if (s.value().size() == 0) { + return {error::invalid_scanned_value, + "Empty string parsed"}; + } + val.assign(s.value().data(), s.value().size()); + return {}; + } + + string_type tmp(val.get_allocator()); + auto outputit = std::back_inserter(tmp); + auto ret = read_until_space(ctx.range(), outputit, + SCN_FWD(predicate), false); + if (SCN_UNLIKELY(!ret)) { + return ret; + } + if (SCN_UNLIKELY(tmp.empty())) { + return {error::invalid_scanned_value, + "Empty string parsed"}; + } + val = SCN_MOVE(tmp); + + return {}; + } + + template + struct pred { + Context& ctx; + set_parser_type& set_parser; + bool localized; + bool multibyte; + + bool operator()(span ch) const + { + SCN_EXPECT(ch.size() >= 1); + code_point cp{}; + auto it = parse_code_point(ch.begin(), ch.end(), cp); + if (!it) { + // todo: is this really a good idea + return !set_parser.check_character(ch[0], localized, + ctx.locale()); + } + return !set_parser.check_character(cp, localized, + ctx.locale()); + } + bool operator()(span ch) const + { + SCN_EXPECT(ch.size() == 1); + return !set_parser.check_character(ch[0], localized, + ctx.locale()); + } + constexpr bool is_localized() const + { + return localized; + } + constexpr bool is_multibyte() const + { + return multibyte; + } + }; + }; + + struct span_scanner : public string_scanner { + template + error scan(span& val, Context& ctx) + { + if (val.size() == 0) { + return {error::invalid_scanned_value, + "Cannot scan into an empty span"}; + } + + if (set_parser.enabled()) { + bool loc = (common_options & localized) != 0; + bool mb = (loc || set_parser.get_option( + set_parser_type::flag::use_ranges)) && + is_multichar_type(typename Context::char_type{}); + return do_scan(ctx, val, + string_scanner::pred{ + ctx, set_parser, loc, mb}); + } + + auto e = skip_range_whitespace(ctx, false); + if (!e) { + return e; + } + + auto is_space_pred = make_is_space_predicate( + ctx.locale(), (common_options & localized) != 0, + field_width != 0 ? min(field_width, val.size()) + : val.size()); + return do_scan(ctx, val, is_space_pred); + } + + protected: + template + error do_scan(Context& ctx, + span& val, + Pred&& predicate) + { + if (Context::range_type::is_contiguous) { + auto s = read_until_space_zero_copy( + ctx.range(), SCN_FWD(predicate), false); + if (!s) { + return s.error(); + } + if (s.value().size() == 0) { + return {error::invalid_scanned_value, + "Empty string parsed"}; + } + std::copy(s.value().begin(), s.value().end(), val.begin()); + val = val.first(s.value().size()); + return {}; + } + + std::basic_string tmp; + auto outputit = std::back_inserter(tmp); + auto ret = read_until_space(ctx.range(), outputit, + SCN_FWD(predicate), false); + if (SCN_UNLIKELY(!ret)) { + return ret; + } + if (SCN_UNLIKELY(tmp.empty())) { + return {error::invalid_scanned_value, + "Empty string parsed"}; + } + std::copy(tmp.begin(), tmp.end(), val.begin()); + val = val.first(tmp.size()); + + return {}; + } + }; + + struct string_view_scanner : string_scanner { + public: + template + error scan(basic_string_view& val, + Context& ctx) + { + if (!Context::range_type::is_contiguous) { + return {error::invalid_operation, + "Cannot read a string_view from a " + "non-contiguous_range"}; + } + + if (set_parser.enabled()) { + bool loc = (common_options & localized) != 0; + bool mb = (loc || set_parser.get_option( + set_parser_type::flag::use_ranges)) && + is_multichar_type(typename Context::char_type{}); + return do_scan(ctx, val, + string_scanner::pred{ + ctx, set_parser, loc, mb}); + } + + auto e = skip_range_whitespace(ctx, false); + if (!e) { + return e; + } + + auto is_space_pred = make_is_space_predicate( + ctx.locale(), (common_options & localized) != 0, + field_width); + return do_scan(ctx, val, is_space_pred); + } + + protected: + template + error do_scan(Context& ctx, + basic_string_view& val, + Pred&& predicate) + { + SCN_EXPECT(Context::range_type::is_contiguous); + + auto s = read_until_space_zero_copy(ctx.range(), + SCN_FWD(predicate), false); + if (!s) { + return s.error(); + } + if (s.value().size() == 0) { + return {error::invalid_scanned_value, + "Empty string parsed"}; + } + val = basic_string_view( + s.value().data(), s.value().size()); + return {}; + } + }; + +#if SCN_HAS_STRING_VIEW + struct std_string_view_scanner : string_view_scanner { + template + error scan(std::basic_string_view& val, + Context& ctx) + { + using char_type = typename Context::char_type; + auto sv = + ::scn::basic_string_view(val.data(), val.size()); + auto e = string_view_scanner::scan(sv, ctx); + if (e) { + val = + std::basic_string_view(sv.data(), sv.size()); + } + return e; + } + }; +#endif + } // namespace detail + SCN_END_NAMESPACE +} // namespace scn + +#endif -- cgit v1.2.3