From 62e4c68907d8d33709c2c1f92a161dff00b3d5f2 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 15 Apr 2024 22:01:36 +0200 Subject: Adding upstream version 0.11.2. Signed-off-by: Daniel Baumann --- src/third-party/scnlib/include/scn/reader/common.h | 1663 ++++++++++++++++++++ src/third-party/scnlib/include/scn/reader/float.h | 246 +++ src/third-party/scnlib/include/scn/reader/int.h | 537 +++++++ src/third-party/scnlib/include/scn/reader/reader.h | 111 ++ src/third-party/scnlib/include/scn/reader/string.h | 1336 ++++++++++++++++ src/third-party/scnlib/include/scn/reader/types.h | 220 +++ 6 files changed, 4113 insertions(+) create mode 100644 src/third-party/scnlib/include/scn/reader/common.h create mode 100644 src/third-party/scnlib/include/scn/reader/float.h create mode 100644 src/third-party/scnlib/include/scn/reader/int.h create mode 100644 src/third-party/scnlib/include/scn/reader/reader.h create mode 100644 src/third-party/scnlib/include/scn/reader/string.h create mode 100644 src/third-party/scnlib/include/scn/reader/types.h (limited to 'src/third-party/scnlib/include/scn/reader') diff --git a/src/third-party/scnlib/include/scn/reader/common.h b/src/third-party/scnlib/include/scn/reader/common.h new file mode 100644 index 0000000..0f2b83b --- /dev/null +++ b/src/third-party/scnlib/include/scn/reader/common.h @@ -0,0 +1,1663 @@ +// Copyright 2017 Elias Kosunen +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is a part of scnlib: +// https://github.com/eliaskosunen/scnlib + +#ifndef SCN_READER_COMMON_H +#define SCN_READER_COMMON_H + +#include "../detail/error.h" +#include "../detail/locale.h" +#include "../detail/range.h" +#include "../unicode/unicode.h" +#include "../util/algorithm.h" + +namespace scn { + SCN_BEGIN_NAMESPACE + + // read_code_unit + + namespace detail { + template + expected + read_code_unit_impl(WrappedRange& r, bool advance, std::true_type) + { + SCN_CLANG_PUSH + // clang 10 behaves weirdly + SCN_CLANG_IGNORE("-Wzero-as-null-pointer-constant") + SCN_EXPECT(r.begin() < r.end()); + SCN_CLANG_POP + auto ch = *r.begin(); + if (advance) { + r.advance(); + } + return {ch}; + } + template + expected + read_code_unit_impl(WrappedRange& r, bool advance, std::false_type) + { + SCN_EXPECT(r.begin() != r.end()); + auto ch = *r.begin(); + if (advance && ch) { + r.advance(); + } + return ch; + } + } // namespace detail + + /** + * Reads a single character (= code unit) from the range. + * Dereferences the begin iterator, wrapping it in an `expected` if + * necessary. + * + * Encoding-agnostic, doesn't care about code points, and may leave behind + * partial ones. + * + * \param r Range to read from + * \param advance If `true`, and the read was successful, the range is + * advanced by a single character, as if by calling `r.advance()`. + * + * \return The next character in the range, obtained as if by dereferencing + * the begin iterator `*r.begin()`. + * If `r.begin() == r.end()`, returns EOF. + * If `r` is direct, returns `*r.begin()` wrapped in an `expected`. + * If `r` is not direct, returns `*r.begin()` as-is, with any errors that + * may have been caused by the read. + */ + template + expected read_code_unit( + WrappedRange& r, + bool advance = true) + { + if (r.begin() == r.end()) { + return error(error::end_of_range, "EOF"); + } + return detail::read_code_unit_impl( + r, advance, + std::integral_constant{}); + } + + // putback_n + + /// @{ + + /** + * Puts back `n` characters (= code units) into `r` as if by repeatedly + * calling `r.advance(-1)`. + * + * Encoding-agnostic, may leave behind partial code points. + * + * \param r Range to roll back + * \param n Characters to put back, must be less than or equal to the number + * of characters already read from `r`. + * + * \return If `r` is contiguous, will always return `error::good`. + * Otherwise, may return `error::unrecoverable_source_error`, if the putback + * fails. + */ + template < + typename WrappedRange, + typename std::enable_if::type* = nullptr> + error putback_n(WrappedRange& r, ranges::range_difference_t n) + { + SCN_EXPECT(n <= ranges::distance(r.begin_underlying(), r.begin())); + r.advance(-n); + return {}; + } + template < + typename WrappedRange, + typename std::enable_if::type* = nullptr> + error putback_n(WrappedRange& r, ranges::range_difference_t n) + { + for (ranges::range_difference_t i = 0; i < n; ++i) { + r.advance(-1); + if (r.begin() == r.end()) { + return {error::unrecoverable_source_error, "Putback failed"}; + } + } + return {}; + } + + /// @} + + // read_code_point + + /** + * Type returned by `read_code_point` + * \tparam CharT Character type of the range + */ + template + struct read_code_point_result { + /// Code units, may point to `writebuf` given to `read_code_point` + span chars; + /// Parsed code point + code_point cp; + }; + + namespace detail { + // contiguous && direct + template + expected> read_code_point_impl( + WrappedRange& r, + span writebuf, + std::true_type) + { + if (r.begin() == r.end()) { + return error(error::end_of_range, "EOF"); + } + + auto sbuf = r.get_buffer_and_advance(4 / sizeof(CharT)); + if (sbuf.size() == 0) { + auto ret = read_code_unit(r, true); + if (!ret) { + return ret.error(); + } + sbuf = writebuf.first(1); + writebuf[0] = ret.value(); + } + int len = ::scn::get_sequence_length(sbuf[0]); + if (SCN_UNLIKELY(len == 0)) { + return error(error::invalid_encoding, "Invalid code point"); + } + if (sbuf.ssize() > len) { + auto e = putback_n(r, sbuf.ssize() - len); + if (!e) { + return e; + } + sbuf = sbuf.first(static_cast(len)); + } + if (len == 1) { + // Single-char code point + return read_code_point_result{sbuf.first(1), + make_code_point(sbuf[0])}; + } + while (sbuf.ssize() < len) { + auto ret = read_code_unit(r, true); + if (!ret) { + auto e = putback_n(r, sbuf.ssize()); + if (!e) { + return e; + } + if (ret.error().code() == error::end_of_range) { + return error(error::invalid_encoding, + "Invalid code point"); + } + return ret.error(); + } + sbuf = make_span(writebuf.begin(), sbuf.size() + 1); + writebuf[sbuf.size() - 1] = ret.value(); + } + + code_point cp{}; + auto ret = parse_code_point(sbuf.begin(), sbuf.end(), cp); + if (!ret) { + return ret.error(); + } + return read_code_point_result{sbuf, cp}; + } + + template + expected> read_code_point_impl( + WrappedRange& r, + span writebuf, + std::false_type) + { + auto first = read_code_unit(r, false); + if (!first) { + return first.error(); + } + + auto len = + static_cast(::scn::get_sequence_length(first.value())); + if (SCN_UNLIKELY(len == 0)) { + return error(error::invalid_encoding, "Invalid code point"); + } + r.advance(); + + writebuf[0] = first.value(); + if (len == 1) { + // Single-char code point + return read_code_point_result{ + make_span(writebuf.data(), 1), + make_code_point(first.value())}; + } + + size_t index = 1; + + auto parse = [&]() -> expected> { + code_point cp{}; + auto ret = parse_code_point(writebuf.data(), + writebuf.data() + len, cp); + if (!ret) { + auto pb = putback_n(r, static_cast(len)); + if (!pb) { + return pb; + } + return ret.error(); + } + auto s = make_span(writebuf.data(), len); + return read_code_point_result{s, cp}; + }; + auto advance = [&]() -> error { + auto ret = read_code_unit(r, false); + if (!ret) { + auto pb = putback_n(r, static_cast(index)); + if (!pb) { + return pb; + } + return ret.error(); + } + writebuf[index] = ret.value(); + ++index; + r.advance(); + return {}; + }; + + while (index < 4) { + auto e = advance(); + if (!e) { + return e; + } + if (index == len) { + return parse(); + } + } + SCN_ENSURE(false); + SCN_UNREACHABLE; + } + } // namespace detail + + /** + * Read a single Unicode code point from `r` as if by repeatedly calling + * `read_code_unit()`. + * + * Advances the range past the read code point. On error, rolls back the + * range into the state it was before calling this function, as if by + * calling `putback_n()`. + * + * \param r Range to read from + * \param writebuf Buffer to use for reading into, if necessary. `BufValueT` + * can be any trivial type. Must be at least 4 bytes long. May be written + * over. + * + * \return An instance of `read_code_point_result`, wrapped in an + * `expected`. `chars` contains the code units read from `r`, which may + * point to `writebuf`. `cp` contains the code point parsed. + * If `r.begin() == r.end()`, returns EOF. + * If `read_code_unit()` or `putback_n()` fails, returns any errors returned + * by it. + * If the code point was not encoded correctly, returns + * `error::invalid_encoding`. + */ + template + expected> + read_code_point(WrappedRange& r, span writebuf) + { + SCN_EXPECT(writebuf.size() * sizeof(BufValueT) >= 4); + using char_type = typename WrappedRange::char_type; + SCN_GCC_PUSH + SCN_GCC_IGNORE("-Wcast-align") // taken care of by the caller + return detail::read_code_point_impl( + r, + make_span(reinterpret_cast(writebuf.data()), + writebuf.size() * sizeof(BufValueT) / sizeof(char_type)), + std::integral_constant{}); + SCN_GCC_POP + } + + // read_zero_copy + + /// @{ + + /** + * Reads up to `n` characters (= code units) from `r`, as if by repeatedly + * incrementing `r.begin()`, and returns a `span` pointing into `r`. + * + * Let `count` be `min(r.size(), n)`. + * Reads, and advances `r` by `count` characters. + * `r.begin()` is in no point dereferenced. + * If `r.size()` is not defined, the range is not contiguous, and an empty + * span is returned. + * + * \return A `span` pointing to `r`, starting from `r.begin()` and with a + * size of `count`. + * If `r.begin() == r.end()`, returns EOF. + * If the range does not satisfy `contiguous_range`, returns an empty + * `span`. + */ + template ::type* = nullptr> + expected::type>> + read_zero_copy(WrappedRange& r, ranges::range_difference_t n) + { + if (r.begin() == r.end()) { + return error(error::end_of_range, "EOF"); + } + return r.get_buffer_and_advance(static_cast(n)); + } + template ::type* = nullptr> + expected::type>> + read_zero_copy(WrappedRange& r, ranges::range_difference_t) + { + if (r.begin() == r.end()) { + return error(error::end_of_range, "EOF"); + } + return span::type>{}; + } + /// @} + + // read_all_zero_copy + + /// @{ + /** + * Reads every character from `r`, as if by repeatedly incrementing + * `r.begin()`, and returns a `span` pointing into `r`. + * + * If there's no error, `r` is advanced to the end. + * `r.begin()` is in no point dereferenced. + * If `r.size()` is not defined, the range is not contiguous, and an empty + * span is returned. + * + * \return A `span` pointing to `r`, starting at `r.begin()` and ending at + * `r.end()`. + * If `r.begin() == r.end()`, returns EOF. + * If the range does not satisfy `contiguous_range`, returns an empty + * `span`. + */ + template < + typename WrappedRange, + typename std::enable_if::type* = nullptr> + expected::type>> + read_all_zero_copy(WrappedRange& r) + { + if (r.begin() == r.end()) { + return error(error::end_of_range, "EOF"); + } + auto s = make_span(r.data(), static_cast(r.size())); + r.advance(r.size()); + return s; + } + template < + typename WrappedRange, + typename std::enable_if::type* = nullptr> + expected::type>> + read_all_zero_copy(WrappedRange& r) + { + if (r.begin() == r.end()) { + return error(error::end_of_range, "EOF"); + } + return span::type>{}; + } + /// @} + + // read_into + + namespace detail { + template + error read_into_impl(WrappedRange& r, + OutputIterator& it, + ranges::range_difference_t n) + { + for (; n != 0; --n) { + auto ret = read_code_unit(r, false); + if (!ret) { + return ret.error(); + } + *it = ret.value(); + r.advance(); + } + return {}; + } + } // namespace detail + + /// @{ + + /** + * Reads up to `n` characters (= code units) from `r`, as if by repeatedly + * calling `read_code_unit()`, and writing the characters into `it`. + * + * If reading fails at any point, the error is returned. + * `r` is advanced by as many characters that were successfully read. + * + * \param r Range to read + * \param it Iterator to write into, e.g. `std::back_insert_iterator`. Must + * satisfy `output_iterator`, and be incrementable by `n` times. + * \param n Characters to read from `r` + * + * \return `error::good` if `n` characters were read. + * If `r.begin() == r.end()` at any point before `n` characters has been + * read, returns EOF. + * Any error returned by `read_code_unit()` if one + * occurred. + */ + template ::type* = nullptr> + error read_into(WrappedRange& r, + OutputIterator& it, + ranges::range_difference_t n) + { + while (n != 0) { + if (r.begin() == r.end()) { + return {error::end_of_range, "EOF"}; + } + auto s = read_zero_copy(r, n); + if (!s) { + return s.error(); + } + if (s.value().size() == 0) { + break; + } + it = std::copy(s.value().begin(), s.value().end(), it); + n -= s.value().ssize(); + } + if (n != 0) { + return detail::read_into_impl(r, it, n); + } + return {}; + } + template ::type* = nullptr> + error read_into(WrappedRange& r, + OutputIterator& it, + ranges::range_difference_t n) + { + if (r.begin() == r.end()) { + return {error::end_of_range, "EOF"}; + } + return detail::read_into_impl(r, it, n); + } + /// @} + + namespace detail { + template + expected> + read_until_pred_contiguous(WrappedRange& r, + Predicate&& pred, + bool pred_result_to_stop, + bool keep_final) + { + using span_type = span; + + if (r.begin() == r.end()) { + return error(error::end_of_range, "EOF"); + } + + if (!pred.is_multibyte()) { + for (auto it = r.begin(); it != r.end(); ++it) { + if (pred(make_span(&*it, 1)) == pred_result_to_stop) { + auto begin = r.data(); + auto end = keep_final ? it + 1 : it; + r.advance_to(end); + return span_type{ + begin, to_address_safe(end, r.begin(), r.end())}; + } + } + } + else { + for (auto it = r.begin(); it != r.end();) { + auto len = ::scn::get_sequence_length(*it); + if (len == 0 || ranges::distance(it, r.end()) < len) { + return error{error::invalid_encoding, + "Invalid code point"}; + } + auto span = + make_span(to_address_safe(it, r.begin(), r.end()), + static_cast(len)); + code_point cp{}; + auto i = parse_code_point(span.begin(), span.end(), cp); + if (!i) { + return i.error(); + } + if (i.value() != span.end()) { + return error{error::invalid_encoding, + "Invalid code point"}; + } + if (pred(span) == pred_result_to_stop) { + auto begin = r.data(); + auto end = keep_final ? it + len : it; + r.advance_to(end); + return span_type{ + begin, to_address_safe(end, r.begin(), r.end())}; + } + it += len; + } + } + auto begin = r.data(); + auto end = r.data() + r.size(); + r.advance_to(r.end()); + return span_type{begin, end}; + } + } // namespace detail + + // read_until_space_zero_copy + + namespace detail { + template + expected> + read_until_space_zero_copy_impl(WrappedRange& r, + Predicate&& is_space, + bool keep_final_space, + std::true_type) + { + return detail::read_until_pred_contiguous(r, SCN_FWD(is_space), + true, keep_final_space); + } + template + expected> + read_until_space_zero_copy_impl(WrappedRange& r, + Predicate&&, + bool, + std::false_type) + { + if (r.begin() == r.end()) { + return error(error::end_of_range, "EOF"); + } + return span{}; + } + } // namespace detail + + /** + * Reads code points from `r`, until a space, as determined by `is_space`, + * is found, and returns a `span` pointing to `r`. + * + * If no error occurs `r` is advanced past the returned span. + * On error, `r` is not advanced. + * + * \param r Range to read from + * + * \param is_space Predicate taking a span of code units encompassing a code + * point, and returning a `bool`, where `true` means that the character is a + * space. Additionally, it must have a member function + * `is_space.is_multibyte()`, returning a `bool`, where `true` means that a + * space character can encompass multiple code units. + * + * \param keep_final_space If `true`, the space code point found is included + * in the returned span, and it is advanced past in `r`. If `false`, it is + * not included, and `r.begin()` will point to the space. + * + * \return Span of code units, pointing to `r`, starting at `r.begin()`, and + * ending at the space character, the precise location determined by the + * `keep_final_space` parameter. + * If `r.begin() == r.end()`, returns EOF. + * `r` reaching its end before a space character is found is not considered + * an error. + * If `r` contains invalid encoding, returns `error::invalid_encoding`. + * If the range is not contiguous, returns an empty `span`. + */ + template + expected> + read_until_space_zero_copy(WrappedRange& r, + Predicate&& is_space, + bool keep_final_space) + { + return detail::read_until_space_zero_copy_impl( + r, SCN_FWD(is_space), keep_final_space, + std::integral_constant{}); + } + + // read_until_space + + namespace detail { + template + error read_until_pred_buffer(WrappedRange& r, + Predicate&& pred, + bool pred_result_to_stop, + OutputIt& out, + OutputItCmp out_cmp, + bool keep_final, + bool& done, + std::true_type) + { + if (!pred.is_multibyte()) { + while (r.begin() != r.end() && !done) { + auto s = r.get_buffer_and_advance(); + for (auto it = s.begin(); it != s.end() && out_cmp(out); + ++it) { + if (pred(make_span(&*it, 1)) == pred_result_to_stop) { + if (keep_final) { + *out = *it; + ++out; + } + auto e = + putback_n(r, ranges::distance(it, s.end())); + if (!e) { + return e; + } + done = true; + break; + } + *out = *it; + ++out; + } + if (!done && out_cmp(out)) { + auto ret = read_code_unit(r, false); + if (!ret) { + if (ret.error() == error::end_of_range) { + return {}; + } + return ret.error(); + } + if (pred(make_span(&ret.value(), 1)) == + pred_result_to_stop) { + if (keep_final) { + r.advance(); + *out = ret.value(); + ++out; + } + done = true; + break; + } + r.advance(); + *out = ret.value(); + ++out; + } + } + } + else { + while (r.begin() != r.end() && !done) { + auto s = r.get_buffer_and_advance(); + for (auto it = s.begin(); it != s.end() && out_cmp(out);) { + auto len = ::scn::get_sequence_length(*it); + if (len == 0) { + return error{error::invalid_encoding, + "Invalid code point"}; + } + if (ranges::distance(it, s.end()) < len) { + auto e = putback_n(r, len); + if (!e) { + return e; + } + break; + } + auto cpspan = make_span(it, static_cast(len)); + code_point cp{}; + auto i = + parse_code_point(cpspan.begin(), cpspan.end(), cp); + if (!i) { + return i.error(); + } + if (i.value() != cpspan.end()) { + return error{error::invalid_encoding, + "Invalid code point"}; + } + if (pred(cpspan) == pred_result_to_stop) { + if (keep_final) { + out = std::copy(cpspan.begin(), cpspan.end(), + out); + } + done = true; + break; + } + out = std::copy(cpspan.begin(), cpspan.end(), out); + } + + if (!done && out_cmp(out)) { + alignas(typename WrappedRange::char_type) unsigned char + buf[4] = {0}; + auto cpret = read_code_point(r, make_span(buf, 4)); + if (!cpret) { + if (cpret.error() == error::end_of_range) { + return {}; + } + return cpret.error(); + } + if (pred(cpret.value().chars) == pred_result_to_stop) { + if (keep_final) { + out = std::copy(cpret.value().chars.begin(), + cpret.value().chars.end(), out); + } + else { + return putback_n(r, + cpret.value().chars.ssize()); + } + done = true; + break; + } + out = std::copy(cpret.value().chars.begin(), + cpret.value().chars.end(), out); + } + } + } + return {}; + } + template + error read_until_pred_buffer(WrappedRange&, + Predicate&&, + bool, + OutputIt&, + OutputItCmp, + bool, + bool& done, + std::false_type) + { + done = false; + return {}; + } + + template + error read_until_pred_non_contiguous(WrappedRange& r, + Predicate&& pred, + bool pred_result_to_stop, + OutputIt& out, + OutputItCmp out_cmp, + bool keep_final) + { + if (r.begin() == r.end()) { + return {error::end_of_range, "EOF"}; + } + + { + bool done = false; + auto e = read_until_pred_buffer( + r, pred, pred_result_to_stop, out, out_cmp, keep_final, + done, + std::integral_constant< + bool, WrappedRange::provides_buffer_access>{}); + if (!e) { + return e; + } + if (done) { + return {}; + } + } + + if (!pred.is_multibyte()) { + while (r.begin() != r.end() && out_cmp(out)) { + auto cu = read_code_unit(r, false); + if (!cu) { + return cu.error(); + } + if (pred(make_span(&cu.value(), 1)) == + pred_result_to_stop) { + if (keep_final) { + r.advance(); + *out = cu.value(); + ++out; + } + return {}; + } + r.advance(); + *out = cu.value(); + ++out; + } + } + else { + unsigned char buf[4] = {0}; + while (r.begin() != r.end() && out_cmp(out)) { + auto cp = read_code_point(r, make_span(buf, 4)); + if (!cp) { + return cp.error(); + } + if (pred(cp.value().chars) == pred_result_to_stop) { + if (keep_final) { + out = std::copy(cp.value().chars.begin(), + cp.value().chars.end(), out); + return {}; + } + else { + return putback_n(r, cp.value().chars.ssize()); + } + } + out = std::copy(cp.value().chars.begin(), + cp.value().chars.end(), out); + } + } + return {}; + } + } // namespace detail + + /// @{ + + /** + * Reads code points from `r`, until a space, as determined by `is_space`, + * is found, and writes them into `out`, a single code unit at a time. + * + * If no error occurs, `r` is advanced past the last character written into + * `out`. + * + * On error, `r` is advanced an indeterminate amount, as if by calling + * `r.advance(n)`, where `n` is a non-negative integer. + * It is, however, not advanced past any space characters. + * + * \param r Range to read from + * + * \param out Iterator to write read characters into. Must satisfy + * `output_iterator`. + * + * \param is_space Predicate taking a span of code units encompassing a code + * point, and returning a `bool`, where `true` means that the character is a + * space. Additionally, it must have a member function + * `is_space.is_multibyte()`, returning a `bool`, where `true` means that a + * space character can encompass multiple code units. + * + * \param keep_final_space If `true`, the space code point found is written + * into `out`, and it is advanced past in `r`. If `false`, it is not + * included, and `r.begin()` will point to the space. + * + * \return `error::good` on success. + * If `r.begin() == r.end()`, returns EOF. + * `r` reaching its end before a space character is found is not considered + * an error. + * If `r` contains invalid encoding, returns `error::invalid_encoding`. + */ + template < + typename WrappedRange, + typename OutputIterator, + typename Predicate, + typename std::enable_if::type* = nullptr> + error read_until_space(WrappedRange& r, + OutputIterator& out, + Predicate&& is_space, + bool keep_final_space) + { + auto s = + read_until_space_zero_copy(r, SCN_FWD(is_space), keep_final_space); + if (!s) { + return s.error(); + } + out = std::copy(s.value().begin(), s.value().end(), out); + return {}; + } + template < + typename WrappedRange, + typename OutputIterator, + typename Predicate, + typename std::enable_if::type* = nullptr> + error read_until_space(WrappedRange& r, + OutputIterator& out, + Predicate&& is_space, + bool keep_final_space) + { + return detail::read_until_pred_non_contiguous( + r, SCN_FWD(is_space), true, out, + [](const OutputIterator&) { return true; }, keep_final_space); + } + + /// @} + + // read_until_space_ranged + + /// @{ + + /** + * Otherwise equivalent to `read_until_space`, except will also stop reading + * if `out == end`. + * + * \see read_until_space + */ + template + error read_until_space_ranged(WrappedRange& r, + OutputIterator& out, + Sentinel end, + Predicate&& is_space, + bool keep_final_space) + { + return detail::read_until_pred_non_contiguous( + r, SCN_FWD(is_space), true, out, + [&end](const OutputIterator& it) { return it != end; }, + keep_final_space); + } + + /// @} + + namespace detail { + /** + * Predicate to pass to read_until_space etc. + */ + template + struct is_space_predicate { + using char_type = CharT; + using locale_type = basic_locale_ref; + + /** + * \param l Locale to use, fetched from `ctx.locale()` + * \param localized If `true`, use `l.get_custom()`, otherwise use + * `l.get_static()`. + * \param width If `width != 0`, limit the number of code + * units to be read + */ + SCN_CONSTEXPR14 is_space_predicate(const locale_type& l, + bool localized, + size_t width) + : m_locale{nullptr}, + m_width{width}, + m_fn{get_fn(localized, width != 0)} + { + if (localized) { + l.prepare_localized(); + m_locale = l.get_localized_unsafe(); + } + } + + /** + * Returns `true` if `ch` is a code point according to the supplied + * locale, using either the static or custom locale, depending on + * the `localized` parameter given to the constructor. + * + * Returns also `true` if the maximum width, as determined by the + * `width` parameter given to the constructor, was reached. + */ + bool operator()(span ch) + { + SCN_EXPECT(m_fn); + SCN_EXPECT(ch.size() >= 1); + return m_fn(m_locale, ch, m_i, m_width); + } + + /** + * Returns `true`, if `*this` uses the custom locale for classifying + * space characters + */ + constexpr bool is_localized() const + { + return m_locale != nullptr; + } + /** + * Returns `true` if a space character can encompass multiple code + * units + */ + constexpr bool is_multibyte() const + { + return is_localized() && is_multichar_type(CharT{}); + } + + private: + using static_locale_type = typename locale_type::static_type; + using custom_locale_type = typename locale_type::custom_type; + const custom_locale_type* m_locale; + size_t m_width{0}, m_i{0}; + + constexpr static bool call(const custom_locale_type*, + span ch, + size_t&, + size_t) + { + return static_locale_type::is_space(ch); + } + static bool localized_call(const custom_locale_type* locale, + span ch, + size_t&, + size_t) + { + SCN_EXPECT(locale != nullptr); + return locale->is_space(ch); + } + SCN_CONSTEXPR14 static bool call_counting(const custom_locale_type*, + span ch, + size_t& i, + size_t max) + { + SCN_EXPECT(i <= max); + if (i == max || i + ch.size() > max) { + return true; + } + i += ch.size(); + return static_locale_type::is_space(ch); + } + static bool localized_call_counting( + const custom_locale_type* locale, + span ch, + size_t& i, + size_t max) + { + SCN_EXPECT(locale != nullptr); + SCN_EXPECT(i <= max); + if (i == max || i + ch.size() > max) { + return true; + } + i += ch.size(); + return locale->is_space(ch); + } + + using fn_type = bool (*)(const custom_locale_type*, + span, + size_t&, + size_t); + fn_type m_fn{nullptr}; + + static SCN_CONSTEXPR14 fn_type get_fn(bool localized, bool counting) + { + if (localized) { + return counting ? localized_call_counting : localized_call; + } + return counting ? call_counting : call; + } + }; + + template + is_space_predicate make_is_space_predicate( + const basic_locale_ref& locale, + bool localized, + size_t width = 0) + { + return {locale, localized, width}; + } + + template + struct basic_skipws_iterator { + using value_type = void; + using reference = void; + using pointer = void; + using size_type = size_t; + using difference_type = std::ptrdiff_t; + using iterator_category = std::output_iterator_tag; + + constexpr basic_skipws_iterator() = default; + + basic_skipws_iterator& operator=(CharT) + { + return *this; + } + basic_skipws_iterator& operator*() + { + return *this; + } + basic_skipws_iterator& operator++() + { + return *this; + } + }; + } // namespace detail + + // skip_range_whitespace + + /// @{ + + /** + * Reads code points from `ctx.range()`, as if by repeatedly calling + * `read_code_point()`, until a non-space character is found, or EOF is + * reached. That non-space character is then put back into the range. + * + * Whether a character is a space, is determined by `ctx.locale()` and the + * `localized` parameter. + * + * \param ctx Context to get the range and locale from. + * + * \param localized If `true`, `ctx.locale().get_custom()` is used. + * Otherwise, `ctx.locale().get_static()` is used. + * In practice, means whether locale-specific whitespace characters are + * accepted, or just those given by `std::isspace` with the `"C"` locale. + * + * \return `error::good` on success. + * If `ctx.range().begin() == ctx.range().end()`, returns EOF. + * If `ctx.range()` contains invalid encoding, returns + * `error::invalid_encoding`. + */ + template ::type* = nullptr> + error skip_range_whitespace(Context& ctx, bool localized) noexcept + { + auto is_space_pred = + detail::make_is_space_predicate(ctx.locale(), localized); + auto it = detail::basic_skipws_iterator{}; + return detail::read_until_pred_non_contiguous( + ctx.range(), is_space_pred, false, it, + [](decltype(it)) { return true; }, false); + } + template ::type* = nullptr> + error skip_range_whitespace(Context& ctx, bool localized) noexcept + { + auto is_space_pred = + detail::make_is_space_predicate(ctx.locale(), localized); + return detail::read_until_pred_contiguous(ctx.range(), is_space_pred, + false, false) + .error(); + } + + /// @} + + namespace detail { + template + struct simple_integer_scanner { + template + static expected::iterator> scan( + span buf, + T& val, + int base = 10, + uint16_t flags = 0); + + template + static expected::iterator> scan_lower( + span buf, + T& val, + int base = 10, + uint16_t flags = 0); + }; + } // namespace detail + + /** + * A very simple parser base class, which only accepts empty format string + * specifiers, e.g. `{}`, `{:}` or `{1:}`. + */ + struct empty_parser : parser_base { + template + error parse(ParseCtx& pctx) + { + pctx.arg_begin(); + if (SCN_UNLIKELY(!pctx)) { + return {error::invalid_format_string, + "Unexpected format string end"}; + } + if (!pctx.check_arg_end()) { + return {error::invalid_format_string, "Expected argument end"}; + } + pctx.arg_end(); + return {}; + } + }; + + /** + * Provides a framework for building a format string parser. + * Does not provide a `parse()` member function, so not a parser on to its + * own. + */ + struct common_parser : parser_base { + static constexpr bool support_align_and_fill() + { + return true; + } + + protected: + /** + * Parse the beginning of the argument. + * Returns `error::invalid_format_string` if `!pctx` (the format string + * ended) + */ + template + error parse_common_begin(ParseCtx& pctx) + { + pctx.arg_begin(); + if (SCN_UNLIKELY(!pctx)) { + return {error::invalid_format_string, + "Unexpected format string end"}; + } + return {}; + } + + /** + * Returns `error::invalid_format_string` if the format string or the + * argument has ended. + */ + template + error check_end(ParseCtx& pctx) + { + if (!pctx || pctx.check_arg_end()) { + return {error::invalid_format_string, + "Unexpected end of format string argument"}; + } + return {}; + } + + /** + * Parse alignment, fill, width, and localization flags, and populate + * appropriate member variables. + * + * Returns `error::invalid_format_string` if an error occurred. + */ + template + error parse_common_flags(ParseCtx& pctx) + { + SCN_EXPECT(check_end(pctx)); + using char_type = typename ParseCtx::char_type; + + auto ch = pctx.next_char(); + auto next_char = [&]() -> error { + pctx.advance_char(); + auto e = check_end(pctx); + if (!e) { + return e; + } + ch = pctx.next_char(); + return {}; + }; + auto parse_number = [&](size_t& n) -> error { + SCN_EXPECT(pctx.locale().get_static().is_digit(ch)); + + auto it = pctx.begin(); + for (; it != pctx.end(); ++it) { + if (!pctx.locale().get_static().is_digit(*it)) { + break; + } + } + auto buf = make_span(pctx.begin(), it); + + auto s = detail::simple_integer_scanner{}; + auto res = s.scan(buf, n, 10); + if (!res) { + return res.error(); + } + + for (it = pctx.begin(); it != res.value(); + pctx.advance_char(), it = pctx.begin()) {} + return {}; + }; + + auto get_align_char = [&](char_type c) -> common_options_type { + if (c == detail::ascii_widen('<')) { + return aligned_left; + } + if (c == detail::ascii_widen('>')) { + return aligned_right; + } + if (c == detail::ascii_widen('^')) { + return aligned_center; + } + return common_options_none; + }; + auto parse_align = [&](common_options_type align, char_type fill) { + if (align != common_options_none) { + common_options |= align; + } + fill_char = static_cast(fill); + }; + + // align and fill + common_options_type align{}; + bool align_set = false; + if (pctx.chars_left() > 1 && + ch != detail::ascii_widen('[')) { + const auto peek = pctx.peek_char(); + align = get_align_char(peek); + if (align != common_options_none) { + // Arg is like "{:_x}", where _ is some fill character, and + // x is an alignment flag + // -> we have both alignment and fill + parse_align(align, ch); + + auto e = next_char(); + SCN_ENSURE(e); + if (!next_char()) { + return {}; + } + align_set = true; + } + } + if (!align_set) { + align = get_align_char(ch); + if (align != common_options_none) { + // Arg is like "{:x}", where x is an alignment flag + // -> we have alignment with default fill (space ' ') + parse_align(align, detail::ascii_widen(' ')); + if (!next_char()) { + return {}; + } + } + } + + // digit -> width + if (pctx.locale().get_static().is_digit(ch)) { + common_options |= width_set; + + size_t w{}; + auto e = parse_number(w); + if (!e) { + return e; + } + field_width = w; + return {}; + } + // L -> localized + if (ch == detail::ascii_widen('L')) { + common_options |= localized; + + if (!next_char()) { + return {}; + } + } + + return {}; + } + + /** + * Parse argument end. + * + * Returns `error::invalid_format_string` if argument end was not found. + */ + template + error parse_common_end(ParseCtx& pctx) + { + if (!pctx || !pctx.check_arg_end()) { + return {error::invalid_format_string, "Expected argument end"}; + } + + pctx.arg_end(); + return {}; + } + + /** + * A null callback to pass to `parse_common`, doing nothing and + * returning `error::good`. + */ + template + static error null_type_cb(ParseCtx&, bool&) + { + return {}; + } + + public: + /** + * Parse a format string argument, using `parse_common_begin`, + * `parse_common_flags`, `parse_common_end`, and the supplied type + * flags. + * + * `type_options.size() == type_flags.size()` must be `true`. + * `pctx` must be valid, and must start at the format string argument + * specifiers, e.g. in the case of `"{1:foo}"` -> `pctx == "foo}"` + * + * \param pctx Format string to parse + * \param type_options A span of characters, where each character + * corresponds to a valid type flag. For example, for characters, this + * span would be \c ['c'] + * \param type_flags A span of bools, where the values will be set to + * `true`, if a corresponding type flag from `type_options` was found. + * Should be initialized to all-`false`, as a `false` value will not be + * written. + * \param type_cb A callback to call, if none of the `type_options` + * matched. Must have the signature `(ParseCtx& pctx, bool& parsed) -> + * error`., where `parsed` is set to `true`, if the flag at + * `pctx.next_char()` was parsed and advanced past. + */ + template + error parse_common(ParseCtx& pctx, + span type_options, + span type_flags, + F&& type_cb) + { + SCN_EXPECT(type_options.size() == type_flags.size()); + + auto e = parse_common_begin(pctx); + if (!e) { + return e; + } + + if (!pctx) { + return {error::invalid_format_string, + "Unexpected end of format string"}; + } + if (pctx.check_arg_end()) { + return {}; + } + + e = parse_common_flags(pctx); + if (!e) { + return e; + } + + if (!pctx) { + return {error::invalid_format_string, + "Unexpected end of format string"}; + } + if (pctx.check_arg_end()) { + return {}; + } + + for (auto ch = pctx.next_char(); pctx && !pctx.check_arg_end(); + ch = pctx.next_char()) { + bool parsed = false; + for (std::size_t i = 0; i < type_options.size() && !parsed; + ++i) { + if (ch == type_options[i]) { + if (SCN_UNLIKELY(type_flags[i])) { + return {error::invalid_format_string, + "Repeat flag in format string"}; + } + type_flags[i] = true; + parsed = true; + } + } + if (parsed) { + pctx.advance_char(); + if (!pctx || pctx.check_arg_end()) { + break; + } + continue; + } + + e = type_cb(pctx, parsed); + if (!e) { + return e; + } + if (parsed) { + if (!pctx || pctx.check_arg_end()) { + break; + } + continue; + } + ch = pctx.next_char(); + + if (!parsed) { + return {error::invalid_format_string, + "Invalid character in format string"}; + } + if (!pctx || pctx.check_arg_end()) { + break; + } + } + + return parse_common_end(pctx); + } + + void make_localized() + { + common_options |= localized; + } + + /** + * Invoke `parse_common()` with default options (no type flags) + */ + template + error parse_default(ParseCtx& pctx) + { + return parse_common(pctx, {}, {}, null_type_cb); + } + + constexpr bool is_aligned_left() const noexcept + { + return (common_options & aligned_left) != 0 || + (common_options & aligned_center) != 0; + } + constexpr bool is_aligned_right() const noexcept + { + return (common_options & aligned_right) != 0 || + (common_options & aligned_center) != 0; + } + template + constexpr CharT get_fill_char() const noexcept + { + return static_cast(fill_char); + } + + size_t field_width{0}; + char32_t fill_char{0}; + enum common_options_type : uint8_t { + common_options_none = 0, + localized = 1, // 'L', + aligned_left = 2, // '<' + aligned_right = 4, // '>' + aligned_center = 8, // '^' + width_set = 16, // width + common_options_all = 31, + }; + uint8_t common_options{0}; + }; + + /** + * Derives from `common_parser`, and implements `parse()` with + * `parse_default()` + */ + struct common_parser_default : common_parser { + template + error parse(ParseCtx& pctx) + { + return parse_default(pctx); + } + }; + + namespace detail { + template ::type* = nullptr> + error scan_alignment(Context& ctx, + typename Context::char_type fill) noexcept + { + while (true) { + SCN_CLANG_PUSH_IGNORE_UNDEFINED_TEMPLATE + + auto ch = read_code_unit(ctx.range()); + if (SCN_UNLIKELY(!ch)) { + return ch.error(); + } + if (ch.value() != fill) { + auto pb = putback_n(ctx.range(), 1); + if (SCN_UNLIKELY(!pb)) { + return pb; + } + break; + } + + SCN_CLANG_POP_IGNORE_UNDEFINED_TEMPLATE + } + return {}; + } + template ::type* = nullptr> + error scan_alignment(Context& ctx, + typename Context::char_type fill) noexcept + { + SCN_CLANG_PUSH_IGNORE_UNDEFINED_TEMPLATE + const auto end = ctx.range().end(); + for (auto it = ctx.range().begin(); it != end; ++it) { + if (*it != fill) { + ctx.range().advance_to(it); + return {}; + } + } + ctx.range().advance_to(end); + return {}; + + SCN_CLANG_POP_IGNORE_UNDEFINED_TEMPLATE + } + + template + struct scanner_supports_alignment : std::false_type { + }; + template + struct scanner_supports_alignment< + Scanner, + typename std::enable_if::type> + : std::true_type { + }; + + template + error skip_alignment(Context& ctx, + Scanner& scanner, + bool left, + std::true_type) + { + if (left && !scanner.is_aligned_left()) { + return {}; + } + if (!left && !scanner.is_aligned_right()) { + return {}; + } + return scan_alignment( + ctx, + scanner.template get_fill_char()); + } + template + error skip_alignment(Context&, Scanner&, bool, std::false_type) + { + return {}; + } + + /** + * Scan argument in `val`, from `ctx`, using `Scanner` and `pctx`. + * + * Parses `pctx` for `Scanner`, skips whitespace and alignment if + * necessary, and scans the argument into `val`. + */ + template + error visitor_boilerplate(T& val, Context& ctx, ParseCtx& pctx) + { + Scanner scanner; + + auto err = pctx.parse(scanner); + if (!err) { + return err; + } + + if (scanner.skip_preceding_whitespace()) { + err = skip_range_whitespace(ctx, false); + if (!err) { + return err; + } + } + + err = skip_alignment(ctx, scanner, false, + scanner_supports_alignment{}); + if (!err) { + return err; + } + + err = scanner.scan(val, ctx); + if (!err) { + return err; + } + + return skip_alignment(ctx, scanner, true, + scanner_supports_alignment{}); + } + } // namespace detail + + SCN_END_NAMESPACE +} // namespace scn + +#endif diff --git a/src/third-party/scnlib/include/scn/reader/float.h b/src/third-party/scnlib/include/scn/reader/float.h new file mode 100644 index 0000000..24265a1 --- /dev/null +++ b/src/third-party/scnlib/include/scn/reader/float.h @@ -0,0 +1,246 @@ +// Copyright 2017 Elias Kosunen +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is a part of scnlib: +// https://github.com/eliaskosunen/scnlib + +#ifndef SCN_READER_FLOAT_H +#define SCN_READER_FLOAT_H + +#include "../util/small_vector.h" +#include "common.h" + +namespace scn { + SCN_BEGIN_NAMESPACE + namespace detail { + template + struct float_scanner_access; + + template + struct float_scanner : common_parser { + static_assert(std::is_floating_point::value, + "float_scanner requires a floating point type"); + + friend struct float_scanner_access; + + template + error parse(ParseCtx& pctx) + { + using char_type = typename ParseCtx::char_type; + + array options{ + {// hex + ascii_widen('a'), ascii_widen('A'), + // scientific + ascii_widen('e'), ascii_widen('E'), + // fixed + ascii_widen('f'), ascii_widen('F'), + // general + ascii_widen('g'), ascii_widen('G'), + // localized digits + ascii_widen('n'), + // thsep + ascii_widen('\'')}}; + bool flags[10] = {false}; + + auto e = parse_common( + pctx, span{options.begin(), options.end()}, + span{flags, 10}, null_type_cb); + if (!e) { + return e; + } + + if (flags[0] && flags[1]) { + return {error::invalid_format_string, + "Can't have both 'a' and 'A' flags with floats"}; + } + if (flags[2] && flags[3]) { + return {error::invalid_format_string, + "Can't have both 'e' and 'E' flags with floats"}; + } + if (flags[4] && flags[5]) { + return {error::invalid_format_string, + "Can't have both 'f' and 'F' flags with floats"}; + } + if (flags[6] && flags[7]) { + return {error::invalid_format_string, + "Can't have both 'g' and 'G' flags with floats"}; + } + + bool set_hex = flags[0] || flags[1]; + bool set_scientific = flags[2] || flags[3]; + bool set_fixed = flags[4] || flags[5]; + bool set_general = flags[6] || flags[7]; + if (set_general && set_fixed) { + return {error::invalid_format_string, + "General float already implies fixed"}; + } + if (set_general && set_scientific) { + return {error::invalid_format_string, + "General float already implies scientific"}; + } + + format_options = 0; + if (set_hex) { + format_options |= allow_hex; + } + if (set_scientific) { + format_options |= allow_scientific; + } + if (set_fixed) { + format_options |= allow_fixed; + } + if (set_general) { + format_options |= allow_fixed | allow_scientific; + } + if (format_options == 0) { + format_options |= + allow_fixed | allow_scientific | allow_hex; + } + + // 'n' + if (flags[8]) { + common_options |= localized; + format_options |= localized_digits; + } + + // thsep + if (flags[9]) { + format_options |= allow_thsep; + } + + return {}; + } + + template + error scan(T& val, Context& ctx) + { + using char_type = typename Context::char_type; + + auto do_parse_float = [&](span s) -> error { + T tmp = 0; + expected ret{0}; + if (SCN_UNLIKELY((format_options & localized_digits) != 0 || + ((common_options & localized) != 0 && + (format_options & allow_hex) != 0))) { + // 'n' OR ('L' AND 'a') + // because none of our parsers support BOTH hexfloats + // and custom (localized) decimal points, + // so we have to fall back on iostreams + SCN_CLANG_PUSH_IGNORE_UNDEFINED_TEMPLATE + std::basic_string str(s.data(), s.size()); + ret = + ctx.locale().get_localized().read_num(tmp, str, 0); + SCN_CLANG_POP_IGNORE_UNDEFINED_TEMPLATE + } + else { + ret = _read_float( + tmp, s, + ctx.locale() + .get((common_options & localized) != 0) + .decimal_point()); + } + + if (!ret) { + return ret.error(); + } + if (ret.value() != s.ssize()) { + auto pb = + putback_n(ctx.range(), s.ssize() - ret.value()); + if (!pb) { + return pb; + } + } + val = tmp; + return {}; + }; + + auto is_space_pred = make_is_space_predicate( + ctx.locale(), (common_options & localized) != 0, + field_width); + + if (Context::range_type::is_contiguous) { + auto s = read_until_space_zero_copy(ctx.range(), + is_space_pred, false); + if (!s) { + return s.error(); + } + return do_parse_float(s.value()); + } + + small_vector buf; + auto outputit = std::back_inserter(buf); + auto e = read_until_space(ctx.range(), outputit, is_space_pred, + false); + if (!e && buf.empty()) { + return e; + } + + return do_parse_float(make_span(buf)); + } + + enum format_options_type { + allow_hex = 1, + allow_scientific = 2, + allow_fixed = 4, + localized_digits = 8, + allow_thsep = 16 + }; + uint8_t format_options{allow_hex | allow_scientific | allow_fixed}; + + private: + template + expected _read_float(T& val, + span s, + CharT locale_decimal_point) + { + size_t chars{}; + std::basic_string str(s.data(), s.size()); + SCN_CLANG_PUSH_IGNORE_UNDEFINED_TEMPLATE + auto ret = + _read_float_impl(str.data(), chars, locale_decimal_point); + SCN_CLANG_POP_IGNORE_UNDEFINED_TEMPLATE + if (!ret) { + return ret.error(); + } + val = ret.value(); + return static_cast(chars); + } + + template + expected _read_float_impl(const CharT* str, + size_t& chars, + CharT locale_decimal_point); + }; + + // instantiate + template struct float_scanner; + template struct float_scanner; + template struct float_scanner; + + template + struct float_scanner_access : public float_scanner { + using float_scanner::_read_float; + using float_scanner::_read_float_impl; + }; + } // namespace detail + SCN_END_NAMESPACE +} // namespace scn + +#if defined(SCN_HEADER_ONLY) && SCN_HEADER_ONLY && \ + !defined(SCN_READER_FLOAT_CPP) +#include "reader_float.cpp" +#endif + +#endif diff --git a/src/third-party/scnlib/include/scn/reader/int.h b/src/third-party/scnlib/include/scn/reader/int.h new file mode 100644 index 0000000..19bac44 --- /dev/null +++ b/src/third-party/scnlib/include/scn/reader/int.h @@ -0,0 +1,537 @@ +// Copyright 2017 Elias Kosunen +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is a part of scnlib: +// https://github.com/eliaskosunen/scnlib + +#ifndef SCN_READER_INT_H +#define SCN_READER_INT_H + +#include "../util/math.h" +#include "common.h" + +namespace scn { + SCN_BEGIN_NAMESPACE + + namespace detail { + template + struct integer_scanner : common_parser { + static_assert(std::is_integral::value, + "integer_scanner requires an integral type"); + + friend struct simple_integer_scanner; + + bool skip_preceding_whitespace() + { + // if format_options == single_code_unit, + // then we're scanning a char -> don't skip + return format_options != single_code_unit; + } + + template + error parse(ParseCtx& pctx) + { + using char_type = typename ParseCtx::char_type; + + format_options = 0; + + int custom_base = 0; + auto each = [&](ParseCtx& p, bool& parsed) -> error { + parsed = false; + auto ch = pctx.next_char(); + + if (ch == detail::ascii_widen('B')) { + // Custom base + p.advance_char(); + if (SCN_UNLIKELY(!p)) { + return {error::invalid_format_string, + "Unexpected format string end"}; + } + if (SCN_UNLIKELY(p.check_arg_end())) { + return {error::invalid_format_string, + "Unexpected argument end"}; + } + ch = p.next_char(); + + const auto zero = detail::ascii_widen('0'), + nine = detail::ascii_widen('9'); + integer_type_for_char tmp = 0; + if (ch < zero || ch > nine) { + return {error::invalid_format_string, + "Invalid character after 'B', " + "expected digit"}; + } + tmp = static_cast>( + p.next_char() - zero); + if (tmp < 1) { + return {error::invalid_format_string, + "Invalid base, must be between 2 and 36"}; + } + + p.advance_char(); + if (!p) { + return {error::invalid_format_string, + "Unexpected end of format string"}; + } + if (p.check_arg_end()) { + custom_base = static_cast(tmp); + parsed = true; + return {}; + } + ch = p.next_char(); + + if (ch < zero || ch > nine) { + return {error::invalid_format_string, + "Invalid character after 'B', " + "expected digit"}; + } + tmp *= 10; + tmp += static_cast>( + ch - zero); + if (tmp < 2 || tmp > 36) { + return {error::invalid_format_string, + "Invalid base, must be between 2 and 36"}; + } + custom_base = static_cast(tmp); + parsed = true; + pctx.advance_char(); + return {}; + } + + return {}; + }; + + array options{{// decimal + ascii_widen('d'), + // binary + ascii_widen('b'), + // octal + ascii_widen('o'), + // hex + ascii_widen('x'), + // detect base + ascii_widen('i'), + // unsigned decimal + ascii_widen('u'), + // code unit + ascii_widen('c'), + // localized digits + ascii_widen('n'), + // thsep + ascii_widen('\'')}}; + bool flags[9] = {false}; + + auto e = parse_common( + pctx, span{options.begin(), options.end()}, + span{flags, 9}, each); + if (!e) { + return e; + } + + int base_flags_set = int(flags[0]) + int(flags[1]) + + int(flags[2]) + int(flags[3]) + + int(flags[4]) + int(flags[5]) + + int(custom_base != 0); + if (SCN_UNLIKELY(base_flags_set > 1)) { + return {error::invalid_format_string, + "Up to one base flags ('d', 'i', 'u', 'b', 'o', " + "'x', 'B') allowed"}; + } + else if (base_flags_set == 0) { + // Default: + // 'c' for CharT + // 'd' otherwise + if (std::is_same::value) { + format_options = single_code_unit; + } + else { + base = 10; + } + } + else if (custom_base != 0) { + // B__ + base = static_cast(custom_base); + } + else if (flags[0]) { + // 'd' flag + base = 10; + } + else if (flags[1]) { + // 'b' flag + base = 2; + format_options |= allow_base_prefix; + } + else if (flags[2]) { + // 'o' flag + base = 8; + format_options |= allow_base_prefix; + } + else if (flags[3]) { + // 'x' flag + base = 16; + format_options |= allow_base_prefix; + } + else if (flags[4]) { + // 'i' flag + base = 0; + } + else if (flags[5]) { + // 'u' flag + base = 10; + format_options |= only_unsigned; + } + + // n set, implies L + if (flags[7]) { + common_options |= localized; + format_options |= localized_digits; + } + if ((format_options & localized_digits) != 0 && + (base != 0 && base != 10 && base != 8 && base != 16)) { + return {error::invalid_format_string, + "Localized integers can only be scanned in " + "bases 8, 10 and 16"}; + } + + // thsep flag + if (flags[8]) { + format_options |= allow_thsep; + } + + // 'c' flag -> no other options allowed + if (flags[6]) { + if (!(format_options == 0 || + format_options == single_code_unit) || + base_flags_set != 0) { + return {error::invalid_format_string, + "'c' flag cannot be used in conjunction with " + "any other flags"}; + } + format_options = single_code_unit; + } + + return {}; + } + + template + error scan(T& val, Context& ctx) + { + using char_type = typename Context::char_type; + auto do_parse_int = [&](span s) -> error { + T tmp = 0; + expected ret{0}; + if (SCN_UNLIKELY((format_options & localized_digits) != + 0)) { + SCN_CLANG_PUSH_IGNORE_UNDEFINED_TEMPLATE + int b{base}; + auto r = parse_base_prefix(s, b); + if (!r) { + return r.error(); + } + if (b == -1) { + // -1 means we read a '0' + tmp = 0; + return {}; + } + if (b != 10 && base != b && base != 0) { + return {error::invalid_scanned_value, + "Invalid base prefix"}; + } + if (base == 0) { + base = static_cast(b); + } + if (base != 8 && base != 10 && base != 16) { + return {error::invalid_scanned_value, + "Localized values have to be in base " + "8, 10 or 16"}; + } + + auto it = r.value(); + std::basic_string str(to_address(it), + s.size()); + ret = ctx.locale().get_localized().read_num( + tmp, str, static_cast(base)); + + if (tmp < T{0} && + (format_options & only_unsigned) != 0) { + return {error::invalid_scanned_value, + "Parsed negative value when type was 'u'"}; + } + SCN_CLANG_POP_IGNORE_UNDEFINED_TEMPLATE + } + else { + SCN_CLANG_PUSH_IGNORE_UNDEFINED_TEMPLATE + ret = _parse_int(tmp, s); + SCN_CLANG_POP_IGNORE_UNDEFINED_TEMPLATE + } + + if (!ret) { + return ret.error(); + } + if (ret.value() != s.ssize()) { + auto pb = + putback_n(ctx.range(), s.ssize() - ret.value()); + if (!pb) { + return pb; + } + } + val = tmp; + return {}; + }; + + if (format_options == single_code_unit) { + SCN_MSVC_PUSH + SCN_MSVC_IGNORE(4127) // conditional expression is constant + if (sizeof(T) < sizeof(char_type)) { + // sizeof(char_type) > 1 -> wide range + // Code unit might not fit + return error{error::invalid_operation, + "Cannot read this type as a code unit " + "from a wide range"}; + } + SCN_MSVC_POP + auto ch = read_code_unit(ctx.range()); + if (!ch) { + return ch.error(); + } + val = static_cast(ch.value()); + return {}; + } + + SCN_MSVC_PUSH + SCN_MSVC_IGNORE(4127) // conditional expression is constant + if ((std::is_same::value || + std::is_same::value) && + !std::is_same::value) { + // T is a character type, but not char_type: + // Trying to read a char from a wide range, or wchar_t from + // a narrow one + // Reading a code unit is allowed, however + return error{error::invalid_operation, + "Cannot read a char from a wide range, or a " + "wchar_t from a narrow one"}; + } + SCN_MSVC_POP + + std::basic_string buf{}; + span bufspan{}; + auto e = _read_source( + ctx, buf, bufspan, + std::integral_constant< + bool, Context::range_type::is_contiguous>{}); + if (!e) { + return e; + } + + return do_parse_int(bufspan); + } + + enum format_options_type : uint8_t { + // "n" option -> localized digits and digit grouping + localized_digits = 1, + // "'" option -> accept thsep + // if "L" use locale, default=',' + allow_thsep = 2, + // "u" option -> don't allow sign + only_unsigned = 4, + // Allow base prefix (e.g. 0B and 0x) + allow_base_prefix = 8, + // "c" option -> scan a code unit + single_code_unit = 16, + }; + uint8_t format_options{default_format_options()}; + + // 0 = detect base + // Otherwise [2,36] + uint8_t base{0}; + + private: + static SCN_CONSTEXPR14 uint8_t default_format_options() + { + SCN_MSVC_PUSH + SCN_MSVC_IGNORE(4127) // conditional expression is constant + if (std::is_same::value || + std::is_same::value) { + return single_code_unit; + } + return 0; + SCN_MSVC_POP + } + + template + error _read_source(Context& ctx, + Buf& buf, + span& s, + std::false_type) + { + auto do_read = [&](Buf& b) -> error { + auto outputit = std::back_inserter(b); + auto is_space_pred = make_is_space_predicate( + ctx.locale(), (common_options & localized) != 0, + field_width); + auto e = read_until_space(ctx.range(), outputit, + is_space_pred, false); + if (!e && b.empty()) { + return e; + } + + return {}; + }; + + if (SCN_LIKELY((format_options & allow_thsep) == 0)) { + auto e = do_read(buf); + if (!e) { + return e; + } + s = make_span(buf.data(), buf.size()); + return {}; + } + + Buf tmp; + auto e = do_read(tmp); + if (!e) { + return e; + } + auto thsep = ctx.locale() + .get((common_options & localized) != 0) + .thousands_separator(); + + auto it = tmp.begin(); + for (; it != tmp.end(); ++it) { + if (*it == thsep) { + for (auto it2 = it; ++it2 != tmp.end();) { + *it++ = SCN_MOVE(*it2); + } + break; + } + } + + auto n = + static_cast(std::distance(tmp.begin(), it)); + if (n == 0) { + return {error::invalid_scanned_value, + "Only a thousands separator found"}; + } + + buf = SCN_MOVE(tmp); + s = make_span(buf.data(), n); + return {}; + } + + template + error _read_source(Context& ctx, + Buf& buf, + span& s, + std::true_type) + { + if (SCN_UNLIKELY((format_options & allow_thsep) != 0)) { + return _read_source(ctx, buf, s, std::false_type{}); + } + auto ret = read_zero_copy( + ctx.range(), field_width != 0 + ? static_cast(field_width) + : ctx.range().size()); + if (!ret) { + return ret.error(); + } + s = ret.value(); + return {}; + } + + template + expected::iterator> parse_base_prefix( + span s, + int& b) const; + + template + expected _parse_int(T& val, span s); + + template + expected::iterator> _parse_int_impl( + T& val, + bool minus_sign, + span buf) const; + }; + + // instantiate + template struct integer_scanner; + template struct integer_scanner; + template struct integer_scanner; + template struct integer_scanner; + template struct integer_scanner; + template struct integer_scanner; + template struct integer_scanner; + template struct integer_scanner; + template struct integer_scanner; + template struct integer_scanner; + template struct integer_scanner; + template struct integer_scanner; + + template + template + expected::iterator> + simple_integer_scanner::scan(span buf, + T& val, + int base, + uint16_t flags) + { + SCN_EXPECT(buf.size() != 0); + + integer_scanner s{}; + s.base = static_cast(base); + s.format_options = flags & 0xffu; + s.common_options = static_cast(flags >> 8u); + SCN_CLANG_PUSH_IGNORE_UNDEFINED_TEMPLATE + auto n = s._parse_int(val, buf); + SCN_CLANG_POP_IGNORE_UNDEFINED_TEMPLATE + if (!n) { + return n.error(); + } + return buf.begin() + n.value(); + } + template + template + expected::iterator> + simple_integer_scanner::scan_lower(span buf, + T& val, + int base, + uint16_t flags) + { + SCN_EXPECT(buf.size() != 0); + SCN_EXPECT(base > 0); + + integer_scanner s{}; + s.base = static_cast(base); + s.format_options = flags & 0xffu; + s.common_options = static_cast(flags >> 8u); + + bool minus_sign = false; + if (buf[0] == ascii_widen('-')) { + buf = buf.subspan(1); + minus_sign = true; + } + + SCN_CLANG_PUSH_IGNORE_UNDEFINED_TEMPLATE + return s._parse_int_impl(val, minus_sign, buf); + SCN_CLANG_POP_IGNORE_UNDEFINED_TEMPLATE + } + } // namespace detail + SCN_END_NAMESPACE +} // namespace scn + +#if defined(SCN_HEADER_ONLY) && SCN_HEADER_ONLY && !defined(SCN_READER_INT_CPP) +#include "reader_int.cpp" +#endif + +#endif diff --git a/src/third-party/scnlib/include/scn/reader/reader.h b/src/third-party/scnlib/include/scn/reader/reader.h new file mode 100644 index 0000000..cd955ce --- /dev/null +++ b/src/third-party/scnlib/include/scn/reader/reader.h @@ -0,0 +1,111 @@ +// Copyright 2017 Elias Kosunen +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is a part of scnlib: +// https://github.com/eliaskosunen/scnlib + +#ifndef SCN_READER_READER_H +#define SCN_READER_READER_H + +#include "common.h" +#include "float.h" +#include "int.h" +#include "string.h" +#include "types.h" + +#include "../detail/args.h" + +namespace scn { + SCN_BEGIN_NAMESPACE + + template <> + struct scanner : public detail::code_point_scanner { + }; + template <> + struct scanner : public detail::bool_scanner { + }; + template <> + struct scanner : public detail::integer_scanner { + }; + template <> + struct scanner : public detail::integer_scanner { + }; + template <> + struct scanner : public detail::integer_scanner { + }; + template <> + struct scanner : public detail::integer_scanner { + }; + template <> + struct scanner : public detail::integer_scanner { + }; + template <> + struct scanner : public detail::integer_scanner { + }; + template <> + struct scanner : public detail::integer_scanner { + }; + template <> + struct scanner + : public detail::integer_scanner { + }; + template <> + struct scanner + : public detail::integer_scanner { + }; + template <> + struct scanner + : public detail::integer_scanner { + }; + template <> + struct scanner + : public detail::integer_scanner { + }; + template <> + struct scanner + : public detail::integer_scanner { + }; + template <> + struct scanner : public detail::float_scanner { + }; + template <> + struct scanner : public detail::float_scanner { + }; + template <> + struct scanner : public detail::float_scanner { + }; + template + struct scanner, Allocator>> + : public detail::string_scanner { + }; + template + struct scanner> : public detail::span_scanner { + }; + template + struct scanner> + : public detail::string_view_scanner { + }; +#if SCN_HAS_STRING_VIEW + template + struct scanner> + : public detail::std_string_view_scanner { + }; +#endif + template <> + struct scanner; + + SCN_END_NAMESPACE +} // namespace scn + +#endif diff --git a/src/third-party/scnlib/include/scn/reader/string.h b/src/third-party/scnlib/include/scn/reader/string.h new file mode 100644 index 0000000..19727ee --- /dev/null +++ b/src/third-party/scnlib/include/scn/reader/string.h @@ -0,0 +1,1336 @@ +// Copyright 2017 Elias Kosunen +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is a part of scnlib: +// https://github.com/eliaskosunen/scnlib + +#ifndef SCN_READER_STRING_H +#define SCN_READER_STRING_H + +#include "../util/small_vector.h" +#include "common.h" + +namespace scn { + SCN_BEGIN_NAMESPACE + namespace detail { + class set_parser_type { + public: + constexpr set_parser_type() = default; + + template + error parse_set(ParseCtx& pctx, bool& parsed) + { + using char_type = typename ParseCtx::char_type; + SCN_EXPECT(pctx.next_char() == ascii_widen('[')); + + pctx.advance_char(); + if (!pctx || pctx.check_arg_end()) { + return {error::invalid_format_string, + "Unexpected end of format string argument"}; + } + + get_option(flag::enabled) = true; + parsed = true; + + if (pctx.next_char() == ascii_widen('^')) { + // inverted + get_option(flag::inverted) = true; + pctx.advance_char(); + if (!pctx || pctx.check_arg_end()) { + return {error::invalid_format_string, + "Unexpected end of format string argument"}; + } + } + + if (pctx.next_char() == ascii_widen(']')) { + // end of range + get_option(flag::accept_all) = true; + pctx.advance_char(); + return {}; + } + + while (true) { + if (!pctx || pctx.check_arg_end()) { + return {error::invalid_format_string, + "Unexpected end of format string argument"}; + } + + const auto ch = pctx.next_char(); + if (ch == ascii_widen(']')) { + break; + } + + auto err = parse_next_char(pctx, true); + if (!err) { + return err; + } + + err = pctx.advance_cp(); + if (!err) { + pctx.advance_char(); + } + } + auto err = pctx.advance_cp(); + if (!err) { + pctx.advance_char(); + } + + return {}; + } + + error sanitize(bool localized) + { + // specifiers -> chars, if not localized + if (get_option(flag::use_specifiers)) { + if ((get_option(specifier::letters) || + get_option(specifier::alpha)) && + get_option(specifier::inverted_letters)) { + get_option(flag::accept_all) = true; + } + if (get_option(specifier::alnum_underscore) && + get_option(specifier::inverted_alnum_underscore)) { + get_option(flag::accept_all) = true; + } + if ((get_option(specifier::whitespace) || + get_option(specifier::space)) && + get_option(specifier::inverted_whitespace)) { + get_option(flag::accept_all) = true; + } + if ((get_option(specifier::numbers) || + get_option(specifier::digit)) && + get_option(specifier::inverted_numbers)) { + get_option(flag::accept_all) = true; + } + } + + if (get_option(flag::use_specifiers) && + !get_option(flag::accept_all)) { + if (localized) { + if (get_option(specifier::letters)) { + get_option(specifier::letters) = false; + get_option(specifier::alpha) = true; + } + if (get_option(specifier::alnum_underscore)) { + get_option(specifier::alnum_underscore) = false; + get_option(specifier::alnum) = true; + get_option('_') = true; + } + if (get_option(specifier::whitespace)) { + get_option(specifier::whitespace) = false; + get_option(specifier::space) = true; + } + if (get_option(specifier::numbers)) { + get_option(specifier::numbers) = false; + get_option(specifier::digit) = true; + } + } + else { + auto do_range = [&](char a, char b) { + for (; a < b; ++a) { + get_option(a) = true; + } + get_option(b) = true; + }; + auto do_lower = [&]() { + // a-z + do_range(0x61, 0x7a); + }; + auto do_upper = [&]() { + // A-Z + do_range(0x41, 0x5a); + }; + auto do_digit = [&]() { + // 0-9 + do_range(0x30, 0x39); + }; + + if (get_option(specifier::alnum)) { + do_lower(); + do_upper(); + do_digit(); + get_option(specifier::alnum) = false; + } + if (get_option(specifier::alpha)) { + do_lower(); + do_upper(); + get_option(specifier::alpha) = false; + } + if (get_option(specifier::blank)) { + get_option(' ') = true; + get_option('\t') = true; + get_option(specifier::blank) = false; + } + if (get_option(specifier::cntrl)) { + do_range(0, 0x1f); + get_option(0x7f) = true; + get_option(specifier::cntrl) = false; + } + if (get_option(specifier::digit)) { + do_digit(); + get_option(specifier::digit) = false; + } + if (get_option(specifier::graph)) { + do_range(0x21, 0x7e); + get_option(specifier::graph) = false; + } + if (get_option(specifier::lower)) { + do_lower(); + get_option(specifier::lower) = false; + } + if (get_option(specifier::print)) { + do_range(0x20, 0x7e); + get_option(specifier::print) = false; + } + if (get_option(specifier::punct)) { + do_range(0x21, 0x2f); + do_range(0x3a, 0x40); + do_range(0x5b, 0x60); + do_range(0x7b, 0x7e); + get_option(specifier::punct) = false; + } + if (get_option(specifier::space)) { + do_range(0x9, 0xd); + get_option(' ') = true; + get_option(specifier::space) = false; + } + if (get_option(specifier::upper)) { + do_upper(); + get_option(specifier::upper) = false; + } + if (get_option(specifier::xdigit)) { + do_digit(); + do_range(0x41, 0x46); + do_range(0x61, 0x66); + get_option(specifier::xdigit) = false; + } + if (get_option(specifier::letters)) { + do_upper(); + do_lower(); + get_option(specifier::letters) = false; + } + if (get_option(specifier::inverted_letters)) { + do_range(0x0, 0x2f); + do_range(0x3a, 0x40); + do_range(0x5b, 0x60); + do_range(0x7b, 0x7f); + get_option(specifier::inverted_letters) = false; + } + if (get_option(specifier::alnum_underscore)) { + do_digit(); + do_upper(); + do_lower(); + get_option('_') = true; + get_option(specifier::alnum_underscore) = false; + } + if (get_option(specifier::inverted_alnum_underscore)) { + bool underscore = get_option('_'); + do_range(0x0, 0x2f); + do_range(0x3a, 0x40); + do_range(0x5b, 0x60); + do_range(0x7b, 0x7f); + get_option('_') = underscore; // reset back + get_option(specifier::inverted_alnum_underscore) = + false; + } + if (get_option(specifier::whitespace)) { + do_range(0x9, 0xd); + get_option(' ') = true; + get_option(specifier::whitespace) = false; + } + if (get_option(specifier::inverted_whitespace)) { + do_range(0, 0x8); + do_range(0xe, 0x1f); + do_range(0x21, 0x7f); + get_option(specifier::inverted_whitespace) = false; + } + if (get_option(specifier::numbers)) { + do_digit(); + get_option(specifier::numbers) = false; + } + if (get_option(specifier::inverted_numbers)) { + do_range(0, 0x2f); + do_range(0x3a, 0x7f); + get_option(specifier::inverted_numbers) = false; + } + + { + bool first = get_option(0); + char i = 1; + for (; i < 0x7f; ++i) { + if (first != get_option(i)) { + break; + } + } + if (i == 0x7f && first == get_option(0x7f)) { + get_option(flag::accept_all) = true; + if (!first) { + get_option(flag::inverted) = true; + } + } + } + + get_option(flag::use_specifiers) = false; + get_option(flag::use_chars) = true; + } + } + + return {}; + } + + // true = char accepted + template + bool check_character(CharT ch, bool localized, const Locale& loc) + { + SCN_EXPECT(get_option(flag::enabled)); + + const bool not_inverted = !get_option(flag::inverted); + if (get_option(flag::accept_all)) { + return not_inverted; + } + + if (get_option(flag::use_specifiers)) { + SCN_EXPECT(localized); // ensured by sanitize() + SCN_UNUSED(localized); + SCN_CLANG_PUSH_IGNORE_UNDEFINED_TEMPLATE + if (get_option(specifier::alnum) && + loc.get_localized().is_alnum(ch)) { + return not_inverted; + } + if (get_option(specifier::alpha) && + loc.get_localized().is_alpha(ch)) { + return not_inverted; + } + if (get_option(specifier::blank) && + loc.get_localized().is_blank(ch)) { + return not_inverted; + } + if (get_option(specifier::cntrl) && + loc.get_localized().is_cntrl(ch)) { + return not_inverted; + } + if (get_option(specifier::digit) && + loc.get_localized().is_digit(ch)) { + return not_inverted; + } + if (get_option(specifier::graph) && + loc.get_localized().is_graph(ch)) { + return not_inverted; + } + if (get_option(specifier::lower) && + loc.get_localized().is_lower(ch)) { + return not_inverted; + } + if (get_option(specifier::print) && + loc.get_localized().is_print(ch)) { + return not_inverted; + } + if (get_option(specifier::punct) && + loc.get_localized().is_punct(ch)) { + return not_inverted; + } + if (get_option(specifier::space) && + loc.get_localized().is_space(ch)) { + return not_inverted; + } + if (get_option(specifier::upper) && + loc.get_localized().is_upper(ch)) { + return not_inverted; + } + if (get_option(specifier::xdigit) && + loc.get_localized().is_xdigit(ch)) { + return not_inverted; + } + SCN_CLANG_POP_IGNORE_UNDEFINED_TEMPLATE + } + if (get_option(flag::use_chars) && (ch >= 0 && ch <= 0x7f)) { + if (get_option(static_cast(ch))) { + return not_inverted; + } + } + if (get_option(flag::use_ranges)) { + const auto c = static_cast(ch); + for (const auto& e : set_extra_ranges) { + if (c >= e.begin && c <= e.end) { + return not_inverted; + } + } + } + return !not_inverted; + } + + enum class specifier : size_t { + alnum = 0x80, + alpha, + blank, + cntrl, + digit, + graph, + lower, + print, + punct, + space, + upper, + xdigit, + letters = 0x90, // \l + inverted_letters, // \L + alnum_underscore, // \w + inverted_alnum_underscore, // \W + whitespace, // \s + inverted_whitespace, // \S + numbers, // \d + inverted_numbers, // \D + last = 0x9f + }; + enum class flag : size_t { + enabled = 0xa0, // using [set] + accept_all, // empty [set] + inverted, // ^ flag + // 0x00 - 0x7f + use_chars, + // 0x80 - 0x8f + use_specifiers, + // set_extra_ranges + use_ranges, + last = 0xaf + }; + + bool& get_option(char ch) + { + SCN_GCC_PUSH + SCN_GCC_IGNORE("-Wtype-limits") + SCN_EXPECT(ch >= 0 && ch <= 0x7f); + SCN_GCC_POP + return set_options[static_cast(ch)]; + } + SCN_NODISCARD bool get_option(char ch) const + { + SCN_GCC_PUSH + SCN_GCC_IGNORE("-Wtype-limits") + SCN_EXPECT(ch >= 0 && ch <= 0x7f); + SCN_GCC_POP + return set_options[static_cast(ch)]; + } + + bool& get_option(specifier s) + { + return set_options[static_cast(s)]; + } + SCN_NODISCARD bool get_option(specifier s) const + { + return set_options[static_cast(s)]; + } + + bool& get_option(flag f) + { + return set_options[static_cast(f)]; + } + SCN_NODISCARD bool get_option(flag f) const + { + return set_options[static_cast(f)]; + } + + SCN_NODISCARD bool enabled() const + { + return get_option(flag::enabled); + } + + private: + void accept_char(char ch) + { + get_option(ch) = true; + get_option(flag::use_chars) = true; + } + void accept_char(code_point cp) + { + if (cp >= 0 && cp <= 0x7f) { + return accept_char(static_cast(cp)); + } + set_extra_ranges.push_back(set_range::single(cp)); + get_option(flag::use_ranges) = true; + } + void accept_char(wchar_t ch) + { + SCN_GCC_COMPAT_PUSH + SCN_GCC_COMPAT_IGNORE("-Wtype-limits") + if (ch >= 0 && ch <= 0x7f) { + return accept_char(static_cast(ch)); + } + SCN_GCC_COMPAT_POP + set_extra_ranges.push_back(set_range::single(ch)); + get_option(flag::use_ranges) = true; + } + + void accept_char_range(char first, char last) + { + SCN_EXPECT(first >= 0); + SCN_EXPECT(last >= 0); + SCN_EXPECT(first <= last); + get_option(flag::use_chars) = true; + for (; first != last; ++first) { + get_option(first) = true; + } + SCN_ENSURE(first == last); + get_option(last) = true; + } + void accept_char_range(code_point first, code_point last) + { + SCN_EXPECT(first <= last); + if (first >= 0 && last <= 0x7f) { + return accept_char_range(static_cast(first), + static_cast(last)); + } + set_extra_ranges.push_back(set_range::range(first, last)); + get_option(flag::use_ranges) = true; + } + void accept_char_range(wchar_t first, wchar_t last) + { + SCN_EXPECT(first <= last); + SCN_GCC_COMPAT_PUSH + SCN_GCC_COMPAT_IGNORE("-Wtype-limits") + if (first >= 0 && last <= 0x7f) { + return accept_char_range(static_cast(first), + static_cast(last)); + } + SCN_GCC_COMPAT_POP + set_extra_ranges.push_back(set_range::range(first, last)); + get_option(flag::use_ranges) = true; + } + + template + error parse_range(ParseCtx& pctx, code_point begin) + { + using char_type = typename ParseCtx::char_type; + SCN_EXPECT(pctx.next_char() == ascii_widen('-')); + if (pctx.can_peek_char() && + pctx.peek_char() == ascii_widen(']')) { + // Just a '-' + accept_char(begin); + accept_char(ascii_widen('-')); + return {}; + } + pctx.advance_char(); + if (!pctx || pctx.check_arg_end()) { + return {error::invalid_format_string, + "Unexpected end of format string argument"}; + } + return parse_next_char(pctx, false, begin); + } + template + error parse_literal(ParseCtx& pctx, + bool allow_range, + code_point begin = make_code_point(0)) + { + using char_type = typename ParseCtx::char_type; + if (allow_range) { + auto e = pctx.peek_cp(); + if (!e && e.error().code() != error::end_of_range) { + return e.error(); + } + if (e && e.value() == ascii_widen('-')) { + const auto cp = pctx.next_cp(); + if (!cp) { + return cp.error(); + } + auto err = pctx.advance_cp(); + if (!err) { + return err; + } + return parse_range(pctx, cp.value()); + } + } + const auto cp = pctx.next_cp(); + if (!cp) { + return cp.error(); + } + if (cp.value() >= 0 && cp.value() <= 0x7f) { + if (!allow_range) { + if (static_cast< + typename std::make_unsigned::type>( + cp.value()) < + static_cast< + typename std::make_unsigned::type>( + begin)) { + return {error::invalid_format_string, + "Last char in [set] range is less than the " + "first"}; + } + accept_char_range(begin, cp.value()); + } + else { + accept_char(cp.value()); + } + } + else { + if (!allow_range) { + if (static_cast< + typename std::make_unsigned::type>( + cp.value()) < + static_cast< + typename std::make_unsigned::type>( + begin)) { + return {error::invalid_format_string, + "Last char in [set] range is less than the " + "first"}; + } + set_extra_ranges.push_back( + set_range::range(begin, cp.value())); + } + else { + set_extra_ranges.push_back( + set_range::single(cp.value())); + } + get_option(flag::use_ranges) = true; + } + return {}; + } + template + error parse_colon_specifier(ParseCtx& pctx) + { + using char_type = typename ParseCtx::char_type; + SCN_EXPECT(pctx.next_char() == ascii_widen(':')); + pctx.advance_char(); + if (!pctx || pctx.check_arg_end()) { + return {error::invalid_format_string, + "Unexpected end of format string argument"}; + } + if (pctx.next_char() == ascii_widen(']')) { + return { + error::invalid_format_string, + "Unexpected end of [set] in format string after ':'"}; + } + + std::basic_string buf; + while (true) { + if (!pctx || pctx.check_arg_end()) { + return {error::invalid_format_string, + "Unexpected end of format string argument"}; + } + auto ch = pctx.next_char(); + if (ch == ascii_widen(':')) { + break; + } + if (ch == ascii_widen(']')) { + return {error::invalid_format_string, + "Unexpected end of [set] :specifier:, did you " + "forget a terminating colon?"}; + } + buf.push_back(ch); + pctx.advance_char(); + } + + auto ch = pctx.next_char(); + if (buf == all_str(ch)) { + get_option(flag::accept_all) = true; + return {}; + } + if (buf == alnum_str(ch)) { + get_option(specifier::alnum) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (buf == alpha_str(ch)) { + get_option(specifier::alpha) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (buf == blank_str(ch)) { + get_option(specifier::blank) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (buf == cntrl_str(ch)) { + get_option(specifier::cntrl) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (buf == digit_str(ch)) { + get_option(specifier::digit) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (buf == graph_str(ch)) { + get_option(specifier::graph) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (buf == lower_str(ch)) { + get_option(specifier::lower) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (buf == print_str(ch)) { + get_option(specifier::print) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (buf == punct_str(ch)) { + get_option(specifier::punct) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (buf == space_str(ch)) { + get_option(specifier::space) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (buf == upper_str(ch)) { + get_option(specifier::upper) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (buf == xdigit_str(ch)) { + get_option(specifier::xdigit) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + + return {error::invalid_format_string, + "Invalid :specifier: in [set]"}; + } + template + error parse_backslash_hex(ParseCtx& pctx, + bool allow_range, + code_point begin = make_code_point(0)) + { + using char_type = typename ParseCtx::char_type; + SCN_EXPECT(pctx.next_char() == ascii_widen('x') || + pctx.next_char() == ascii_widen('u') || + pctx.next_char() == ascii_widen('U')); + + const char_type flag_char = pctx.next_char(); + const int chars = [flag_char]() { + auto ch = static_cast(flag_char); + if (ch == 'x') { + return 2; + } + if (ch == 'u') { + return 4; + } + if (ch == 'U') { + return 8; + } + SCN_ENSURE(false); + SCN_UNREACHABLE; + }(); + + char_type str[8] = {0}; + for (int i = 0; i < chars; ++i) { + pctx.advance_char(); + if (!pctx || pctx.check_arg_end()) { + return {error::invalid_format_string, + "Unexpected end of format string argument " + "after '\\x', '\\u', or '\\U'"}; + } + if (pctx.next_char() == ascii_widen(']')) { + return {error::invalid_format_string, + "Unexpected end of [set] in format string " + "after '\\x', '\\u', or '\\U'"}; + } + str[i] = pctx.next_char(); + } + + auto scanner = simple_integer_scanner{}; + uint64_t i; + SCN_CLANG_PUSH_IGNORE_UNDEFINED_TEMPLATE + auto res = scanner.scan( + scn::make_span(str, static_cast(chars)).as_const(), + i, 16); + SCN_CLANG_POP_IGNORE_UNDEFINED_TEMPLATE + if (!res) { + return {error::invalid_format_string, + "Failed to parse '\\x', '\\u', or '\\U' flag in " + "format string"}; + } + const uint64_t min = 0; + const uint64_t max = [chars]() { + if (chars == 2) { + // \x + return uint64_t{0x7f}; + } + if (chars == 4) { + return uint64_t{0xffff}; + } + if (chars == 8) { + return uint64_t{0xffffffff}; + } + SCN_ENSURE(false); + SCN_UNREACHABLE; + }(); + if (i < min || i > max) { + return {error::invalid_format_string, + "'\\x', '\\u', or '\\U' option in format string " + "out of range"}; + } + + if (allow_range && pctx.can_peek_char() && + pctx.peek_char() == ascii_widen('-')) { + pctx.advance_char(); + return parse_range(pctx, make_code_point(i)); + } + if (!allow_range) { + accept_char_range(begin, make_code_point(i)); + } + else { + accept_char(make_code_point(i)); + } + return {}; + } + template + error parse_backslash_specifier( + ParseCtx& pctx, + bool allow_range, + code_point begin = make_code_point(0)) + { + using char_type = typename ParseCtx::char_type; + SCN_EXPECT(pctx.next_char() == ascii_widen('\\')); + pctx.advance_char(); + + if (!pctx || pctx.check_arg_end()) { + return {error::invalid_format_string, + "Unexpected end of format string argument"}; + } + if (pctx.next_char() == ascii_widen(']') && + pctx.can_peek_char() && + pctx.peek_char() == ascii_widen('}')) { + return {error::invalid_format_string, + "Unexpected end of [set] in format string"}; + } + + if (pctx.next_char() == ascii_widen('\\')) { + // Literal "\\" + accept_char(pctx.next_char()); + return {}; + } + + // specifiers + if (pctx.next_char() == ascii_widen('l')) { + // \l + get_option(specifier::letters) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (pctx.next_char() == ascii_widen('L')) { + // \L + get_option(specifier::inverted_letters) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + + if (pctx.next_char() == ascii_widen('w')) { + // \w + get_option(specifier::alnum_underscore) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (pctx.next_char() == ascii_widen('W')) { + // \W + get_option(specifier::inverted_alnum_underscore) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + + if (pctx.next_char() == ascii_widen('s')) { + // \s + get_option(specifier::whitespace) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (pctx.next_char() == ascii_widen('S')) { + // \S + get_option(specifier::inverted_whitespace) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + + if (pctx.next_char() == ascii_widen('d')) { + // \d + get_option(specifier::numbers) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + if (pctx.next_char() == ascii_widen('D')) { + // \D + get_option(specifier::inverted_numbers) = true; + get_option(flag::use_specifiers) = true; + return {}; + } + + if (pctx.next_char() == ascii_widen('x') || + pctx.next_char() == ascii_widen('u') || + pctx.next_char() == ascii_widen('U')) { + // \x__, \u____, or \U________ + return parse_backslash_hex(pctx, allow_range, begin); + } + + // Literal, e.g. \: -> : + return parse_literal(pctx, true); + } + template + error parse_next_char(ParseCtx& pctx, + bool allow_range, + code_point begin = make_code_point(0)) + { + using char_type = typename ParseCtx::char_type; + const auto ch = pctx.next_char(); + if (ch == ascii_widen('\\')) { + return parse_backslash_specifier(pctx, allow_range, begin); + } + if (allow_range && ch == ascii_widen(':')) { + return parse_colon_specifier(pctx); + } + return parse_literal(pctx, allow_range, begin); + } + + SCN_NODISCARD static constexpr const char* all_str(char) + { + return "all"; + } + SCN_NODISCARD static constexpr const wchar_t* all_str(wchar_t) + { + return L"all"; + } + SCN_NODISCARD static constexpr const char* alnum_str(char) + { + return "alnum"; + } + SCN_NODISCARD static constexpr const wchar_t* alnum_str(wchar_t) + { + return L"alnum"; + } + SCN_NODISCARD static constexpr const char* alpha_str(char) + { + return "alpha"; + } + SCN_NODISCARD static constexpr const wchar_t* alpha_str(wchar_t) + { + return L"alpha"; + } + SCN_NODISCARD static constexpr const char* blank_str(char) + { + return "blank"; + } + SCN_NODISCARD static constexpr const wchar_t* blank_str(wchar_t) + { + return L"blank"; + } + SCN_NODISCARD static constexpr const char* cntrl_str(char) + { + return "cntrl"; + } + SCN_NODISCARD static constexpr const wchar_t* cntrl_str(wchar_t) + { + return L"cntrl"; + } + SCN_NODISCARD static constexpr const char* digit_str(char) + { + return "digit"; + } + SCN_NODISCARD static constexpr const wchar_t* digit_str(wchar_t) + { + return L"digit"; + } + SCN_NODISCARD static constexpr const char* graph_str(char) + { + return "graph"; + } + SCN_NODISCARD static constexpr const wchar_t* graph_str(wchar_t) + { + return L"graph"; + } + SCN_NODISCARD static constexpr const char* lower_str(char) + { + return "lower"; + } + SCN_NODISCARD static constexpr const wchar_t* lower_str(wchar_t) + { + return L"lower"; + } + SCN_NODISCARD static constexpr const char* print_str(char) + { + return "print"; + } + SCN_NODISCARD static constexpr const wchar_t* print_str(wchar_t) + { + return L"print"; + } + SCN_NODISCARD static constexpr const char* punct_str(char) + { + return "punct"; + } + SCN_NODISCARD static constexpr const wchar_t* punct_str(wchar_t) + { + return L"punct"; + } + SCN_NODISCARD static constexpr const char* space_str(char) + { + return "space"; + } + SCN_NODISCARD static constexpr const wchar_t* space_str(wchar_t) + { + return L"space"; + } + SCN_NODISCARD static constexpr const char* upper_str(char) + { + return "upper"; + } + SCN_NODISCARD static constexpr const wchar_t* upper_str(wchar_t) + { + return L"upper"; + } + SCN_NODISCARD static constexpr const char* xdigit_str(char) + { + return "xdigit"; + } + SCN_NODISCARD static constexpr const wchar_t* xdigit_str(wchar_t) + { + return L"xdigit"; + } + + // 0x00 - 0x7f, individual chars, true = accept + // 0x80 - 0x9f, specifiers, true = accept (if use_specifiers = true) + // 0xa0 - 0xaf, flags + array set_options{{false}}; + + struct set_range { + constexpr set_range(uint32_t b, uint32_t e) : begin(b), end(e) + { + } + + uint32_t begin{}; + uint32_t end{}; // inclusive + + static set_range single(code_point cp) + { + return {static_cast(cp), + static_cast(cp)}; + } + static set_range single(wchar_t ch) + { + return {static_cast(ch), + static_cast(ch)}; + } + + static set_range range(code_point begin, code_point end) + { + SCN_EXPECT(begin <= end); + return {static_cast(begin), + static_cast(end)}; + } + static set_range range(wchar_t begin, wchar_t end) + { + SCN_EXPECT(begin <= end); + return {static_cast(begin), + static_cast(end)}; + } + }; + // Used if set_options[use_ranges] = true + small_vector set_extra_ranges{}; + }; + + struct string_scanner : common_parser { + static constexpr bool skip_preceding_whitespace() + { + return false; + } + + template + error parse(ParseCtx& pctx) + { + using char_type = typename ParseCtx::char_type; + + auto s_flag = detail::ascii_widen('s'); + bool s_set{}; + + auto each = [&](ParseCtx& p, bool& parsed) -> error { + if (p.next_char() == ascii_widen('[')) { + if (set_parser.get_option( + set_parser_type::flag::enabled)) { + return {error::invalid_format_string, + "[set] already specified for this argument " + "in format string"}; + } + return set_parser.parse_set(p, parsed); + } + return {}; + }; + auto e = parse_common(pctx, span{&s_flag, 1}, + span{&s_set, 1}, each); + if (!e) { + return e; + } + if (set_parser.enabled()) { + bool loc = (common_options & localized) != 0; + return set_parser.sanitize(loc); + } + return {}; + } + + template + error scan( + std::basic_string, + Allocator>& val, + Context& ctx) + { + if (set_parser.enabled()) { + bool loc = (common_options & localized) != 0; + bool mb = (loc || set_parser.get_option( + set_parser_type::flag::use_ranges)) && + is_multichar_type(typename Context::char_type{}); + return do_scan(ctx, val, + pred{ctx, set_parser, loc, mb}); + } + + auto e = skip_range_whitespace(ctx, false); + if (!e) { + return e; + } + + auto is_space_pred = make_is_space_predicate( + ctx.locale(), (common_options & localized) != 0, + field_width); + return do_scan(ctx, val, is_space_pred); + } + + set_parser_type set_parser; + + protected: + template + error do_scan( + Context& ctx, + std::basic_string, + Allocator>& val, + Pred&& predicate) + { + using string_type = std::basic_string< + typename Context::char_type, + std::char_traits, Allocator>; + + if (Context::range_type::is_contiguous) { + auto s = read_until_space_zero_copy( + ctx.range(), SCN_FWD(predicate), false); + if (!s) { + return s.error(); + } + if (s.value().size() == 0) { + return {error::invalid_scanned_value, + "Empty string parsed"}; + } + val.assign(s.value().data(), s.value().size()); + return {}; + } + + string_type tmp(val.get_allocator()); + auto outputit = std::back_inserter(tmp); + auto ret = read_until_space(ctx.range(), outputit, + SCN_FWD(predicate), false); + if (SCN_UNLIKELY(!ret)) { + return ret; + } + if (SCN_UNLIKELY(tmp.empty())) { + return {error::invalid_scanned_value, + "Empty string parsed"}; + } + val = SCN_MOVE(tmp); + + return {}; + } + + template + struct pred { + Context& ctx; + set_parser_type& set_parser; + bool localized; + bool multibyte; + + bool operator()(span ch) const + { + SCN_EXPECT(ch.size() >= 1); + code_point cp{}; + auto it = parse_code_point(ch.begin(), ch.end(), cp); + if (!it) { + // todo: is this really a good idea + return !set_parser.check_character(ch[0], localized, + ctx.locale()); + } + return !set_parser.check_character(cp, localized, + ctx.locale()); + } + bool operator()(span ch) const + { + SCN_EXPECT(ch.size() == 1); + return !set_parser.check_character(ch[0], localized, + ctx.locale()); + } + constexpr bool is_localized() const + { + return localized; + } + constexpr bool is_multibyte() const + { + return multibyte; + } + }; + }; + + struct span_scanner : public string_scanner { + template + error scan(span& val, Context& ctx) + { + if (val.size() == 0) { + return {error::invalid_scanned_value, + "Cannot scan into an empty span"}; + } + + if (set_parser.enabled()) { + bool loc = (common_options & localized) != 0; + bool mb = (loc || set_parser.get_option( + set_parser_type::flag::use_ranges)) && + is_multichar_type(typename Context::char_type{}); + return do_scan(ctx, val, + string_scanner::pred{ + ctx, set_parser, loc, mb}); + } + + auto e = skip_range_whitespace(ctx, false); + if (!e) { + return e; + } + + auto is_space_pred = make_is_space_predicate( + ctx.locale(), (common_options & localized) != 0, + field_width != 0 ? min(field_width, val.size()) + : val.size()); + return do_scan(ctx, val, is_space_pred); + } + + protected: + template + error do_scan(Context& ctx, + span& val, + Pred&& predicate) + { + if (Context::range_type::is_contiguous) { + auto s = read_until_space_zero_copy( + ctx.range(), SCN_FWD(predicate), false); + if (!s) { + return s.error(); + } + if (s.value().size() == 0) { + return {error::invalid_scanned_value, + "Empty string parsed"}; + } + std::copy(s.value().begin(), s.value().end(), val.begin()); + val = val.first(s.value().size()); + return {}; + } + + std::basic_string tmp; + auto outputit = std::back_inserter(tmp); + auto ret = read_until_space(ctx.range(), outputit, + SCN_FWD(predicate), false); + if (SCN_UNLIKELY(!ret)) { + return ret; + } + if (SCN_UNLIKELY(tmp.empty())) { + return {error::invalid_scanned_value, + "Empty string parsed"}; + } + std::copy(tmp.begin(), tmp.end(), val.begin()); + val = val.first(tmp.size()); + + return {}; + } + }; + + struct string_view_scanner : string_scanner { + public: + template + error scan(basic_string_view& val, + Context& ctx) + { + if (!Context::range_type::is_contiguous) { + return {error::invalid_operation, + "Cannot read a string_view from a " + "non-contiguous_range"}; + } + + if (set_parser.enabled()) { + bool loc = (common_options & localized) != 0; + bool mb = (loc || set_parser.get_option( + set_parser_type::flag::use_ranges)) && + is_multichar_type(typename Context::char_type{}); + return do_scan(ctx, val, + string_scanner::pred{ + ctx, set_parser, loc, mb}); + } + + auto e = skip_range_whitespace(ctx, false); + if (!e) { + return e; + } + + auto is_space_pred = make_is_space_predicate( + ctx.locale(), (common_options & localized) != 0, + field_width); + return do_scan(ctx, val, is_space_pred); + } + + protected: + template + error do_scan(Context& ctx, + basic_string_view& val, + Pred&& predicate) + { + SCN_EXPECT(Context::range_type::is_contiguous); + + auto s = read_until_space_zero_copy(ctx.range(), + SCN_FWD(predicate), false); + if (!s) { + return s.error(); + } + if (s.value().size() == 0) { + return {error::invalid_scanned_value, + "Empty string parsed"}; + } + val = basic_string_view( + s.value().data(), s.value().size()); + return {}; + } + }; + +#if SCN_HAS_STRING_VIEW + struct std_string_view_scanner : string_view_scanner { + template + error scan(std::basic_string_view& val, + Context& ctx) + { + using char_type = typename Context::char_type; + auto sv = + ::scn::basic_string_view(val.data(), val.size()); + auto e = string_view_scanner::scan(sv, ctx); + if (e) { + val = + std::basic_string_view(sv.data(), sv.size()); + } + return e; + } + }; +#endif + } // namespace detail + SCN_END_NAMESPACE +} // namespace scn + +#endif diff --git a/src/third-party/scnlib/include/scn/reader/types.h b/src/third-party/scnlib/include/scn/reader/types.h new file mode 100644 index 0000000..047d10d --- /dev/null +++ b/src/third-party/scnlib/include/scn/reader/types.h @@ -0,0 +1,220 @@ +// Copyright 2017 Elias Kosunen +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is a part of scnlib: +// https://github.com/eliaskosunen/scnlib + +#ifndef SCN_READER_TYPES_H +#define SCN_READER_TYPES_H + +#include "int.h" + +namespace scn { + SCN_BEGIN_NAMESPACE + namespace detail { + struct code_point_scanner : common_parser { + static constexpr bool skip_preceding_whitespace() + { + return false; + } + + template + error parse(ParseCtx& pctx) + { + using char_type = typename ParseCtx::char_type; + + auto c_flag = detail::ascii_widen('c'); + bool c_set{}; + return parse_common(pctx, span{&c_flag, 1}, + span{&c_set, 1}, + null_type_cb); + } + + template + error scan(code_point& val, Context& ctx) + { + unsigned char buf[4] = {0}; + auto cp = read_code_point(ctx.range(), make_span(buf, 4)); + if (!cp) { + return cp.error(); + } + val = cp.value().cp; + return {}; + } + }; + + struct bool_scanner : common_parser { + template + error parse(ParseCtx& pctx) + { + using char_type = typename ParseCtx::char_type; + + array options{{ + // Only strings + ascii_widen('s'), + // Only ints + ascii_widen('i'), + // Localized digits + ascii_widen('n'), + }}; + bool flags[3] = {false}; + auto e = parse_common( + pctx, span{options.begin(), options.end()}, + span{flags, 3}, null_type_cb); + + if (!e) { + return e; + } + + format_options = 0; + // default ('s' + 'i') + if (!flags[0] && !flags[1]) { + format_options |= allow_string | allow_int; + } + // 's' + if (flags[0]) { + format_options |= allow_string; + } + // 'i' + if (flags[1]) { + format_options |= allow_int; + } + // 'n' + if (flags[2]) { + format_options |= localized_digits; + // 'n' implies 'L' + common_options |= localized; + } + return {}; + } + + template + error scan(bool& val, Context& ctx) + { + using char_type = typename Context::char_type; + + if ((format_options & allow_string) != 0) { + auto truename = ctx.locale().get_static().truename(); + auto falsename = ctx.locale().get_static().falsename(); + if ((common_options & localized) != 0) { + truename = ctx.locale().get_localized().truename(); + falsename = ctx.locale().get_localized().falsename(); + } + const auto max_len = + detail::max(truename.size(), falsename.size()); + std::basic_string buf; + buf.reserve(max_len); + + auto tmp_it = std::back_inserter(buf); + auto is_space_pred = make_is_space_predicate( + ctx.locale(), (common_options & localized) != 0, + field_width); + auto e = read_until_space(ctx.range(), tmp_it, + is_space_pred, false); + if (!e) { + return e; + } + + bool found = false; + if (buf.size() >= falsename.size()) { + if (std::equal(falsename.begin(), falsename.end(), + buf.begin())) { + val = false; + found = true; + } + } + if (!found && buf.size() >= truename.size()) { + if (std::equal(truename.begin(), truename.end(), + buf.begin())) { + val = true; + found = true; + } + } + if (found) { + return {}; + } + else { + auto pb = + putback_n(ctx.range(), + static_cast(buf.size())); + if (!pb) { + return pb; + } + } + } + + if ((format_options & allow_int) != 0) { + if ((format_options & localized_digits) != 0) { + int i{}; + auto s = integer_scanner{}; + s.common_options = integer_scanner::localized; + s.format_options = + integer_scanner::only_unsigned | + integer_scanner::localized_digits; + auto e = s.scan(i, ctx); + if (!e) { + return e; + } + if (SCN_UNLIKELY(i != 0 && i != 1)) { + return { + error::invalid_scanned_value, + "Scanned integral boolean not equal to 0 or 1"}; + } + else if (i == 0) { + val = false; + } + else { + val = true; + } + return {}; + } + + unsigned char buf[4] = {0}; + auto cp = read_code_point(ctx.range(), make_span(buf, 4)); + if (!cp) { + return cp.error(); + } + if (cp.value().cp == detail::ascii_widen('0')) { + val = false; + return {}; + } + if (cp.value().cp == detail::ascii_widen('1')) { + val = true; + return {}; + } + auto pb = putback_n(ctx.range(), cp.value().chars.ssize()); + if (!pb) { + return pb; + } + } + + return {error::invalid_scanned_value, "Couldn't scan bool"}; + } + + enum format_options_type { + // 's' option + allow_string = 1, + // 'i' option + allow_int = 2, + // 'n' option + localized_digits = 4 + }; + uint8_t format_options{allow_string | allow_int}; + }; + + } // namespace detail + SCN_END_NAMESPACE +} // namespace scn + +#endif -- cgit v1.2.3