// Copyright 2017 Elias Kosunen // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // This file is a part of scnlib: // https://github.com/eliaskosunen/scnlib #ifndef SCN_READER_INT_H #define SCN_READER_INT_H #include "../util/math.h" #include "common.h" namespace scn { SCN_BEGIN_NAMESPACE namespace detail { template struct integer_scanner : common_parser { static_assert(std::is_integral::value, "integer_scanner requires an integral type"); friend struct simple_integer_scanner; bool skip_preceding_whitespace() { // if format_options == single_code_unit, // then we're scanning a char -> don't skip return format_options != single_code_unit; } template error parse(ParseCtx& pctx) { using char_type = typename ParseCtx::char_type; format_options = 0; int custom_base = 0; auto each = [&](ParseCtx& p, bool& parsed) -> error { parsed = false; auto ch = pctx.next_char(); if (ch == detail::ascii_widen('B')) { // Custom base p.advance_char(); if (SCN_UNLIKELY(!p)) { return {error::invalid_format_string, "Unexpected format string end"}; } if (SCN_UNLIKELY(p.check_arg_end())) { return {error::invalid_format_string, "Unexpected argument end"}; } ch = p.next_char(); const auto zero = detail::ascii_widen('0'), nine = detail::ascii_widen('9'); integer_type_for_char tmp = 0; if (ch < zero || ch > nine) { return {error::invalid_format_string, "Invalid character after 'B', " "expected digit"}; } tmp = static_cast>( p.next_char() - zero); if (tmp < 1) { return {error::invalid_format_string, "Invalid base, must be between 2 and 36"}; } p.advance_char(); if (!p) { return {error::invalid_format_string, "Unexpected end of format string"}; } if (p.check_arg_end()) { custom_base = static_cast(tmp); parsed = true; return {}; } ch = p.next_char(); if (ch < zero || ch > nine) { return {error::invalid_format_string, "Invalid character after 'B', " "expected digit"}; } tmp *= 10; tmp += static_cast>( ch - zero); if (tmp < 2 || tmp > 36) { return {error::invalid_format_string, "Invalid base, must be between 2 and 36"}; } custom_base = static_cast(tmp); parsed = true; pctx.advance_char(); return {}; } return {}; }; array options{{// decimal ascii_widen('d'), // binary ascii_widen('b'), // octal ascii_widen('o'), // hex ascii_widen('x'), // detect base ascii_widen('i'), // unsigned decimal ascii_widen('u'), // code unit ascii_widen('c'), // localized digits ascii_widen('n'), // thsep ascii_widen('\'')}}; bool flags[9] = {false}; auto e = parse_common( pctx, span{options.begin(), options.end()}, span{flags, 9}, each); if (!e) { return e; } int base_flags_set = int(flags[0]) + int(flags[1]) + int(flags[2]) + int(flags[3]) + int(flags[4]) + int(flags[5]) + int(custom_base != 0); if (SCN_UNLIKELY(base_flags_set > 1)) { return {error::invalid_format_string, "Up to one base flags ('d', 'i', 'u', 'b', 'o', " "'x', 'B') allowed"}; } else if (base_flags_set == 0) { // Default: // 'c' for CharT // 'd' otherwise if (std::is_same::value) { format_options = single_code_unit; } else { base = 10; } } else if (custom_base != 0) { // B__ base = static_cast(custom_base); } else if (flags[0]) { // 'd' flag base = 10; } else if (flags[1]) { // 'b' flag base = 2; format_options |= allow_base_prefix; } else if (flags[2]) { // 'o' flag base = 8; format_options |= allow_base_prefix; } else if (flags[3]) { // 'x' flag base = 16; format_options |= allow_base_prefix; } else if (flags[4]) { // 'i' flag base = 0; } else if (flags[5]) { // 'u' flag base = 10; format_options |= only_unsigned; } // n set, implies L if (flags[7]) { common_options |= localized; format_options |= localized_digits; } if ((format_options & localized_digits) != 0 && (base != 0 && base != 10 && base != 8 && base != 16)) { return {error::invalid_format_string, "Localized integers can only be scanned in " "bases 8, 10 and 16"}; } // thsep flag if (flags[8]) { format_options |= allow_thsep; } // 'c' flag -> no other options allowed if (flags[6]) { if (!(format_options == 0 || format_options == single_code_unit) || base_flags_set != 0) { return {error::invalid_format_string, "'c' flag cannot be used in conjunction with " "any other flags"}; } format_options = single_code_unit; } return {}; } template error scan(T& val, Context& ctx) { using char_type = typename Context::char_type; auto do_parse_int = [&](span s) -> error { T tmp = 0; expected ret{0}; if (SCN_UNLIKELY((format_options & localized_digits) != 0)) { SCN_CLANG_PUSH_IGNORE_UNDEFINED_TEMPLATE int b{base}; auto r = parse_base_prefix(s, b); if (!r) { return r.error(); } if (b == -1) { // -1 means we read a '0' tmp = 0; return {}; } if (b != 10 && base != b && base != 0) { return {error::invalid_scanned_value, "Invalid base prefix"}; } if (base == 0) { base = static_cast(b); } if (base != 8 && base != 10 && base != 16) { return {error::invalid_scanned_value, "Localized values have to be in base " "8, 10 or 16"}; } auto it = r.value(); std::basic_string str(to_address(it), s.size()); ret = ctx.locale().get_localized().read_num( tmp, str, static_cast(base)); if (tmp < T{0} && (format_options & only_unsigned) != 0) { return {error::invalid_scanned_value, "Parsed negative value when type was 'u'"}; } SCN_CLANG_POP_IGNORE_UNDEFINED_TEMPLATE } else { SCN_CLANG_PUSH_IGNORE_UNDEFINED_TEMPLATE ret = _parse_int(tmp, s); SCN_CLANG_POP_IGNORE_UNDEFINED_TEMPLATE } if (!ret) { return ret.error(); } if (ret.value() != s.ssize()) { auto pb = putback_n(ctx.range(), s.ssize() - ret.value()); if (!pb) { return pb; } } val = tmp; return {}; }; if (format_options == single_code_unit) { SCN_MSVC_PUSH SCN_MSVC_IGNORE(4127) // conditional expression is constant if (sizeof(T) < sizeof(char_type)) { // sizeof(char_type) > 1 -> wide range // Code unit might not fit return error{error::invalid_operation, "Cannot read this type as a code unit " "from a wide range"}; } SCN_MSVC_POP auto ch = read_code_unit(ctx.range()); if (!ch) { return ch.error(); } val = static_cast(ch.value()); return {}; } SCN_MSVC_PUSH SCN_MSVC_IGNORE(4127) // conditional expression is constant if ((std::is_same::value || std::is_same::value) && !std::is_same::value) { // T is a character type, but not char_type: // Trying to read a char from a wide range, or wchar_t from // a narrow one // Reading a code unit is allowed, however return error{error::invalid_operation, "Cannot read a char from a wide range, or a " "wchar_t from a narrow one"}; } SCN_MSVC_POP std::basic_string buf{}; span bufspan{}; auto e = _read_source( ctx, buf, bufspan, std::integral_constant< bool, Context::range_type::is_contiguous>{}); if (!e) { return e; } return do_parse_int(bufspan); } enum format_options_type : uint8_t { // "n" option -> localized digits and digit grouping localized_digits = 1, // "'" option -> accept thsep // if "L" use locale, default=',' allow_thsep = 2, // "u" option -> don't allow sign only_unsigned = 4, // Allow base prefix (e.g. 0B and 0x) allow_base_prefix = 8, // "c" option -> scan a code unit single_code_unit = 16, }; uint8_t format_options{default_format_options()}; // 0 = detect base // Otherwise [2,36] uint8_t base{0}; private: static SCN_CONSTEXPR14 uint8_t default_format_options() { SCN_MSVC_PUSH SCN_MSVC_IGNORE(4127) // conditional expression is constant if (std::is_same::value || std::is_same::value) { return single_code_unit; } return 0; SCN_MSVC_POP } template error _read_source(Context& ctx, Buf& buf, span& s, std::false_type) { auto do_read = [&](Buf& b) -> error { auto outputit = std::back_inserter(b); auto is_space_pred = make_is_space_predicate( ctx.locale(), (common_options & localized) != 0, field_width); auto e = read_until_space(ctx.range(), outputit, is_space_pred, false); if (!e && b.empty()) { return e; } return {}; }; if (SCN_LIKELY((format_options & allow_thsep) == 0)) { auto e = do_read(buf); if (!e) { return e; } s = make_span(buf.data(), buf.size()); return {}; } Buf tmp; auto e = do_read(tmp); if (!e) { return e; } auto thsep = ctx.locale() .get((common_options & localized) != 0) .thousands_separator(); auto it = tmp.begin(); for (; it != tmp.end(); ++it) { if (*it == thsep) { for (auto it2 = it; ++it2 != tmp.end();) { *it++ = SCN_MOVE(*it2); } break; } } auto n = static_cast(std::distance(tmp.begin(), it)); if (n == 0) { return {error::invalid_scanned_value, "Only a thousands separator found"}; } buf = SCN_MOVE(tmp); s = make_span(buf.data(), n); return {}; } template error _read_source(Context& ctx, Buf& buf, span& s, std::true_type) { if (SCN_UNLIKELY((format_options & allow_thsep) != 0)) { return _read_source(ctx, buf, s, std::false_type{}); } auto ret = read_zero_copy( ctx.range(), field_width != 0 ? static_cast(field_width) : ctx.range().size()); if (!ret) { return ret.error(); } s = ret.value(); return {}; } template expected::iterator> parse_base_prefix( span s, int& b) const; template expected _parse_int(T& val, span s); template expected::iterator> _parse_int_impl( T& val, bool minus_sign, span buf) const; }; // instantiate template struct integer_scanner; template struct integer_scanner; template struct integer_scanner; template struct integer_scanner; template struct integer_scanner; template struct integer_scanner; template struct integer_scanner; template struct integer_scanner; template struct integer_scanner; template struct integer_scanner; template struct integer_scanner; template struct integer_scanner; template template expected::iterator> simple_integer_scanner::scan(span buf, T& val, int base, uint16_t flags) { SCN_EXPECT(buf.size() != 0); integer_scanner s{}; s.base = static_cast(base); s.format_options = flags & 0xffu; s.common_options = static_cast(flags >> 8u); SCN_CLANG_PUSH_IGNORE_UNDEFINED_TEMPLATE auto n = s._parse_int(val, buf); SCN_CLANG_POP_IGNORE_UNDEFINED_TEMPLATE if (!n) { return n.error(); } return buf.begin() + n.value(); } template template expected::iterator> simple_integer_scanner::scan_lower(span buf, T& val, int base, uint16_t flags) { SCN_EXPECT(buf.size() != 0); SCN_EXPECT(base > 0); integer_scanner s{}; s.base = static_cast(base); s.format_options = flags & 0xffu; s.common_options = static_cast(flags >> 8u); bool minus_sign = false; if (buf[0] == ascii_widen('-')) { buf = buf.subspan(1); minus_sign = true; } SCN_CLANG_PUSH_IGNORE_UNDEFINED_TEMPLATE return s._parse_int_impl(val, minus_sign, buf); SCN_CLANG_POP_IGNORE_UNDEFINED_TEMPLATE } } // namespace detail SCN_END_NAMESPACE } // namespace scn #if defined(SCN_HEADER_ONLY) && SCN_HEADER_ONLY && !defined(SCN_READER_INT_CPP) #include "reader_int.cpp" #endif #endif