diff options
Diffstat (limited to 'src/include/numeric_parser.hpp')
-rw-r--r-- | src/include/numeric_parser.hpp | 216 |
1 files changed, 216 insertions, 0 deletions
diff --git a/src/include/numeric_parser.hpp b/src/include/numeric_parser.hpp new file mode 100644 index 0000000..fc047b6 --- /dev/null +++ b/src/include/numeric_parser.hpp @@ -0,0 +1,216 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#ifndef INCLUDED_ORCUS_DETAIL_NUMERIC_PARSER_HPP +#define INCLUDED_ORCUS_DETAIL_NUMERIC_PARSER_HPP + +#include <mdds/global.hpp> +#include <limits> +#include <cmath> + +namespace orcus { namespace detail { + +struct generic_parser_trait {}; + +struct json_parser_trait {}; + +struct parser_state +{ + /** number of digits before the decimal point. */ + int int_digit_count = 0; + /** number of digits after the decimal point. */ + int frac_digit_count = 0; + /** first digit before the decimal point. */ + char first_int_digit = 0; + double parsed_value = 0.0; + double divisor = 1.0; + bool has_digit = false; + bool has_decimal = false; + bool negative_sign = false; +}; + +template<typename _Trait> +double make_final_value(const parser_state& state); + +template<> +inline double make_final_value<generic_parser_trait>(const parser_state& state) +{ + return state.negative_sign ? -state.parsed_value : state.parsed_value; +} + +template<> +inline double make_final_value<json_parser_trait>(const parser_state& state) +{ + if (state.int_digit_count > 1 && state.first_int_digit == 0) + // leading zeros not allowed. + return std::numeric_limits<double>::quiet_NaN(); + + if (state.has_decimal && (state.frac_digit_count == 0 || state.int_digit_count == 0)) + // at least one digit is required both before and after the decimal point. + return std::numeric_limits<double>::quiet_NaN(); + + return state.negative_sign ? -state.parsed_value : state.parsed_value; +} + +template<typename _Trait> +class numeric_parser +{ + using trait_type = _Trait; + + const char* mp_char; + const char* mp_end; + + parser_state m_state; + + bool check_sign() + { + bool negative_sign = false; + + // Check for presence of a sign. + if (mp_char != mp_end) + { + switch (*mp_char) + { + case '+': + ++mp_char; + break; + case '-': + negative_sign = true; + ++mp_char; + break; + default: + ; + } + } + + return negative_sign; + } + + /** + * Parse the exponent part of a numeric string. + * + * @return extra divisor to multiply to the original divisor, or 0.0 if the + * parsing fails. + */ + double parse_exponent() + { + const char* p0 = mp_char - 1; // original position to restore to in case of parsing failure. The e needs to be added back as well. + double exponent = 0.0; + bool valid = false; + + bool negative_sign = check_sign(); + + for (; mp_char != mp_end; ++mp_char) + { + if (*mp_char < '0' || '9' < *mp_char) + { + // Non-digit encountered. + break; + } + + valid = true; + exponent *= 10.0; + exponent += *mp_char - '0'; + } + + if (!valid) + { + // Restore the original position on failed parsing. + mp_char = p0; + return 0.0; + } + + if (!negative_sign) + exponent = -exponent; + + return std::pow(10.0, exponent); + } + +public: + numeric_parser(const char* p, const char* p_end) : + mp_char(p), + mp_end(p_end) {} + + /** + * Start parsing the string. + * + * @return a finite value upon successful parsing, else NaN is returned. + */ + double parse() + { + m_state.negative_sign = check_sign(); + + for (; mp_char != mp_end; ++mp_char) + { + if (*mp_char == '.') + { + if (m_state.has_decimal) + { + // Second '.' encountered. Terminate the parsing. + m_state.parsed_value /= m_state.divisor; + return make_final_value<trait_type>(m_state); + } + + m_state.has_decimal = true; + continue; + } + + if (m_state.has_digit && (*mp_char == 'e' || *mp_char == 'E')) + { + ++mp_char; + double extra_divisor = parse_exponent(); + if (extra_divisor) + m_state.divisor *= extra_divisor; + break; + } + + if (*mp_char < '0' || '9' < *mp_char) + { + if (!m_state.has_digit) // without a digit we have no numbers + return std::numeric_limits<double>::quiet_NaN(); + + m_state.parsed_value /= m_state.divisor; + return make_final_value<trait_type>(m_state); + } + + m_state.has_digit = true; + char digit = *mp_char - '0'; + + if (m_state.has_decimal) + ++m_state.frac_digit_count; + else + { + if (!m_state.int_digit_count) + m_state.first_int_digit = digit; + + ++m_state.int_digit_count; + } + + m_state.parsed_value *= 10.0; + m_state.parsed_value += digit; + + if (m_state.has_decimal) + m_state.divisor *= 10.0; + } + if (!m_state.has_digit) // without a digit we have no numbers + return std::numeric_limits<double>::quiet_NaN(); + + m_state.parsed_value /= m_state.divisor; + return make_final_value<trait_type>(m_state); + } + + const char* get_char_position() const + { + return mp_char; + } +}; + +}} // namespace orcus::detail + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |