summaryrefslogtreecommitdiffstats
path: root/src/include/numeric_parser.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/numeric_parser.hpp')
-rw-r--r--src/include/numeric_parser.hpp216
1 files changed, 216 insertions, 0 deletions
diff --git a/src/include/numeric_parser.hpp b/src/include/numeric_parser.hpp
new file mode 100644
index 0000000..fc047b6
--- /dev/null
+++ b/src/include/numeric_parser.hpp
@@ -0,0 +1,216 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_DETAIL_NUMERIC_PARSER_HPP
+#define INCLUDED_ORCUS_DETAIL_NUMERIC_PARSER_HPP
+
+#include <mdds/global.hpp>
+#include <limits>
+#include <cmath>
+
+namespace orcus { namespace detail {
+
+struct generic_parser_trait {};
+
+struct json_parser_trait {};
+
+struct parser_state
+{
+ /** number of digits before the decimal point. */
+ int int_digit_count = 0;
+ /** number of digits after the decimal point. */
+ int frac_digit_count = 0;
+ /** first digit before the decimal point. */
+ char first_int_digit = 0;
+ double parsed_value = 0.0;
+ double divisor = 1.0;
+ bool has_digit = false;
+ bool has_decimal = false;
+ bool negative_sign = false;
+};
+
+template<typename _Trait>
+double make_final_value(const parser_state& state);
+
+template<>
+inline double make_final_value<generic_parser_trait>(const parser_state& state)
+{
+ return state.negative_sign ? -state.parsed_value : state.parsed_value;
+}
+
+template<>
+inline double make_final_value<json_parser_trait>(const parser_state& state)
+{
+ if (state.int_digit_count > 1 && state.first_int_digit == 0)
+ // leading zeros not allowed.
+ return std::numeric_limits<double>::quiet_NaN();
+
+ if (state.has_decimal && (state.frac_digit_count == 0 || state.int_digit_count == 0))
+ // at least one digit is required both before and after the decimal point.
+ return std::numeric_limits<double>::quiet_NaN();
+
+ return state.negative_sign ? -state.parsed_value : state.parsed_value;
+}
+
+template<typename _Trait>
+class numeric_parser
+{
+ using trait_type = _Trait;
+
+ const char* mp_char;
+ const char* mp_end;
+
+ parser_state m_state;
+
+ bool check_sign()
+ {
+ bool negative_sign = false;
+
+ // Check for presence of a sign.
+ if (mp_char != mp_end)
+ {
+ switch (*mp_char)
+ {
+ case '+':
+ ++mp_char;
+ break;
+ case '-':
+ negative_sign = true;
+ ++mp_char;
+ break;
+ default:
+ ;
+ }
+ }
+
+ return negative_sign;
+ }
+
+ /**
+ * Parse the exponent part of a numeric string.
+ *
+ * @return extra divisor to multiply to the original divisor, or 0.0 if the
+ * parsing fails.
+ */
+ double parse_exponent()
+ {
+ const char* p0 = mp_char - 1; // original position to restore to in case of parsing failure. The e needs to be added back as well.
+ double exponent = 0.0;
+ bool valid = false;
+
+ bool negative_sign = check_sign();
+
+ for (; mp_char != mp_end; ++mp_char)
+ {
+ if (*mp_char < '0' || '9' < *mp_char)
+ {
+ // Non-digit encountered.
+ break;
+ }
+
+ valid = true;
+ exponent *= 10.0;
+ exponent += *mp_char - '0';
+ }
+
+ if (!valid)
+ {
+ // Restore the original position on failed parsing.
+ mp_char = p0;
+ return 0.0;
+ }
+
+ if (!negative_sign)
+ exponent = -exponent;
+
+ return std::pow(10.0, exponent);
+ }
+
+public:
+ numeric_parser(const char* p, const char* p_end) :
+ mp_char(p),
+ mp_end(p_end) {}
+
+ /**
+ * Start parsing the string.
+ *
+ * @return a finite value upon successful parsing, else NaN is returned.
+ */
+ double parse()
+ {
+ m_state.negative_sign = check_sign();
+
+ for (; mp_char != mp_end; ++mp_char)
+ {
+ if (*mp_char == '.')
+ {
+ if (m_state.has_decimal)
+ {
+ // Second '.' encountered. Terminate the parsing.
+ m_state.parsed_value /= m_state.divisor;
+ return make_final_value<trait_type>(m_state);
+ }
+
+ m_state.has_decimal = true;
+ continue;
+ }
+
+ if (m_state.has_digit && (*mp_char == 'e' || *mp_char == 'E'))
+ {
+ ++mp_char;
+ double extra_divisor = parse_exponent();
+ if (extra_divisor)
+ m_state.divisor *= extra_divisor;
+ break;
+ }
+
+ if (*mp_char < '0' || '9' < *mp_char)
+ {
+ if (!m_state.has_digit) // without a digit we have no numbers
+ return std::numeric_limits<double>::quiet_NaN();
+
+ m_state.parsed_value /= m_state.divisor;
+ return make_final_value<trait_type>(m_state);
+ }
+
+ m_state.has_digit = true;
+ char digit = *mp_char - '0';
+
+ if (m_state.has_decimal)
+ ++m_state.frac_digit_count;
+ else
+ {
+ if (!m_state.int_digit_count)
+ m_state.first_int_digit = digit;
+
+ ++m_state.int_digit_count;
+ }
+
+ m_state.parsed_value *= 10.0;
+ m_state.parsed_value += digit;
+
+ if (m_state.has_decimal)
+ m_state.divisor *= 10.0;
+ }
+ if (!m_state.has_digit) // without a digit we have no numbers
+ return std::numeric_limits<double>::quiet_NaN();
+
+ m_state.parsed_value /= m_state.divisor;
+ return make_final_value<trait_type>(m_state);
+ }
+
+ const char* get_char_position() const
+ {
+ return mp_char;
+ }
+};
+
+}} // namespace orcus::detail
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */