diff options
Diffstat (limited to 'src/strconv/atof.go')
-rw-r--r-- | src/strconv/atof.go | 709 |
1 files changed, 709 insertions, 0 deletions
diff --git a/src/strconv/atof.go b/src/strconv/atof.go new file mode 100644 index 0000000..8fc9042 --- /dev/null +++ b/src/strconv/atof.go @@ -0,0 +1,709 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strconv + +// decimal to binary floating point conversion. +// Algorithm: +// 1) Store input in multiprecision decimal. +// 2) Multiply/divide decimal by powers of two until in range [0.5, 1) +// 3) Multiply by 2^precision and round to get mantissa. + +import "math" + +var optimize = true // set to false to force slow-path conversions for testing + +// commonPrefixLenIgnoreCase returns the length of the common +// prefix of s and prefix, with the character case of s ignored. +// The prefix argument must be all lower-case. +func commonPrefixLenIgnoreCase(s, prefix string) int { + n := len(prefix) + if n > len(s) { + n = len(s) + } + for i := 0; i < n; i++ { + c := s[i] + if 'A' <= c && c <= 'Z' { + c += 'a' - 'A' + } + if c != prefix[i] { + return i + } + } + return n +} + +// special returns the floating-point value for the special, +// possibly signed floating-point representations inf, infinity, +// and NaN. The result is ok if a prefix of s contains one +// of these representations and n is the length of that prefix. +// The character case is ignored. +func special(s string) (f float64, n int, ok bool) { + if len(s) == 0 { + return 0, 0, false + } + sign := 1 + nsign := 0 + switch s[0] { + case '+', '-': + if s[0] == '-' { + sign = -1 + } + nsign = 1 + s = s[1:] + fallthrough + case 'i', 'I': + n := commonPrefixLenIgnoreCase(s, "infinity") + // Anything longer than "inf" is ok, but if we + // don't have "infinity", only consume "inf". + if 3 < n && n < 8 { + n = 3 + } + if n == 3 || n == 8 { + return math.Inf(sign), nsign + n, true + } + case 'n', 'N': + if commonPrefixLenIgnoreCase(s, "nan") == 3 { + return math.NaN(), 3, true + } + } + return 0, 0, false +} + +func (b *decimal) set(s string) (ok bool) { + i := 0 + b.neg = false + b.trunc = false + + // optional sign + if i >= len(s) { + return + } + switch { + case s[i] == '+': + i++ + case s[i] == '-': + b.neg = true + i++ + } + + // digits + sawdot := false + sawdigits := false + for ; i < len(s); i++ { + switch { + case s[i] == '_': + // readFloat already checked underscores + continue + case s[i] == '.': + if sawdot { + return + } + sawdot = true + b.dp = b.nd + continue + + case '0' <= s[i] && s[i] <= '9': + sawdigits = true + if s[i] == '0' && b.nd == 0 { // ignore leading zeros + b.dp-- + continue + } + if b.nd < len(b.d) { + b.d[b.nd] = s[i] + b.nd++ + } else if s[i] != '0' { + b.trunc = true + } + continue + } + break + } + if !sawdigits { + return + } + if !sawdot { + b.dp = b.nd + } + + // optional exponent moves decimal point. + // if we read a very large, very long number, + // just be sure to move the decimal point by + // a lot (say, 100000). it doesn't matter if it's + // not the exact number. + if i < len(s) && lower(s[i]) == 'e' { + i++ + if i >= len(s) { + return + } + esign := 1 + if s[i] == '+' { + i++ + } else if s[i] == '-' { + i++ + esign = -1 + } + if i >= len(s) || s[i] < '0' || s[i] > '9' { + return + } + e := 0 + for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ { + if s[i] == '_' { + // readFloat already checked underscores + continue + } + if e < 10000 { + e = e*10 + int(s[i]) - '0' + } + } + b.dp += e * esign + } + + if i != len(s) { + return + } + + ok = true + return +} + +// readFloat reads a decimal or hexadecimal mantissa and exponent from a float +// string representation in s; the number may be followed by other characters. +// readFloat reports the number of bytes consumed (i), and whether the number +// is valid (ok). +func readFloat(s string) (mantissa uint64, exp int, neg, trunc, hex bool, i int, ok bool) { + underscores := false + + // optional sign + if i >= len(s) { + return + } + switch { + case s[i] == '+': + i++ + case s[i] == '-': + neg = true + i++ + } + + // digits + base := uint64(10) + maxMantDigits := 19 // 10^19 fits in uint64 + expChar := byte('e') + if i+2 < len(s) && s[i] == '0' && lower(s[i+1]) == 'x' { + base = 16 + maxMantDigits = 16 // 16^16 fits in uint64 + i += 2 + expChar = 'p' + hex = true + } + sawdot := false + sawdigits := false + nd := 0 + ndMant := 0 + dp := 0 +loop: + for ; i < len(s); i++ { + switch c := s[i]; true { + case c == '_': + underscores = true + continue + + case c == '.': + if sawdot { + break loop + } + sawdot = true + dp = nd + continue + + case '0' <= c && c <= '9': + sawdigits = true + if c == '0' && nd == 0 { // ignore leading zeros + dp-- + continue + } + nd++ + if ndMant < maxMantDigits { + mantissa *= base + mantissa += uint64(c - '0') + ndMant++ + } else if c != '0' { + trunc = true + } + continue + + case base == 16 && 'a' <= lower(c) && lower(c) <= 'f': + sawdigits = true + nd++ + if ndMant < maxMantDigits { + mantissa *= 16 + mantissa += uint64(lower(c) - 'a' + 10) + ndMant++ + } else { + trunc = true + } + continue + } + break + } + if !sawdigits { + return + } + if !sawdot { + dp = nd + } + + if base == 16 { + dp *= 4 + ndMant *= 4 + } + + // optional exponent moves decimal point. + // if we read a very large, very long number, + // just be sure to move the decimal point by + // a lot (say, 100000). it doesn't matter if it's + // not the exact number. + if i < len(s) && lower(s[i]) == expChar { + i++ + if i >= len(s) { + return + } + esign := 1 + if s[i] == '+' { + i++ + } else if s[i] == '-' { + i++ + esign = -1 + } + if i >= len(s) || s[i] < '0' || s[i] > '9' { + return + } + e := 0 + for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ { + if s[i] == '_' { + underscores = true + continue + } + if e < 10000 { + e = e*10 + int(s[i]) - '0' + } + } + dp += e * esign + } else if base == 16 { + // Must have exponent. + return + } + + if mantissa != 0 { + exp = dp - ndMant + } + + if underscores && !underscoreOK(s[:i]) { + return + } + + ok = true + return +} + +// decimal power of ten to binary power of two. +var powtab = []int{1, 3, 6, 9, 13, 16, 19, 23, 26} + +func (d *decimal) floatBits(flt *floatInfo) (b uint64, overflow bool) { + var exp int + var mant uint64 + + // Zero is always a special case. + if d.nd == 0 { + mant = 0 + exp = flt.bias + goto out + } + + // Obvious overflow/underflow. + // These bounds are for 64-bit floats. + // Will have to change if we want to support 80-bit floats in the future. + if d.dp > 310 { + goto overflow + } + if d.dp < -330 { + // zero + mant = 0 + exp = flt.bias + goto out + } + + // Scale by powers of two until in range [0.5, 1.0) + exp = 0 + for d.dp > 0 { + var n int + if d.dp >= len(powtab) { + n = 27 + } else { + n = powtab[d.dp] + } + d.Shift(-n) + exp += n + } + for d.dp < 0 || d.dp == 0 && d.d[0] < '5' { + var n int + if -d.dp >= len(powtab) { + n = 27 + } else { + n = powtab[-d.dp] + } + d.Shift(n) + exp -= n + } + + // Our range is [0.5,1) but floating point range is [1,2). + exp-- + + // Minimum representable exponent is flt.bias+1. + // If the exponent is smaller, move it up and + // adjust d accordingly. + if exp < flt.bias+1 { + n := flt.bias + 1 - exp + d.Shift(-n) + exp += n + } + + if exp-flt.bias >= 1<<flt.expbits-1 { + goto overflow + } + + // Extract 1+flt.mantbits bits. + d.Shift(int(1 + flt.mantbits)) + mant = d.RoundedInteger() + + // Rounding might have added a bit; shift down. + if mant == 2<<flt.mantbits { + mant >>= 1 + exp++ + if exp-flt.bias >= 1<<flt.expbits-1 { + goto overflow + } + } + + // Denormalized? + if mant&(1<<flt.mantbits) == 0 { + exp = flt.bias + } + goto out + +overflow: + // ±Inf + mant = 0 + exp = 1<<flt.expbits - 1 + flt.bias + overflow = true + +out: + // Assemble bits. + bits := mant & (uint64(1)<<flt.mantbits - 1) + bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits + if d.neg { + bits |= 1 << flt.mantbits << flt.expbits + } + return bits, overflow +} + +// Exact powers of 10. +var float64pow10 = []float64{ + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, + 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, + 1e20, 1e21, 1e22, +} +var float32pow10 = []float32{1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10} + +// If possible to convert decimal representation to 64-bit float f exactly, +// entirely in floating-point math, do so, avoiding the expense of decimalToFloatBits. +// Three common cases: +// +// value is exact integer +// value is exact integer * exact power of ten +// value is exact integer / exact power of ten +// +// These all produce potentially inexact but correctly rounded answers. +func atof64exact(mantissa uint64, exp int, neg bool) (f float64, ok bool) { + if mantissa>>float64info.mantbits != 0 { + return + } + f = float64(mantissa) + if neg { + f = -f + } + switch { + case exp == 0: + // an integer. + return f, true + // Exact integers are <= 10^15. + // Exact powers of ten are <= 10^22. + case exp > 0 && exp <= 15+22: // int * 10^k + // If exponent is big but number of digits is not, + // can move a few zeros into the integer part. + if exp > 22 { + f *= float64pow10[exp-22] + exp = 22 + } + if f > 1e15 || f < -1e15 { + // the exponent was really too large. + return + } + return f * float64pow10[exp], true + case exp < 0 && exp >= -22: // int / 10^k + return f / float64pow10[-exp], true + } + return +} + +// If possible to compute mantissa*10^exp to 32-bit float f exactly, +// entirely in floating-point math, do so, avoiding the machinery above. +func atof32exact(mantissa uint64, exp int, neg bool) (f float32, ok bool) { + if mantissa>>float32info.mantbits != 0 { + return + } + f = float32(mantissa) + if neg { + f = -f + } + switch { + case exp == 0: + return f, true + // Exact integers are <= 10^7. + // Exact powers of ten are <= 10^10. + case exp > 0 && exp <= 7+10: // int * 10^k + // If exponent is big but number of digits is not, + // can move a few zeros into the integer part. + if exp > 10 { + f *= float32pow10[exp-10] + exp = 10 + } + if f > 1e7 || f < -1e7 { + // the exponent was really too large. + return + } + return f * float32pow10[exp], true + case exp < 0 && exp >= -10: // int / 10^k + return f / float32pow10[-exp], true + } + return +} + +// atofHex converts the hex floating-point string s +// to a rounded float32 or float64 value (depending on flt==&float32info or flt==&float64info) +// and returns it as a float64. +// The string s has already been parsed into a mantissa, exponent, and sign (neg==true for negative). +// If trunc is true, trailing non-zero bits have been omitted from the mantissa. +func atofHex(s string, flt *floatInfo, mantissa uint64, exp int, neg, trunc bool) (float64, error) { + maxExp := 1<<flt.expbits + flt.bias - 2 + minExp := flt.bias + 1 + exp += int(flt.mantbits) // mantissa now implicitly divided by 2^mantbits. + + // Shift mantissa and exponent to bring representation into float range. + // Eventually we want a mantissa with a leading 1-bit followed by mantbits other bits. + // For rounding, we need two more, where the bottom bit represents + // whether that bit or any later bit was non-zero. + // (If the mantissa has already lost non-zero bits, trunc is true, + // and we OR in a 1 below after shifting left appropriately.) + for mantissa != 0 && mantissa>>(flt.mantbits+2) == 0 { + mantissa <<= 1 + exp-- + } + if trunc { + mantissa |= 1 + } + for mantissa>>(1+flt.mantbits+2) != 0 { + mantissa = mantissa>>1 | mantissa&1 + exp++ + } + + // If exponent is too negative, + // denormalize in hopes of making it representable. + // (The -2 is for the rounding bits.) + for mantissa > 1 && exp < minExp-2 { + mantissa = mantissa>>1 | mantissa&1 + exp++ + } + + // Round using two bottom bits. + round := mantissa & 3 + mantissa >>= 2 + round |= mantissa & 1 // round to even (round up if mantissa is odd) + exp += 2 + if round == 3 { + mantissa++ + if mantissa == 1<<(1+flt.mantbits) { + mantissa >>= 1 + exp++ + } + } + + if mantissa>>flt.mantbits == 0 { // Denormal or zero. + exp = flt.bias + } + var err error + if exp > maxExp { // infinity and range error + mantissa = 1 << flt.mantbits + exp = maxExp + 1 + err = rangeError(fnParseFloat, s) + } + + bits := mantissa & (1<<flt.mantbits - 1) + bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits + if neg { + bits |= 1 << flt.mantbits << flt.expbits + } + if flt == &float32info { + return float64(math.Float32frombits(uint32(bits))), err + } + return math.Float64frombits(bits), err +} + +const fnParseFloat = "ParseFloat" + +func atof32(s string) (f float32, n int, err error) { + if val, n, ok := special(s); ok { + return float32(val), n, nil + } + + mantissa, exp, neg, trunc, hex, n, ok := readFloat(s) + if !ok { + return 0, n, syntaxError(fnParseFloat, s) + } + + if hex { + f, err := atofHex(s[:n], &float32info, mantissa, exp, neg, trunc) + return float32(f), n, err + } + + if optimize { + // Try pure floating-point arithmetic conversion, and if that fails, + // the Eisel-Lemire algorithm. + if !trunc { + if f, ok := atof32exact(mantissa, exp, neg); ok { + return f, n, nil + } + } + f, ok := eiselLemire32(mantissa, exp, neg) + if ok { + if !trunc { + return f, n, nil + } + // Even if the mantissa was truncated, we may + // have found the correct result. Confirm by + // converting the upper mantissa bound. + fUp, ok := eiselLemire32(mantissa+1, exp, neg) + if ok && f == fUp { + return f, n, nil + } + } + } + + // Slow fallback. + var d decimal + if !d.set(s[:n]) { + return 0, n, syntaxError(fnParseFloat, s) + } + b, ovf := d.floatBits(&float32info) + f = math.Float32frombits(uint32(b)) + if ovf { + err = rangeError(fnParseFloat, s) + } + return f, n, err +} + +func atof64(s string) (f float64, n int, err error) { + if val, n, ok := special(s); ok { + return val, n, nil + } + + mantissa, exp, neg, trunc, hex, n, ok := readFloat(s) + if !ok { + return 0, n, syntaxError(fnParseFloat, s) + } + + if hex { + f, err := atofHex(s[:n], &float64info, mantissa, exp, neg, trunc) + return f, n, err + } + + if optimize { + // Try pure floating-point arithmetic conversion, and if that fails, + // the Eisel-Lemire algorithm. + if !trunc { + if f, ok := atof64exact(mantissa, exp, neg); ok { + return f, n, nil + } + } + f, ok := eiselLemire64(mantissa, exp, neg) + if ok { + if !trunc { + return f, n, nil + } + // Even if the mantissa was truncated, we may + // have found the correct result. Confirm by + // converting the upper mantissa bound. + fUp, ok := eiselLemire64(mantissa+1, exp, neg) + if ok && f == fUp { + return f, n, nil + } + } + } + + // Slow fallback. + var d decimal + if !d.set(s[:n]) { + return 0, n, syntaxError(fnParseFloat, s) + } + b, ovf := d.floatBits(&float64info) + f = math.Float64frombits(b) + if ovf { + err = rangeError(fnParseFloat, s) + } + return f, n, err +} + +// ParseFloat converts the string s to a floating-point number +// with the precision specified by bitSize: 32 for float32, or 64 for float64. +// When bitSize=32, the result still has type float64, but it will be +// convertible to float32 without changing its value. +// +// ParseFloat accepts decimal and hexadecimal floating-point numbers +// as defined by the Go syntax for [floating-point literals]. +// If s is well-formed and near a valid floating-point number, +// ParseFloat returns the nearest floating-point number rounded +// using IEEE754 unbiased rounding. +// (Parsing a hexadecimal floating-point value only rounds when +// there are more bits in the hexadecimal representation than +// will fit in the mantissa.) +// +// The errors that ParseFloat returns have concrete type *NumError +// and include err.Num = s. +// +// If s is not syntactically well-formed, ParseFloat returns err.Err = ErrSyntax. +// +// If s is syntactically well-formed but is more than 1/2 ULP +// away from the largest floating point number of the given size, +// ParseFloat returns f = ±Inf, err.Err = ErrRange. +// +// ParseFloat recognizes the string "NaN", and the (possibly signed) strings "Inf" and "Infinity" +// as their respective special floating point values. It ignores case when matching. +// +// [floating-point literals]: https://go.dev/ref/spec#Floating-point_literals +func ParseFloat(s string, bitSize int) (float64, error) { + f, n, err := parseFloatPrefix(s, bitSize) + if n != len(s) && (err == nil || err.(*NumError).Err != ErrSyntax) { + return 0, syntaxError(fnParseFloat, s) + } + return f, err +} + +func parseFloatPrefix(s string, bitSize int) (float64, int, error) { + if bitSize == 32 { + f, n, err := atof32(s) + return float64(f), n, err + } + return atof64(s) +} |