diff options
Diffstat (limited to 'src/strconv/atoi.go')
-rw-r--r-- | src/strconv/atoi.go | 332 |
1 files changed, 332 insertions, 0 deletions
diff --git a/src/strconv/atoi.go b/src/strconv/atoi.go new file mode 100644 index 0000000..520d826 --- /dev/null +++ b/src/strconv/atoi.go @@ -0,0 +1,332 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strconv + +import "errors" + +// lower(c) is a lower-case letter if and only if +// c is either that lower-case letter or the equivalent upper-case letter. +// Instead of writing c == 'x' || c == 'X' one can write lower(c) == 'x'. +// Note that lower of non-letters can produce other non-letters. +func lower(c byte) byte { + return c | ('x' - 'X') +} + +// ErrRange indicates that a value is out of range for the target type. +var ErrRange = errors.New("value out of range") + +// ErrSyntax indicates that a value does not have the right syntax for the target type. +var ErrSyntax = errors.New("invalid syntax") + +// A NumError records a failed conversion. +type NumError struct { + Func string // the failing function (ParseBool, ParseInt, ParseUint, ParseFloat, ParseComplex) + Num string // the input + Err error // the reason the conversion failed (e.g. ErrRange, ErrSyntax, etc.) +} + +func (e *NumError) Error() string { + return "strconv." + e.Func + ": " + "parsing " + Quote(e.Num) + ": " + e.Err.Error() +} + +func (e *NumError) Unwrap() error { return e.Err } + +// cloneString returns a string copy of x. +// +// All ParseXXX functions allow the input string to escape to the error value. +// This hurts strconv.ParseXXX(string(b)) calls where b is []byte since +// the conversion from []byte must allocate a string on the heap. +// If we assume errors are infrequent, then we can avoid escaping the input +// back to the output by copying it first. This allows the compiler to call +// strconv.ParseXXX without a heap allocation for most []byte to string +// conversions, since it can now prove that the string cannot escape Parse. +// +// TODO: Use strings.Clone instead? However, we cannot depend on "strings" +// since it incurs a transitive dependency on "unicode". +// Either move strings.Clone to an internal/bytealg or make the +// "strings" to "unicode" dependency lighter (see https://go.dev/issue/54098). +func cloneString(x string) string { return string([]byte(x)) } + +func syntaxError(fn, str string) *NumError { + return &NumError{fn, cloneString(str), ErrSyntax} +} + +func rangeError(fn, str string) *NumError { + return &NumError{fn, cloneString(str), ErrRange} +} + +func baseError(fn, str string, base int) *NumError { + return &NumError{fn, cloneString(str), errors.New("invalid base " + Itoa(base))} +} + +func bitSizeError(fn, str string, bitSize int) *NumError { + return &NumError{fn, cloneString(str), errors.New("invalid bit size " + Itoa(bitSize))} +} + +const intSize = 32 << (^uint(0) >> 63) + +// IntSize is the size in bits of an int or uint value. +const IntSize = intSize + +const maxUint64 = 1<<64 - 1 + +// ParseUint is like ParseInt but for unsigned numbers. +// +// A sign prefix is not permitted. +func ParseUint(s string, base int, bitSize int) (uint64, error) { + const fnParseUint = "ParseUint" + + if s == "" { + return 0, syntaxError(fnParseUint, s) + } + + base0 := base == 0 + + s0 := s + switch { + case 2 <= base && base <= 36: + // valid base; nothing to do + + case base == 0: + // Look for octal, hex prefix. + base = 10 + if s[0] == '0' { + switch { + case len(s) >= 3 && lower(s[1]) == 'b': + base = 2 + s = s[2:] + case len(s) >= 3 && lower(s[1]) == 'o': + base = 8 + s = s[2:] + case len(s) >= 3 && lower(s[1]) == 'x': + base = 16 + s = s[2:] + default: + base = 8 + s = s[1:] + } + } + + default: + return 0, baseError(fnParseUint, s0, base) + } + + if bitSize == 0 { + bitSize = IntSize + } else if bitSize < 0 || bitSize > 64 { + return 0, bitSizeError(fnParseUint, s0, bitSize) + } + + // Cutoff is the smallest number such that cutoff*base > maxUint64. + // Use compile-time constants for common cases. + var cutoff uint64 + switch base { + case 10: + cutoff = maxUint64/10 + 1 + case 16: + cutoff = maxUint64/16 + 1 + default: + cutoff = maxUint64/uint64(base) + 1 + } + + maxVal := uint64(1)<<uint(bitSize) - 1 + + underscores := false + var n uint64 + for _, c := range []byte(s) { + var d byte + switch { + case c == '_' && base0: + underscores = true + continue + case '0' <= c && c <= '9': + d = c - '0' + case 'a' <= lower(c) && lower(c) <= 'z': + d = lower(c) - 'a' + 10 + default: + return 0, syntaxError(fnParseUint, s0) + } + + if d >= byte(base) { + return 0, syntaxError(fnParseUint, s0) + } + + if n >= cutoff { + // n*base overflows + return maxVal, rangeError(fnParseUint, s0) + } + n *= uint64(base) + + n1 := n + uint64(d) + if n1 < n || n1 > maxVal { + // n+d overflows + return maxVal, rangeError(fnParseUint, s0) + } + n = n1 + } + + if underscores && !underscoreOK(s0) { + return 0, syntaxError(fnParseUint, s0) + } + + return n, nil +} + +// ParseInt interprets a string s in the given base (0, 2 to 36) and +// bit size (0 to 64) and returns the corresponding value i. +// +// The string may begin with a leading sign: "+" or "-". +// +// If the base argument is 0, the true base is implied by the string's +// prefix following the sign (if present): 2 for "0b", 8 for "0" or "0o", +// 16 for "0x", and 10 otherwise. Also, for argument base 0 only, +// underscore characters are permitted as defined by the Go syntax for +// [integer literals]. +// +// The bitSize argument specifies the integer type +// that the result must fit into. Bit sizes 0, 8, 16, 32, and 64 +// correspond to int, int8, int16, int32, and int64. +// If bitSize is below 0 or above 64, an error is returned. +// +// The errors that ParseInt returns have concrete type *NumError +// and include err.Num = s. If s is empty or contains invalid +// digits, err.Err = ErrSyntax and the returned value is 0; +// if the value corresponding to s cannot be represented by a +// signed integer of the given size, err.Err = ErrRange and the +// returned value is the maximum magnitude integer of the +// appropriate bitSize and sign. +// +// [integer literals]: https://go.dev/ref/spec#Integer_literals +func ParseInt(s string, base int, bitSize int) (i int64, err error) { + const fnParseInt = "ParseInt" + + if s == "" { + return 0, syntaxError(fnParseInt, s) + } + + // Pick off leading sign. + s0 := s + neg := false + if s[0] == '+' { + s = s[1:] + } else if s[0] == '-' { + neg = true + s = s[1:] + } + + // Convert unsigned and check range. + var un uint64 + un, err = ParseUint(s, base, bitSize) + if err != nil && err.(*NumError).Err != ErrRange { + err.(*NumError).Func = fnParseInt + err.(*NumError).Num = cloneString(s0) + return 0, err + } + + if bitSize == 0 { + bitSize = IntSize + } + + cutoff := uint64(1 << uint(bitSize-1)) + if !neg && un >= cutoff { + return int64(cutoff - 1), rangeError(fnParseInt, s0) + } + if neg && un > cutoff { + return -int64(cutoff), rangeError(fnParseInt, s0) + } + n := int64(un) + if neg { + n = -n + } + return n, nil +} + +// Atoi is equivalent to ParseInt(s, 10, 0), converted to type int. +func Atoi(s string) (int, error) { + const fnAtoi = "Atoi" + + sLen := len(s) + if intSize == 32 && (0 < sLen && sLen < 10) || + intSize == 64 && (0 < sLen && sLen < 19) { + // Fast path for small integers that fit int type. + s0 := s + if s[0] == '-' || s[0] == '+' { + s = s[1:] + if len(s) < 1 { + return 0, syntaxError(fnAtoi, s0) + } + } + + n := 0 + for _, ch := range []byte(s) { + ch -= '0' + if ch > 9 { + return 0, syntaxError(fnAtoi, s0) + } + n = n*10 + int(ch) + } + if s0[0] == '-' { + n = -n + } + return n, nil + } + + // Slow path for invalid, big, or underscored integers. + i64, err := ParseInt(s, 10, 0) + if nerr, ok := err.(*NumError); ok { + nerr.Func = fnAtoi + } + return int(i64), err +} + +// underscoreOK reports whether the underscores in s are allowed. +// Checking them in this one function lets all the parsers skip over them simply. +// Underscore must appear only between digits or between a base prefix and a digit. +func underscoreOK(s string) bool { + // saw tracks the last character (class) we saw: + // ^ for beginning of number, + // 0 for a digit or base prefix, + // _ for an underscore, + // ! for none of the above. + saw := '^' + i := 0 + + // Optional sign. + if len(s) >= 1 && (s[0] == '-' || s[0] == '+') { + s = s[1:] + } + + // Optional base prefix. + hex := false + if len(s) >= 2 && s[0] == '0' && (lower(s[1]) == 'b' || lower(s[1]) == 'o' || lower(s[1]) == 'x') { + i = 2 + saw = '0' // base prefix counts as a digit for "underscore as digit separator" + hex = lower(s[1]) == 'x' + } + + // Number proper. + for ; i < len(s); i++ { + // Digits are always okay. + if '0' <= s[i] && s[i] <= '9' || hex && 'a' <= lower(s[i]) && lower(s[i]) <= 'f' { + saw = '0' + continue + } + // Underscore must follow digit. + if s[i] == '_' { + if saw != '0' { + return false + } + saw = '_' + continue + } + // Underscore must also be followed by digit. + if saw == '_' { + return false + } + // Saw non-digit, non-underscore. + saw = '!' + } + return saw != '_' +} |