summaryrefslogtreecommitdiffstats
path: root/src/strconv/atoi.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/strconv/atoi.go')
-rw-r--r--src/strconv/atoi.go332
1 files changed, 332 insertions, 0 deletions
diff --git a/src/strconv/atoi.go b/src/strconv/atoi.go
new file mode 100644
index 0000000..520d826
--- /dev/null
+++ b/src/strconv/atoi.go
@@ -0,0 +1,332 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strconv
+
+import "errors"
+
+// lower(c) is a lower-case letter if and only if
+// c is either that lower-case letter or the equivalent upper-case letter.
+// Instead of writing c == 'x' || c == 'X' one can write lower(c) == 'x'.
+// Note that lower of non-letters can produce other non-letters.
+func lower(c byte) byte {
+ return c | ('x' - 'X')
+}
+
+// ErrRange indicates that a value is out of range for the target type.
+var ErrRange = errors.New("value out of range")
+
+// ErrSyntax indicates that a value does not have the right syntax for the target type.
+var ErrSyntax = errors.New("invalid syntax")
+
+// A NumError records a failed conversion.
+type NumError struct {
+ Func string // the failing function (ParseBool, ParseInt, ParseUint, ParseFloat, ParseComplex)
+ Num string // the input
+ Err error // the reason the conversion failed (e.g. ErrRange, ErrSyntax, etc.)
+}
+
+func (e *NumError) Error() string {
+ return "strconv." + e.Func + ": " + "parsing " + Quote(e.Num) + ": " + e.Err.Error()
+}
+
+func (e *NumError) Unwrap() error { return e.Err }
+
+// cloneString returns a string copy of x.
+//
+// All ParseXXX functions allow the input string to escape to the error value.
+// This hurts strconv.ParseXXX(string(b)) calls where b is []byte since
+// the conversion from []byte must allocate a string on the heap.
+// If we assume errors are infrequent, then we can avoid escaping the input
+// back to the output by copying it first. This allows the compiler to call
+// strconv.ParseXXX without a heap allocation for most []byte to string
+// conversions, since it can now prove that the string cannot escape Parse.
+//
+// TODO: Use strings.Clone instead? However, we cannot depend on "strings"
+// since it incurs a transitive dependency on "unicode".
+// Either move strings.Clone to an internal/bytealg or make the
+// "strings" to "unicode" dependency lighter (see https://go.dev/issue/54098).
+func cloneString(x string) string { return string([]byte(x)) }
+
+func syntaxError(fn, str string) *NumError {
+ return &NumError{fn, cloneString(str), ErrSyntax}
+}
+
+func rangeError(fn, str string) *NumError {
+ return &NumError{fn, cloneString(str), ErrRange}
+}
+
+func baseError(fn, str string, base int) *NumError {
+ return &NumError{fn, cloneString(str), errors.New("invalid base " + Itoa(base))}
+}
+
+func bitSizeError(fn, str string, bitSize int) *NumError {
+ return &NumError{fn, cloneString(str), errors.New("invalid bit size " + Itoa(bitSize))}
+}
+
+const intSize = 32 << (^uint(0) >> 63)
+
+// IntSize is the size in bits of an int or uint value.
+const IntSize = intSize
+
+const maxUint64 = 1<<64 - 1
+
+// ParseUint is like ParseInt but for unsigned numbers.
+//
+// A sign prefix is not permitted.
+func ParseUint(s string, base int, bitSize int) (uint64, error) {
+ const fnParseUint = "ParseUint"
+
+ if s == "" {
+ return 0, syntaxError(fnParseUint, s)
+ }
+
+ base0 := base == 0
+
+ s0 := s
+ switch {
+ case 2 <= base && base <= 36:
+ // valid base; nothing to do
+
+ case base == 0:
+ // Look for octal, hex prefix.
+ base = 10
+ if s[0] == '0' {
+ switch {
+ case len(s) >= 3 && lower(s[1]) == 'b':
+ base = 2
+ s = s[2:]
+ case len(s) >= 3 && lower(s[1]) == 'o':
+ base = 8
+ s = s[2:]
+ case len(s) >= 3 && lower(s[1]) == 'x':
+ base = 16
+ s = s[2:]
+ default:
+ base = 8
+ s = s[1:]
+ }
+ }
+
+ default:
+ return 0, baseError(fnParseUint, s0, base)
+ }
+
+ if bitSize == 0 {
+ bitSize = IntSize
+ } else if bitSize < 0 || bitSize > 64 {
+ return 0, bitSizeError(fnParseUint, s0, bitSize)
+ }
+
+ // Cutoff is the smallest number such that cutoff*base > maxUint64.
+ // Use compile-time constants for common cases.
+ var cutoff uint64
+ switch base {
+ case 10:
+ cutoff = maxUint64/10 + 1
+ case 16:
+ cutoff = maxUint64/16 + 1
+ default:
+ cutoff = maxUint64/uint64(base) + 1
+ }
+
+ maxVal := uint64(1)<<uint(bitSize) - 1
+
+ underscores := false
+ var n uint64
+ for _, c := range []byte(s) {
+ var d byte
+ switch {
+ case c == '_' && base0:
+ underscores = true
+ continue
+ case '0' <= c && c <= '9':
+ d = c - '0'
+ case 'a' <= lower(c) && lower(c) <= 'z':
+ d = lower(c) - 'a' + 10
+ default:
+ return 0, syntaxError(fnParseUint, s0)
+ }
+
+ if d >= byte(base) {
+ return 0, syntaxError(fnParseUint, s0)
+ }
+
+ if n >= cutoff {
+ // n*base overflows
+ return maxVal, rangeError(fnParseUint, s0)
+ }
+ n *= uint64(base)
+
+ n1 := n + uint64(d)
+ if n1 < n || n1 > maxVal {
+ // n+d overflows
+ return maxVal, rangeError(fnParseUint, s0)
+ }
+ n = n1
+ }
+
+ if underscores && !underscoreOK(s0) {
+ return 0, syntaxError(fnParseUint, s0)
+ }
+
+ return n, nil
+}
+
+// ParseInt interprets a string s in the given base (0, 2 to 36) and
+// bit size (0 to 64) and returns the corresponding value i.
+//
+// The string may begin with a leading sign: "+" or "-".
+//
+// If the base argument is 0, the true base is implied by the string's
+// prefix following the sign (if present): 2 for "0b", 8 for "0" or "0o",
+// 16 for "0x", and 10 otherwise. Also, for argument base 0 only,
+// underscore characters are permitted as defined by the Go syntax for
+// [integer literals].
+//
+// The bitSize argument specifies the integer type
+// that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
+// correspond to int, int8, int16, int32, and int64.
+// If bitSize is below 0 or above 64, an error is returned.
+//
+// The errors that ParseInt returns have concrete type *NumError
+// and include err.Num = s. If s is empty or contains invalid
+// digits, err.Err = ErrSyntax and the returned value is 0;
+// if the value corresponding to s cannot be represented by a
+// signed integer of the given size, err.Err = ErrRange and the
+// returned value is the maximum magnitude integer of the
+// appropriate bitSize and sign.
+//
+// [integer literals]: https://go.dev/ref/spec#Integer_literals
+func ParseInt(s string, base int, bitSize int) (i int64, err error) {
+ const fnParseInt = "ParseInt"
+
+ if s == "" {
+ return 0, syntaxError(fnParseInt, s)
+ }
+
+ // Pick off leading sign.
+ s0 := s
+ neg := false
+ if s[0] == '+' {
+ s = s[1:]
+ } else if s[0] == '-' {
+ neg = true
+ s = s[1:]
+ }
+
+ // Convert unsigned and check range.
+ var un uint64
+ un, err = ParseUint(s, base, bitSize)
+ if err != nil && err.(*NumError).Err != ErrRange {
+ err.(*NumError).Func = fnParseInt
+ err.(*NumError).Num = cloneString(s0)
+ return 0, err
+ }
+
+ if bitSize == 0 {
+ bitSize = IntSize
+ }
+
+ cutoff := uint64(1 << uint(bitSize-1))
+ if !neg && un >= cutoff {
+ return int64(cutoff - 1), rangeError(fnParseInt, s0)
+ }
+ if neg && un > cutoff {
+ return -int64(cutoff), rangeError(fnParseInt, s0)
+ }
+ n := int64(un)
+ if neg {
+ n = -n
+ }
+ return n, nil
+}
+
+// Atoi is equivalent to ParseInt(s, 10, 0), converted to type int.
+func Atoi(s string) (int, error) {
+ const fnAtoi = "Atoi"
+
+ sLen := len(s)
+ if intSize == 32 && (0 < sLen && sLen < 10) ||
+ intSize == 64 && (0 < sLen && sLen < 19) {
+ // Fast path for small integers that fit int type.
+ s0 := s
+ if s[0] == '-' || s[0] == '+' {
+ s = s[1:]
+ if len(s) < 1 {
+ return 0, syntaxError(fnAtoi, s0)
+ }
+ }
+
+ n := 0
+ for _, ch := range []byte(s) {
+ ch -= '0'
+ if ch > 9 {
+ return 0, syntaxError(fnAtoi, s0)
+ }
+ n = n*10 + int(ch)
+ }
+ if s0[0] == '-' {
+ n = -n
+ }
+ return n, nil
+ }
+
+ // Slow path for invalid, big, or underscored integers.
+ i64, err := ParseInt(s, 10, 0)
+ if nerr, ok := err.(*NumError); ok {
+ nerr.Func = fnAtoi
+ }
+ return int(i64), err
+}
+
+// underscoreOK reports whether the underscores in s are allowed.
+// Checking them in this one function lets all the parsers skip over them simply.
+// Underscore must appear only between digits or between a base prefix and a digit.
+func underscoreOK(s string) bool {
+ // saw tracks the last character (class) we saw:
+ // ^ for beginning of number,
+ // 0 for a digit or base prefix,
+ // _ for an underscore,
+ // ! for none of the above.
+ saw := '^'
+ i := 0
+
+ // Optional sign.
+ if len(s) >= 1 && (s[0] == '-' || s[0] == '+') {
+ s = s[1:]
+ }
+
+ // Optional base prefix.
+ hex := false
+ if len(s) >= 2 && s[0] == '0' && (lower(s[1]) == 'b' || lower(s[1]) == 'o' || lower(s[1]) == 'x') {
+ i = 2
+ saw = '0' // base prefix counts as a digit for "underscore as digit separator"
+ hex = lower(s[1]) == 'x'
+ }
+
+ // Number proper.
+ for ; i < len(s); i++ {
+ // Digits are always okay.
+ if '0' <= s[i] && s[i] <= '9' || hex && 'a' <= lower(s[i]) && lower(s[i]) <= 'f' {
+ saw = '0'
+ continue
+ }
+ // Underscore must follow digit.
+ if s[i] == '_' {
+ if saw != '0' {
+ return false
+ }
+ saw = '_'
+ continue
+ }
+ // Underscore must also be followed by digit.
+ if saw == '_' {
+ return false
+ }
+ // Saw non-digit, non-underscore.
+ saw = '!'
+ }
+ return saw != '_'
+}