diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
commit | 43a123c1ae6613b3efeed291fa552ecd909d3acf (patch) | |
tree | fd92518b7024bc74031f78a1cf9e454b65e73665 /src/archive/tar/strconv.go | |
parent | Initial commit. (diff) | |
download | golang-1.20-43a123c1ae6613b3efeed291fa552ecd909d3acf.tar.xz golang-1.20-43a123c1ae6613b3efeed291fa552ecd909d3acf.zip |
Adding upstream version 1.20.14.upstream/1.20.14upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/archive/tar/strconv.go')
-rw-r--r-- | src/archive/tar/strconv.go | 327 |
1 files changed, 327 insertions, 0 deletions
diff --git a/src/archive/tar/strconv.go b/src/archive/tar/strconv.go new file mode 100644 index 0000000..ac31963 --- /dev/null +++ b/src/archive/tar/strconv.go @@ -0,0 +1,327 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "bytes" + "fmt" + "strconv" + "strings" + "time" +) + +// hasNUL reports whether the NUL character exists within s. +func hasNUL(s string) bool { + return strings.Contains(s, "\x00") +} + +// isASCII reports whether the input is an ASCII C-style string. +func isASCII(s string) bool { + for _, c := range s { + if c >= 0x80 || c == 0x00 { + return false + } + } + return true +} + +// toASCII converts the input to an ASCII C-style string. +// This is a best effort conversion, so invalid characters are dropped. +func toASCII(s string) string { + if isASCII(s) { + return s + } + b := make([]byte, 0, len(s)) + for _, c := range s { + if c < 0x80 && c != 0x00 { + b = append(b, byte(c)) + } + } + return string(b) +} + +type parser struct { + err error // Last error seen +} + +type formatter struct { + err error // Last error seen +} + +// parseString parses bytes as a NUL-terminated C-style string. +// If a NUL byte is not found then the whole slice is returned as a string. +func (*parser) parseString(b []byte) string { + if i := bytes.IndexByte(b, 0); i >= 0 { + return string(b[:i]) + } + return string(b) +} + +// formatString copies s into b, NUL-terminating if possible. +func (f *formatter) formatString(b []byte, s string) { + if len(s) > len(b) { + f.err = ErrFieldTooLong + } + copy(b, s) + if len(s) < len(b) { + b[len(s)] = 0 + } + + // Some buggy readers treat regular files with a trailing slash + // in the V7 path field as a directory even though the full path + // recorded elsewhere (e.g., via PAX record) contains no trailing slash. + if len(s) > len(b) && b[len(b)-1] == '/' { + n := len(strings.TrimRight(s[:len(b)], "/")) + b[n] = 0 // Replace trailing slash with NUL terminator + } +} + +// fitsInBase256 reports whether x can be encoded into n bytes using base-256 +// encoding. Unlike octal encoding, base-256 encoding does not require that the +// string ends with a NUL character. Thus, all n bytes are available for output. +// +// If operating in binary mode, this assumes strict GNU binary mode; which means +// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is +// equivalent to the sign bit in two's complement form. +func fitsInBase256(n int, x int64) bool { + binBits := uint(n-1) * 8 + return n >= 9 || (x >= -1<<binBits && x < 1<<binBits) +} + +// parseNumeric parses the input as being encoded in either base-256 or octal. +// This function may return negative numbers. +// If parsing fails or an integer overflow occurs, err will be set. +func (p *parser) parseNumeric(b []byte) int64 { + // Check for base-256 (binary) format first. + // If the first bit is set, then all following bits constitute a two's + // complement encoded number in big-endian byte order. + if len(b) > 0 && b[0]&0x80 != 0 { + // Handling negative numbers relies on the following identity: + // -a-1 == ^a + // + // If the number is negative, we use an inversion mask to invert the + // data bytes and treat the value as an unsigned number. + var inv byte // 0x00 if positive or zero, 0xff if negative + if b[0]&0x40 != 0 { + inv = 0xff + } + + var x uint64 + for i, c := range b { + c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing + if i == 0 { + c &= 0x7f // Ignore signal bit in first byte + } + if (x >> 56) > 0 { + p.err = ErrHeader // Integer overflow + return 0 + } + x = x<<8 | uint64(c) + } + if (x >> 63) > 0 { + p.err = ErrHeader // Integer overflow + return 0 + } + if inv == 0xff { + return ^int64(x) + } + return int64(x) + } + + // Normal case is base-8 (octal) format. + return p.parseOctal(b) +} + +// formatNumeric encodes x into b using base-8 (octal) encoding if possible. +// Otherwise it will attempt to use base-256 (binary) encoding. +func (f *formatter) formatNumeric(b []byte, x int64) { + if fitsInOctal(len(b), x) { + f.formatOctal(b, x) + return + } + + if fitsInBase256(len(b), x) { + for i := len(b) - 1; i >= 0; i-- { + b[i] = byte(x) + x >>= 8 + } + b[0] |= 0x80 // Highest bit indicates binary format + return + } + + f.formatOctal(b, 0) // Last resort, just write zero + f.err = ErrFieldTooLong +} + +func (p *parser) parseOctal(b []byte) int64 { + // Because unused fields are filled with NULs, we need + // to skip leading NULs. Fields may also be padded with + // spaces or NULs. + // So we remove leading and trailing NULs and spaces to + // be sure. + b = bytes.Trim(b, " \x00") + + if len(b) == 0 { + return 0 + } + x, perr := strconv.ParseUint(p.parseString(b), 8, 64) + if perr != nil { + p.err = ErrHeader + } + return int64(x) +} + +func (f *formatter) formatOctal(b []byte, x int64) { + if !fitsInOctal(len(b), x) { + x = 0 // Last resort, just write zero + f.err = ErrFieldTooLong + } + + s := strconv.FormatInt(x, 8) + // Add leading zeros, but leave room for a NUL. + if n := len(b) - len(s) - 1; n > 0 { + s = strings.Repeat("0", n) + s + } + f.formatString(b, s) +} + +// fitsInOctal reports whether the integer x fits in a field n-bytes long +// using octal encoding with the appropriate NUL terminator. +func fitsInOctal(n int, x int64) bool { + octBits := uint(n-1) * 3 + return x >= 0 && (n >= 22 || x < 1<<octBits) +} + +// parsePAXTime takes a string of the form %d.%d as described in the PAX +// specification. Note that this implementation allows for negative timestamps, +// which is allowed for by the PAX specification, but not always portable. +func parsePAXTime(s string) (time.Time, error) { + const maxNanoSecondDigits = 9 + + // Split string into seconds and sub-seconds parts. + ss, sn, _ := strings.Cut(s, ".") + + // Parse the seconds. + secs, err := strconv.ParseInt(ss, 10, 64) + if err != nil { + return time.Time{}, ErrHeader + } + if len(sn) == 0 { + return time.Unix(secs, 0), nil // No sub-second values + } + + // Parse the nanoseconds. + if strings.Trim(sn, "0123456789") != "" { + return time.Time{}, ErrHeader + } + if len(sn) < maxNanoSecondDigits { + sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad + } else { + sn = sn[:maxNanoSecondDigits] // Right truncate + } + nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed + if len(ss) > 0 && ss[0] == '-' { + return time.Unix(secs, -1*nsecs), nil // Negative correction + } + return time.Unix(secs, nsecs), nil +} + +// formatPAXTime converts ts into a time of the form %d.%d as described in the +// PAX specification. This function is capable of negative timestamps. +func formatPAXTime(ts time.Time) (s string) { + secs, nsecs := ts.Unix(), ts.Nanosecond() + if nsecs == 0 { + return strconv.FormatInt(secs, 10) + } + + // If seconds is negative, then perform correction. + sign := "" + if secs < 0 { + sign = "-" // Remember sign + secs = -(secs + 1) // Add a second to secs + nsecs = -(nsecs - 1e9) // Take that second away from nsecs + } + return strings.TrimRight(fmt.Sprintf("%s%d.%09d", sign, secs, nsecs), "0") +} + +// parsePAXRecord parses the input PAX record string into a key-value pair. +// If parsing is successful, it will slice off the currently read record and +// return the remainder as r. +func parsePAXRecord(s string) (k, v, r string, err error) { + // The size field ends at the first space. + nStr, rest, ok := strings.Cut(s, " ") + if !ok { + return "", "", s, ErrHeader + } + + // Parse the first token as a decimal integer. + n, perr := strconv.ParseInt(nStr, 10, 0) // Intentionally parse as native int + if perr != nil || n < 5 || n > int64(len(s)) { + return "", "", s, ErrHeader + } + n -= int64(len(nStr) + 1) // convert from index in s to index in rest + if n <= 0 { + return "", "", s, ErrHeader + } + + // Extract everything between the space and the final newline. + rec, nl, rem := rest[:n-1], rest[n-1:n], rest[n:] + if nl != "\n" { + return "", "", s, ErrHeader + } + + // The first equals separates the key from the value. + k, v, ok = strings.Cut(rec, "=") + if !ok { + return "", "", s, ErrHeader + } + + if !validPAXRecord(k, v) { + return "", "", s, ErrHeader + } + return k, v, rem, nil +} + +// formatPAXRecord formats a single PAX record, prefixing it with the +// appropriate length. +func formatPAXRecord(k, v string) (string, error) { + if !validPAXRecord(k, v) { + return "", ErrHeader + } + + const padding = 3 // Extra padding for ' ', '=', and '\n' + size := len(k) + len(v) + padding + size += len(strconv.Itoa(size)) + record := strconv.Itoa(size) + " " + k + "=" + v + "\n" + + // Final adjustment if adding size field increased the record size. + if len(record) != size { + size = len(record) + record = strconv.Itoa(size) + " " + k + "=" + v + "\n" + } + return record, nil +} + +// validPAXRecord reports whether the key-value pair is valid where each +// record is formatted as: +// +// "%d %s=%s\n" % (size, key, value) +// +// Keys and values should be UTF-8, but the number of bad writers out there +// forces us to be a more liberal. +// Thus, we only reject all keys with NUL, and only reject NULs in values +// for the PAX version of the USTAR string fields. +// The key must not contain an '=' character. +func validPAXRecord(k, v string) bool { + if k == "" || strings.Contains(k, "=") { + return false + } + switch k { + case paxPath, paxLinkpath, paxUname, paxGname: + return !hasNUL(v) + default: + return !hasNUL(k) + } +} |