diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 13:16:40 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 13:16:40 +0000 |
commit | 47ab3d4a42e9ab51c465c4322d2ec233f6324e6b (patch) | |
tree | a61a0ffd83f4a3def4b36e5c8e99630c559aa723 /src/archive | |
parent | Initial commit. (diff) | |
download | golang-1.18-upstream.tar.xz golang-1.18-upstream.zip |
Adding upstream version 1.18.10.upstream/1.18.10upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
96 files changed, 12458 insertions, 0 deletions
diff --git a/src/archive/tar/common.go b/src/archive/tar/common.go new file mode 100644 index 0000000..c99b5c1 --- /dev/null +++ b/src/archive/tar/common.go @@ -0,0 +1,723 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package tar implements access to tar archives. +// +// Tape archives (tar) are a file format for storing a sequence of files that +// can be read and written in a streaming manner. +// This package aims to cover most variations of the format, +// including those produced by GNU and BSD tar tools. +package tar + +import ( + "errors" + "fmt" + "io/fs" + "math" + "path" + "reflect" + "strconv" + "strings" + "time" +) + +// BUG: Use of the Uid and Gid fields in Header could overflow on 32-bit +// architectures. If a large value is encountered when decoding, the result +// stored in Header will be the truncated version. + +var ( + ErrHeader = errors.New("archive/tar: invalid tar header") + ErrWriteTooLong = errors.New("archive/tar: write too long") + ErrFieldTooLong = errors.New("archive/tar: header field too long") + ErrWriteAfterClose = errors.New("archive/tar: write after close") + errMissData = errors.New("archive/tar: sparse file references non-existent data") + errUnrefData = errors.New("archive/tar: sparse file contains unreferenced data") + errWriteHole = errors.New("archive/tar: write non-NUL byte in sparse hole") +) + +type headerError []string + +func (he headerError) Error() string { + const prefix = "archive/tar: cannot encode header" + var ss []string + for _, s := range he { + if s != "" { + ss = append(ss, s) + } + } + if len(ss) == 0 { + return prefix + } + return fmt.Sprintf("%s: %v", prefix, strings.Join(ss, "; and ")) +} + +// Type flags for Header.Typeflag. +const ( + // Type '0' indicates a regular file. + TypeReg = '0' + TypeRegA = '\x00' // Deprecated: Use TypeReg instead. + + // Type '1' to '6' are header-only flags and may not have a data body. + TypeLink = '1' // Hard link + TypeSymlink = '2' // Symbolic link + TypeChar = '3' // Character device node + TypeBlock = '4' // Block device node + TypeDir = '5' // Directory + TypeFifo = '6' // FIFO node + + // Type '7' is reserved. + TypeCont = '7' + + // Type 'x' is used by the PAX format to store key-value records that + // are only relevant to the next file. + // This package transparently handles these types. + TypeXHeader = 'x' + + // Type 'g' is used by the PAX format to store key-value records that + // are relevant to all subsequent files. + // This package only supports parsing and composing such headers, + // but does not currently support persisting the global state across files. + TypeXGlobalHeader = 'g' + + // Type 'S' indicates a sparse file in the GNU format. + TypeGNUSparse = 'S' + + // Types 'L' and 'K' are used by the GNU format for a meta file + // used to store the path or link name for the next file. + // This package transparently handles these types. + TypeGNULongName = 'L' + TypeGNULongLink = 'K' +) + +// Keywords for PAX extended header records. +const ( + paxNone = "" // Indicates that no PAX key is suitable + paxPath = "path" + paxLinkpath = "linkpath" + paxSize = "size" + paxUid = "uid" + paxGid = "gid" + paxUname = "uname" + paxGname = "gname" + paxMtime = "mtime" + paxAtime = "atime" + paxCtime = "ctime" // Removed from later revision of PAX spec, but was valid + paxCharset = "charset" // Currently unused + paxComment = "comment" // Currently unused + + paxSchilyXattr = "SCHILY.xattr." + + // Keywords for GNU sparse files in a PAX extended header. + paxGNUSparse = "GNU.sparse." + paxGNUSparseNumBlocks = "GNU.sparse.numblocks" + paxGNUSparseOffset = "GNU.sparse.offset" + paxGNUSparseNumBytes = "GNU.sparse.numbytes" + paxGNUSparseMap = "GNU.sparse.map" + paxGNUSparseName = "GNU.sparse.name" + paxGNUSparseMajor = "GNU.sparse.major" + paxGNUSparseMinor = "GNU.sparse.minor" + paxGNUSparseSize = "GNU.sparse.size" + paxGNUSparseRealSize = "GNU.sparse.realsize" +) + +// basicKeys is a set of the PAX keys for which we have built-in support. +// This does not contain "charset" or "comment", which are both PAX-specific, +// so adding them as first-class features of Header is unlikely. +// Users can use the PAXRecords field to set it themselves. +var basicKeys = map[string]bool{ + paxPath: true, paxLinkpath: true, paxSize: true, paxUid: true, paxGid: true, + paxUname: true, paxGname: true, paxMtime: true, paxAtime: true, paxCtime: true, +} + +// A Header represents a single header in a tar archive. +// Some fields may not be populated. +// +// For forward compatibility, users that retrieve a Header from Reader.Next, +// mutate it in some ways, and then pass it back to Writer.WriteHeader +// should do so by creating a new Header and copying the fields +// that they are interested in preserving. +type Header struct { + // Typeflag is the type of header entry. + // The zero value is automatically promoted to either TypeReg or TypeDir + // depending on the presence of a trailing slash in Name. + Typeflag byte + + Name string // Name of file entry + Linkname string // Target name of link (valid for TypeLink or TypeSymlink) + + Size int64 // Logical file size in bytes + Mode int64 // Permission and mode bits + Uid int // User ID of owner + Gid int // Group ID of owner + Uname string // User name of owner + Gname string // Group name of owner + + // If the Format is unspecified, then Writer.WriteHeader rounds ModTime + // to the nearest second and ignores the AccessTime and ChangeTime fields. + // + // To use AccessTime or ChangeTime, specify the Format as PAX or GNU. + // To use sub-second resolution, specify the Format as PAX. + ModTime time.Time // Modification time + AccessTime time.Time // Access time (requires either PAX or GNU support) + ChangeTime time.Time // Change time (requires either PAX or GNU support) + + Devmajor int64 // Major device number (valid for TypeChar or TypeBlock) + Devminor int64 // Minor device number (valid for TypeChar or TypeBlock) + + // Xattrs stores extended attributes as PAX records under the + // "SCHILY.xattr." namespace. + // + // The following are semantically equivalent: + // h.Xattrs[key] = value + // h.PAXRecords["SCHILY.xattr."+key] = value + // + // When Writer.WriteHeader is called, the contents of Xattrs will take + // precedence over those in PAXRecords. + // + // Deprecated: Use PAXRecords instead. + Xattrs map[string]string + + // PAXRecords is a map of PAX extended header records. + // + // User-defined records should have keys of the following form: + // VENDOR.keyword + // Where VENDOR is some namespace in all uppercase, and keyword may + // not contain the '=' character (e.g., "GOLANG.pkg.version"). + // The key and value should be non-empty UTF-8 strings. + // + // When Writer.WriteHeader is called, PAX records derived from the + // other fields in Header take precedence over PAXRecords. + PAXRecords map[string]string + + // Format specifies the format of the tar header. + // + // This is set by Reader.Next as a best-effort guess at the format. + // Since the Reader liberally reads some non-compliant files, + // it is possible for this to be FormatUnknown. + // + // If the format is unspecified when Writer.WriteHeader is called, + // then it uses the first format (in the order of USTAR, PAX, GNU) + // capable of encoding this Header (see Format). + Format Format +} + +// sparseEntry represents a Length-sized fragment at Offset in the file. +type sparseEntry struct{ Offset, Length int64 } + +func (s sparseEntry) endOffset() int64 { return s.Offset + s.Length } + +// A sparse file can be represented as either a sparseDatas or a sparseHoles. +// As long as the total size is known, they are equivalent and one can be +// converted to the other form and back. The various tar formats with sparse +// file support represent sparse files in the sparseDatas form. That is, they +// specify the fragments in the file that has data, and treat everything else as +// having zero bytes. As such, the encoding and decoding logic in this package +// deals with sparseDatas. +// +// However, the external API uses sparseHoles instead of sparseDatas because the +// zero value of sparseHoles logically represents a normal file (i.e., there are +// no holes in it). On the other hand, the zero value of sparseDatas implies +// that the file has no data in it, which is rather odd. +// +// As an example, if the underlying raw file contains the 10-byte data: +// var compactFile = "abcdefgh" +// +// And the sparse map has the following entries: +// var spd sparseDatas = []sparseEntry{ +// {Offset: 2, Length: 5}, // Data fragment for 2..6 +// {Offset: 18, Length: 3}, // Data fragment for 18..20 +// } +// var sph sparseHoles = []sparseEntry{ +// {Offset: 0, Length: 2}, // Hole fragment for 0..1 +// {Offset: 7, Length: 11}, // Hole fragment for 7..17 +// {Offset: 21, Length: 4}, // Hole fragment for 21..24 +// } +// +// Then the content of the resulting sparse file with a Header.Size of 25 is: +// var sparseFile = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 +type ( + sparseDatas []sparseEntry + sparseHoles []sparseEntry +) + +// validateSparseEntries reports whether sp is a valid sparse map. +// It does not matter whether sp represents data fragments or hole fragments. +func validateSparseEntries(sp []sparseEntry, size int64) bool { + // Validate all sparse entries. These are the same checks as performed by + // the BSD tar utility. + if size < 0 { + return false + } + var pre sparseEntry + for _, cur := range sp { + switch { + case cur.Offset < 0 || cur.Length < 0: + return false // Negative values are never okay + case cur.Offset > math.MaxInt64-cur.Length: + return false // Integer overflow with large length + case cur.endOffset() > size: + return false // Region extends beyond the actual size + case pre.endOffset() > cur.Offset: + return false // Regions cannot overlap and must be in order + } + pre = cur + } + return true +} + +// alignSparseEntries mutates src and returns dst where each fragment's +// starting offset is aligned up to the nearest block edge, and each +// ending offset is aligned down to the nearest block edge. +// +// Even though the Go tar Reader and the BSD tar utility can handle entries +// with arbitrary offsets and lengths, the GNU tar utility can only handle +// offsets and lengths that are multiples of blockSize. +func alignSparseEntries(src []sparseEntry, size int64) []sparseEntry { + dst := src[:0] + for _, s := range src { + pos, end := s.Offset, s.endOffset() + pos += blockPadding(+pos) // Round-up to nearest blockSize + if end != size { + end -= blockPadding(-end) // Round-down to nearest blockSize + } + if pos < end { + dst = append(dst, sparseEntry{Offset: pos, Length: end - pos}) + } + } + return dst +} + +// invertSparseEntries converts a sparse map from one form to the other. +// If the input is sparseHoles, then it will output sparseDatas and vice-versa. +// The input must have been already validated. +// +// This function mutates src and returns a normalized map where: +// * adjacent fragments are coalesced together +// * only the last fragment may be empty +// * the endOffset of the last fragment is the total size +func invertSparseEntries(src []sparseEntry, size int64) []sparseEntry { + dst := src[:0] + var pre sparseEntry + for _, cur := range src { + if cur.Length == 0 { + continue // Skip empty fragments + } + pre.Length = cur.Offset - pre.Offset + if pre.Length > 0 { + dst = append(dst, pre) // Only add non-empty fragments + } + pre.Offset = cur.endOffset() + } + pre.Length = size - pre.Offset // Possibly the only empty fragment + return append(dst, pre) +} + +// fileState tracks the number of logical (includes sparse holes) and physical +// (actual in tar archive) bytes remaining for the current file. +// +// Invariant: logicalRemaining >= physicalRemaining +type fileState interface { + logicalRemaining() int64 + physicalRemaining() int64 +} + +// allowedFormats determines which formats can be used. +// The value returned is the logical OR of multiple possible formats. +// If the value is FormatUnknown, then the input Header cannot be encoded +// and an error is returned explaining why. +// +// As a by-product of checking the fields, this function returns paxHdrs, which +// contain all fields that could not be directly encoded. +// A value receiver ensures that this method does not mutate the source Header. +func (h Header) allowedFormats() (format Format, paxHdrs map[string]string, err error) { + format = FormatUSTAR | FormatPAX | FormatGNU + paxHdrs = make(map[string]string) + + var whyNoUSTAR, whyNoPAX, whyNoGNU string + var preferPAX bool // Prefer PAX over USTAR + verifyString := func(s string, size int, name, paxKey string) { + // NUL-terminator is optional for path and linkpath. + // Technically, it is required for uname and gname, + // but neither GNU nor BSD tar checks for it. + tooLong := len(s) > size + allowLongGNU := paxKey == paxPath || paxKey == paxLinkpath + if hasNUL(s) || (tooLong && !allowLongGNU) { + whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%q", name, s) + format.mustNotBe(FormatGNU) + } + if !isASCII(s) || tooLong { + canSplitUSTAR := paxKey == paxPath + if _, _, ok := splitUSTARPath(s); !canSplitUSTAR || !ok { + whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%q", name, s) + format.mustNotBe(FormatUSTAR) + } + if paxKey == paxNone { + whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%q", name, s) + format.mustNotBe(FormatPAX) + } else { + paxHdrs[paxKey] = s + } + } + if v, ok := h.PAXRecords[paxKey]; ok && v == s { + paxHdrs[paxKey] = v + } + } + verifyNumeric := func(n int64, size int, name, paxKey string) { + if !fitsInBase256(size, n) { + whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%d", name, n) + format.mustNotBe(FormatGNU) + } + if !fitsInOctal(size, n) { + whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%d", name, n) + format.mustNotBe(FormatUSTAR) + if paxKey == paxNone { + whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%d", name, n) + format.mustNotBe(FormatPAX) + } else { + paxHdrs[paxKey] = strconv.FormatInt(n, 10) + } + } + if v, ok := h.PAXRecords[paxKey]; ok && v == strconv.FormatInt(n, 10) { + paxHdrs[paxKey] = v + } + } + verifyTime := func(ts time.Time, size int, name, paxKey string) { + if ts.IsZero() { + return // Always okay + } + if !fitsInBase256(size, ts.Unix()) { + whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%v", name, ts) + format.mustNotBe(FormatGNU) + } + isMtime := paxKey == paxMtime + fitsOctal := fitsInOctal(size, ts.Unix()) + if (isMtime && !fitsOctal) || !isMtime { + whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%v", name, ts) + format.mustNotBe(FormatUSTAR) + } + needsNano := ts.Nanosecond() != 0 + if !isMtime || !fitsOctal || needsNano { + preferPAX = true // USTAR may truncate sub-second measurements + if paxKey == paxNone { + whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%v", name, ts) + format.mustNotBe(FormatPAX) + } else { + paxHdrs[paxKey] = formatPAXTime(ts) + } + } + if v, ok := h.PAXRecords[paxKey]; ok && v == formatPAXTime(ts) { + paxHdrs[paxKey] = v + } + } + + // Check basic fields. + var blk block + v7 := blk.toV7() + ustar := blk.toUSTAR() + gnu := blk.toGNU() + verifyString(h.Name, len(v7.name()), "Name", paxPath) + verifyString(h.Linkname, len(v7.linkName()), "Linkname", paxLinkpath) + verifyString(h.Uname, len(ustar.userName()), "Uname", paxUname) + verifyString(h.Gname, len(ustar.groupName()), "Gname", paxGname) + verifyNumeric(h.Mode, len(v7.mode()), "Mode", paxNone) + verifyNumeric(int64(h.Uid), len(v7.uid()), "Uid", paxUid) + verifyNumeric(int64(h.Gid), len(v7.gid()), "Gid", paxGid) + verifyNumeric(h.Size, len(v7.size()), "Size", paxSize) + verifyNumeric(h.Devmajor, len(ustar.devMajor()), "Devmajor", paxNone) + verifyNumeric(h.Devminor, len(ustar.devMinor()), "Devminor", paxNone) + verifyTime(h.ModTime, len(v7.modTime()), "ModTime", paxMtime) + verifyTime(h.AccessTime, len(gnu.accessTime()), "AccessTime", paxAtime) + verifyTime(h.ChangeTime, len(gnu.changeTime()), "ChangeTime", paxCtime) + + // Check for header-only types. + var whyOnlyPAX, whyOnlyGNU string + switch h.Typeflag { + case TypeReg, TypeChar, TypeBlock, TypeFifo, TypeGNUSparse: + // Exclude TypeLink and TypeSymlink, since they may reference directories. + if strings.HasSuffix(h.Name, "/") { + return FormatUnknown, nil, headerError{"filename may not have trailing slash"} + } + case TypeXHeader, TypeGNULongName, TypeGNULongLink: + return FormatUnknown, nil, headerError{"cannot manually encode TypeXHeader, TypeGNULongName, or TypeGNULongLink headers"} + case TypeXGlobalHeader: + h2 := Header{Name: h.Name, Typeflag: h.Typeflag, Xattrs: h.Xattrs, PAXRecords: h.PAXRecords, Format: h.Format} + if !reflect.DeepEqual(h, h2) { + return FormatUnknown, nil, headerError{"only PAXRecords should be set for TypeXGlobalHeader"} + } + whyOnlyPAX = "only PAX supports TypeXGlobalHeader" + format.mayOnlyBe(FormatPAX) + } + if !isHeaderOnlyType(h.Typeflag) && h.Size < 0 { + return FormatUnknown, nil, headerError{"negative size on header-only type"} + } + + // Check PAX records. + if len(h.Xattrs) > 0 { + for k, v := range h.Xattrs { + paxHdrs[paxSchilyXattr+k] = v + } + whyOnlyPAX = "only PAX supports Xattrs" + format.mayOnlyBe(FormatPAX) + } + if len(h.PAXRecords) > 0 { + for k, v := range h.PAXRecords { + switch _, exists := paxHdrs[k]; { + case exists: + continue // Do not overwrite existing records + case h.Typeflag == TypeXGlobalHeader: + paxHdrs[k] = v // Copy all records + case !basicKeys[k] && !strings.HasPrefix(k, paxGNUSparse): + paxHdrs[k] = v // Ignore local records that may conflict + } + } + whyOnlyPAX = "only PAX supports PAXRecords" + format.mayOnlyBe(FormatPAX) + } + for k, v := range paxHdrs { + if !validPAXRecord(k, v) { + return FormatUnknown, nil, headerError{fmt.Sprintf("invalid PAX record: %q", k+" = "+v)} + } + } + + // TODO(dsnet): Re-enable this when adding sparse support. + // See https://golang.org/issue/22735 + /* + // Check sparse files. + if len(h.SparseHoles) > 0 || h.Typeflag == TypeGNUSparse { + if isHeaderOnlyType(h.Typeflag) { + return FormatUnknown, nil, headerError{"header-only type cannot be sparse"} + } + if !validateSparseEntries(h.SparseHoles, h.Size) { + return FormatUnknown, nil, headerError{"invalid sparse holes"} + } + if h.Typeflag == TypeGNUSparse { + whyOnlyGNU = "only GNU supports TypeGNUSparse" + format.mayOnlyBe(FormatGNU) + } else { + whyNoGNU = "GNU supports sparse files only with TypeGNUSparse" + format.mustNotBe(FormatGNU) + } + whyNoUSTAR = "USTAR does not support sparse files" + format.mustNotBe(FormatUSTAR) + } + */ + + // Check desired format. + if wantFormat := h.Format; wantFormat != FormatUnknown { + if wantFormat.has(FormatPAX) && !preferPAX { + wantFormat.mayBe(FormatUSTAR) // PAX implies USTAR allowed too + } + format.mayOnlyBe(wantFormat) // Set union of formats allowed and format wanted + } + if format == FormatUnknown { + switch h.Format { + case FormatUSTAR: + err = headerError{"Format specifies USTAR", whyNoUSTAR, whyOnlyPAX, whyOnlyGNU} + case FormatPAX: + err = headerError{"Format specifies PAX", whyNoPAX, whyOnlyGNU} + case FormatGNU: + err = headerError{"Format specifies GNU", whyNoGNU, whyOnlyPAX} + default: + err = headerError{whyNoUSTAR, whyNoPAX, whyNoGNU, whyOnlyPAX, whyOnlyGNU} + } + } + return format, paxHdrs, err +} + +// FileInfo returns an fs.FileInfo for the Header. +func (h *Header) FileInfo() fs.FileInfo { + return headerFileInfo{h} +} + +// headerFileInfo implements fs.FileInfo. +type headerFileInfo struct { + h *Header +} + +func (fi headerFileInfo) Size() int64 { return fi.h.Size } +func (fi headerFileInfo) IsDir() bool { return fi.Mode().IsDir() } +func (fi headerFileInfo) ModTime() time.Time { return fi.h.ModTime } +func (fi headerFileInfo) Sys() any { return fi.h } + +// Name returns the base name of the file. +func (fi headerFileInfo) Name() string { + if fi.IsDir() { + return path.Base(path.Clean(fi.h.Name)) + } + return path.Base(fi.h.Name) +} + +// Mode returns the permission and mode bits for the headerFileInfo. +func (fi headerFileInfo) Mode() (mode fs.FileMode) { + // Set file permission bits. + mode = fs.FileMode(fi.h.Mode).Perm() + + // Set setuid, setgid and sticky bits. + if fi.h.Mode&c_ISUID != 0 { + mode |= fs.ModeSetuid + } + if fi.h.Mode&c_ISGID != 0 { + mode |= fs.ModeSetgid + } + if fi.h.Mode&c_ISVTX != 0 { + mode |= fs.ModeSticky + } + + // Set file mode bits; clear perm, setuid, setgid, and sticky bits. + switch m := fs.FileMode(fi.h.Mode) &^ 07777; m { + case c_ISDIR: + mode |= fs.ModeDir + case c_ISFIFO: + mode |= fs.ModeNamedPipe + case c_ISLNK: + mode |= fs.ModeSymlink + case c_ISBLK: + mode |= fs.ModeDevice + case c_ISCHR: + mode |= fs.ModeDevice + mode |= fs.ModeCharDevice + case c_ISSOCK: + mode |= fs.ModeSocket + } + + switch fi.h.Typeflag { + case TypeSymlink: + mode |= fs.ModeSymlink + case TypeChar: + mode |= fs.ModeDevice + mode |= fs.ModeCharDevice + case TypeBlock: + mode |= fs.ModeDevice + case TypeDir: + mode |= fs.ModeDir + case TypeFifo: + mode |= fs.ModeNamedPipe + } + + return mode +} + +// sysStat, if non-nil, populates h from system-dependent fields of fi. +var sysStat func(fi fs.FileInfo, h *Header) error + +const ( + // Mode constants from the USTAR spec: + // See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06 + c_ISUID = 04000 // Set uid + c_ISGID = 02000 // Set gid + c_ISVTX = 01000 // Save text (sticky bit) + + // Common Unix mode constants; these are not defined in any common tar standard. + // Header.FileInfo understands these, but FileInfoHeader will never produce these. + c_ISDIR = 040000 // Directory + c_ISFIFO = 010000 // FIFO + c_ISREG = 0100000 // Regular file + c_ISLNK = 0120000 // Symbolic link + c_ISBLK = 060000 // Block special file + c_ISCHR = 020000 // Character special file + c_ISSOCK = 0140000 // Socket +) + +// FileInfoHeader creates a partially-populated Header from fi. +// If fi describes a symlink, FileInfoHeader records link as the link target. +// If fi describes a directory, a slash is appended to the name. +// +// Since fs.FileInfo's Name method only returns the base name of +// the file it describes, it may be necessary to modify Header.Name +// to provide the full path name of the file. +func FileInfoHeader(fi fs.FileInfo, link string) (*Header, error) { + if fi == nil { + return nil, errors.New("archive/tar: FileInfo is nil") + } + fm := fi.Mode() + h := &Header{ + Name: fi.Name(), + ModTime: fi.ModTime(), + Mode: int64(fm.Perm()), // or'd with c_IS* constants later + } + switch { + case fm.IsRegular(): + h.Typeflag = TypeReg + h.Size = fi.Size() + case fi.IsDir(): + h.Typeflag = TypeDir + h.Name += "/" + case fm&fs.ModeSymlink != 0: + h.Typeflag = TypeSymlink + h.Linkname = link + case fm&fs.ModeDevice != 0: + if fm&fs.ModeCharDevice != 0 { + h.Typeflag = TypeChar + } else { + h.Typeflag = TypeBlock + } + case fm&fs.ModeNamedPipe != 0: + h.Typeflag = TypeFifo + case fm&fs.ModeSocket != 0: + return nil, fmt.Errorf("archive/tar: sockets not supported") + default: + return nil, fmt.Errorf("archive/tar: unknown file mode %v", fm) + } + if fm&fs.ModeSetuid != 0 { + h.Mode |= c_ISUID + } + if fm&fs.ModeSetgid != 0 { + h.Mode |= c_ISGID + } + if fm&fs.ModeSticky != 0 { + h.Mode |= c_ISVTX + } + // If possible, populate additional fields from OS-specific + // FileInfo fields. + if sys, ok := fi.Sys().(*Header); ok { + // This FileInfo came from a Header (not the OS). Use the + // original Header to populate all remaining fields. + h.Uid = sys.Uid + h.Gid = sys.Gid + h.Uname = sys.Uname + h.Gname = sys.Gname + h.AccessTime = sys.AccessTime + h.ChangeTime = sys.ChangeTime + if sys.Xattrs != nil { + h.Xattrs = make(map[string]string) + for k, v := range sys.Xattrs { + h.Xattrs[k] = v + } + } + if sys.Typeflag == TypeLink { + // hard link + h.Typeflag = TypeLink + h.Size = 0 + h.Linkname = sys.Linkname + } + if sys.PAXRecords != nil { + h.PAXRecords = make(map[string]string) + for k, v := range sys.PAXRecords { + h.PAXRecords[k] = v + } + } + } + if sysStat != nil { + return h, sysStat(fi, h) + } + return h, nil +} + +// isHeaderOnlyType checks if the given type flag is of the type that has no +// data section even if a size is specified. +func isHeaderOnlyType(flag byte) bool { + switch flag { + case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo: + return true + default: + return false + } +} + +func min(a, b int64) int64 { + if a < b { + return a + } + return b +} diff --git a/src/archive/tar/example_test.go b/src/archive/tar/example_test.go new file mode 100644 index 0000000..a2474b9 --- /dev/null +++ b/src/archive/tar/example_test.go @@ -0,0 +1,71 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar_test + +import ( + "archive/tar" + "bytes" + "fmt" + "io" + "log" + "os" +) + +func Example_minimal() { + // Create and add some files to the archive. + var buf bytes.Buffer + tw := tar.NewWriter(&buf) + var files = []struct { + Name, Body string + }{ + {"readme.txt", "This archive contains some text files."}, + {"gopher.txt", "Gopher names:\nGeorge\nGeoffrey\nGonzo"}, + {"todo.txt", "Get animal handling license."}, + } + for _, file := range files { + hdr := &tar.Header{ + Name: file.Name, + Mode: 0600, + Size: int64(len(file.Body)), + } + if err := tw.WriteHeader(hdr); err != nil { + log.Fatal(err) + } + if _, err := tw.Write([]byte(file.Body)); err != nil { + log.Fatal(err) + } + } + if err := tw.Close(); err != nil { + log.Fatal(err) + } + + // Open and iterate through the files in the archive. + tr := tar.NewReader(&buf) + for { + hdr, err := tr.Next() + if err == io.EOF { + break // End of archive + } + if err != nil { + log.Fatal(err) + } + fmt.Printf("Contents of %s:\n", hdr.Name) + if _, err := io.Copy(os.Stdout, tr); err != nil { + log.Fatal(err) + } + fmt.Println() + } + + // Output: + // Contents of readme.txt: + // This archive contains some text files. + // Contents of gopher.txt: + // Gopher names: + // George + // Geoffrey + // Gonzo + // Contents of todo.txt: + // Get animal handling license. +} diff --git a/src/archive/tar/format.go b/src/archive/tar/format.go new file mode 100644 index 0000000..8898c43 --- /dev/null +++ b/src/archive/tar/format.go @@ -0,0 +1,307 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import "strings" + +// Format represents the tar archive format. +// +// The original tar format was introduced in Unix V7. +// Since then, there have been multiple competing formats attempting to +// standardize or extend the V7 format to overcome its limitations. +// The most common formats are the USTAR, PAX, and GNU formats, +// each with their own advantages and limitations. +// +// The following table captures the capabilities of each format: +// +// | USTAR | PAX | GNU +// ------------------+--------+-----------+---------- +// Name | 256B | unlimited | unlimited +// Linkname | 100B | unlimited | unlimited +// Size | uint33 | unlimited | uint89 +// Mode | uint21 | uint21 | uint57 +// Uid/Gid | uint21 | unlimited | uint57 +// Uname/Gname | 32B | unlimited | 32B +// ModTime | uint33 | unlimited | int89 +// AccessTime | n/a | unlimited | int89 +// ChangeTime | n/a | unlimited | int89 +// Devmajor/Devminor | uint21 | uint21 | uint57 +// ------------------+--------+-----------+---------- +// string encoding | ASCII | UTF-8 | binary +// sub-second times | no | yes | no +// sparse files | no | yes | yes +// +// The table's upper portion shows the Header fields, where each format reports +// the maximum number of bytes allowed for each string field and +// the integer type used to store each numeric field +// (where timestamps are stored as the number of seconds since the Unix epoch). +// +// The table's lower portion shows specialized features of each format, +// such as supported string encodings, support for sub-second timestamps, +// or support for sparse files. +// +// The Writer currently provides no support for sparse files. +type Format int + +// Constants to identify various tar formats. +const ( + // Deliberately hide the meaning of constants from public API. + _ Format = (1 << iota) / 4 // Sequence of 0, 0, 1, 2, 4, 8, etc... + + // FormatUnknown indicates that the format is unknown. + FormatUnknown + + // The format of the original Unix V7 tar tool prior to standardization. + formatV7 + + // FormatUSTAR represents the USTAR header format defined in POSIX.1-1988. + // + // While this format is compatible with most tar readers, + // the format has several limitations making it unsuitable for some usages. + // Most notably, it cannot support sparse files, files larger than 8GiB, + // filenames larger than 256 characters, and non-ASCII filenames. + // + // Reference: + // http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06 + FormatUSTAR + + // FormatPAX represents the PAX header format defined in POSIX.1-2001. + // + // PAX extends USTAR by writing a special file with Typeflag TypeXHeader + // preceding the original header. This file contains a set of key-value + // records, which are used to overcome USTAR's shortcomings, in addition to + // providing the ability to have sub-second resolution for timestamps. + // + // Some newer formats add their own extensions to PAX by defining their + // own keys and assigning certain semantic meaning to the associated values. + // For example, sparse file support in PAX is implemented using keys + // defined by the GNU manual (e.g., "GNU.sparse.map"). + // + // Reference: + // http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html + FormatPAX + + // FormatGNU represents the GNU header format. + // + // The GNU header format is older than the USTAR and PAX standards and + // is not compatible with them. The GNU format supports + // arbitrary file sizes, filenames of arbitrary encoding and length, + // sparse files, and other features. + // + // It is recommended that PAX be chosen over GNU unless the target + // application can only parse GNU formatted archives. + // + // Reference: + // https://www.gnu.org/software/tar/manual/html_node/Standard.html + FormatGNU + + // Schily's tar format, which is incompatible with USTAR. + // This does not cover STAR extensions to the PAX format; these fall under + // the PAX format. + formatSTAR + + formatMax +) + +func (f Format) has(f2 Format) bool { return f&f2 != 0 } +func (f *Format) mayBe(f2 Format) { *f |= f2 } +func (f *Format) mayOnlyBe(f2 Format) { *f &= f2 } +func (f *Format) mustNotBe(f2 Format) { *f &^= f2 } + +var formatNames = map[Format]string{ + formatV7: "V7", FormatUSTAR: "USTAR", FormatPAX: "PAX", FormatGNU: "GNU", formatSTAR: "STAR", +} + +func (f Format) String() string { + var ss []string + for f2 := Format(1); f2 < formatMax; f2 <<= 1 { + if f.has(f2) { + ss = append(ss, formatNames[f2]) + } + } + switch len(ss) { + case 0: + return "<unknown>" + case 1: + return ss[0] + default: + return "(" + strings.Join(ss, " | ") + ")" + } +} + +// Magics used to identify various formats. +const ( + magicGNU, versionGNU = "ustar ", " \x00" + magicUSTAR, versionUSTAR = "ustar\x00", "00" + trailerSTAR = "tar\x00" +) + +// Size constants from various tar specifications. +const ( + blockSize = 512 // Size of each block in a tar stream + nameSize = 100 // Max length of the name field in USTAR format + prefixSize = 155 // Max length of the prefix field in USTAR format + + // Max length of a special file (PAX header, GNU long name or link). + // This matches the limit used by libarchive. + maxSpecialFileSize = 1 << 20 +) + +// blockPadding computes the number of bytes needed to pad offset up to the +// nearest block edge where 0 <= n < blockSize. +func blockPadding(offset int64) (n int64) { + return -offset & (blockSize - 1) +} + +var zeroBlock block + +type block [blockSize]byte + +// Convert block to any number of formats. +func (b *block) toV7() *headerV7 { return (*headerV7)(b) } +func (b *block) toGNU() *headerGNU { return (*headerGNU)(b) } +func (b *block) toSTAR() *headerSTAR { return (*headerSTAR)(b) } +func (b *block) toUSTAR() *headerUSTAR { return (*headerUSTAR)(b) } +func (b *block) toSparse() sparseArray { return sparseArray(b[:]) } + +// GetFormat checks that the block is a valid tar header based on the checksum. +// It then attempts to guess the specific format based on magic values. +// If the checksum fails, then FormatUnknown is returned. +func (b *block) getFormat() Format { + // Verify checksum. + var p parser + value := p.parseOctal(b.toV7().chksum()) + chksum1, chksum2 := b.computeChecksum() + if p.err != nil || (value != chksum1 && value != chksum2) { + return FormatUnknown + } + + // Guess the magic values. + magic := string(b.toUSTAR().magic()) + version := string(b.toUSTAR().version()) + trailer := string(b.toSTAR().trailer()) + switch { + case magic == magicUSTAR && trailer == trailerSTAR: + return formatSTAR + case magic == magicUSTAR: + return FormatUSTAR | FormatPAX + case magic == magicGNU && version == versionGNU: + return FormatGNU + default: + return formatV7 + } +} + +// setFormat writes the magic values necessary for specified format +// and then updates the checksum accordingly. +func (b *block) setFormat(format Format) { + // Set the magic values. + switch { + case format.has(formatV7): + // Do nothing. + case format.has(FormatGNU): + copy(b.toGNU().magic(), magicGNU) + copy(b.toGNU().version(), versionGNU) + case format.has(formatSTAR): + copy(b.toSTAR().magic(), magicUSTAR) + copy(b.toSTAR().version(), versionUSTAR) + copy(b.toSTAR().trailer(), trailerSTAR) + case format.has(FormatUSTAR | FormatPAX): + copy(b.toUSTAR().magic(), magicUSTAR) + copy(b.toUSTAR().version(), versionUSTAR) + default: + panic("invalid format") + } + + // Update checksum. + // This field is special in that it is terminated by a NULL then space. + var f formatter + field := b.toV7().chksum() + chksum, _ := b.computeChecksum() // Possible values are 256..128776 + f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143 + field[7] = ' ' +} + +// computeChecksum computes the checksum for the header block. +// POSIX specifies a sum of the unsigned byte values, but the Sun tar used +// signed byte values. +// We compute and return both. +func (b *block) computeChecksum() (unsigned, signed int64) { + for i, c := range b { + if 148 <= i && i < 156 { + c = ' ' // Treat the checksum field itself as all spaces. + } + unsigned += int64(c) + signed += int64(int8(c)) + } + return unsigned, signed +} + +// Reset clears the block with all zeros. +func (b *block) reset() { + *b = block{} +} + +type headerV7 [blockSize]byte + +func (h *headerV7) name() []byte { return h[000:][:100] } +func (h *headerV7) mode() []byte { return h[100:][:8] } +func (h *headerV7) uid() []byte { return h[108:][:8] } +func (h *headerV7) gid() []byte { return h[116:][:8] } +func (h *headerV7) size() []byte { return h[124:][:12] } +func (h *headerV7) modTime() []byte { return h[136:][:12] } +func (h *headerV7) chksum() []byte { return h[148:][:8] } +func (h *headerV7) typeFlag() []byte { return h[156:][:1] } +func (h *headerV7) linkName() []byte { return h[157:][:100] } + +type headerGNU [blockSize]byte + +func (h *headerGNU) v7() *headerV7 { return (*headerV7)(h) } +func (h *headerGNU) magic() []byte { return h[257:][:6] } +func (h *headerGNU) version() []byte { return h[263:][:2] } +func (h *headerGNU) userName() []byte { return h[265:][:32] } +func (h *headerGNU) groupName() []byte { return h[297:][:32] } +func (h *headerGNU) devMajor() []byte { return h[329:][:8] } +func (h *headerGNU) devMinor() []byte { return h[337:][:8] } +func (h *headerGNU) accessTime() []byte { return h[345:][:12] } +func (h *headerGNU) changeTime() []byte { return h[357:][:12] } +func (h *headerGNU) sparse() sparseArray { return sparseArray(h[386:][:24*4+1]) } +func (h *headerGNU) realSize() []byte { return h[483:][:12] } + +type headerSTAR [blockSize]byte + +func (h *headerSTAR) v7() *headerV7 { return (*headerV7)(h) } +func (h *headerSTAR) magic() []byte { return h[257:][:6] } +func (h *headerSTAR) version() []byte { return h[263:][:2] } +func (h *headerSTAR) userName() []byte { return h[265:][:32] } +func (h *headerSTAR) groupName() []byte { return h[297:][:32] } +func (h *headerSTAR) devMajor() []byte { return h[329:][:8] } +func (h *headerSTAR) devMinor() []byte { return h[337:][:8] } +func (h *headerSTAR) prefix() []byte { return h[345:][:131] } +func (h *headerSTAR) accessTime() []byte { return h[476:][:12] } +func (h *headerSTAR) changeTime() []byte { return h[488:][:12] } +func (h *headerSTAR) trailer() []byte { return h[508:][:4] } + +type headerUSTAR [blockSize]byte + +func (h *headerUSTAR) v7() *headerV7 { return (*headerV7)(h) } +func (h *headerUSTAR) magic() []byte { return h[257:][:6] } +func (h *headerUSTAR) version() []byte { return h[263:][:2] } +func (h *headerUSTAR) userName() []byte { return h[265:][:32] } +func (h *headerUSTAR) groupName() []byte { return h[297:][:32] } +func (h *headerUSTAR) devMajor() []byte { return h[329:][:8] } +func (h *headerUSTAR) devMinor() []byte { return h[337:][:8] } +func (h *headerUSTAR) prefix() []byte { return h[345:][:155] } + +type sparseArray []byte + +func (s sparseArray) entry(i int) sparseElem { return sparseElem(s[i*24:]) } +func (s sparseArray) isExtended() []byte { return s[24*s.maxEntries():][:1] } +func (s sparseArray) maxEntries() int { return len(s) / 24 } + +type sparseElem []byte + +func (s sparseElem) offset() []byte { return s[00:][:12] } +func (s sparseElem) length() []byte { return s[12:][:12] } diff --git a/src/archive/tar/fuzz_test.go b/src/archive/tar/fuzz_test.go new file mode 100644 index 0000000..e73e0d2 --- /dev/null +++ b/src/archive/tar/fuzz_test.go @@ -0,0 +1,80 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "bytes" + "io" + "testing" +) + +func FuzzReader(f *testing.F) { + b := bytes.NewBuffer(nil) + w := NewWriter(b) + inp := []byte("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.") + err := w.WriteHeader(&Header{ + Name: "lorem.txt", + Mode: 0600, + Size: int64(len(inp)), + }) + if err != nil { + f.Fatalf("failed to create writer: %s", err) + } + _, err = w.Write(inp) + if err != nil { + f.Fatalf("failed to write file to archive: %s", err) + } + if err := w.Close(); err != nil { + f.Fatalf("failed to write archive: %s", err) + } + f.Add(b.Bytes()) + + f.Fuzz(func(t *testing.T, b []byte) { + r := NewReader(bytes.NewReader(b)) + type file struct { + header *Header + content []byte + } + files := []file{} + for { + hdr, err := r.Next() + if err == io.EOF { + break + } + if err != nil { + return + } + buf := bytes.NewBuffer(nil) + if _, err := io.Copy(buf, r); err != nil { + continue + } + files = append(files, file{header: hdr, content: buf.Bytes()}) + } + + // If we were unable to read anything out of the archive don't + // bother trying to roundtrip it. + if len(files) == 0 { + return + } + + out := bytes.NewBuffer(nil) + w := NewWriter(out) + for _, f := range files { + if err := w.WriteHeader(f.header); err != nil { + t.Fatalf("unable to write previously parsed header: %s", err) + } + if _, err := w.Write(f.content); err != nil { + t.Fatalf("unable to write previously parsed content: %s", err) + } + } + if err := w.Close(); err != nil { + t.Fatalf("Unable to write archive: %s", err) + } + + // TODO: We may want to check if the archive roundtrips. This would require + // taking into account addition of the two zero trailer blocks that Writer.Close + // appends. + }) +} diff --git a/src/archive/tar/reader.go b/src/archive/tar/reader.go new file mode 100644 index 0000000..e609c15 --- /dev/null +++ b/src/archive/tar/reader.go @@ -0,0 +1,869 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "bytes" + "io" + "strconv" + "strings" + "time" +) + +// Reader provides sequential access to the contents of a tar archive. +// Reader.Next advances to the next file in the archive (including the first), +// and then Reader can be treated as an io.Reader to access the file's data. +type Reader struct { + r io.Reader + pad int64 // Amount of padding (ignored) after current file entry + curr fileReader // Reader for current file entry + blk block // Buffer to use as temporary local storage + + // err is a persistent error. + // It is only the responsibility of every exported method of Reader to + // ensure that this error is sticky. + err error +} + +type fileReader interface { + io.Reader + fileState + + WriteTo(io.Writer) (int64, error) +} + +// NewReader creates a new Reader reading from r. +func NewReader(r io.Reader) *Reader { + return &Reader{r: r, curr: ®FileReader{r, 0}} +} + +// Next advances to the next entry in the tar archive. +// The Header.Size determines how many bytes can be read for the next file. +// Any remaining data in the current file is automatically discarded. +// +// io.EOF is returned at the end of the input. +func (tr *Reader) Next() (*Header, error) { + if tr.err != nil { + return nil, tr.err + } + hdr, err := tr.next() + tr.err = err + return hdr, err +} + +func (tr *Reader) next() (*Header, error) { + var paxHdrs map[string]string + var gnuLongName, gnuLongLink string + + // Externally, Next iterates through the tar archive as if it is a series of + // files. Internally, the tar format often uses fake "files" to add meta + // data that describes the next file. These meta data "files" should not + // normally be visible to the outside. As such, this loop iterates through + // one or more "header files" until it finds a "normal file". + format := FormatUSTAR | FormatPAX | FormatGNU + for { + // Discard the remainder of the file and any padding. + if err := discard(tr.r, tr.curr.physicalRemaining()); err != nil { + return nil, err + } + if _, err := tryReadFull(tr.r, tr.blk[:tr.pad]); err != nil { + return nil, err + } + tr.pad = 0 + + hdr, rawHdr, err := tr.readHeader() + if err != nil { + return nil, err + } + if err := tr.handleRegularFile(hdr); err != nil { + return nil, err + } + format.mayOnlyBe(hdr.Format) + + // Check for PAX/GNU special headers and files. + switch hdr.Typeflag { + case TypeXHeader, TypeXGlobalHeader: + format.mayOnlyBe(FormatPAX) + paxHdrs, err = parsePAX(tr) + if err != nil { + return nil, err + } + if hdr.Typeflag == TypeXGlobalHeader { + mergePAX(hdr, paxHdrs) + return &Header{ + Name: hdr.Name, + Typeflag: hdr.Typeflag, + Xattrs: hdr.Xattrs, + PAXRecords: hdr.PAXRecords, + Format: format, + }, nil + } + continue // This is a meta header affecting the next header + case TypeGNULongName, TypeGNULongLink: + format.mayOnlyBe(FormatGNU) + realname, err := readSpecialFile(tr) + if err != nil { + return nil, err + } + + var p parser + switch hdr.Typeflag { + case TypeGNULongName: + gnuLongName = p.parseString(realname) + case TypeGNULongLink: + gnuLongLink = p.parseString(realname) + } + continue // This is a meta header affecting the next header + default: + // The old GNU sparse format is handled here since it is technically + // just a regular file with additional attributes. + + if err := mergePAX(hdr, paxHdrs); err != nil { + return nil, err + } + if gnuLongName != "" { + hdr.Name = gnuLongName + } + if gnuLongLink != "" { + hdr.Linkname = gnuLongLink + } + if hdr.Typeflag == TypeRegA { + if strings.HasSuffix(hdr.Name, "/") { + hdr.Typeflag = TypeDir // Legacy archives use trailing slash for directories + } else { + hdr.Typeflag = TypeReg + } + } + + // The extended headers may have updated the size. + // Thus, setup the regFileReader again after merging PAX headers. + if err := tr.handleRegularFile(hdr); err != nil { + return nil, err + } + + // Sparse formats rely on being able to read from the logical data + // section; there must be a preceding call to handleRegularFile. + if err := tr.handleSparseFile(hdr, rawHdr); err != nil { + return nil, err + } + + // Set the final guess at the format. + if format.has(FormatUSTAR) && format.has(FormatPAX) { + format.mayOnlyBe(FormatUSTAR) + } + hdr.Format = format + return hdr, nil // This is a file, so stop + } + } +} + +// handleRegularFile sets up the current file reader and padding such that it +// can only read the following logical data section. It will properly handle +// special headers that contain no data section. +func (tr *Reader) handleRegularFile(hdr *Header) error { + nb := hdr.Size + if isHeaderOnlyType(hdr.Typeflag) { + nb = 0 + } + if nb < 0 { + return ErrHeader + } + + tr.pad = blockPadding(nb) + tr.curr = ®FileReader{r: tr.r, nb: nb} + return nil +} + +// handleSparseFile checks if the current file is a sparse format of any type +// and sets the curr reader appropriately. +func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block) error { + var spd sparseDatas + var err error + if hdr.Typeflag == TypeGNUSparse { + spd, err = tr.readOldGNUSparseMap(hdr, rawHdr) + } else { + spd, err = tr.readGNUSparsePAXHeaders(hdr) + } + + // If sp is non-nil, then this is a sparse file. + // Note that it is possible for len(sp) == 0. + if err == nil && spd != nil { + if isHeaderOnlyType(hdr.Typeflag) || !validateSparseEntries(spd, hdr.Size) { + return ErrHeader + } + sph := invertSparseEntries(spd, hdr.Size) + tr.curr = &sparseFileReader{tr.curr, sph, 0} + } + return err +} + +// readGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. +// If they are found, then this function reads the sparse map and returns it. +// This assumes that 0.0 headers have already been converted to 0.1 headers +// by the PAX header parsing logic. +func (tr *Reader) readGNUSparsePAXHeaders(hdr *Header) (sparseDatas, error) { + // Identify the version of GNU headers. + var is1x0 bool + major, minor := hdr.PAXRecords[paxGNUSparseMajor], hdr.PAXRecords[paxGNUSparseMinor] + switch { + case major == "0" && (minor == "0" || minor == "1"): + is1x0 = false + case major == "1" && minor == "0": + is1x0 = true + case major != "" || minor != "": + return nil, nil // Unknown GNU sparse PAX version + case hdr.PAXRecords[paxGNUSparseMap] != "": + is1x0 = false // 0.0 and 0.1 did not have explicit version records, so guess + default: + return nil, nil // Not a PAX format GNU sparse file. + } + hdr.Format.mayOnlyBe(FormatPAX) + + // Update hdr from GNU sparse PAX headers. + if name := hdr.PAXRecords[paxGNUSparseName]; name != "" { + hdr.Name = name + } + size := hdr.PAXRecords[paxGNUSparseSize] + if size == "" { + size = hdr.PAXRecords[paxGNUSparseRealSize] + } + if size != "" { + n, err := strconv.ParseInt(size, 10, 64) + if err != nil { + return nil, ErrHeader + } + hdr.Size = n + } + + // Read the sparse map according to the appropriate format. + if is1x0 { + return readGNUSparseMap1x0(tr.curr) + } + return readGNUSparseMap0x1(hdr.PAXRecords) +} + +// mergePAX merges paxHdrs into hdr for all relevant fields of Header. +func mergePAX(hdr *Header, paxHdrs map[string]string) (err error) { + for k, v := range paxHdrs { + if v == "" { + continue // Keep the original USTAR value + } + var id64 int64 + switch k { + case paxPath: + hdr.Name = v + case paxLinkpath: + hdr.Linkname = v + case paxUname: + hdr.Uname = v + case paxGname: + hdr.Gname = v + case paxUid: + id64, err = strconv.ParseInt(v, 10, 64) + hdr.Uid = int(id64) // Integer overflow possible + case paxGid: + id64, err = strconv.ParseInt(v, 10, 64) + hdr.Gid = int(id64) // Integer overflow possible + case paxAtime: + hdr.AccessTime, err = parsePAXTime(v) + case paxMtime: + hdr.ModTime, err = parsePAXTime(v) + case paxCtime: + hdr.ChangeTime, err = parsePAXTime(v) + case paxSize: + hdr.Size, err = strconv.ParseInt(v, 10, 64) + default: + if strings.HasPrefix(k, paxSchilyXattr) { + if hdr.Xattrs == nil { + hdr.Xattrs = make(map[string]string) + } + hdr.Xattrs[k[len(paxSchilyXattr):]] = v + } + } + if err != nil { + return ErrHeader + } + } + hdr.PAXRecords = paxHdrs + return nil +} + +// parsePAX parses PAX headers. +// If an extended header (type 'x') is invalid, ErrHeader is returned +func parsePAX(r io.Reader) (map[string]string, error) { + buf, err := readSpecialFile(r) + if err != nil { + return nil, err + } + sbuf := string(buf) + + // For GNU PAX sparse format 0.0 support. + // This function transforms the sparse format 0.0 headers into format 0.1 + // headers since 0.0 headers were not PAX compliant. + var sparseMap []string + + paxHdrs := make(map[string]string) + for len(sbuf) > 0 { + key, value, residual, err := parsePAXRecord(sbuf) + if err != nil { + return nil, ErrHeader + } + sbuf = residual + + switch key { + case paxGNUSparseOffset, paxGNUSparseNumBytes: + // Validate sparse header order and value. + if (len(sparseMap)%2 == 0 && key != paxGNUSparseOffset) || + (len(sparseMap)%2 == 1 && key != paxGNUSparseNumBytes) || + strings.Contains(value, ",") { + return nil, ErrHeader + } + sparseMap = append(sparseMap, value) + default: + paxHdrs[key] = value + } + } + if len(sparseMap) > 0 { + paxHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",") + } + return paxHdrs, nil +} + +// readHeader reads the next block header and assumes that the underlying reader +// is already aligned to a block boundary. It returns the raw block of the +// header in case further processing is required. +// +// The err will be set to io.EOF only when one of the following occurs: +// * Exactly 0 bytes are read and EOF is hit. +// * Exactly 1 block of zeros is read and EOF is hit. +// * At least 2 blocks of zeros are read. +func (tr *Reader) readHeader() (*Header, *block, error) { + // Two blocks of zero bytes marks the end of the archive. + if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil { + return nil, nil, err // EOF is okay here; exactly 0 bytes read + } + if bytes.Equal(tr.blk[:], zeroBlock[:]) { + if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil { + return nil, nil, err // EOF is okay here; exactly 1 block of zeros read + } + if bytes.Equal(tr.blk[:], zeroBlock[:]) { + return nil, nil, io.EOF // normal EOF; exactly 2 block of zeros read + } + return nil, nil, ErrHeader // Zero block and then non-zero block + } + + // Verify the header matches a known format. + format := tr.blk.getFormat() + if format == FormatUnknown { + return nil, nil, ErrHeader + } + + var p parser + hdr := new(Header) + + // Unpack the V7 header. + v7 := tr.blk.toV7() + hdr.Typeflag = v7.typeFlag()[0] + hdr.Name = p.parseString(v7.name()) + hdr.Linkname = p.parseString(v7.linkName()) + hdr.Size = p.parseNumeric(v7.size()) + hdr.Mode = p.parseNumeric(v7.mode()) + hdr.Uid = int(p.parseNumeric(v7.uid())) + hdr.Gid = int(p.parseNumeric(v7.gid())) + hdr.ModTime = time.Unix(p.parseNumeric(v7.modTime()), 0) + + // Unpack format specific fields. + if format > formatV7 { + ustar := tr.blk.toUSTAR() + hdr.Uname = p.parseString(ustar.userName()) + hdr.Gname = p.parseString(ustar.groupName()) + hdr.Devmajor = p.parseNumeric(ustar.devMajor()) + hdr.Devminor = p.parseNumeric(ustar.devMinor()) + + var prefix string + switch { + case format.has(FormatUSTAR | FormatPAX): + hdr.Format = format + ustar := tr.blk.toUSTAR() + prefix = p.parseString(ustar.prefix()) + + // For Format detection, check if block is properly formatted since + // the parser is more liberal than what USTAR actually permits. + notASCII := func(r rune) bool { return r >= 0x80 } + if bytes.IndexFunc(tr.blk[:], notASCII) >= 0 { + hdr.Format = FormatUnknown // Non-ASCII characters in block. + } + nul := func(b []byte) bool { return int(b[len(b)-1]) == 0 } + if !(nul(v7.size()) && nul(v7.mode()) && nul(v7.uid()) && nul(v7.gid()) && + nul(v7.modTime()) && nul(ustar.devMajor()) && nul(ustar.devMinor())) { + hdr.Format = FormatUnknown // Numeric fields must end in NUL + } + case format.has(formatSTAR): + star := tr.blk.toSTAR() + prefix = p.parseString(star.prefix()) + hdr.AccessTime = time.Unix(p.parseNumeric(star.accessTime()), 0) + hdr.ChangeTime = time.Unix(p.parseNumeric(star.changeTime()), 0) + case format.has(FormatGNU): + hdr.Format = format + var p2 parser + gnu := tr.blk.toGNU() + if b := gnu.accessTime(); b[0] != 0 { + hdr.AccessTime = time.Unix(p2.parseNumeric(b), 0) + } + if b := gnu.changeTime(); b[0] != 0 { + hdr.ChangeTime = time.Unix(p2.parseNumeric(b), 0) + } + + // Prior to Go1.8, the Writer had a bug where it would output + // an invalid tar file in certain rare situations because the logic + // incorrectly believed that the old GNU format had a prefix field. + // This is wrong and leads to an output file that mangles the + // atime and ctime fields, which are often left unused. + // + // In order to continue reading tar files created by former, buggy + // versions of Go, we skeptically parse the atime and ctime fields. + // If we are unable to parse them and the prefix field looks like + // an ASCII string, then we fallback on the pre-Go1.8 behavior + // of treating these fields as the USTAR prefix field. + // + // Note that this will not use the fallback logic for all possible + // files generated by a pre-Go1.8 toolchain. If the generated file + // happened to have a prefix field that parses as valid + // atime and ctime fields (e.g., when they are valid octal strings), + // then it is impossible to distinguish between a valid GNU file + // and an invalid pre-Go1.8 file. + // + // See https://golang.org/issues/12594 + // See https://golang.org/issues/21005 + if p2.err != nil { + hdr.AccessTime, hdr.ChangeTime = time.Time{}, time.Time{} + ustar := tr.blk.toUSTAR() + if s := p.parseString(ustar.prefix()); isASCII(s) { + prefix = s + } + hdr.Format = FormatUnknown // Buggy file is not GNU + } + } + if len(prefix) > 0 { + hdr.Name = prefix + "/" + hdr.Name + } + } + return hdr, &tr.blk, p.err +} + +// readOldGNUSparseMap reads the sparse map from the old GNU sparse format. +// The sparse map is stored in the tar header if it's small enough. +// If it's larger than four entries, then one or more extension headers are used +// to store the rest of the sparse map. +// +// The Header.Size does not reflect the size of any extended headers used. +// Thus, this function will read from the raw io.Reader to fetch extra headers. +// This method mutates blk in the process. +func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, error) { + // Make sure that the input format is GNU. + // Unfortunately, the STAR format also has a sparse header format that uses + // the same type flag but has a completely different layout. + if blk.getFormat() != FormatGNU { + return nil, ErrHeader + } + hdr.Format.mayOnlyBe(FormatGNU) + + var p parser + hdr.Size = p.parseNumeric(blk.toGNU().realSize()) + if p.err != nil { + return nil, p.err + } + s := blk.toGNU().sparse() + spd := make(sparseDatas, 0, s.maxEntries()) + for { + for i := 0; i < s.maxEntries(); i++ { + // This termination condition is identical to GNU and BSD tar. + if s.entry(i).offset()[0] == 0x00 { + break // Don't return, need to process extended headers (even if empty) + } + offset := p.parseNumeric(s.entry(i).offset()) + length := p.parseNumeric(s.entry(i).length()) + if p.err != nil { + return nil, p.err + } + spd = append(spd, sparseEntry{Offset: offset, Length: length}) + } + + if s.isExtended()[0] > 0 { + // There are more entries. Read an extension header and parse its entries. + if _, err := mustReadFull(tr.r, blk[:]); err != nil { + return nil, err + } + s = blk.toSparse() + continue + } + return spd, nil // Done + } +} + +// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format +// version 1.0. The format of the sparse map consists of a series of +// newline-terminated numeric fields. The first field is the number of entries +// and is always present. Following this are the entries, consisting of two +// fields (offset, length). This function must stop reading at the end +// boundary of the block containing the last newline. +// +// Note that the GNU manual says that numeric values should be encoded in octal +// format. However, the GNU tar utility itself outputs these values in decimal. +// As such, this library treats values as being encoded in decimal. +func readGNUSparseMap1x0(r io.Reader) (sparseDatas, error) { + var ( + cntNewline int64 + buf bytes.Buffer + blk block + ) + + // feedTokens copies data in blocks from r into buf until there are + // at least cnt newlines in buf. It will not read more blocks than needed. + feedTokens := func(n int64) error { + for cntNewline < n { + if _, err := mustReadFull(r, blk[:]); err != nil { + return err + } + buf.Write(blk[:]) + for _, c := range blk { + if c == '\n' { + cntNewline++ + } + } + } + return nil + } + + // nextToken gets the next token delimited by a newline. This assumes that + // at least one newline exists in the buffer. + nextToken := func() string { + cntNewline-- + tok, _ := buf.ReadString('\n') + return strings.TrimRight(tok, "\n") + } + + // Parse for the number of entries. + // Use integer overflow resistant math to check this. + if err := feedTokens(1); err != nil { + return nil, err + } + numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int + if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { + return nil, ErrHeader + } + + // Parse for all member entries. + // numEntries is trusted after this since a potential attacker must have + // committed resources proportional to what this library used. + if err := feedTokens(2 * numEntries); err != nil { + return nil, err + } + spd := make(sparseDatas, 0, numEntries) + for i := int64(0); i < numEntries; i++ { + offset, err1 := strconv.ParseInt(nextToken(), 10, 64) + length, err2 := strconv.ParseInt(nextToken(), 10, 64) + if err1 != nil || err2 != nil { + return nil, ErrHeader + } + spd = append(spd, sparseEntry{Offset: offset, Length: length}) + } + return spd, nil +} + +// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format +// version 0.1. The sparse map is stored in the PAX headers. +func readGNUSparseMap0x1(paxHdrs map[string]string) (sparseDatas, error) { + // Get number of entries. + // Use integer overflow resistant math to check this. + numEntriesStr := paxHdrs[paxGNUSparseNumBlocks] + numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int + if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { + return nil, ErrHeader + } + + // There should be two numbers in sparseMap for each entry. + sparseMap := strings.Split(paxHdrs[paxGNUSparseMap], ",") + if len(sparseMap) == 1 && sparseMap[0] == "" { + sparseMap = sparseMap[:0] + } + if int64(len(sparseMap)) != 2*numEntries { + return nil, ErrHeader + } + + // Loop through the entries in the sparse map. + // numEntries is trusted now. + spd := make(sparseDatas, 0, numEntries) + for len(sparseMap) >= 2 { + offset, err1 := strconv.ParseInt(sparseMap[0], 10, 64) + length, err2 := strconv.ParseInt(sparseMap[1], 10, 64) + if err1 != nil || err2 != nil { + return nil, ErrHeader + } + spd = append(spd, sparseEntry{Offset: offset, Length: length}) + sparseMap = sparseMap[2:] + } + return spd, nil +} + +// Read reads from the current file in the tar archive. +// It returns (0, io.EOF) when it reaches the end of that file, +// until Next is called to advance to the next file. +// +// If the current file is sparse, then the regions marked as a hole +// are read back as NUL-bytes. +// +// Calling Read on special types like TypeLink, TypeSymlink, TypeChar, +// TypeBlock, TypeDir, and TypeFifo returns (0, io.EOF) regardless of what +// the Header.Size claims. +func (tr *Reader) Read(b []byte) (int, error) { + if tr.err != nil { + return 0, tr.err + } + n, err := tr.curr.Read(b) + if err != nil && err != io.EOF { + tr.err = err + } + return n, err +} + +// writeTo writes the content of the current file to w. +// The bytes written matches the number of remaining bytes in the current file. +// +// If the current file is sparse and w is an io.WriteSeeker, +// then writeTo uses Seek to skip past holes defined in Header.SparseHoles, +// assuming that skipped regions are filled with NULs. +// This always writes the last byte to ensure w is the right size. +// +// TODO(dsnet): Re-export this when adding sparse file support. +// See https://golang.org/issue/22735 +func (tr *Reader) writeTo(w io.Writer) (int64, error) { + if tr.err != nil { + return 0, tr.err + } + n, err := tr.curr.WriteTo(w) + if err != nil { + tr.err = err + } + return n, err +} + +// regFileReader is a fileReader for reading data from a regular file entry. +type regFileReader struct { + r io.Reader // Underlying Reader + nb int64 // Number of remaining bytes to read +} + +func (fr *regFileReader) Read(b []byte) (n int, err error) { + if int64(len(b)) > fr.nb { + b = b[:fr.nb] + } + if len(b) > 0 { + n, err = fr.r.Read(b) + fr.nb -= int64(n) + } + switch { + case err == io.EOF && fr.nb > 0: + return n, io.ErrUnexpectedEOF + case err == nil && fr.nb == 0: + return n, io.EOF + default: + return n, err + } +} + +func (fr *regFileReader) WriteTo(w io.Writer) (int64, error) { + return io.Copy(w, struct{ io.Reader }{fr}) +} + +// logicalRemaining implements fileState.logicalRemaining. +func (fr regFileReader) logicalRemaining() int64 { + return fr.nb +} + +// logicalRemaining implements fileState.physicalRemaining. +func (fr regFileReader) physicalRemaining() int64 { + return fr.nb +} + +// sparseFileReader is a fileReader for reading data from a sparse file entry. +type sparseFileReader struct { + fr fileReader // Underlying fileReader + sp sparseHoles // Normalized list of sparse holes + pos int64 // Current position in sparse file +} + +func (sr *sparseFileReader) Read(b []byte) (n int, err error) { + finished := int64(len(b)) >= sr.logicalRemaining() + if finished { + b = b[:sr.logicalRemaining()] + } + + b0 := b + endPos := sr.pos + int64(len(b)) + for endPos > sr.pos && err == nil { + var nf int // Bytes read in fragment + holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset() + if sr.pos < holeStart { // In a data fragment + bf := b[:min(int64(len(b)), holeStart-sr.pos)] + nf, err = tryReadFull(sr.fr, bf) + } else { // In a hole fragment + bf := b[:min(int64(len(b)), holeEnd-sr.pos)] + nf, err = tryReadFull(zeroReader{}, bf) + } + b = b[nf:] + sr.pos += int64(nf) + if sr.pos >= holeEnd && len(sr.sp) > 1 { + sr.sp = sr.sp[1:] // Ensure last fragment always remains + } + } + + n = len(b0) - len(b) + switch { + case err == io.EOF: + return n, errMissData // Less data in dense file than sparse file + case err != nil: + return n, err + case sr.logicalRemaining() == 0 && sr.physicalRemaining() > 0: + return n, errUnrefData // More data in dense file than sparse file + case finished: + return n, io.EOF + default: + return n, nil + } +} + +func (sr *sparseFileReader) WriteTo(w io.Writer) (n int64, err error) { + ws, ok := w.(io.WriteSeeker) + if ok { + if _, err := ws.Seek(0, io.SeekCurrent); err != nil { + ok = false // Not all io.Seeker can really seek + } + } + if !ok { + return io.Copy(w, struct{ io.Reader }{sr}) + } + + var writeLastByte bool + pos0 := sr.pos + for sr.logicalRemaining() > 0 && !writeLastByte && err == nil { + var nf int64 // Size of fragment + holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset() + if sr.pos < holeStart { // In a data fragment + nf = holeStart - sr.pos + nf, err = io.CopyN(ws, sr.fr, nf) + } else { // In a hole fragment + nf = holeEnd - sr.pos + if sr.physicalRemaining() == 0 { + writeLastByte = true + nf-- + } + _, err = ws.Seek(nf, io.SeekCurrent) + } + sr.pos += nf + if sr.pos >= holeEnd && len(sr.sp) > 1 { + sr.sp = sr.sp[1:] // Ensure last fragment always remains + } + } + + // If the last fragment is a hole, then seek to 1-byte before EOF, and + // write a single byte to ensure the file is the right size. + if writeLastByte && err == nil { + _, err = ws.Write([]byte{0}) + sr.pos++ + } + + n = sr.pos - pos0 + switch { + case err == io.EOF: + return n, errMissData // Less data in dense file than sparse file + case err != nil: + return n, err + case sr.logicalRemaining() == 0 && sr.physicalRemaining() > 0: + return n, errUnrefData // More data in dense file than sparse file + default: + return n, nil + } +} + +func (sr sparseFileReader) logicalRemaining() int64 { + return sr.sp[len(sr.sp)-1].endOffset() - sr.pos +} +func (sr sparseFileReader) physicalRemaining() int64 { + return sr.fr.physicalRemaining() +} + +type zeroReader struct{} + +func (zeroReader) Read(b []byte) (int, error) { + for i := range b { + b[i] = 0 + } + return len(b), nil +} + +// mustReadFull is like io.ReadFull except it returns +// io.ErrUnexpectedEOF when io.EOF is hit before len(b) bytes are read. +func mustReadFull(r io.Reader, b []byte) (int, error) { + n, err := tryReadFull(r, b) + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return n, err +} + +// tryReadFull is like io.ReadFull except it returns +// io.EOF when it is hit before len(b) bytes are read. +func tryReadFull(r io.Reader, b []byte) (n int, err error) { + for len(b) > n && err == nil { + var nn int + nn, err = r.Read(b[n:]) + n += nn + } + if len(b) == n && err == io.EOF { + err = nil + } + return n, err +} + +// readSpecialFile is like io.ReadAll except it returns +// ErrFieldTooLong if more than maxSpecialFileSize is read. +func readSpecialFile(r io.Reader) ([]byte, error) { + buf, err := io.ReadAll(io.LimitReader(r, maxSpecialFileSize+1)) + if len(buf) > maxSpecialFileSize { + return nil, ErrFieldTooLong + } + return buf, err +} + +// discard skips n bytes in r, reporting an error if unable to do so. +func discard(r io.Reader, n int64) error { + // If possible, Seek to the last byte before the end of the data section. + // Do this because Seek is often lazy about reporting errors; this will mask + // the fact that the stream may be truncated. We can rely on the + // io.CopyN done shortly afterwards to trigger any IO errors. + var seekSkipped int64 // Number of bytes skipped via Seek + if sr, ok := r.(io.Seeker); ok && n > 1 { + // Not all io.Seeker can actually Seek. For example, os.Stdin implements + // io.Seeker, but calling Seek always returns an error and performs + // no action. Thus, we try an innocent seek to the current position + // to see if Seek is really supported. + pos1, err := sr.Seek(0, io.SeekCurrent) + if pos1 >= 0 && err == nil { + // Seek seems supported, so perform the real Seek. + pos2, err := sr.Seek(n-1, io.SeekCurrent) + if pos2 < 0 || err != nil { + return err + } + seekSkipped = pos2 - pos1 + } + } + + copySkipped, err := io.CopyN(io.Discard, r, n-seekSkipped) + if err == io.EOF && seekSkipped+copySkipped < n { + err = io.ErrUnexpectedEOF + } + return err +} diff --git a/src/archive/tar/reader_test.go b/src/archive/tar/reader_test.go new file mode 100644 index 0000000..140c736 --- /dev/null +++ b/src/archive/tar/reader_test.go @@ -0,0 +1,1619 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "bytes" + "compress/bzip2" + "crypto/md5" + "errors" + "fmt" + "io" + "math" + "os" + "path" + "reflect" + "strconv" + "strings" + "testing" + "time" +) + +func TestReader(t *testing.T) { + vectors := []struct { + file string // Test input file + headers []*Header // Expected output headers + chksums []string // MD5 checksum of files, leave as nil if not checked + err error // Expected error to occur + }{{ + file: "testdata/gnu.tar", + headers: []*Header{{ + Name: "small.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 5, + ModTime: time.Unix(1244428340, 0), + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + Format: FormatGNU, + }, { + Name: "small2.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 11, + ModTime: time.Unix(1244436044, 0), + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + Format: FormatGNU, + }}, + chksums: []string{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, + }, { + file: "testdata/sparse-formats.tar", + headers: []*Header{{ + Name: "sparse-gnu", + Mode: 420, + Uid: 1000, + Gid: 1000, + Size: 200, + ModTime: time.Unix(1392395740, 0), + Typeflag: 0x53, + Linkname: "", + Uname: "david", + Gname: "david", + Devmajor: 0, + Devminor: 0, + Format: FormatGNU, + }, { + Name: "sparse-posix-0.0", + Mode: 420, + Uid: 1000, + Gid: 1000, + Size: 200, + ModTime: time.Unix(1392342187, 0), + Typeflag: 0x30, + Linkname: "", + Uname: "david", + Gname: "david", + Devmajor: 0, + Devminor: 0, + PAXRecords: map[string]string{ + "GNU.sparse.size": "200", + "GNU.sparse.numblocks": "95", + "GNU.sparse.map": "1,1,3,1,5,1,7,1,9,1,11,1,13,1,15,1,17,1,19,1,21,1,23,1,25,1,27,1,29,1,31,1,33,1,35,1,37,1,39,1,41,1,43,1,45,1,47,1,49,1,51,1,53,1,55,1,57,1,59,1,61,1,63,1,65,1,67,1,69,1,71,1,73,1,75,1,77,1,79,1,81,1,83,1,85,1,87,1,89,1,91,1,93,1,95,1,97,1,99,1,101,1,103,1,105,1,107,1,109,1,111,1,113,1,115,1,117,1,119,1,121,1,123,1,125,1,127,1,129,1,131,1,133,1,135,1,137,1,139,1,141,1,143,1,145,1,147,1,149,1,151,1,153,1,155,1,157,1,159,1,161,1,163,1,165,1,167,1,169,1,171,1,173,1,175,1,177,1,179,1,181,1,183,1,185,1,187,1,189,1", + }, + Format: FormatPAX, + }, { + Name: "sparse-posix-0.1", + Mode: 420, + Uid: 1000, + Gid: 1000, + Size: 200, + ModTime: time.Unix(1392340456, 0), + Typeflag: 0x30, + Linkname: "", + Uname: "david", + Gname: "david", + Devmajor: 0, + Devminor: 0, + PAXRecords: map[string]string{ + "GNU.sparse.size": "200", + "GNU.sparse.numblocks": "95", + "GNU.sparse.map": "1,1,3,1,5,1,7,1,9,1,11,1,13,1,15,1,17,1,19,1,21,1,23,1,25,1,27,1,29,1,31,1,33,1,35,1,37,1,39,1,41,1,43,1,45,1,47,1,49,1,51,1,53,1,55,1,57,1,59,1,61,1,63,1,65,1,67,1,69,1,71,1,73,1,75,1,77,1,79,1,81,1,83,1,85,1,87,1,89,1,91,1,93,1,95,1,97,1,99,1,101,1,103,1,105,1,107,1,109,1,111,1,113,1,115,1,117,1,119,1,121,1,123,1,125,1,127,1,129,1,131,1,133,1,135,1,137,1,139,1,141,1,143,1,145,1,147,1,149,1,151,1,153,1,155,1,157,1,159,1,161,1,163,1,165,1,167,1,169,1,171,1,173,1,175,1,177,1,179,1,181,1,183,1,185,1,187,1,189,1", + "GNU.sparse.name": "sparse-posix-0.1", + }, + Format: FormatPAX, + }, { + Name: "sparse-posix-1.0", + Mode: 420, + Uid: 1000, + Gid: 1000, + Size: 200, + ModTime: time.Unix(1392337404, 0), + Typeflag: 0x30, + Linkname: "", + Uname: "david", + Gname: "david", + Devmajor: 0, + Devminor: 0, + PAXRecords: map[string]string{ + "GNU.sparse.major": "1", + "GNU.sparse.minor": "0", + "GNU.sparse.realsize": "200", + "GNU.sparse.name": "sparse-posix-1.0", + }, + Format: FormatPAX, + }, { + Name: "end", + Mode: 420, + Uid: 1000, + Gid: 1000, + Size: 4, + ModTime: time.Unix(1392398319, 0), + Typeflag: 0x30, + Linkname: "", + Uname: "david", + Gname: "david", + Devmajor: 0, + Devminor: 0, + Format: FormatGNU, + }}, + chksums: []string{ + "6f53234398c2449fe67c1812d993012f", + "6f53234398c2449fe67c1812d993012f", + "6f53234398c2449fe67c1812d993012f", + "6f53234398c2449fe67c1812d993012f", + "b0061974914468de549a2af8ced10316", + }, + }, { + file: "testdata/star.tar", + headers: []*Header{{ + Name: "small.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 5, + ModTime: time.Unix(1244592783, 0), + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + AccessTime: time.Unix(1244592783, 0), + ChangeTime: time.Unix(1244592783, 0), + }, { + Name: "small2.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 11, + ModTime: time.Unix(1244592783, 0), + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + AccessTime: time.Unix(1244592783, 0), + ChangeTime: time.Unix(1244592783, 0), + }}, + }, { + file: "testdata/v7.tar", + headers: []*Header{{ + Name: "small.txt", + Mode: 0444, + Uid: 73025, + Gid: 5000, + Size: 5, + ModTime: time.Unix(1244593104, 0), + Typeflag: '0', + }, { + Name: "small2.txt", + Mode: 0444, + Uid: 73025, + Gid: 5000, + Size: 11, + ModTime: time.Unix(1244593104, 0), + Typeflag: '0', + }}, + }, { + file: "testdata/pax.tar", + headers: []*Header{{ + Name: "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + Mode: 0664, + Uid: 1000, + Gid: 1000, + Uname: "shane", + Gname: "shane", + Size: 7, + ModTime: time.Unix(1350244992, 23960108), + ChangeTime: time.Unix(1350244992, 23960108), + AccessTime: time.Unix(1350244992, 23960108), + Typeflag: TypeReg, + PAXRecords: map[string]string{ + "path": "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + "mtime": "1350244992.023960108", + "atime": "1350244992.023960108", + "ctime": "1350244992.023960108", + }, + Format: FormatPAX, + }, { + Name: "a/b", + Mode: 0777, + Uid: 1000, + Gid: 1000, + Uname: "shane", + Gname: "shane", + Size: 0, + ModTime: time.Unix(1350266320, 910238425), + ChangeTime: time.Unix(1350266320, 910238425), + AccessTime: time.Unix(1350266320, 910238425), + Typeflag: TypeSymlink, + Linkname: "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + PAXRecords: map[string]string{ + "linkpath": "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + "mtime": "1350266320.910238425", + "atime": "1350266320.910238425", + "ctime": "1350266320.910238425", + }, + Format: FormatPAX, + }}, + }, { + file: "testdata/pax-bad-hdr-file.tar", + err: ErrHeader, + }, { + file: "testdata/pax-bad-hdr-large.tar.bz2", + err: ErrFieldTooLong, + }, { + file: "testdata/pax-bad-mtime-file.tar", + err: ErrHeader, + }, { + file: "testdata/pax-pos-size-file.tar", + headers: []*Header{{ + Name: "foo", + Mode: 0640, + Uid: 319973, + Gid: 5000, + Size: 999, + ModTime: time.Unix(1442282516, 0), + Typeflag: '0', + Uname: "joetsai", + Gname: "eng", + PAXRecords: map[string]string{ + "size": "000000000000000000000999", + }, + Format: FormatPAX, + }}, + chksums: []string{ + "0afb597b283fe61b5d4879669a350556", + }, + }, { + file: "testdata/pax-records.tar", + headers: []*Header{{ + Typeflag: TypeReg, + Name: "file", + Uname: strings.Repeat("long", 10), + ModTime: time.Unix(0, 0), + PAXRecords: map[string]string{ + "GOLANG.pkg": "tar", + "comment": "Hello, 世界", + "uname": strings.Repeat("long", 10), + }, + Format: FormatPAX, + }}, + }, { + file: "testdata/pax-global-records.tar", + headers: []*Header{{ + Typeflag: TypeXGlobalHeader, + Name: "global1", + PAXRecords: map[string]string{"path": "global1", "mtime": "1500000000.0"}, + Format: FormatPAX, + }, { + Typeflag: TypeReg, + Name: "file1", + ModTime: time.Unix(0, 0), + Format: FormatUSTAR, + }, { + Typeflag: TypeReg, + Name: "file2", + PAXRecords: map[string]string{"path": "file2"}, + ModTime: time.Unix(0, 0), + Format: FormatPAX, + }, { + Typeflag: TypeXGlobalHeader, + Name: "GlobalHead.0.0", + PAXRecords: map[string]string{"path": ""}, + Format: FormatPAX, + }, { + Typeflag: TypeReg, + Name: "file3", + ModTime: time.Unix(0, 0), + Format: FormatUSTAR, + }, { + Typeflag: TypeReg, + Name: "file4", + ModTime: time.Unix(1400000000, 0), + PAXRecords: map[string]string{"mtime": "1400000000"}, + Format: FormatPAX, + }}, + }, { + file: "testdata/nil-uid.tar", // golang.org/issue/5290 + headers: []*Header{{ + Name: "P1050238.JPG.log", + Mode: 0664, + Uid: 0, + Gid: 0, + Size: 14, + ModTime: time.Unix(1365454838, 0), + Typeflag: TypeReg, + Linkname: "", + Uname: "eyefi", + Gname: "eyefi", + Devmajor: 0, + Devminor: 0, + Format: FormatGNU, + }}, + }, { + file: "testdata/xattrs.tar", + headers: []*Header{{ + Name: "small.txt", + Mode: 0644, + Uid: 1000, + Gid: 10, + Size: 5, + ModTime: time.Unix(1386065770, 448252320), + Typeflag: '0', + Uname: "alex", + Gname: "wheel", + AccessTime: time.Unix(1389782991, 419875220), + ChangeTime: time.Unix(1389782956, 794414986), + Xattrs: map[string]string{ + "user.key": "value", + "user.key2": "value2", + // Interestingly, selinux encodes the terminating null inside the xattr + "security.selinux": "unconfined_u:object_r:default_t:s0\x00", + }, + PAXRecords: map[string]string{ + "mtime": "1386065770.44825232", + "atime": "1389782991.41987522", + "ctime": "1389782956.794414986", + "SCHILY.xattr.user.key": "value", + "SCHILY.xattr.user.key2": "value2", + "SCHILY.xattr.security.selinux": "unconfined_u:object_r:default_t:s0\x00", + }, + Format: FormatPAX, + }, { + Name: "small2.txt", + Mode: 0644, + Uid: 1000, + Gid: 10, + Size: 11, + ModTime: time.Unix(1386065770, 449252304), + Typeflag: '0', + Uname: "alex", + Gname: "wheel", + AccessTime: time.Unix(1389782991, 419875220), + ChangeTime: time.Unix(1386065770, 449252304), + Xattrs: map[string]string{ + "security.selinux": "unconfined_u:object_r:default_t:s0\x00", + }, + PAXRecords: map[string]string{ + "mtime": "1386065770.449252304", + "atime": "1389782991.41987522", + "ctime": "1386065770.449252304", + "SCHILY.xattr.security.selinux": "unconfined_u:object_r:default_t:s0\x00", + }, + Format: FormatPAX, + }}, + }, { + // Matches the behavior of GNU, BSD, and STAR tar utilities. + file: "testdata/gnu-multi-hdrs.tar", + headers: []*Header{{ + Name: "GNU2/GNU2/long-path-name", + Linkname: "GNU4/GNU4/long-linkpath-name", + ModTime: time.Unix(0, 0), + Typeflag: '2', + Format: FormatGNU, + }}, + }, { + // GNU tar file with atime and ctime fields set. + // Created with the GNU tar v1.27.1. + // tar --incremental -S -cvf gnu-incremental.tar test2 + file: "testdata/gnu-incremental.tar", + headers: []*Header{{ + Name: "test2/", + Mode: 16877, + Uid: 1000, + Gid: 1000, + Size: 14, + ModTime: time.Unix(1441973427, 0), + Typeflag: 'D', + Uname: "rawr", + Gname: "dsnet", + AccessTime: time.Unix(1441974501, 0), + ChangeTime: time.Unix(1441973436, 0), + Format: FormatGNU, + }, { + Name: "test2/foo", + Mode: 33188, + Uid: 1000, + Gid: 1000, + Size: 64, + ModTime: time.Unix(1441973363, 0), + Typeflag: '0', + Uname: "rawr", + Gname: "dsnet", + AccessTime: time.Unix(1441974501, 0), + ChangeTime: time.Unix(1441973436, 0), + Format: FormatGNU, + }, { + Name: "test2/sparse", + Mode: 33188, + Uid: 1000, + Gid: 1000, + Size: 536870912, + ModTime: time.Unix(1441973427, 0), + Typeflag: 'S', + Uname: "rawr", + Gname: "dsnet", + AccessTime: time.Unix(1441991948, 0), + ChangeTime: time.Unix(1441973436, 0), + Format: FormatGNU, + }}, + }, { + // Matches the behavior of GNU and BSD tar utilities. + file: "testdata/pax-multi-hdrs.tar", + headers: []*Header{{ + Name: "bar", + Linkname: "PAX4/PAX4/long-linkpath-name", + ModTime: time.Unix(0, 0), + Typeflag: '2', + PAXRecords: map[string]string{ + "linkpath": "PAX4/PAX4/long-linkpath-name", + }, + Format: FormatPAX, + }}, + }, { + // Both BSD and GNU tar truncate long names at first NUL even + // if there is data following that NUL character. + // This is reasonable as GNU long names are C-strings. + file: "testdata/gnu-long-nul.tar", + headers: []*Header{{ + Name: "0123456789", + Mode: 0644, + Uid: 1000, + Gid: 1000, + ModTime: time.Unix(1486082191, 0), + Typeflag: '0', + Uname: "rawr", + Gname: "dsnet", + Format: FormatGNU, + }}, + }, { + // This archive was generated by Writer but is readable by both + // GNU and BSD tar utilities. + // The archive generated by GNU is nearly byte-for-byte identical + // to the Go version except the Go version sets a negative Devminor + // just to force the GNU format. + file: "testdata/gnu-utf8.tar", + headers: []*Header{{ + Name: "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", + Mode: 0644, + Uid: 1000, Gid: 1000, + ModTime: time.Unix(0, 0), + Typeflag: '0', + Uname: "☺", + Gname: "⚹", + Format: FormatGNU, + }}, + }, { + // This archive was generated by Writer but is readable by both + // GNU and BSD tar utilities. + // The archive generated by GNU is nearly byte-for-byte identical + // to the Go version except the Go version sets a negative Devminor + // just to force the GNU format. + file: "testdata/gnu-not-utf8.tar", + headers: []*Header{{ + Name: "hi\x80\x81\x82\x83bye", + Mode: 0644, + Uid: 1000, + Gid: 1000, + ModTime: time.Unix(0, 0), + Typeflag: '0', + Uname: "rawr", + Gname: "dsnet", + Format: FormatGNU, + }}, + }, { + // BSD tar v3.1.2 and GNU tar v1.27.1 both rejects PAX records + // with NULs in the key. + file: "testdata/pax-nul-xattrs.tar", + err: ErrHeader, + }, { + // BSD tar v3.1.2 rejects a PAX path with NUL in the value, while + // GNU tar v1.27.1 simply truncates at first NUL. + // We emulate the behavior of BSD since it is strange doing NUL + // truncations since PAX records are length-prefix strings instead + // of NUL-terminated C-strings. + file: "testdata/pax-nul-path.tar", + err: ErrHeader, + }, { + file: "testdata/neg-size.tar", + err: ErrHeader, + }, { + file: "testdata/issue10968.tar", + err: ErrHeader, + }, { + file: "testdata/issue11169.tar", + err: ErrHeader, + }, { + file: "testdata/issue12435.tar", + err: ErrHeader, + }, { + // Ensure that we can read back the original Header as written with + // a buggy pre-Go1.8 tar.Writer. + file: "testdata/invalid-go17.tar", + headers: []*Header{{ + Name: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo", + Uid: 010000000, + ModTime: time.Unix(0, 0), + Typeflag: '0', + }}, + }, { + // USTAR archive with a regular entry with non-zero device numbers. + file: "testdata/ustar-file-devs.tar", + headers: []*Header{{ + Name: "file", + Mode: 0644, + Typeflag: '0', + ModTime: time.Unix(0, 0), + Devmajor: 1, + Devminor: 1, + Format: FormatUSTAR, + }}, + }, { + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + file: "testdata/gnu-nil-sparse-data.tar", + headers: []*Header{{ + Name: "sparse.db", + Typeflag: TypeGNUSparse, + Size: 1000, + ModTime: time.Unix(0, 0), + Format: FormatGNU, + }}, + }, { + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + file: "testdata/gnu-nil-sparse-hole.tar", + headers: []*Header{{ + Name: "sparse.db", + Typeflag: TypeGNUSparse, + Size: 1000, + ModTime: time.Unix(0, 0), + Format: FormatGNU, + }}, + }, { + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + file: "testdata/pax-nil-sparse-data.tar", + headers: []*Header{{ + Name: "sparse.db", + Typeflag: TypeReg, + Size: 1000, + ModTime: time.Unix(0, 0), + PAXRecords: map[string]string{ + "size": "1512", + "GNU.sparse.major": "1", + "GNU.sparse.minor": "0", + "GNU.sparse.realsize": "1000", + "GNU.sparse.name": "sparse.db", + }, + Format: FormatPAX, + }}, + }, { + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + file: "testdata/pax-nil-sparse-hole.tar", + headers: []*Header{{ + Name: "sparse.db", + Typeflag: TypeReg, + Size: 1000, + ModTime: time.Unix(0, 0), + PAXRecords: map[string]string{ + "size": "512", + "GNU.sparse.major": "1", + "GNU.sparse.minor": "0", + "GNU.sparse.realsize": "1000", + "GNU.sparse.name": "sparse.db", + }, + Format: FormatPAX, + }}, + }, { + file: "testdata/trailing-slash.tar", + headers: []*Header{{ + Typeflag: TypeDir, + Name: strings.Repeat("123456789/", 30), + ModTime: time.Unix(0, 0), + PAXRecords: map[string]string{ + "path": strings.Repeat("123456789/", 30), + }, + Format: FormatPAX, + }}, + }} + + for _, v := range vectors { + t.Run(path.Base(v.file), func(t *testing.T) { + f, err := os.Open(v.file) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + defer f.Close() + + var fr io.Reader = f + if strings.HasSuffix(v.file, ".bz2") { + fr = bzip2.NewReader(fr) + } + + // Capture all headers and checksums. + var ( + tr = NewReader(fr) + hdrs []*Header + chksums []string + rdbuf = make([]byte, 8) + ) + for { + var hdr *Header + hdr, err = tr.Next() + if err != nil { + if err == io.EOF { + err = nil // Expected error + } + break + } + hdrs = append(hdrs, hdr) + + if v.chksums == nil { + continue + } + h := md5.New() + _, err = io.CopyBuffer(h, tr, rdbuf) // Effectively an incremental read + if err != nil { + break + } + chksums = append(chksums, fmt.Sprintf("%x", h.Sum(nil))) + } + + for i, hdr := range hdrs { + if i >= len(v.headers) { + t.Fatalf("entry %d: unexpected header:\ngot %+v", i, *hdr) + continue + } + if !reflect.DeepEqual(*hdr, *v.headers[i]) { + t.Fatalf("entry %d: incorrect header:\ngot %+v\nwant %+v", i, *hdr, *v.headers[i]) + } + } + if len(hdrs) != len(v.headers) { + t.Fatalf("got %d headers, want %d headers", len(hdrs), len(v.headers)) + } + + for i, sum := range chksums { + if i >= len(v.chksums) { + t.Fatalf("entry %d: unexpected sum: got %s", i, sum) + continue + } + if sum != v.chksums[i] { + t.Fatalf("entry %d: incorrect checksum: got %s, want %s", i, sum, v.chksums[i]) + } + } + + if err != v.err { + t.Fatalf("unexpected error: got %v, want %v", err, v.err) + } + f.Close() + }) + } +} + +func TestPartialRead(t *testing.T) { + type testCase struct { + cnt int // Number of bytes to read + output string // Expected value of string read + } + vectors := []struct { + file string + cases []testCase + }{{ + file: "testdata/gnu.tar", + cases: []testCase{ + {4, "Kilt"}, + {6, "Google"}, + }, + }, { + file: "testdata/sparse-formats.tar", + cases: []testCase{ + {2, "\x00G"}, + {4, "\x00G\x00o"}, + {6, "\x00G\x00o\x00G"}, + {8, "\x00G\x00o\x00G\x00o"}, + {4, "end\n"}, + }, + }} + + for _, v := range vectors { + t.Run(path.Base(v.file), func(t *testing.T) { + f, err := os.Open(v.file) + if err != nil { + t.Fatalf("Open() error: %v", err) + } + defer f.Close() + + tr := NewReader(f) + for i, tc := range v.cases { + hdr, err := tr.Next() + if err != nil || hdr == nil { + t.Fatalf("entry %d, Next(): got %v, want %v", i, err, nil) + } + buf := make([]byte, tc.cnt) + if _, err := io.ReadFull(tr, buf); err != nil { + t.Fatalf("entry %d, ReadFull(): got %v, want %v", i, err, nil) + } + if string(buf) != tc.output { + t.Fatalf("entry %d, ReadFull(): got %q, want %q", i, string(buf), tc.output) + } + } + + if _, err := tr.Next(); err != io.EOF { + t.Fatalf("Next(): got %v, want EOF", err) + } + }) + } +} + +func TestUninitializedRead(t *testing.T) { + f, err := os.Open("testdata/gnu.tar") + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer f.Close() + + tr := NewReader(f) + _, err = tr.Read([]byte{}) + if err == nil || err != io.EOF { + t.Errorf("Unexpected error: %v, wanted %v", err, io.EOF) + } + +} + +type reader struct{ io.Reader } +type readSeeker struct{ io.ReadSeeker } +type readBadSeeker struct{ io.ReadSeeker } + +func (rbs *readBadSeeker) Seek(int64, int) (int64, error) { return 0, fmt.Errorf("illegal seek") } + +// TestReadTruncation test the ending condition on various truncated files and +// that truncated files are still detected even if the underlying io.Reader +// satisfies io.Seeker. +func TestReadTruncation(t *testing.T) { + var ss []string + for _, p := range []string{ + "testdata/gnu.tar", + "testdata/ustar-file-reg.tar", + "testdata/pax-path-hdr.tar", + "testdata/sparse-formats.tar", + } { + buf, err := os.ReadFile(p) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + ss = append(ss, string(buf)) + } + + data1, data2, pax, sparse := ss[0], ss[1], ss[2], ss[3] + data2 += strings.Repeat("\x00", 10*512) + trash := strings.Repeat("garbage ", 64) // Exactly 512 bytes + + vectors := []struct { + input string // Input stream + cnt int // Expected number of headers read + err error // Expected error outcome + }{ + {"", 0, io.EOF}, // Empty file is a "valid" tar file + {data1[:511], 0, io.ErrUnexpectedEOF}, + {data1[:512], 1, io.ErrUnexpectedEOF}, + {data1[:1024], 1, io.EOF}, + {data1[:1536], 2, io.ErrUnexpectedEOF}, + {data1[:2048], 2, io.EOF}, + {data1, 2, io.EOF}, + {data1[:2048] + data2[:1536], 3, io.EOF}, + {data2[:511], 0, io.ErrUnexpectedEOF}, + {data2[:512], 1, io.ErrUnexpectedEOF}, + {data2[:1195], 1, io.ErrUnexpectedEOF}, + {data2[:1196], 1, io.EOF}, // Exact end of data and start of padding + {data2[:1200], 1, io.EOF}, + {data2[:1535], 1, io.EOF}, + {data2[:1536], 1, io.EOF}, // Exact end of padding + {data2[:1536] + trash[:1], 1, io.ErrUnexpectedEOF}, + {data2[:1536] + trash[:511], 1, io.ErrUnexpectedEOF}, + {data2[:1536] + trash, 1, ErrHeader}, + {data2[:2048], 1, io.EOF}, // Exactly 1 empty block + {data2[:2048] + trash[:1], 1, io.ErrUnexpectedEOF}, + {data2[:2048] + trash[:511], 1, io.ErrUnexpectedEOF}, + {data2[:2048] + trash, 1, ErrHeader}, + {data2[:2560], 1, io.EOF}, // Exactly 2 empty blocks (normal end-of-stream) + {data2[:2560] + trash[:1], 1, io.EOF}, + {data2[:2560] + trash[:511], 1, io.EOF}, + {data2[:2560] + trash, 1, io.EOF}, + {data2[:3072], 1, io.EOF}, + {pax, 0, io.EOF}, // PAX header without data is a "valid" tar file + {pax + trash[:1], 0, io.ErrUnexpectedEOF}, + {pax + trash[:511], 0, io.ErrUnexpectedEOF}, + {sparse[:511], 0, io.ErrUnexpectedEOF}, + {sparse[:512], 0, io.ErrUnexpectedEOF}, + {sparse[:3584], 1, io.EOF}, + {sparse[:9200], 1, io.EOF}, // Terminate in padding of sparse header + {sparse[:9216], 1, io.EOF}, + {sparse[:9728], 2, io.ErrUnexpectedEOF}, + {sparse[:10240], 2, io.EOF}, + {sparse[:11264], 2, io.ErrUnexpectedEOF}, + {sparse, 5, io.EOF}, + {sparse + trash, 5, io.EOF}, + } + + for i, v := range vectors { + for j := 0; j < 6; j++ { + var tr *Reader + var s1, s2 string + + switch j { + case 0: + tr = NewReader(&reader{strings.NewReader(v.input)}) + s1, s2 = "io.Reader", "auto" + case 1: + tr = NewReader(&reader{strings.NewReader(v.input)}) + s1, s2 = "io.Reader", "manual" + case 2: + tr = NewReader(&readSeeker{strings.NewReader(v.input)}) + s1, s2 = "io.ReadSeeker", "auto" + case 3: + tr = NewReader(&readSeeker{strings.NewReader(v.input)}) + s1, s2 = "io.ReadSeeker", "manual" + case 4: + tr = NewReader(&readBadSeeker{strings.NewReader(v.input)}) + s1, s2 = "ReadBadSeeker", "auto" + case 5: + tr = NewReader(&readBadSeeker{strings.NewReader(v.input)}) + s1, s2 = "ReadBadSeeker", "manual" + } + + var cnt int + var err error + for { + if _, err = tr.Next(); err != nil { + break + } + cnt++ + if s2 == "manual" { + if _, err = tr.writeTo(io.Discard); err != nil { + break + } + } + } + if err != v.err { + t.Errorf("test %d, NewReader(%s) with %s discard: got %v, want %v", + i, s1, s2, err, v.err) + } + if cnt != v.cnt { + t.Errorf("test %d, NewReader(%s) with %s discard: got %d headers, want %d headers", + i, s1, s2, cnt, v.cnt) + } + } + } +} + +// TestReadHeaderOnly tests that Reader does not attempt to read special +// header-only files. +func TestReadHeaderOnly(t *testing.T) { + f, err := os.Open("testdata/hdr-only.tar") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + defer f.Close() + + var hdrs []*Header + tr := NewReader(f) + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + t.Errorf("Next(): got %v, want %v", err, nil) + continue + } + hdrs = append(hdrs, hdr) + + // If a special flag, we should read nothing. + cnt, _ := io.ReadFull(tr, []byte{0}) + if cnt > 0 && hdr.Typeflag != TypeReg { + t.Errorf("ReadFull(...): got %d bytes, want 0 bytes", cnt) + } + } + + // File is crafted with 16 entries. The later 8 are identical to the first + // 8 except that the size is set. + if len(hdrs) != 16 { + t.Fatalf("len(hdrs): got %d, want %d", len(hdrs), 16) + } + for i := 0; i < 8; i++ { + hdr1, hdr2 := hdrs[i+0], hdrs[i+8] + hdr1.Size, hdr2.Size = 0, 0 + if !reflect.DeepEqual(*hdr1, *hdr2) { + t.Errorf("incorrect header:\ngot %+v\nwant %+v", *hdr1, *hdr2) + } + } +} + +func TestMergePAX(t *testing.T) { + vectors := []struct { + in map[string]string + want *Header + ok bool + }{{ + in: map[string]string{ + "path": "a/b/c", + "uid": "1000", + "mtime": "1350244992.023960108", + }, + want: &Header{ + Name: "a/b/c", + Uid: 1000, + ModTime: time.Unix(1350244992, 23960108), + PAXRecords: map[string]string{ + "path": "a/b/c", + "uid": "1000", + "mtime": "1350244992.023960108", + }, + }, + ok: true, + }, { + in: map[string]string{ + "gid": "gtgergergersagersgers", + }, + ok: false, + }, { + in: map[string]string{ + "missing": "missing", + "SCHILY.xattr.key": "value", + }, + want: &Header{ + Xattrs: map[string]string{"key": "value"}, + PAXRecords: map[string]string{ + "missing": "missing", + "SCHILY.xattr.key": "value", + }, + }, + ok: true, + }} + + for i, v := range vectors { + got := new(Header) + err := mergePAX(got, v.in) + if v.ok && !reflect.DeepEqual(*got, *v.want) { + t.Errorf("test %d, mergePAX(...):\ngot %+v\nwant %+v", i, *got, *v.want) + } + if ok := err == nil; ok != v.ok { + t.Errorf("test %d, mergePAX(...): got %v, want %v", i, ok, v.ok) + } + } +} + +func TestParsePAX(t *testing.T) { + vectors := []struct { + in string + want map[string]string + ok bool + }{ + {"", nil, true}, + {"6 k=1\n", map[string]string{"k": "1"}, true}, + {"10 a=name\n", map[string]string{"a": "name"}, true}, + {"9 a=name\n", map[string]string{"a": "name"}, true}, + {"30 mtime=1350244992.023960108\n", map[string]string{"mtime": "1350244992.023960108"}, true}, + {"3 somelongkey=\n", nil, false}, + {"50 tooshort=\n", nil, false}, + {"13 key1=haha\n13 key2=nana\n13 key3=kaka\n", + map[string]string{"key1": "haha", "key2": "nana", "key3": "kaka"}, true}, + {"13 key1=val1\n13 key2=val2\n8 key1=\n", + map[string]string{"key1": "", "key2": "val2"}, true}, + {"22 GNU.sparse.size=10\n26 GNU.sparse.numblocks=2\n" + + "23 GNU.sparse.offset=1\n25 GNU.sparse.numbytes=2\n" + + "23 GNU.sparse.offset=3\n25 GNU.sparse.numbytes=4\n", + map[string]string{paxGNUSparseSize: "10", paxGNUSparseNumBlocks: "2", paxGNUSparseMap: "1,2,3,4"}, true}, + {"22 GNU.sparse.size=10\n26 GNU.sparse.numblocks=1\n" + + "25 GNU.sparse.numbytes=2\n23 GNU.sparse.offset=1\n", + nil, false}, + {"22 GNU.sparse.size=10\n26 GNU.sparse.numblocks=1\n" + + "25 GNU.sparse.offset=1,2\n25 GNU.sparse.numbytes=2\n", + nil, false}, + } + + for i, v := range vectors { + r := strings.NewReader(v.in) + got, err := parsePAX(r) + if !reflect.DeepEqual(got, v.want) && !(len(got) == 0 && len(v.want) == 0) { + t.Errorf("test %d, parsePAX():\ngot %v\nwant %v", i, got, v.want) + } + if ok := err == nil; ok != v.ok { + t.Errorf("test %d, parsePAX(): got %v, want %v", i, ok, v.ok) + } + } +} + +func TestReadOldGNUSparseMap(t *testing.T) { + populateSparseMap := func(sa sparseArray, sps []string) []string { + for i := 0; len(sps) > 0 && i < sa.maxEntries(); i++ { + copy(sa.entry(i), sps[0]) + sps = sps[1:] + } + if len(sps) > 0 { + copy(sa.isExtended(), "\x80") + } + return sps + } + + makeInput := func(format Format, size string, sps ...string) (out []byte) { + // Write the initial GNU header. + var blk block + gnu := blk.toGNU() + sparse := gnu.sparse() + copy(gnu.realSize(), size) + sps = populateSparseMap(sparse, sps) + if format != FormatUnknown { + blk.setFormat(format) + } + out = append(out, blk[:]...) + + // Write extended sparse blocks. + for len(sps) > 0 { + var blk block + sps = populateSparseMap(blk.toSparse(), sps) + out = append(out, blk[:]...) + } + return out + } + + makeSparseStrings := func(sp []sparseEntry) (out []string) { + var f formatter + for _, s := range sp { + var b [24]byte + f.formatNumeric(b[:12], s.Offset) + f.formatNumeric(b[12:], s.Length) + out = append(out, string(b[:])) + } + return out + } + + vectors := []struct { + input []byte + wantMap sparseDatas + wantSize int64 + wantErr error + }{{ + input: makeInput(FormatUnknown, ""), + wantErr: ErrHeader, + }, { + input: makeInput(FormatGNU, "1234", "fewa"), + wantSize: 01234, + wantErr: ErrHeader, + }, { + input: makeInput(FormatGNU, "0031"), + wantSize: 031, + }, { + input: makeInput(FormatGNU, "80"), + wantErr: ErrHeader, + }, { + input: makeInput(FormatGNU, "1234", + makeSparseStrings(sparseDatas{{0, 0}, {1, 1}})...), + wantMap: sparseDatas{{0, 0}, {1, 1}}, + wantSize: 01234, + }, { + input: makeInput(FormatGNU, "1234", + append(makeSparseStrings(sparseDatas{{0, 0}, {1, 1}}), []string{"", "blah"}...)...), + wantMap: sparseDatas{{0, 0}, {1, 1}}, + wantSize: 01234, + }, { + input: makeInput(FormatGNU, "3333", + makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}})...), + wantMap: sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}}, + wantSize: 03333, + }, { + input: makeInput(FormatGNU, "", + append(append( + makeSparseStrings(sparseDatas{{0, 1}, {2, 1}}), + []string{"", ""}...), + makeSparseStrings(sparseDatas{{4, 1}, {6, 1}})...)...), + wantMap: sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}}, + }, { + input: makeInput(FormatGNU, "", + makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}})...)[:blockSize], + wantErr: io.ErrUnexpectedEOF, + }, { + input: makeInput(FormatGNU, "", + makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}})...)[:3*blockSize/2], + wantErr: io.ErrUnexpectedEOF, + }, { + input: makeInput(FormatGNU, "", + makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}})...), + wantMap: sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}}, + }, { + input: makeInput(FormatGNU, "", + makeSparseStrings(sparseDatas{{10 << 30, 512}, {20 << 30, 512}})...), + wantMap: sparseDatas{{10 << 30, 512}, {20 << 30, 512}}, + }} + + for i, v := range vectors { + var blk block + var hdr Header + v.input = v.input[copy(blk[:], v.input):] + tr := Reader{r: bytes.NewReader(v.input)} + got, err := tr.readOldGNUSparseMap(&hdr, &blk) + if !equalSparseEntries(got, v.wantMap) { + t.Errorf("test %d, readOldGNUSparseMap(): got %v, want %v", i, got, v.wantMap) + } + if err != v.wantErr { + t.Errorf("test %d, readOldGNUSparseMap() = %v, want %v", i, err, v.wantErr) + } + if hdr.Size != v.wantSize { + t.Errorf("test %d, Header.Size = %d, want %d", i, hdr.Size, v.wantSize) + } + } +} + +func TestReadGNUSparsePAXHeaders(t *testing.T) { + padInput := func(s string) string { + return s + string(zeroBlock[:blockPadding(int64(len(s)))]) + } + + vectors := []struct { + inputData string + inputHdrs map[string]string + wantMap sparseDatas + wantSize int64 + wantName string + wantErr error + }{{ + inputHdrs: nil, + wantErr: nil, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: strconv.FormatInt(math.MaxInt64, 10), + paxGNUSparseMap: "0,1,2,3", + }, + wantErr: ErrHeader, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "4\x00", + paxGNUSparseMap: "0,1,2,3", + }, + wantErr: ErrHeader, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "4", + paxGNUSparseMap: "0,1,2,3", + }, + wantErr: ErrHeader, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "2", + paxGNUSparseMap: "0,1,2,3", + }, + wantMap: sparseDatas{{0, 1}, {2, 3}}, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "2", + paxGNUSparseMap: "0, 1,2,3", + }, + wantErr: ErrHeader, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "2", + paxGNUSparseMap: "0,1,02,3", + paxGNUSparseRealSize: "4321", + }, + wantMap: sparseDatas{{0, 1}, {2, 3}}, + wantSize: 4321, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "2", + paxGNUSparseMap: "0,one1,2,3", + }, + wantErr: ErrHeader, + }, { + inputHdrs: map[string]string{ + paxGNUSparseMajor: "0", + paxGNUSparseMinor: "0", + paxGNUSparseNumBlocks: "2", + paxGNUSparseMap: "0,1,2,3", + paxGNUSparseSize: "1234", + paxGNUSparseRealSize: "4321", + paxGNUSparseName: "realname", + }, + wantMap: sparseDatas{{0, 1}, {2, 3}}, + wantSize: 1234, + wantName: "realname", + }, { + inputHdrs: map[string]string{ + paxGNUSparseMajor: "0", + paxGNUSparseMinor: "0", + paxGNUSparseNumBlocks: "1", + paxGNUSparseMap: "10737418240,512", + paxGNUSparseSize: "10737418240", + paxGNUSparseName: "realname", + }, + wantMap: sparseDatas{{10737418240, 512}}, + wantSize: 10737418240, + wantName: "realname", + }, { + inputHdrs: map[string]string{ + paxGNUSparseMajor: "0", + paxGNUSparseMinor: "0", + paxGNUSparseNumBlocks: "0", + paxGNUSparseMap: "", + }, + wantMap: sparseDatas{}, + }, { + inputHdrs: map[string]string{ + paxGNUSparseMajor: "0", + paxGNUSparseMinor: "1", + paxGNUSparseNumBlocks: "4", + paxGNUSparseMap: "0,5,10,5,20,5,30,5", + }, + wantMap: sparseDatas{{0, 5}, {10, 5}, {20, 5}, {30, 5}}, + }, { + inputHdrs: map[string]string{ + paxGNUSparseMajor: "1", + paxGNUSparseMinor: "0", + paxGNUSparseNumBlocks: "4", + paxGNUSparseMap: "0,5,10,5,20,5,30,5", + }, + wantErr: io.ErrUnexpectedEOF, + }, { + inputData: padInput("0\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{}, + }, { + inputData: padInput("0\n")[:blockSize-1] + "#", + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{}, + }, { + inputData: padInput("0"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: io.ErrUnexpectedEOF, + }, { + inputData: padInput("ab\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: ErrHeader, + }, { + inputData: padInput("1\n2\n3\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{{2, 3}}, + }, { + inputData: padInput("1\n2\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: io.ErrUnexpectedEOF, + }, { + inputData: padInput("1\n2\n\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: ErrHeader, + }, { + inputData: string(zeroBlock[:]) + padInput("0\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: ErrHeader, + }, { + inputData: strings.Repeat("0", blockSize) + padInput("1\n5\n1\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{{5, 1}}, + }, { + inputData: padInput(fmt.Sprintf("%d\n", int64(math.MaxInt64))), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: ErrHeader, + }, { + inputData: padInput(strings.Repeat("0", 300) + "1\n" + strings.Repeat("0", 1000) + "5\n" + strings.Repeat("0", 800) + "2\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{{5, 2}}, + }, { + inputData: padInput("2\n10737418240\n512\n21474836480\n512\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{{10737418240, 512}, {21474836480, 512}}, + }, { + inputData: padInput("100\n" + func() string { + var ss []string + for i := 0; i < 100; i++ { + ss = append(ss, fmt.Sprintf("%d\n%d\n", int64(i)<<30, 512)) + } + return strings.Join(ss, "") + }()), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: func() (spd sparseDatas) { + for i := 0; i < 100; i++ { + spd = append(spd, sparseEntry{int64(i) << 30, 512}) + } + return spd + }(), + }} + + for i, v := range vectors { + var hdr Header + hdr.PAXRecords = v.inputHdrs + r := strings.NewReader(v.inputData + "#") // Add canary byte + tr := Reader{curr: ®FileReader{r, int64(r.Len())}} + got, err := tr.readGNUSparsePAXHeaders(&hdr) + if !equalSparseEntries(got, v.wantMap) { + t.Errorf("test %d, readGNUSparsePAXHeaders(): got %v, want %v", i, got, v.wantMap) + } + if err != v.wantErr { + t.Errorf("test %d, readGNUSparsePAXHeaders() = %v, want %v", i, err, v.wantErr) + } + if hdr.Size != v.wantSize { + t.Errorf("test %d, Header.Size = %d, want %d", i, hdr.Size, v.wantSize) + } + if hdr.Name != v.wantName { + t.Errorf("test %d, Header.Name = %s, want %s", i, hdr.Name, v.wantName) + } + if v.wantErr == nil && r.Len() == 0 { + t.Errorf("test %d, canary byte unexpectedly consumed", i) + } + } +} + +// testNonEmptyReader wraps an io.Reader and ensures that +// Read is never called with an empty buffer. +type testNonEmptyReader struct{ io.Reader } + +func (r testNonEmptyReader) Read(b []byte) (int, error) { + if len(b) == 0 { + return 0, errors.New("unexpected empty Read call") + } + return r.Reader.Read(b) +} + +func TestFileReader(t *testing.T) { + type ( + testRead struct { // Read(cnt) == (wantStr, wantErr) + cnt int + wantStr string + wantErr error + } + testWriteTo struct { // WriteTo(testFile{ops}) == (wantCnt, wantErr) + ops fileOps + wantCnt int64 + wantErr error + } + testRemaining struct { // logicalRemaining() == wantLCnt, physicalRemaining() == wantPCnt + wantLCnt int64 + wantPCnt int64 + } + testFnc any // testRead | testWriteTo | testRemaining + ) + + type ( + makeReg struct { + str string + size int64 + } + makeSparse struct { + makeReg makeReg + spd sparseDatas + size int64 + } + fileMaker any // makeReg | makeSparse + ) + + vectors := []struct { + maker fileMaker + tests []testFnc + }{{ + maker: makeReg{"", 0}, + tests: []testFnc{ + testRemaining{0, 0}, + testRead{0, "", io.EOF}, + testRead{1, "", io.EOF}, + testWriteTo{nil, 0, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{"", 1}, + tests: []testFnc{ + testRemaining{1, 1}, + testRead{5, "", io.ErrUnexpectedEOF}, + testWriteTo{nil, 0, io.ErrUnexpectedEOF}, + testRemaining{1, 1}, + }, + }, { + maker: makeReg{"hello", 5}, + tests: []testFnc{ + testRemaining{5, 5}, + testRead{5, "hello", io.EOF}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{"hello, world", 50}, + tests: []testFnc{ + testRemaining{50, 50}, + testRead{7, "hello, ", nil}, + testRemaining{43, 43}, + testRead{5, "world", nil}, + testRemaining{38, 38}, + testWriteTo{nil, 0, io.ErrUnexpectedEOF}, + testRead{1, "", io.ErrUnexpectedEOF}, + testRemaining{38, 38}, + }, + }, { + maker: makeReg{"hello, world", 5}, + tests: []testFnc{ + testRemaining{5, 5}, + testRead{0, "", nil}, + testRead{4, "hell", nil}, + testRemaining{1, 1}, + testWriteTo{fileOps{"o"}, 1, nil}, + testRemaining{0, 0}, + testWriteTo{nil, 0, nil}, + testRead{0, "", io.EOF}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 8}, + tests: []testFnc{ + testRemaining{8, 5}, + testRead{3, "ab\x00", nil}, + testRead{10, "\x00\x00cde", io.EOF}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 8}, + tests: []testFnc{ + testRemaining{8, 5}, + testWriteTo{fileOps{"ab", int64(3), "cde"}, 8, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 10}, + tests: []testFnc{ + testRemaining{10, 5}, + testRead{100, "ab\x00\x00\x00cde\x00\x00", io.EOF}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{"abc", 5}, sparseDatas{{0, 2}, {5, 3}}, 10}, + tests: []testFnc{ + testRemaining{10, 5}, + testRead{100, "ab\x00\x00\x00c", io.ErrUnexpectedEOF}, + testRemaining{4, 2}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 8}, + tests: []testFnc{ + testRemaining{8, 5}, + testRead{8, "\x00abc\x00\x00de", io.EOF}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 0}, {6, 0}, {6, 2}}, 8}, + tests: []testFnc{ + testRemaining{8, 5}, + testRead{8, "\x00abc\x00\x00de", io.EOF}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 0}, {6, 0}, {6, 2}}, 8}, + tests: []testFnc{ + testRemaining{8, 5}, + testWriteTo{fileOps{int64(1), "abc", int64(2), "de"}, 8, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 10}, + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 10}, + tests: []testFnc{ + testWriteTo{fileOps{int64(1), "abc", int64(2), "de", int64(1), "\x00"}, 10, nil}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}, {8, 0}, {8, 0}, {8, 0}, {8, 0}}, 10}, + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF}, + }, + }, { + maker: makeSparse{makeReg{"", 0}, sparseDatas{}, 2}, + tests: []testFnc{ + testRead{100, "\x00\x00", io.EOF}, + }, + }, { + maker: makeSparse{makeReg{"", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRead{100, "\x00", io.ErrUnexpectedEOF}, + }, + }, { + maker: makeSparse{makeReg{"ab", 2}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRead{100, "\x00ab", errMissData}, + }, + }, { + maker: makeSparse{makeReg{"ab", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRead{100, "\x00ab", io.ErrUnexpectedEOF}, + }, + }, { + maker: makeSparse{makeReg{"abc", 3}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00", errMissData}, + }, + }, { + maker: makeSparse{makeReg{"abc", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00", io.ErrUnexpectedEOF}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00de", errMissData}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testWriteTo{fileOps{int64(1), "abc", int64(2), "de"}, 8, errMissData}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00de", io.ErrUnexpectedEOF}, + }, + }, { + maker: makeSparse{makeReg{"abcdefghEXTRA", 13}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRemaining{15, 13}, + testRead{100, "\x00abc\x00\x00defgh\x00\x00\x00\x00", errUnrefData}, + testWriteTo{nil, 0, errUnrefData}, + testRemaining{0, 5}, + }, + }, { + maker: makeSparse{makeReg{"abcdefghEXTRA", 13}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRemaining{15, 13}, + testWriteTo{fileOps{int64(1), "abc", int64(2), "defgh", int64(4)}, 15, errUnrefData}, + testRead{100, "", errUnrefData}, + testRemaining{0, 5}, + }, + }} + + for i, v := range vectors { + var fr fileReader + switch maker := v.maker.(type) { + case makeReg: + r := testNonEmptyReader{strings.NewReader(maker.str)} + fr = ®FileReader{r, maker.size} + case makeSparse: + if !validateSparseEntries(maker.spd, maker.size) { + t.Fatalf("invalid sparse map: %v", maker.spd) + } + sph := invertSparseEntries(maker.spd, maker.size) + r := testNonEmptyReader{strings.NewReader(maker.makeReg.str)} + fr = ®FileReader{r, maker.makeReg.size} + fr = &sparseFileReader{fr, sph, 0} + default: + t.Fatalf("test %d, unknown make operation: %T", i, maker) + } + + for j, tf := range v.tests { + switch tf := tf.(type) { + case testRead: + b := make([]byte, tf.cnt) + n, err := fr.Read(b) + if got := string(b[:n]); got != tf.wantStr || err != tf.wantErr { + t.Errorf("test %d.%d, Read(%d):\ngot (%q, %v)\nwant (%q, %v)", i, j, tf.cnt, got, err, tf.wantStr, tf.wantErr) + } + case testWriteTo: + f := &testFile{ops: tf.ops} + got, err := fr.WriteTo(f) + if _, ok := err.(testError); ok { + t.Errorf("test %d.%d, WriteTo(): %v", i, j, err) + } else if got != tf.wantCnt || err != tf.wantErr { + t.Errorf("test %d.%d, WriteTo() = (%d, %v), want (%d, %v)", i, j, got, err, tf.wantCnt, tf.wantErr) + } + if len(f.ops) > 0 { + t.Errorf("test %d.%d, expected %d more operations", i, j, len(f.ops)) + } + case testRemaining: + if got := fr.logicalRemaining(); got != tf.wantLCnt { + t.Errorf("test %d.%d, logicalRemaining() = %d, want %d", i, j, got, tf.wantLCnt) + } + if got := fr.physicalRemaining(); got != tf.wantPCnt { + t.Errorf("test %d.%d, physicalRemaining() = %d, want %d", i, j, got, tf.wantPCnt) + } + default: + t.Fatalf("test %d.%d, unknown test operation: %T", i, j, tf) + } + } + } +} diff --git a/src/archive/tar/stat_actime1.go b/src/archive/tar/stat_actime1.go new file mode 100644 index 0000000..c4c2480 --- /dev/null +++ b/src/archive/tar/stat_actime1.go @@ -0,0 +1,20 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build aix || linux || dragonfly || openbsd || solaris + +package tar + +import ( + "syscall" + "time" +) + +func statAtime(st *syscall.Stat_t) time.Time { + return time.Unix(st.Atim.Unix()) +} + +func statCtime(st *syscall.Stat_t) time.Time { + return time.Unix(st.Ctim.Unix()) +} diff --git a/src/archive/tar/stat_actime2.go b/src/archive/tar/stat_actime2.go new file mode 100644 index 0000000..f76d6be --- /dev/null +++ b/src/archive/tar/stat_actime2.go @@ -0,0 +1,20 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build darwin || freebsd || netbsd + +package tar + +import ( + "syscall" + "time" +) + +func statAtime(st *syscall.Stat_t) time.Time { + return time.Unix(st.Atimespec.Unix()) +} + +func statCtime(st *syscall.Stat_t) time.Time { + return time.Unix(st.Ctimespec.Unix()) +} diff --git a/src/archive/tar/stat_unix.go b/src/archive/tar/stat_unix.go new file mode 100644 index 0000000..717a0b3 --- /dev/null +++ b/src/archive/tar/stat_unix.go @@ -0,0 +1,101 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build aix || linux || darwin || dragonfly || freebsd || openbsd || netbsd || solaris + +package tar + +import ( + "io/fs" + "os/user" + "runtime" + "strconv" + "sync" + "syscall" +) + +func init() { + sysStat = statUnix +} + +// userMap and groupMap caches UID and GID lookups for performance reasons. +// The downside is that renaming uname or gname by the OS never takes effect. +var userMap, groupMap sync.Map // map[int]string + +func statUnix(fi fs.FileInfo, h *Header) error { + sys, ok := fi.Sys().(*syscall.Stat_t) + if !ok { + return nil + } + h.Uid = int(sys.Uid) + h.Gid = int(sys.Gid) + + // Best effort at populating Uname and Gname. + // The os/user functions may fail for any number of reasons + // (not implemented on that platform, cgo not enabled, etc). + if u, ok := userMap.Load(h.Uid); ok { + h.Uname = u.(string) + } else if u, err := user.LookupId(strconv.Itoa(h.Uid)); err == nil { + h.Uname = u.Username + userMap.Store(h.Uid, h.Uname) + } + if g, ok := groupMap.Load(h.Gid); ok { + h.Gname = g.(string) + } else if g, err := user.LookupGroupId(strconv.Itoa(h.Gid)); err == nil { + h.Gname = g.Name + groupMap.Store(h.Gid, h.Gname) + } + + h.AccessTime = statAtime(sys) + h.ChangeTime = statCtime(sys) + + // Best effort at populating Devmajor and Devminor. + if h.Typeflag == TypeChar || h.Typeflag == TypeBlock { + dev := uint64(sys.Rdev) // May be int32 or uint32 + switch runtime.GOOS { + case "aix": + var major, minor uint32 + major = uint32((dev & 0x3fffffff00000000) >> 32) + minor = uint32((dev & 0x00000000ffffffff) >> 0) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "linux": + // Copied from golang.org/x/sys/unix/dev_linux.go. + major := uint32((dev & 0x00000000000fff00) >> 8) + major |= uint32((dev & 0xfffff00000000000) >> 32) + minor := uint32((dev & 0x00000000000000ff) >> 0) + minor |= uint32((dev & 0x00000ffffff00000) >> 12) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "darwin", "ios": + // Copied from golang.org/x/sys/unix/dev_darwin.go. + major := uint32((dev >> 24) & 0xff) + minor := uint32(dev & 0xffffff) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "dragonfly": + // Copied from golang.org/x/sys/unix/dev_dragonfly.go. + major := uint32((dev >> 8) & 0xff) + minor := uint32(dev & 0xffff00ff) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "freebsd": + // Copied from golang.org/x/sys/unix/dev_freebsd.go. + major := uint32((dev >> 8) & 0xff) + minor := uint32(dev & 0xffff00ff) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "netbsd": + // Copied from golang.org/x/sys/unix/dev_netbsd.go. + major := uint32((dev & 0x000fff00) >> 8) + minor := uint32((dev & 0x000000ff) >> 0) + minor |= uint32((dev & 0xfff00000) >> 12) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "openbsd": + // Copied from golang.org/x/sys/unix/dev_openbsd.go. + major := uint32((dev & 0x0000ff00) >> 8) + minor := uint32((dev & 0x000000ff) >> 0) + minor |= uint32((dev & 0xffff0000) >> 8) + h.Devmajor, h.Devminor = int64(major), int64(minor) + default: + // TODO: Implement solaris (see https://golang.org/issue/8106) + } + } + return nil +} diff --git a/src/archive/tar/strconv.go b/src/archive/tar/strconv.go new file mode 100644 index 0000000..275db6f --- /dev/null +++ b/src/archive/tar/strconv.go @@ -0,0 +1,326 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "bytes" + "fmt" + "strconv" + "strings" + "time" +) + +// hasNUL reports whether the NUL character exists within s. +func hasNUL(s string) bool { + return strings.Contains(s, "\x00") +} + +// isASCII reports whether the input is an ASCII C-style string. +func isASCII(s string) bool { + for _, c := range s { + if c >= 0x80 || c == 0x00 { + return false + } + } + return true +} + +// toASCII converts the input to an ASCII C-style string. +// This is a best effort conversion, so invalid characters are dropped. +func toASCII(s string) string { + if isASCII(s) { + return s + } + b := make([]byte, 0, len(s)) + for _, c := range s { + if c < 0x80 && c != 0x00 { + b = append(b, byte(c)) + } + } + return string(b) +} + +type parser struct { + err error // Last error seen +} + +type formatter struct { + err error // Last error seen +} + +// parseString parses bytes as a NUL-terminated C-style string. +// If a NUL byte is not found then the whole slice is returned as a string. +func (*parser) parseString(b []byte) string { + if i := bytes.IndexByte(b, 0); i >= 0 { + return string(b[:i]) + } + return string(b) +} + +// formatString copies s into b, NUL-terminating if possible. +func (f *formatter) formatString(b []byte, s string) { + if len(s) > len(b) { + f.err = ErrFieldTooLong + } + copy(b, s) + if len(s) < len(b) { + b[len(s)] = 0 + } + + // Some buggy readers treat regular files with a trailing slash + // in the V7 path field as a directory even though the full path + // recorded elsewhere (e.g., via PAX record) contains no trailing slash. + if len(s) > len(b) && b[len(b)-1] == '/' { + n := len(strings.TrimRight(s[:len(b)], "/")) + b[n] = 0 // Replace trailing slash with NUL terminator + } +} + +// fitsInBase256 reports whether x can be encoded into n bytes using base-256 +// encoding. Unlike octal encoding, base-256 encoding does not require that the +// string ends with a NUL character. Thus, all n bytes are available for output. +// +// If operating in binary mode, this assumes strict GNU binary mode; which means +// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is +// equivalent to the sign bit in two's complement form. +func fitsInBase256(n int, x int64) bool { + binBits := uint(n-1) * 8 + return n >= 9 || (x >= -1<<binBits && x < 1<<binBits) +} + +// parseNumeric parses the input as being encoded in either base-256 or octal. +// This function may return negative numbers. +// If parsing fails or an integer overflow occurs, err will be set. +func (p *parser) parseNumeric(b []byte) int64 { + // Check for base-256 (binary) format first. + // If the first bit is set, then all following bits constitute a two's + // complement encoded number in big-endian byte order. + if len(b) > 0 && b[0]&0x80 != 0 { + // Handling negative numbers relies on the following identity: + // -a-1 == ^a + // + // If the number is negative, we use an inversion mask to invert the + // data bytes and treat the value as an unsigned number. + var inv byte // 0x00 if positive or zero, 0xff if negative + if b[0]&0x40 != 0 { + inv = 0xff + } + + var x uint64 + for i, c := range b { + c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing + if i == 0 { + c &= 0x7f // Ignore signal bit in first byte + } + if (x >> 56) > 0 { + p.err = ErrHeader // Integer overflow + return 0 + } + x = x<<8 | uint64(c) + } + if (x >> 63) > 0 { + p.err = ErrHeader // Integer overflow + return 0 + } + if inv == 0xff { + return ^int64(x) + } + return int64(x) + } + + // Normal case is base-8 (octal) format. + return p.parseOctal(b) +} + +// formatNumeric encodes x into b using base-8 (octal) encoding if possible. +// Otherwise it will attempt to use base-256 (binary) encoding. +func (f *formatter) formatNumeric(b []byte, x int64) { + if fitsInOctal(len(b), x) { + f.formatOctal(b, x) + return + } + + if fitsInBase256(len(b), x) { + for i := len(b) - 1; i >= 0; i-- { + b[i] = byte(x) + x >>= 8 + } + b[0] |= 0x80 // Highest bit indicates binary format + return + } + + f.formatOctal(b, 0) // Last resort, just write zero + f.err = ErrFieldTooLong +} + +func (p *parser) parseOctal(b []byte) int64 { + // Because unused fields are filled with NULs, we need + // to skip leading NULs. Fields may also be padded with + // spaces or NULs. + // So we remove leading and trailing NULs and spaces to + // be sure. + b = bytes.Trim(b, " \x00") + + if len(b) == 0 { + return 0 + } + x, perr := strconv.ParseUint(p.parseString(b), 8, 64) + if perr != nil { + p.err = ErrHeader + } + return int64(x) +} + +func (f *formatter) formatOctal(b []byte, x int64) { + if !fitsInOctal(len(b), x) { + x = 0 // Last resort, just write zero + f.err = ErrFieldTooLong + } + + s := strconv.FormatInt(x, 8) + // Add leading zeros, but leave room for a NUL. + if n := len(b) - len(s) - 1; n > 0 { + s = strings.Repeat("0", n) + s + } + f.formatString(b, s) +} + +// fitsInOctal reports whether the integer x fits in a field n-bytes long +// using octal encoding with the appropriate NUL terminator. +func fitsInOctal(n int, x int64) bool { + octBits := uint(n-1) * 3 + return x >= 0 && (n >= 22 || x < 1<<octBits) +} + +// parsePAXTime takes a string of the form %d.%d as described in the PAX +// specification. Note that this implementation allows for negative timestamps, +// which is allowed for by the PAX specification, but not always portable. +func parsePAXTime(s string) (time.Time, error) { + const maxNanoSecondDigits = 9 + + // Split string into seconds and sub-seconds parts. + ss, sn, _ := strings.Cut(s, ".") + + // Parse the seconds. + secs, err := strconv.ParseInt(ss, 10, 64) + if err != nil { + return time.Time{}, ErrHeader + } + if len(sn) == 0 { + return time.Unix(secs, 0), nil // No sub-second values + } + + // Parse the nanoseconds. + if strings.Trim(sn, "0123456789") != "" { + return time.Time{}, ErrHeader + } + if len(sn) < maxNanoSecondDigits { + sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad + } else { + sn = sn[:maxNanoSecondDigits] // Right truncate + } + nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed + if len(ss) > 0 && ss[0] == '-' { + return time.Unix(secs, -1*nsecs), nil // Negative correction + } + return time.Unix(secs, nsecs), nil +} + +// formatPAXTime converts ts into a time of the form %d.%d as described in the +// PAX specification. This function is capable of negative timestamps. +func formatPAXTime(ts time.Time) (s string) { + secs, nsecs := ts.Unix(), ts.Nanosecond() + if nsecs == 0 { + return strconv.FormatInt(secs, 10) + } + + // If seconds is negative, then perform correction. + sign := "" + if secs < 0 { + sign = "-" // Remember sign + secs = -(secs + 1) // Add a second to secs + nsecs = -(nsecs - 1e9) // Take that second away from nsecs + } + return strings.TrimRight(fmt.Sprintf("%s%d.%09d", sign, secs, nsecs), "0") +} + +// parsePAXRecord parses the input PAX record string into a key-value pair. +// If parsing is successful, it will slice off the currently read record and +// return the remainder as r. +func parsePAXRecord(s string) (k, v, r string, err error) { + // The size field ends at the first space. + nStr, rest, ok := strings.Cut(s, " ") + if !ok { + return "", "", s, ErrHeader + } + + // Parse the first token as a decimal integer. + n, perr := strconv.ParseInt(nStr, 10, 0) // Intentionally parse as native int + if perr != nil || n < 5 || n > int64(len(s)) { + return "", "", s, ErrHeader + } + n -= int64(len(nStr) + 1) // convert from index in s to index in rest + if n <= 0 { + return "", "", s, ErrHeader + } + + // Extract everything between the space and the final newline. + rec, nl, rem := rest[:n-1], rest[n-1:n], rest[n:] + if nl != "\n" { + return "", "", s, ErrHeader + } + + // The first equals separates the key from the value. + k, v, ok = strings.Cut(rec, "=") + if !ok { + return "", "", s, ErrHeader + } + + if !validPAXRecord(k, v) { + return "", "", s, ErrHeader + } + return k, v, rem, nil +} + +// formatPAXRecord formats a single PAX record, prefixing it with the +// appropriate length. +func formatPAXRecord(k, v string) (string, error) { + if !validPAXRecord(k, v) { + return "", ErrHeader + } + + const padding = 3 // Extra padding for ' ', '=', and '\n' + size := len(k) + len(v) + padding + size += len(strconv.Itoa(size)) + record := strconv.Itoa(size) + " " + k + "=" + v + "\n" + + // Final adjustment if adding size field increased the record size. + if len(record) != size { + size = len(record) + record = strconv.Itoa(size) + " " + k + "=" + v + "\n" + } + return record, nil +} + +// validPAXRecord reports whether the key-value pair is valid where each +// record is formatted as: +// "%d %s=%s\n" % (size, key, value) +// +// Keys and values should be UTF-8, but the number of bad writers out there +// forces us to be a more liberal. +// Thus, we only reject all keys with NUL, and only reject NULs in values +// for the PAX version of the USTAR string fields. +// The key must not contain an '=' character. +func validPAXRecord(k, v string) bool { + if k == "" || strings.Contains(k, "=") { + return false + } + switch k { + case paxPath, paxLinkpath, paxUname, paxGname: + return !hasNUL(v) + default: + return !hasNUL(k) + } +} diff --git a/src/archive/tar/strconv_test.go b/src/archive/tar/strconv_test.go new file mode 100644 index 0000000..add65e2 --- /dev/null +++ b/src/archive/tar/strconv_test.go @@ -0,0 +1,441 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "math" + "strings" + "testing" + "time" +) + +func TestFitsInBase256(t *testing.T) { + vectors := []struct { + in int64 + width int + ok bool + }{ + {+1, 8, true}, + {0, 8, true}, + {-1, 8, true}, + {1 << 56, 8, false}, + {(1 << 56) - 1, 8, true}, + {-1 << 56, 8, true}, + {(-1 << 56) - 1, 8, false}, + {121654, 8, true}, + {-9849849, 8, true}, + {math.MaxInt64, 9, true}, + {0, 9, true}, + {math.MinInt64, 9, true}, + {math.MaxInt64, 12, true}, + {0, 12, true}, + {math.MinInt64, 12, true}, + } + + for _, v := range vectors { + ok := fitsInBase256(v.width, v.in) + if ok != v.ok { + t.Errorf("fitsInBase256(%d, %d): got %v, want %v", v.in, v.width, ok, v.ok) + } + } +} + +func TestParseNumeric(t *testing.T) { + vectors := []struct { + in string + want int64 + ok bool + }{ + // Test base-256 (binary) encoded values. + {"", 0, true}, + {"\x80", 0, true}, + {"\x80\x00", 0, true}, + {"\x80\x00\x00", 0, true}, + {"\xbf", (1 << 6) - 1, true}, + {"\xbf\xff", (1 << 14) - 1, true}, + {"\xbf\xff\xff", (1 << 22) - 1, true}, + {"\xff", -1, true}, + {"\xff\xff", -1, true}, + {"\xff\xff\xff", -1, true}, + {"\xc0", -1 * (1 << 6), true}, + {"\xc0\x00", -1 * (1 << 14), true}, + {"\xc0\x00\x00", -1 * (1 << 22), true}, + {"\x87\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true}, + {"\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true}, + {"\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true}, + {"\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true}, + {"\x80\x7f\xff\xff\xff\xff\xff\xff\xff", math.MaxInt64, true}, + {"\x80\x80\x00\x00\x00\x00\x00\x00\x00", 0, false}, + {"\xff\x80\x00\x00\x00\x00\x00\x00\x00", math.MinInt64, true}, + {"\xff\x7f\xff\xff\xff\xff\xff\xff\xff", 0, false}, + {"\xf5\xec\xd1\xc7\x7e\x5f\x26\x48\x81\x9f\x8f\x9b", 0, false}, + + // Test base-8 (octal) encoded values. + {"0000000\x00", 0, true}, + {" \x0000000\x00", 0, true}, + {" \x0000003\x00", 3, true}, + {"00000000227\x00", 0227, true}, + {"032033\x00 ", 032033, true}, + {"320330\x00 ", 0320330, true}, + {"0000660\x00 ", 0660, true}, + {"\x00 0000660\x00 ", 0660, true}, + {"0123456789abcdef", 0, false}, + {"0123456789\x00abcdef", 0, false}, + {"01234567\x0089abcdef", 342391, true}, + {"0123\x7e\x5f\x264123", 0, false}, + } + + for _, v := range vectors { + var p parser + got := p.parseNumeric([]byte(v.in)) + ok := (p.err == nil) + if ok != v.ok { + if v.ok { + t.Errorf("parseNumeric(%q): got parsing failure, want success", v.in) + } else { + t.Errorf("parseNumeric(%q): got parsing success, want failure", v.in) + } + } + if ok && got != v.want { + t.Errorf("parseNumeric(%q): got %d, want %d", v.in, got, v.want) + } + } +} + +func TestFormatNumeric(t *testing.T) { + vectors := []struct { + in int64 + want string + ok bool + }{ + // Test base-8 (octal) encoded values. + {0, "0\x00", true}, + {7, "7\x00", true}, + {8, "\x80\x08", true}, + {077, "77\x00", true}, + {0100, "\x80\x00\x40", true}, + {0, "0000000\x00", true}, + {0123, "0000123\x00", true}, + {07654321, "7654321\x00", true}, + {07777777, "7777777\x00", true}, + {010000000, "\x80\x00\x00\x00\x00\x20\x00\x00", true}, + {0, "00000000000\x00", true}, + {000001234567, "00001234567\x00", true}, + {076543210321, "76543210321\x00", true}, + {012345670123, "12345670123\x00", true}, + {077777777777, "77777777777\x00", true}, + {0100000000000, "\x80\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00", true}, + {math.MaxInt64, "777777777777777777777\x00", true}, + + // Test base-256 (binary) encoded values. + {-1, "\xff", true}, + {-1, "\xff\xff", true}, + {-1, "\xff\xff\xff", true}, + {(1 << 0), "0", false}, + {(1 << 8) - 1, "\x80\xff", true}, + {(1 << 8), "0\x00", false}, + {(1 << 16) - 1, "\x80\xff\xff", true}, + {(1 << 16), "00\x00", false}, + {-1 * (1 << 0), "\xff", true}, + {-1*(1<<0) - 1, "0", false}, + {-1 * (1 << 8), "\xff\x00", true}, + {-1*(1<<8) - 1, "0\x00", false}, + {-1 * (1 << 16), "\xff\x00\x00", true}, + {-1*(1<<16) - 1, "00\x00", false}, + {537795476381659745, "0000000\x00", false}, + {537795476381659745, "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", true}, + {-615126028225187231, "0000000\x00", false}, + {-615126028225187231, "\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", true}, + {math.MaxInt64, "0000000\x00", false}, + {math.MaxInt64, "\x80\x00\x00\x00\x7f\xff\xff\xff\xff\xff\xff\xff", true}, + {math.MinInt64, "0000000\x00", false}, + {math.MinInt64, "\xff\xff\xff\xff\x80\x00\x00\x00\x00\x00\x00\x00", true}, + {math.MaxInt64, "\x80\x7f\xff\xff\xff\xff\xff\xff\xff", true}, + {math.MinInt64, "\xff\x80\x00\x00\x00\x00\x00\x00\x00", true}, + } + + for _, v := range vectors { + var f formatter + got := make([]byte, len(v.want)) + f.formatNumeric(got, v.in) + ok := (f.err == nil) + if ok != v.ok { + if v.ok { + t.Errorf("formatNumeric(%d): got formatting failure, want success", v.in) + } else { + t.Errorf("formatNumeric(%d): got formatting success, want failure", v.in) + } + } + if string(got) != v.want { + t.Errorf("formatNumeric(%d): got %q, want %q", v.in, got, v.want) + } + } +} + +func TestFitsInOctal(t *testing.T) { + vectors := []struct { + input int64 + width int + ok bool + }{ + {-1, 1, false}, + {-1, 2, false}, + {-1, 3, false}, + {0, 1, true}, + {0 + 1, 1, false}, + {0, 2, true}, + {07, 2, true}, + {07 + 1, 2, false}, + {0, 4, true}, + {0777, 4, true}, + {0777 + 1, 4, false}, + {0, 8, true}, + {07777777, 8, true}, + {07777777 + 1, 8, false}, + {0, 12, true}, + {077777777777, 12, true}, + {077777777777 + 1, 12, false}, + {math.MaxInt64, 22, true}, + {012345670123, 12, true}, + {01564164, 12, true}, + {-012345670123, 12, false}, + {-01564164, 12, false}, + {-1564164, 30, false}, + } + + for _, v := range vectors { + ok := fitsInOctal(v.width, v.input) + if ok != v.ok { + t.Errorf("checkOctal(%d, %d): got %v, want %v", v.input, v.width, ok, v.ok) + } + } +} + +func TestParsePAXTime(t *testing.T) { + vectors := []struct { + in string + want time.Time + ok bool + }{ + {"1350244992.023960108", time.Unix(1350244992, 23960108), true}, + {"1350244992.02396010", time.Unix(1350244992, 23960100), true}, + {"1350244992.0239601089", time.Unix(1350244992, 23960108), true}, + {"1350244992.3", time.Unix(1350244992, 300000000), true}, + {"1350244992", time.Unix(1350244992, 0), true}, + {"-1.000000001", time.Unix(-1, -1e0+0e0), true}, + {"-1.000001", time.Unix(-1, -1e3+0e0), true}, + {"-1.001000", time.Unix(-1, -1e6+0e0), true}, + {"-1", time.Unix(-1, -0e0+0e0), true}, + {"-1.999000", time.Unix(-1, -1e9+1e6), true}, + {"-1.999999", time.Unix(-1, -1e9+1e3), true}, + {"-1.999999999", time.Unix(-1, -1e9+1e0), true}, + {"0.000000001", time.Unix(0, 1e0+0e0), true}, + {"0.000001", time.Unix(0, 1e3+0e0), true}, + {"0.001000", time.Unix(0, 1e6+0e0), true}, + {"0", time.Unix(0, 0e0), true}, + {"0.999000", time.Unix(0, 1e9-1e6), true}, + {"0.999999", time.Unix(0, 1e9-1e3), true}, + {"0.999999999", time.Unix(0, 1e9-1e0), true}, + {"1.000000001", time.Unix(+1, +1e0-0e0), true}, + {"1.000001", time.Unix(+1, +1e3-0e0), true}, + {"1.001000", time.Unix(+1, +1e6-0e0), true}, + {"1", time.Unix(+1, +0e0-0e0), true}, + {"1.999000", time.Unix(+1, +1e9-1e6), true}, + {"1.999999", time.Unix(+1, +1e9-1e3), true}, + {"1.999999999", time.Unix(+1, +1e9-1e0), true}, + {"-1350244992.023960108", time.Unix(-1350244992, -23960108), true}, + {"-1350244992.02396010", time.Unix(-1350244992, -23960100), true}, + {"-1350244992.0239601089", time.Unix(-1350244992, -23960108), true}, + {"-1350244992.3", time.Unix(-1350244992, -300000000), true}, + {"-1350244992", time.Unix(-1350244992, 0), true}, + {"", time.Time{}, false}, + {"0", time.Unix(0, 0), true}, + {"1.", time.Unix(1, 0), true}, + {"0.0", time.Unix(0, 0), true}, + {".5", time.Time{}, false}, + {"-1.3", time.Unix(-1, -3e8), true}, + {"-1.0", time.Unix(-1, -0e0), true}, + {"-0.0", time.Unix(-0, -0e0), true}, + {"-0.1", time.Unix(-0, -1e8), true}, + {"-0.01", time.Unix(-0, -1e7), true}, + {"-0.99", time.Unix(-0, -99e7), true}, + {"-0.98", time.Unix(-0, -98e7), true}, + {"-1.1", time.Unix(-1, -1e8), true}, + {"-1.01", time.Unix(-1, -1e7), true}, + {"-2.99", time.Unix(-2, -99e7), true}, + {"-5.98", time.Unix(-5, -98e7), true}, + {"-", time.Time{}, false}, + {"+", time.Time{}, false}, + {"-1.-1", time.Time{}, false}, + {"99999999999999999999999999999999999999999999999", time.Time{}, false}, + {"0.123456789abcdef", time.Time{}, false}, + {"foo", time.Time{}, false}, + {"\x00", time.Time{}, false}, + {"𝟵𝟴𝟳𝟲𝟱.𝟰𝟯𝟮𝟭𝟬", time.Time{}, false}, // Unicode numbers (U+1D7EC to U+1D7F5) + {"98765﹒43210", time.Time{}, false}, // Unicode period (U+FE52) + } + + for _, v := range vectors { + ts, err := parsePAXTime(v.in) + ok := (err == nil) + if v.ok != ok { + if v.ok { + t.Errorf("parsePAXTime(%q): got parsing failure, want success", v.in) + } else { + t.Errorf("parsePAXTime(%q): got parsing success, want failure", v.in) + } + } + if ok && !ts.Equal(v.want) { + t.Errorf("parsePAXTime(%q): got (%ds %dns), want (%ds %dns)", + v.in, ts.Unix(), ts.Nanosecond(), v.want.Unix(), v.want.Nanosecond()) + } + } +} + +func TestFormatPAXTime(t *testing.T) { + vectors := []struct { + sec, nsec int64 + want string + }{ + {1350244992, 0, "1350244992"}, + {1350244992, 300000000, "1350244992.3"}, + {1350244992, 23960100, "1350244992.0239601"}, + {1350244992, 23960108, "1350244992.023960108"}, + {+1, +1e9 - 1e0, "1.999999999"}, + {+1, +1e9 - 1e3, "1.999999"}, + {+1, +1e9 - 1e6, "1.999"}, + {+1, +0e0 - 0e0, "1"}, + {+1, +1e6 - 0e0, "1.001"}, + {+1, +1e3 - 0e0, "1.000001"}, + {+1, +1e0 - 0e0, "1.000000001"}, + {0, 1e9 - 1e0, "0.999999999"}, + {0, 1e9 - 1e3, "0.999999"}, + {0, 1e9 - 1e6, "0.999"}, + {0, 0e0, "0"}, + {0, 1e6 + 0e0, "0.001"}, + {0, 1e3 + 0e0, "0.000001"}, + {0, 1e0 + 0e0, "0.000000001"}, + {-1, -1e9 + 1e0, "-1.999999999"}, + {-1, -1e9 + 1e3, "-1.999999"}, + {-1, -1e9 + 1e6, "-1.999"}, + {-1, -0e0 + 0e0, "-1"}, + {-1, -1e6 + 0e0, "-1.001"}, + {-1, -1e3 + 0e0, "-1.000001"}, + {-1, -1e0 + 0e0, "-1.000000001"}, + {-1350244992, 0, "-1350244992"}, + {-1350244992, -300000000, "-1350244992.3"}, + {-1350244992, -23960100, "-1350244992.0239601"}, + {-1350244992, -23960108, "-1350244992.023960108"}, + } + + for _, v := range vectors { + got := formatPAXTime(time.Unix(v.sec, v.nsec)) + if got != v.want { + t.Errorf("formatPAXTime(%ds, %dns): got %q, want %q", + v.sec, v.nsec, got, v.want) + } + } +} + +func TestParsePAXRecord(t *testing.T) { + medName := strings.Repeat("CD", 50) + longName := strings.Repeat("AB", 100) + + vectors := []struct { + in string + wantRes string + wantKey string + wantVal string + ok bool + }{ + {"6 k=v\n\n", "\n", "k", "v", true}, + {"19 path=/etc/hosts\n", "", "path", "/etc/hosts", true}, + {"210 path=" + longName + "\nabc", "abc", "path", longName, true}, + {"110 path=" + medName + "\n", "", "path", medName, true}, + {"9 foo=ba\n", "", "foo", "ba", true}, + {"11 foo=bar\n\x00", "\x00", "foo", "bar", true}, + {"18 foo=b=\nar=\n==\x00\n", "", "foo", "b=\nar=\n==\x00", true}, + {"27 foo=hello9 foo=ba\nworld\n", "", "foo", "hello9 foo=ba\nworld", true}, + {"27 ☺☻☹=日a本b語ç\nmeow mix", "meow mix", "☺☻☹", "日a本b語ç", true}, + {"17 \x00hello=\x00world\n", "17 \x00hello=\x00world\n", "", "", false}, + {"1 k=1\n", "1 k=1\n", "", "", false}, + {"6 k~1\n", "6 k~1\n", "", "", false}, + {"6_k=1\n", "6_k=1\n", "", "", false}, + {"6 k=1 ", "6 k=1 ", "", "", false}, + {"632 k=1\n", "632 k=1\n", "", "", false}, + {"16 longkeyname=hahaha\n", "16 longkeyname=hahaha\n", "", "", false}, + {"3 somelongkey=\n", "3 somelongkey=\n", "", "", false}, + {"50 tooshort=\n", "50 tooshort=\n", "", "", false}, + {"0000000000000000000000000000000030 mtime=1432668921.098285006\n30 ctime=2147483649.15163319", "0000000000000000000000000000000030 mtime=1432668921.098285006\n30 ctime=2147483649.15163319", "mtime", "1432668921.098285006", false}, + {"06 k=v\n", "06 k=v\n", "", "", false}, + {"00006 k=v\n", "00006 k=v\n", "", "", false}, + {"000006 k=v\n", "000006 k=v\n", "", "", false}, + {"000000 k=v\n", "000000 k=v\n", "", "", false}, + {"0 k=v\n", "0 k=v\n", "", "", false}, + {"+0000005 x=\n", "+0000005 x=\n", "", "", false}, + } + + for _, v := range vectors { + key, val, res, err := parsePAXRecord(v.in) + ok := (err == nil) + if ok != v.ok { + if v.ok { + t.Errorf("parsePAXRecord(%q): got parsing failure, want success", v.in) + } else { + t.Errorf("parsePAXRecord(%q): got parsing success, want failure", v.in) + } + } + if v.ok && (key != v.wantKey || val != v.wantVal) { + t.Errorf("parsePAXRecord(%q): got (%q: %q), want (%q: %q)", + v.in, key, val, v.wantKey, v.wantVal) + } + if res != v.wantRes { + t.Errorf("parsePAXRecord(%q): got residual %q, want residual %q", + v.in, res, v.wantRes) + } + } +} + +func TestFormatPAXRecord(t *testing.T) { + medName := strings.Repeat("CD", 50) + longName := strings.Repeat("AB", 100) + + vectors := []struct { + inKey string + inVal string + want string + ok bool + }{ + {"k", "v", "6 k=v\n", true}, + {"path", "/etc/hosts", "19 path=/etc/hosts\n", true}, + {"path", longName, "210 path=" + longName + "\n", true}, + {"path", medName, "110 path=" + medName + "\n", true}, + {"foo", "ba", "9 foo=ba\n", true}, + {"foo", "bar", "11 foo=bar\n", true}, + {"foo", "b=\nar=\n==\x00", "18 foo=b=\nar=\n==\x00\n", true}, + {"foo", "hello9 foo=ba\nworld", "27 foo=hello9 foo=ba\nworld\n", true}, + {"☺☻☹", "日a本b語ç", "27 ☺☻☹=日a本b語ç\n", true}, + {"xhello", "\x00world", "17 xhello=\x00world\n", true}, + {"path", "null\x00", "", false}, + {"null\x00", "value", "", false}, + {paxSchilyXattr + "key", "null\x00", "26 SCHILY.xattr.key=null\x00\n", true}, + } + + for _, v := range vectors { + got, err := formatPAXRecord(v.inKey, v.inVal) + ok := (err == nil) + if ok != v.ok { + if v.ok { + t.Errorf("formatPAXRecord(%q, %q): got format failure, want success", v.inKey, v.inVal) + } else { + t.Errorf("formatPAXRecord(%q, %q): got format success, want failure", v.inKey, v.inVal) + } + } + if got != v.want { + t.Errorf("formatPAXRecord(%q, %q): got %q, want %q", + v.inKey, v.inVal, got, v.want) + } + } +} diff --git a/src/archive/tar/tar_test.go b/src/archive/tar/tar_test.go new file mode 100644 index 0000000..a476f5e --- /dev/null +++ b/src/archive/tar/tar_test.go @@ -0,0 +1,850 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "bytes" + "errors" + "fmt" + "internal/testenv" + "io" + "io/fs" + "math" + "os" + "path" + "path/filepath" + "reflect" + "strings" + "testing" + "time" +) + +type testError struct{ error } + +type fileOps []any // []T where T is (string | int64) + +// testFile is an io.ReadWriteSeeker where the IO operations performed +// on it must match the list of operations in ops. +type testFile struct { + ops fileOps + pos int64 +} + +func (f *testFile) Read(b []byte) (int, error) { + if len(b) == 0 { + return 0, nil + } + if len(f.ops) == 0 { + return 0, io.EOF + } + s, ok := f.ops[0].(string) + if !ok { + return 0, errors.New("unexpected Read operation") + } + + n := copy(b, s) + if len(s) > n { + f.ops[0] = s[n:] + } else { + f.ops = f.ops[1:] + } + f.pos += int64(len(b)) + return n, nil +} + +func (f *testFile) Write(b []byte) (int, error) { + if len(b) == 0 { + return 0, nil + } + if len(f.ops) == 0 { + return 0, errors.New("unexpected Write operation") + } + s, ok := f.ops[0].(string) + if !ok { + return 0, errors.New("unexpected Write operation") + } + + if !strings.HasPrefix(s, string(b)) { + return 0, testError{fmt.Errorf("got Write(%q), want Write(%q)", b, s)} + } + if len(s) > len(b) { + f.ops[0] = s[len(b):] + } else { + f.ops = f.ops[1:] + } + f.pos += int64(len(b)) + return len(b), nil +} + +func (f *testFile) Seek(pos int64, whence int) (int64, error) { + if pos == 0 && whence == io.SeekCurrent { + return f.pos, nil + } + if len(f.ops) == 0 { + return 0, errors.New("unexpected Seek operation") + } + s, ok := f.ops[0].(int64) + if !ok { + return 0, errors.New("unexpected Seek operation") + } + + if s != pos || whence != io.SeekCurrent { + return 0, testError{fmt.Errorf("got Seek(%d, %d), want Seek(%d, %d)", pos, whence, s, io.SeekCurrent)} + } + f.pos += s + f.ops = f.ops[1:] + return f.pos, nil +} + +func equalSparseEntries(x, y []sparseEntry) bool { + return (len(x) == 0 && len(y) == 0) || reflect.DeepEqual(x, y) +} + +func TestSparseEntries(t *testing.T) { + vectors := []struct { + in []sparseEntry + size int64 + + wantValid bool // Result of validateSparseEntries + wantAligned []sparseEntry // Result of alignSparseEntries + wantInverted []sparseEntry // Result of invertSparseEntries + }{{ + in: []sparseEntry{}, size: 0, + wantValid: true, + wantInverted: []sparseEntry{{0, 0}}, + }, { + in: []sparseEntry{}, size: 5000, + wantValid: true, + wantInverted: []sparseEntry{{0, 5000}}, + }, { + in: []sparseEntry{{0, 5000}}, size: 5000, + wantValid: true, + wantAligned: []sparseEntry{{0, 5000}}, + wantInverted: []sparseEntry{{5000, 0}}, + }, { + in: []sparseEntry{{1000, 4000}}, size: 5000, + wantValid: true, + wantAligned: []sparseEntry{{1024, 3976}}, + wantInverted: []sparseEntry{{0, 1000}, {5000, 0}}, + }, { + in: []sparseEntry{{0, 3000}}, size: 5000, + wantValid: true, + wantAligned: []sparseEntry{{0, 2560}}, + wantInverted: []sparseEntry{{3000, 2000}}, + }, { + in: []sparseEntry{{3000, 2000}}, size: 5000, + wantValid: true, + wantAligned: []sparseEntry{{3072, 1928}}, + wantInverted: []sparseEntry{{0, 3000}, {5000, 0}}, + }, { + in: []sparseEntry{{2000, 2000}}, size: 5000, + wantValid: true, + wantAligned: []sparseEntry{{2048, 1536}}, + wantInverted: []sparseEntry{{0, 2000}, {4000, 1000}}, + }, { + in: []sparseEntry{{0, 2000}, {8000, 2000}}, size: 10000, + wantValid: true, + wantAligned: []sparseEntry{{0, 1536}, {8192, 1808}}, + wantInverted: []sparseEntry{{2000, 6000}, {10000, 0}}, + }, { + in: []sparseEntry{{0, 2000}, {2000, 2000}, {4000, 0}, {4000, 3000}, {7000, 1000}, {8000, 0}, {8000, 2000}}, size: 10000, + wantValid: true, + wantAligned: []sparseEntry{{0, 1536}, {2048, 1536}, {4096, 2560}, {7168, 512}, {8192, 1808}}, + wantInverted: []sparseEntry{{10000, 0}}, + }, { + in: []sparseEntry{{0, 0}, {1000, 0}, {2000, 0}, {3000, 0}, {4000, 0}, {5000, 0}}, size: 5000, + wantValid: true, + wantInverted: []sparseEntry{{0, 5000}}, + }, { + in: []sparseEntry{{1, 0}}, size: 0, + wantValid: false, + }, { + in: []sparseEntry{{-1, 0}}, size: 100, + wantValid: false, + }, { + in: []sparseEntry{{0, -1}}, size: 100, + wantValid: false, + }, { + in: []sparseEntry{{0, 0}}, size: -100, + wantValid: false, + }, { + in: []sparseEntry{{math.MaxInt64, 3}, {6, -5}}, size: 35, + wantValid: false, + }, { + in: []sparseEntry{{1, 3}, {6, -5}}, size: 35, + wantValid: false, + }, { + in: []sparseEntry{{math.MaxInt64, math.MaxInt64}}, size: math.MaxInt64, + wantValid: false, + }, { + in: []sparseEntry{{3, 3}}, size: 5, + wantValid: false, + }, { + in: []sparseEntry{{2, 0}, {1, 0}, {0, 0}}, size: 3, + wantValid: false, + }, { + in: []sparseEntry{{1, 3}, {2, 2}}, size: 10, + wantValid: false, + }} + + for i, v := range vectors { + gotValid := validateSparseEntries(v.in, v.size) + if gotValid != v.wantValid { + t.Errorf("test %d, validateSparseEntries() = %v, want %v", i, gotValid, v.wantValid) + } + if !v.wantValid { + continue + } + gotAligned := alignSparseEntries(append([]sparseEntry{}, v.in...), v.size) + if !equalSparseEntries(gotAligned, v.wantAligned) { + t.Errorf("test %d, alignSparseEntries():\ngot %v\nwant %v", i, gotAligned, v.wantAligned) + } + gotInverted := invertSparseEntries(append([]sparseEntry{}, v.in...), v.size) + if !equalSparseEntries(gotInverted, v.wantInverted) { + t.Errorf("test %d, inverseSparseEntries():\ngot %v\nwant %v", i, gotInverted, v.wantInverted) + } + } +} + +func TestFileInfoHeader(t *testing.T) { + fi, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + h, err := FileInfoHeader(fi, "") + if err != nil { + t.Fatalf("FileInfoHeader: %v", err) + } + if g, e := h.Name, "small.txt"; g != e { + t.Errorf("Name = %q; want %q", g, e) + } + if g, e := h.Mode, int64(fi.Mode().Perm()); g != e { + t.Errorf("Mode = %#o; want %#o", g, e) + } + if g, e := h.Size, int64(5); g != e { + t.Errorf("Size = %v; want %v", g, e) + } + if g, e := h.ModTime, fi.ModTime(); !g.Equal(e) { + t.Errorf("ModTime = %v; want %v", g, e) + } + // FileInfoHeader should error when passing nil FileInfo + if _, err := FileInfoHeader(nil, ""); err == nil { + t.Fatalf("Expected error when passing nil to FileInfoHeader") + } +} + +func TestFileInfoHeaderDir(t *testing.T) { + fi, err := os.Stat("testdata") + if err != nil { + t.Fatal(err) + } + h, err := FileInfoHeader(fi, "") + if err != nil { + t.Fatalf("FileInfoHeader: %v", err) + } + if g, e := h.Name, "testdata/"; g != e { + t.Errorf("Name = %q; want %q", g, e) + } + // Ignoring c_ISGID for golang.org/issue/4867 + if g, e := h.Mode&^c_ISGID, int64(fi.Mode().Perm()); g != e { + t.Errorf("Mode = %#o; want %#o", g, e) + } + if g, e := h.Size, int64(0); g != e { + t.Errorf("Size = %v; want %v", g, e) + } + if g, e := h.ModTime, fi.ModTime(); !g.Equal(e) { + t.Errorf("ModTime = %v; want %v", g, e) + } +} + +func TestFileInfoHeaderSymlink(t *testing.T) { + testenv.MustHaveSymlink(t) + + tmpdir := t.TempDir() + + link := filepath.Join(tmpdir, "link") + target := tmpdir + if err := os.Symlink(target, link); err != nil { + t.Fatal(err) + } + fi, err := os.Lstat(link) + if err != nil { + t.Fatal(err) + } + + h, err := FileInfoHeader(fi, target) + if err != nil { + t.Fatal(err) + } + if g, e := h.Name, fi.Name(); g != e { + t.Errorf("Name = %q; want %q", g, e) + } + if g, e := h.Linkname, target; g != e { + t.Errorf("Linkname = %q; want %q", g, e) + } + if g, e := h.Typeflag, byte(TypeSymlink); g != e { + t.Errorf("Typeflag = %v; want %v", g, e) + } +} + +func TestRoundTrip(t *testing.T) { + data := []byte("some file contents") + + var b bytes.Buffer + tw := NewWriter(&b) + hdr := &Header{ + Name: "file.txt", + Uid: 1 << 21, // Too big for 8 octal digits + Size: int64(len(data)), + ModTime: time.Now().Round(time.Second), + PAXRecords: map[string]string{"uid": "2097152"}, + Format: FormatPAX, + Typeflag: TypeReg, + } + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("tw.WriteHeader: %v", err) + } + if _, err := tw.Write(data); err != nil { + t.Fatalf("tw.Write: %v", err) + } + if err := tw.Close(); err != nil { + t.Fatalf("tw.Close: %v", err) + } + + // Read it back. + tr := NewReader(&b) + rHdr, err := tr.Next() + if err != nil { + t.Fatalf("tr.Next: %v", err) + } + if !reflect.DeepEqual(rHdr, hdr) { + t.Errorf("Header mismatch.\n got %+v\nwant %+v", rHdr, hdr) + } + rData, err := io.ReadAll(tr) + if err != nil { + t.Fatalf("Read: %v", err) + } + if !bytes.Equal(rData, data) { + t.Errorf("Data mismatch.\n got %q\nwant %q", rData, data) + } +} + +type headerRoundTripTest struct { + h *Header + fm fs.FileMode +} + +func TestHeaderRoundTrip(t *testing.T) { + vectors := []headerRoundTripTest{{ + // regular file. + h: &Header{ + Name: "test.txt", + Mode: 0644, + Size: 12, + ModTime: time.Unix(1360600916, 0), + Typeflag: TypeReg, + }, + fm: 0644, + }, { + // symbolic link. + h: &Header{ + Name: "link.txt", + Mode: 0777, + Size: 0, + ModTime: time.Unix(1360600852, 0), + Typeflag: TypeSymlink, + }, + fm: 0777 | fs.ModeSymlink, + }, { + // character device node. + h: &Header{ + Name: "dev/null", + Mode: 0666, + Size: 0, + ModTime: time.Unix(1360578951, 0), + Typeflag: TypeChar, + }, + fm: 0666 | fs.ModeDevice | fs.ModeCharDevice, + }, { + // block device node. + h: &Header{ + Name: "dev/sda", + Mode: 0660, + Size: 0, + ModTime: time.Unix(1360578954, 0), + Typeflag: TypeBlock, + }, + fm: 0660 | fs.ModeDevice, + }, { + // directory. + h: &Header{ + Name: "dir/", + Mode: 0755, + Size: 0, + ModTime: time.Unix(1360601116, 0), + Typeflag: TypeDir, + }, + fm: 0755 | fs.ModeDir, + }, { + // fifo node. + h: &Header{ + Name: "dev/initctl", + Mode: 0600, + Size: 0, + ModTime: time.Unix(1360578949, 0), + Typeflag: TypeFifo, + }, + fm: 0600 | fs.ModeNamedPipe, + }, { + // setuid. + h: &Header{ + Name: "bin/su", + Mode: 0755 | c_ISUID, + Size: 23232, + ModTime: time.Unix(1355405093, 0), + Typeflag: TypeReg, + }, + fm: 0755 | fs.ModeSetuid, + }, { + // setguid. + h: &Header{ + Name: "group.txt", + Mode: 0750 | c_ISGID, + Size: 0, + ModTime: time.Unix(1360602346, 0), + Typeflag: TypeReg, + }, + fm: 0750 | fs.ModeSetgid, + }, { + // sticky. + h: &Header{ + Name: "sticky.txt", + Mode: 0600 | c_ISVTX, + Size: 7, + ModTime: time.Unix(1360602540, 0), + Typeflag: TypeReg, + }, + fm: 0600 | fs.ModeSticky, + }, { + // hard link. + h: &Header{ + Name: "hard.txt", + Mode: 0644, + Size: 0, + Linkname: "file.txt", + ModTime: time.Unix(1360600916, 0), + Typeflag: TypeLink, + }, + fm: 0644, + }, { + // More information. + h: &Header{ + Name: "info.txt", + Mode: 0600, + Size: 0, + Uid: 1000, + Gid: 1000, + ModTime: time.Unix(1360602540, 0), + Uname: "slartibartfast", + Gname: "users", + Typeflag: TypeReg, + }, + fm: 0600, + }} + + for i, v := range vectors { + fi := v.h.FileInfo() + h2, err := FileInfoHeader(fi, "") + if err != nil { + t.Error(err) + continue + } + if strings.Contains(fi.Name(), "/") { + t.Errorf("FileInfo of %q contains slash: %q", v.h.Name, fi.Name()) + } + name := path.Base(v.h.Name) + if fi.IsDir() { + name += "/" + } + if got, want := h2.Name, name; got != want { + t.Errorf("i=%d: Name: got %v, want %v", i, got, want) + } + if got, want := h2.Size, v.h.Size; got != want { + t.Errorf("i=%d: Size: got %v, want %v", i, got, want) + } + if got, want := h2.Uid, v.h.Uid; got != want { + t.Errorf("i=%d: Uid: got %d, want %d", i, got, want) + } + if got, want := h2.Gid, v.h.Gid; got != want { + t.Errorf("i=%d: Gid: got %d, want %d", i, got, want) + } + if got, want := h2.Uname, v.h.Uname; got != want { + t.Errorf("i=%d: Uname: got %q, want %q", i, got, want) + } + if got, want := h2.Gname, v.h.Gname; got != want { + t.Errorf("i=%d: Gname: got %q, want %q", i, got, want) + } + if got, want := h2.Linkname, v.h.Linkname; got != want { + t.Errorf("i=%d: Linkname: got %v, want %v", i, got, want) + } + if got, want := h2.Typeflag, v.h.Typeflag; got != want { + t.Logf("%#v %#v", v.h, fi.Sys()) + t.Errorf("i=%d: Typeflag: got %q, want %q", i, got, want) + } + if got, want := h2.Mode, v.h.Mode; got != want { + t.Errorf("i=%d: Mode: got %o, want %o", i, got, want) + } + if got, want := fi.Mode(), v.fm; got != want { + t.Errorf("i=%d: fi.Mode: got %o, want %o", i, got, want) + } + if got, want := h2.AccessTime, v.h.AccessTime; got != want { + t.Errorf("i=%d: AccessTime: got %v, want %v", i, got, want) + } + if got, want := h2.ChangeTime, v.h.ChangeTime; got != want { + t.Errorf("i=%d: ChangeTime: got %v, want %v", i, got, want) + } + if got, want := h2.ModTime, v.h.ModTime; got != want { + t.Errorf("i=%d: ModTime: got %v, want %v", i, got, want) + } + if sysh, ok := fi.Sys().(*Header); !ok || sysh != v.h { + t.Errorf("i=%d: Sys didn't return original *Header", i) + } + } +} + +func TestHeaderAllowedFormats(t *testing.T) { + vectors := []struct { + header *Header // Input header + paxHdrs map[string]string // Expected PAX headers that may be needed + formats Format // Expected formats that can encode the header + }{{ + header: &Header{}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Size: 077777777777}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Size: 077777777777, Format: FormatUSTAR}, + formats: FormatUSTAR, + }, { + header: &Header{Size: 077777777777, Format: FormatPAX}, + formats: FormatUSTAR | FormatPAX, + }, { + header: &Header{Size: 077777777777, Format: FormatGNU}, + formats: FormatGNU, + }, { + header: &Header{Size: 077777777777 + 1}, + paxHdrs: map[string]string{paxSize: "8589934592"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{Size: 077777777777 + 1, Format: FormatPAX}, + paxHdrs: map[string]string{paxSize: "8589934592"}, + formats: FormatPAX, + }, { + header: &Header{Size: 077777777777 + 1, Format: FormatGNU}, + paxHdrs: map[string]string{paxSize: "8589934592"}, + formats: FormatGNU, + }, { + header: &Header{Mode: 07777777}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Mode: 07777777 + 1}, + formats: FormatGNU, + }, { + header: &Header{Devmajor: -123}, + formats: FormatGNU, + }, { + header: &Header{Devmajor: 1<<56 - 1}, + formats: FormatGNU, + }, { + header: &Header{Devmajor: 1 << 56}, + formats: FormatUnknown, + }, { + header: &Header{Devmajor: -1 << 56}, + formats: FormatGNU, + }, { + header: &Header{Devmajor: -1<<56 - 1}, + formats: FormatUnknown, + }, { + header: &Header{Name: "用戶名", Devmajor: -1 << 56}, + formats: FormatGNU, + }, { + header: &Header{Size: math.MaxInt64}, + paxHdrs: map[string]string{paxSize: "9223372036854775807"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{Size: math.MinInt64}, + paxHdrs: map[string]string{paxSize: "-9223372036854775808"}, + formats: FormatUnknown, + }, { + header: &Header{Uname: "0123456789abcdef0123456789abcdef"}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Uname: "0123456789abcdef0123456789abcdefx"}, + paxHdrs: map[string]string{paxUname: "0123456789abcdef0123456789abcdefx"}, + formats: FormatPAX, + }, { + header: &Header{Name: "foobar"}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Name: strings.Repeat("a", nameSize)}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Name: strings.Repeat("a", nameSize+1)}, + paxHdrs: map[string]string{paxPath: strings.Repeat("a", nameSize+1)}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{Linkname: "用戶名"}, + paxHdrs: map[string]string{paxLinkpath: "用戶名"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{Linkname: strings.Repeat("用戶名\x00", nameSize)}, + paxHdrs: map[string]string{paxLinkpath: strings.Repeat("用戶名\x00", nameSize)}, + formats: FormatUnknown, + }, { + header: &Header{Linkname: "\x00hello"}, + paxHdrs: map[string]string{paxLinkpath: "\x00hello"}, + formats: FormatUnknown, + }, { + header: &Header{Uid: 07777777}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Uid: 07777777 + 1}, + paxHdrs: map[string]string{paxUid: "2097152"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{Xattrs: nil}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Xattrs: map[string]string{"foo": "bar"}}, + paxHdrs: map[string]string{paxSchilyXattr + "foo": "bar"}, + formats: FormatPAX, + }, { + header: &Header{Xattrs: map[string]string{"foo": "bar"}, Format: FormatGNU}, + paxHdrs: map[string]string{paxSchilyXattr + "foo": "bar"}, + formats: FormatUnknown, + }, { + header: &Header{Xattrs: map[string]string{"用戶名": "\x00hello"}}, + paxHdrs: map[string]string{paxSchilyXattr + "用戶名": "\x00hello"}, + formats: FormatPAX, + }, { + header: &Header{Xattrs: map[string]string{"foo=bar": "baz"}}, + formats: FormatUnknown, + }, { + header: &Header{Xattrs: map[string]string{"foo": ""}}, + paxHdrs: map[string]string{paxSchilyXattr + "foo": ""}, + formats: FormatPAX, + }, { + header: &Header{ModTime: time.Unix(0, 0)}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(077777777777, 0)}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(077777777777+1, 0)}, + paxHdrs: map[string]string{paxMtime: "8589934592"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(math.MaxInt64, 0)}, + paxHdrs: map[string]string{paxMtime: "9223372036854775807"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(math.MaxInt64, 0), Format: FormatUSTAR}, + paxHdrs: map[string]string{paxMtime: "9223372036854775807"}, + formats: FormatUnknown, + }, { + header: &Header{ModTime: time.Unix(-1, 0)}, + paxHdrs: map[string]string{paxMtime: "-1"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(1, 500)}, + paxHdrs: map[string]string{paxMtime: "1.0000005"}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(1, 0)}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(1, 0), Format: FormatPAX}, + formats: FormatUSTAR | FormatPAX, + }, { + header: &Header{ModTime: time.Unix(1, 500), Format: FormatUSTAR}, + paxHdrs: map[string]string{paxMtime: "1.0000005"}, + formats: FormatUSTAR, + }, { + header: &Header{ModTime: time.Unix(1, 500), Format: FormatPAX}, + paxHdrs: map[string]string{paxMtime: "1.0000005"}, + formats: FormatPAX, + }, { + header: &Header{ModTime: time.Unix(1, 500), Format: FormatGNU}, + paxHdrs: map[string]string{paxMtime: "1.0000005"}, + formats: FormatGNU, + }, { + header: &Header{ModTime: time.Unix(-1, 500)}, + paxHdrs: map[string]string{paxMtime: "-0.9999995"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(-1, 500), Format: FormatGNU}, + paxHdrs: map[string]string{paxMtime: "-0.9999995"}, + formats: FormatGNU, + }, { + header: &Header{AccessTime: time.Unix(0, 0)}, + paxHdrs: map[string]string{paxAtime: "0"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{AccessTime: time.Unix(0, 0), Format: FormatUSTAR}, + paxHdrs: map[string]string{paxAtime: "0"}, + formats: FormatUnknown, + }, { + header: &Header{AccessTime: time.Unix(0, 0), Format: FormatPAX}, + paxHdrs: map[string]string{paxAtime: "0"}, + formats: FormatPAX, + }, { + header: &Header{AccessTime: time.Unix(0, 0), Format: FormatGNU}, + paxHdrs: map[string]string{paxAtime: "0"}, + formats: FormatGNU, + }, { + header: &Header{AccessTime: time.Unix(-123, 0)}, + paxHdrs: map[string]string{paxAtime: "-123"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{AccessTime: time.Unix(-123, 0), Format: FormatPAX}, + paxHdrs: map[string]string{paxAtime: "-123"}, + formats: FormatPAX, + }, { + header: &Header{ChangeTime: time.Unix(123, 456)}, + paxHdrs: map[string]string{paxCtime: "123.000000456"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{ChangeTime: time.Unix(123, 456), Format: FormatUSTAR}, + paxHdrs: map[string]string{paxCtime: "123.000000456"}, + formats: FormatUnknown, + }, { + header: &Header{ChangeTime: time.Unix(123, 456), Format: FormatGNU}, + paxHdrs: map[string]string{paxCtime: "123.000000456"}, + formats: FormatGNU, + }, { + header: &Header{ChangeTime: time.Unix(123, 456), Format: FormatPAX}, + paxHdrs: map[string]string{paxCtime: "123.000000456"}, + formats: FormatPAX, + }, { + header: &Header{Name: "foo/", Typeflag: TypeDir}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Name: "foo/", Typeflag: TypeReg}, + formats: FormatUnknown, + }, { + header: &Header{Name: "foo/", Typeflag: TypeSymlink}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }} + + for i, v := range vectors { + formats, paxHdrs, err := v.header.allowedFormats() + if formats != v.formats { + t.Errorf("test %d, allowedFormats(): got %v, want %v", i, formats, v.formats) + } + if formats&FormatPAX > 0 && !reflect.DeepEqual(paxHdrs, v.paxHdrs) && !(len(paxHdrs) == 0 && len(v.paxHdrs) == 0) { + t.Errorf("test %d, allowedFormats():\ngot %v\nwant %s", i, paxHdrs, v.paxHdrs) + } + if (formats != FormatUnknown) && (err != nil) { + t.Errorf("test %d, unexpected error: %v", i, err) + } + if (formats == FormatUnknown) && (err == nil) { + t.Errorf("test %d, got nil-error, want non-nil error", i) + } + } +} + +func Benchmark(b *testing.B) { + type file struct { + hdr *Header + body []byte + } + + vectors := []struct { + label string + files []file + }{{ + "USTAR", + []file{{ + &Header{Name: "bar", Mode: 0640, Size: int64(3)}, + []byte("foo"), + }, { + &Header{Name: "world", Mode: 0640, Size: int64(5)}, + []byte("hello"), + }}, + }, { + "GNU", + []file{{ + &Header{Name: "bar", Mode: 0640, Size: int64(3), Devmajor: -1}, + []byte("foo"), + }, { + &Header{Name: "world", Mode: 0640, Size: int64(5), Devmajor: -1}, + []byte("hello"), + }}, + }, { + "PAX", + []file{{ + &Header{Name: "bar", Mode: 0640, Size: int64(3), Xattrs: map[string]string{"foo": "bar"}}, + []byte("foo"), + }, { + &Header{Name: "world", Mode: 0640, Size: int64(5), Xattrs: map[string]string{"foo": "bar"}}, + []byte("hello"), + }}, + }} + + b.Run("Writer", func(b *testing.B) { + for _, v := range vectors { + b.Run(v.label, func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + // Writing to io.Discard because we want to + // test purely the writer code and not bring in disk performance into this. + tw := NewWriter(io.Discard) + for _, file := range v.files { + if err := tw.WriteHeader(file.hdr); err != nil { + b.Errorf("unexpected WriteHeader error: %v", err) + } + if _, err := tw.Write(file.body); err != nil { + b.Errorf("unexpected Write error: %v", err) + } + } + if err := tw.Close(); err != nil { + b.Errorf("unexpected Close error: %v", err) + } + } + }) + } + }) + + b.Run("Reader", func(b *testing.B) { + for _, v := range vectors { + var buf bytes.Buffer + var r bytes.Reader + + // Write the archive to a byte buffer. + tw := NewWriter(&buf) + for _, file := range v.files { + tw.WriteHeader(file.hdr) + tw.Write(file.body) + } + tw.Close() + b.Run(v.label, func(b *testing.B) { + b.ReportAllocs() + // Read from the byte buffer. + for i := 0; i < b.N; i++ { + r.Reset(buf.Bytes()) + tr := NewReader(&r) + if _, err := tr.Next(); err != nil { + b.Errorf("unexpected Next error: %v", err) + } + if _, err := io.Copy(io.Discard, tr); err != nil { + b.Errorf("unexpected Copy error : %v", err) + } + } + }) + } + }) + +} diff --git a/src/archive/tar/testdata/file-and-dir.tar b/src/archive/tar/testdata/file-and-dir.tar Binary files differnew file mode 100644 index 0000000..c18d428 --- /dev/null +++ b/src/archive/tar/testdata/file-and-dir.tar diff --git a/src/archive/tar/testdata/gnu-incremental.tar b/src/archive/tar/testdata/gnu-incremental.tar Binary files differnew file mode 100644 index 0000000..4c442e5 --- /dev/null +++ b/src/archive/tar/testdata/gnu-incremental.tar diff --git a/src/archive/tar/testdata/gnu-long-nul.tar b/src/archive/tar/testdata/gnu-long-nul.tar Binary files differnew file mode 100644 index 0000000..28bc812 --- /dev/null +++ b/src/archive/tar/testdata/gnu-long-nul.tar diff --git a/src/archive/tar/testdata/gnu-multi-hdrs.tar b/src/archive/tar/testdata/gnu-multi-hdrs.tar Binary files differnew file mode 100644 index 0000000..8bcad55 --- /dev/null +++ b/src/archive/tar/testdata/gnu-multi-hdrs.tar diff --git a/src/archive/tar/testdata/gnu-nil-sparse-data.tar b/src/archive/tar/testdata/gnu-nil-sparse-data.tar Binary files differnew file mode 100644 index 0000000..df1aa83 --- /dev/null +++ b/src/archive/tar/testdata/gnu-nil-sparse-data.tar diff --git a/src/archive/tar/testdata/gnu-nil-sparse-hole.tar b/src/archive/tar/testdata/gnu-nil-sparse-hole.tar Binary files differnew file mode 100644 index 0000000..496abfe --- /dev/null +++ b/src/archive/tar/testdata/gnu-nil-sparse-hole.tar diff --git a/src/archive/tar/testdata/gnu-not-utf8.tar b/src/archive/tar/testdata/gnu-not-utf8.tar Binary files differnew file mode 100644 index 0000000..81cec67 --- /dev/null +++ b/src/archive/tar/testdata/gnu-not-utf8.tar diff --git a/src/archive/tar/testdata/gnu-sparse-big.tar b/src/archive/tar/testdata/gnu-sparse-big.tar Binary files differnew file mode 100644 index 0000000..1a5cfc9 --- /dev/null +++ b/src/archive/tar/testdata/gnu-sparse-big.tar diff --git a/src/archive/tar/testdata/gnu-utf8.tar b/src/archive/tar/testdata/gnu-utf8.tar Binary files differnew file mode 100644 index 0000000..2c9c807 --- /dev/null +++ b/src/archive/tar/testdata/gnu-utf8.tar diff --git a/src/archive/tar/testdata/gnu.tar b/src/archive/tar/testdata/gnu.tar Binary files differnew file mode 100644 index 0000000..fc899dc --- /dev/null +++ b/src/archive/tar/testdata/gnu.tar diff --git a/src/archive/tar/testdata/hardlink.tar b/src/archive/tar/testdata/hardlink.tar Binary files differnew file mode 100644 index 0000000..9cd1a26 --- /dev/null +++ b/src/archive/tar/testdata/hardlink.tar diff --git a/src/archive/tar/testdata/hdr-only.tar b/src/archive/tar/testdata/hdr-only.tar Binary files differnew file mode 100644 index 0000000..f250340 --- /dev/null +++ b/src/archive/tar/testdata/hdr-only.tar diff --git a/src/archive/tar/testdata/invalid-go17.tar b/src/archive/tar/testdata/invalid-go17.tar Binary files differnew file mode 100644 index 0000000..58f2488 --- /dev/null +++ b/src/archive/tar/testdata/invalid-go17.tar diff --git a/src/archive/tar/testdata/issue10968.tar b/src/archive/tar/testdata/issue10968.tar Binary files differnew file mode 100644 index 0000000..1cc837b --- /dev/null +++ b/src/archive/tar/testdata/issue10968.tar diff --git a/src/archive/tar/testdata/issue11169.tar b/src/archive/tar/testdata/issue11169.tar Binary files differnew file mode 100644 index 0000000..4d71fa1 --- /dev/null +++ b/src/archive/tar/testdata/issue11169.tar diff --git a/src/archive/tar/testdata/issue12435.tar b/src/archive/tar/testdata/issue12435.tar Binary files differnew file mode 100644 index 0000000..3542dd8 --- /dev/null +++ b/src/archive/tar/testdata/issue12435.tar diff --git a/src/archive/tar/testdata/neg-size.tar b/src/archive/tar/testdata/neg-size.tar Binary files differnew file mode 100644 index 0000000..21edf38 --- /dev/null +++ b/src/archive/tar/testdata/neg-size.tar diff --git a/src/archive/tar/testdata/nil-uid.tar b/src/archive/tar/testdata/nil-uid.tar Binary files differnew file mode 100644 index 0000000..cc9cfaa --- /dev/null +++ b/src/archive/tar/testdata/nil-uid.tar diff --git a/src/archive/tar/testdata/pax-bad-hdr-file.tar b/src/archive/tar/testdata/pax-bad-hdr-file.tar Binary files differnew file mode 100644 index 0000000..b97cc98 --- /dev/null +++ b/src/archive/tar/testdata/pax-bad-hdr-file.tar diff --git a/src/archive/tar/testdata/pax-bad-hdr-large.tar.bz2 b/src/archive/tar/testdata/pax-bad-hdr-large.tar.bz2 Binary files differnew file mode 100644 index 0000000..06bf710 --- /dev/null +++ b/src/archive/tar/testdata/pax-bad-hdr-large.tar.bz2 diff --git a/src/archive/tar/testdata/pax-bad-mtime-file.tar b/src/archive/tar/testdata/pax-bad-mtime-file.tar Binary files differnew file mode 100644 index 0000000..9b22f7e --- /dev/null +++ b/src/archive/tar/testdata/pax-bad-mtime-file.tar diff --git a/src/archive/tar/testdata/pax-global-records.tar b/src/archive/tar/testdata/pax-global-records.tar Binary files differnew file mode 100644 index 0000000..3d3d241 --- /dev/null +++ b/src/archive/tar/testdata/pax-global-records.tar diff --git a/src/archive/tar/testdata/pax-multi-hdrs.tar b/src/archive/tar/testdata/pax-multi-hdrs.tar Binary files differnew file mode 100644 index 0000000..14bc759 --- /dev/null +++ b/src/archive/tar/testdata/pax-multi-hdrs.tar diff --git a/src/archive/tar/testdata/pax-nil-sparse-data.tar b/src/archive/tar/testdata/pax-nil-sparse-data.tar Binary files differnew file mode 100644 index 0000000..e59bd94 --- /dev/null +++ b/src/archive/tar/testdata/pax-nil-sparse-data.tar diff --git a/src/archive/tar/testdata/pax-nil-sparse-hole.tar b/src/archive/tar/testdata/pax-nil-sparse-hole.tar Binary files differnew file mode 100644 index 0000000..b44327b --- /dev/null +++ b/src/archive/tar/testdata/pax-nil-sparse-hole.tar diff --git a/src/archive/tar/testdata/pax-nul-path.tar b/src/archive/tar/testdata/pax-nul-path.tar Binary files differnew file mode 100644 index 0000000..c78f82b --- /dev/null +++ b/src/archive/tar/testdata/pax-nul-path.tar diff --git a/src/archive/tar/testdata/pax-nul-xattrs.tar b/src/archive/tar/testdata/pax-nul-xattrs.tar Binary files differnew file mode 100644 index 0000000..881f517 --- /dev/null +++ b/src/archive/tar/testdata/pax-nul-xattrs.tar diff --git a/src/archive/tar/testdata/pax-path-hdr.tar b/src/archive/tar/testdata/pax-path-hdr.tar Binary files differnew file mode 100644 index 0000000..ab8fc32 --- /dev/null +++ b/src/archive/tar/testdata/pax-path-hdr.tar diff --git a/src/archive/tar/testdata/pax-pos-size-file.tar b/src/archive/tar/testdata/pax-pos-size-file.tar Binary files differnew file mode 100644 index 0000000..ea5ccf9 --- /dev/null +++ b/src/archive/tar/testdata/pax-pos-size-file.tar diff --git a/src/archive/tar/testdata/pax-records.tar b/src/archive/tar/testdata/pax-records.tar Binary files differnew file mode 100644 index 0000000..276c211 --- /dev/null +++ b/src/archive/tar/testdata/pax-records.tar diff --git a/src/archive/tar/testdata/pax-sparse-big.tar b/src/archive/tar/testdata/pax-sparse-big.tar Binary files differnew file mode 100644 index 0000000..65d1f8e --- /dev/null +++ b/src/archive/tar/testdata/pax-sparse-big.tar diff --git a/src/archive/tar/testdata/pax.tar b/src/archive/tar/testdata/pax.tar Binary files differnew file mode 100644 index 0000000..9bc24b6 --- /dev/null +++ b/src/archive/tar/testdata/pax.tar diff --git a/src/archive/tar/testdata/small.txt b/src/archive/tar/testdata/small.txt new file mode 100644 index 0000000..b249bfc --- /dev/null +++ b/src/archive/tar/testdata/small.txt @@ -0,0 +1 @@ +Kilts
\ No newline at end of file diff --git a/src/archive/tar/testdata/small2.txt b/src/archive/tar/testdata/small2.txt new file mode 100644 index 0000000..394ee3e --- /dev/null +++ b/src/archive/tar/testdata/small2.txt @@ -0,0 +1 @@ +Google.com diff --git a/src/archive/tar/testdata/sparse-formats.tar b/src/archive/tar/testdata/sparse-formats.tar Binary files differnew file mode 100644 index 0000000..8bd4e74 --- /dev/null +++ b/src/archive/tar/testdata/sparse-formats.tar diff --git a/src/archive/tar/testdata/star.tar b/src/archive/tar/testdata/star.tar Binary files differnew file mode 100644 index 0000000..59e2d4e --- /dev/null +++ b/src/archive/tar/testdata/star.tar diff --git a/src/archive/tar/testdata/trailing-slash.tar b/src/archive/tar/testdata/trailing-slash.tar Binary files differnew file mode 100644 index 0000000..93718b3 --- /dev/null +++ b/src/archive/tar/testdata/trailing-slash.tar diff --git a/src/archive/tar/testdata/ustar-file-devs.tar b/src/archive/tar/testdata/ustar-file-devs.tar Binary files differnew file mode 100644 index 0000000..146e25b --- /dev/null +++ b/src/archive/tar/testdata/ustar-file-devs.tar diff --git a/src/archive/tar/testdata/ustar-file-reg.tar b/src/archive/tar/testdata/ustar-file-reg.tar Binary files differnew file mode 100644 index 0000000..c84fa27 --- /dev/null +++ b/src/archive/tar/testdata/ustar-file-reg.tar diff --git a/src/archive/tar/testdata/ustar.tar b/src/archive/tar/testdata/ustar.tar Binary files differnew file mode 100644 index 0000000..29679d9 --- /dev/null +++ b/src/archive/tar/testdata/ustar.tar diff --git a/src/archive/tar/testdata/v7.tar b/src/archive/tar/testdata/v7.tar Binary files differnew file mode 100644 index 0000000..eb65fc9 --- /dev/null +++ b/src/archive/tar/testdata/v7.tar diff --git a/src/archive/tar/testdata/writer-big-long.tar b/src/archive/tar/testdata/writer-big-long.tar Binary files differnew file mode 100644 index 0000000..09fc5dd --- /dev/null +++ b/src/archive/tar/testdata/writer-big-long.tar diff --git a/src/archive/tar/testdata/writer-big.tar b/src/archive/tar/testdata/writer-big.tar Binary files differnew file mode 100644 index 0000000..435dcbc --- /dev/null +++ b/src/archive/tar/testdata/writer-big.tar diff --git a/src/archive/tar/testdata/writer.tar b/src/archive/tar/testdata/writer.tar Binary files differnew file mode 100644 index 0000000..e6d816a --- /dev/null +++ b/src/archive/tar/testdata/writer.tar diff --git a/src/archive/tar/testdata/xattrs.tar b/src/archive/tar/testdata/xattrs.tar Binary files differnew file mode 100644 index 0000000..9701950 --- /dev/null +++ b/src/archive/tar/testdata/xattrs.tar diff --git a/src/archive/tar/writer.go b/src/archive/tar/writer.go new file mode 100644 index 0000000..9b2e3e2 --- /dev/null +++ b/src/archive/tar/writer.go @@ -0,0 +1,659 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "fmt" + "io" + "path" + "sort" + "strings" + "time" +) + +// Writer provides sequential writing of a tar archive. +// Write.WriteHeader begins a new file with the provided Header, +// and then Writer can be treated as an io.Writer to supply that file's data. +type Writer struct { + w io.Writer + pad int64 // Amount of padding to write after current file entry + curr fileWriter // Writer for current file entry + hdr Header // Shallow copy of Header that is safe for mutations + blk block // Buffer to use as temporary local storage + + // err is a persistent error. + // It is only the responsibility of every exported method of Writer to + // ensure that this error is sticky. + err error +} + +// NewWriter creates a new Writer writing to w. +func NewWriter(w io.Writer) *Writer { + return &Writer{w: w, curr: ®FileWriter{w, 0}} +} + +type fileWriter interface { + io.Writer + fileState + + ReadFrom(io.Reader) (int64, error) +} + +// Flush finishes writing the current file's block padding. +// The current file must be fully written before Flush can be called. +// +// This is unnecessary as the next call to WriteHeader or Close +// will implicitly flush out the file's padding. +func (tw *Writer) Flush() error { + if tw.err != nil { + return tw.err + } + if nb := tw.curr.logicalRemaining(); nb > 0 { + return fmt.Errorf("archive/tar: missed writing %d bytes", nb) + } + if _, tw.err = tw.w.Write(zeroBlock[:tw.pad]); tw.err != nil { + return tw.err + } + tw.pad = 0 + return nil +} + +// WriteHeader writes hdr and prepares to accept the file's contents. +// The Header.Size determines how many bytes can be written for the next file. +// If the current file is not fully written, then this returns an error. +// This implicitly flushes any padding necessary before writing the header. +func (tw *Writer) WriteHeader(hdr *Header) error { + if err := tw.Flush(); err != nil { + return err + } + tw.hdr = *hdr // Shallow copy of Header + + // Avoid usage of the legacy TypeRegA flag, and automatically promote + // it to use TypeReg or TypeDir. + if tw.hdr.Typeflag == TypeRegA { + if strings.HasSuffix(tw.hdr.Name, "/") { + tw.hdr.Typeflag = TypeDir + } else { + tw.hdr.Typeflag = TypeReg + } + } + + // Round ModTime and ignore AccessTime and ChangeTime unless + // the format is explicitly chosen. + // This ensures nominal usage of WriteHeader (without specifying the format) + // does not always result in the PAX format being chosen, which + // causes a 1KiB increase to every header. + if tw.hdr.Format == FormatUnknown { + tw.hdr.ModTime = tw.hdr.ModTime.Round(time.Second) + tw.hdr.AccessTime = time.Time{} + tw.hdr.ChangeTime = time.Time{} + } + + allowedFormats, paxHdrs, err := tw.hdr.allowedFormats() + switch { + case allowedFormats.has(FormatUSTAR): + tw.err = tw.writeUSTARHeader(&tw.hdr) + return tw.err + case allowedFormats.has(FormatPAX): + tw.err = tw.writePAXHeader(&tw.hdr, paxHdrs) + return tw.err + case allowedFormats.has(FormatGNU): + tw.err = tw.writeGNUHeader(&tw.hdr) + return tw.err + default: + return err // Non-fatal error + } +} + +func (tw *Writer) writeUSTARHeader(hdr *Header) error { + // Check if we can use USTAR prefix/suffix splitting. + var namePrefix string + if prefix, suffix, ok := splitUSTARPath(hdr.Name); ok { + namePrefix, hdr.Name = prefix, suffix + } + + // Pack the main header. + var f formatter + blk := tw.templateV7Plus(hdr, f.formatString, f.formatOctal) + f.formatString(blk.toUSTAR().prefix(), namePrefix) + blk.setFormat(FormatUSTAR) + if f.err != nil { + return f.err // Should never happen since header is validated + } + return tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag) +} + +func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error { + realName, realSize := hdr.Name, hdr.Size + + // TODO(dsnet): Re-enable this when adding sparse support. + // See https://golang.org/issue/22735 + /* + // Handle sparse files. + var spd sparseDatas + var spb []byte + if len(hdr.SparseHoles) > 0 { + sph := append([]sparseEntry{}, hdr.SparseHoles...) // Copy sparse map + sph = alignSparseEntries(sph, hdr.Size) + spd = invertSparseEntries(sph, hdr.Size) + + // Format the sparse map. + hdr.Size = 0 // Replace with encoded size + spb = append(strconv.AppendInt(spb, int64(len(spd)), 10), '\n') + for _, s := range spd { + hdr.Size += s.Length + spb = append(strconv.AppendInt(spb, s.Offset, 10), '\n') + spb = append(strconv.AppendInt(spb, s.Length, 10), '\n') + } + pad := blockPadding(int64(len(spb))) + spb = append(spb, zeroBlock[:pad]...) + hdr.Size += int64(len(spb)) // Accounts for encoded sparse map + + // Add and modify appropriate PAX records. + dir, file := path.Split(realName) + hdr.Name = path.Join(dir, "GNUSparseFile.0", file) + paxHdrs[paxGNUSparseMajor] = "1" + paxHdrs[paxGNUSparseMinor] = "0" + paxHdrs[paxGNUSparseName] = realName + paxHdrs[paxGNUSparseRealSize] = strconv.FormatInt(realSize, 10) + paxHdrs[paxSize] = strconv.FormatInt(hdr.Size, 10) + delete(paxHdrs, paxPath) // Recorded by paxGNUSparseName + } + */ + _ = realSize + + // Write PAX records to the output. + isGlobal := hdr.Typeflag == TypeXGlobalHeader + if len(paxHdrs) > 0 || isGlobal { + // Sort keys for deterministic ordering. + var keys []string + for k := range paxHdrs { + keys = append(keys, k) + } + sort.Strings(keys) + + // Write each record to a buffer. + var buf strings.Builder + for _, k := range keys { + rec, err := formatPAXRecord(k, paxHdrs[k]) + if err != nil { + return err + } + buf.WriteString(rec) + } + + // Write the extended header file. + var name string + var flag byte + if isGlobal { + name = realName + if name == "" { + name = "GlobalHead.0.0" + } + flag = TypeXGlobalHeader + } else { + dir, file := path.Split(realName) + name = path.Join(dir, "PaxHeaders.0", file) + flag = TypeXHeader + } + data := buf.String() + if len(data) > maxSpecialFileSize { + return ErrFieldTooLong + } + if err := tw.writeRawFile(name, data, flag, FormatPAX); err != nil || isGlobal { + return err // Global headers return here + } + } + + // Pack the main header. + var f formatter // Ignore errors since they are expected + fmtStr := func(b []byte, s string) { f.formatString(b, toASCII(s)) } + blk := tw.templateV7Plus(hdr, fmtStr, f.formatOctal) + blk.setFormat(FormatPAX) + if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil { + return err + } + + // TODO(dsnet): Re-enable this when adding sparse support. + // See https://golang.org/issue/22735 + /* + // Write the sparse map and setup the sparse writer if necessary. + if len(spd) > 0 { + // Use tw.curr since the sparse map is accounted for in hdr.Size. + if _, err := tw.curr.Write(spb); err != nil { + return err + } + tw.curr = &sparseFileWriter{tw.curr, spd, 0} + } + */ + return nil +} + +func (tw *Writer) writeGNUHeader(hdr *Header) error { + // Use long-link files if Name or Linkname exceeds the field size. + const longName = "././@LongLink" + if len(hdr.Name) > nameSize { + data := hdr.Name + "\x00" + if err := tw.writeRawFile(longName, data, TypeGNULongName, FormatGNU); err != nil { + return err + } + } + if len(hdr.Linkname) > nameSize { + data := hdr.Linkname + "\x00" + if err := tw.writeRawFile(longName, data, TypeGNULongLink, FormatGNU); err != nil { + return err + } + } + + // Pack the main header. + var f formatter // Ignore errors since they are expected + var spd sparseDatas + var spb []byte + blk := tw.templateV7Plus(hdr, f.formatString, f.formatNumeric) + if !hdr.AccessTime.IsZero() { + f.formatNumeric(blk.toGNU().accessTime(), hdr.AccessTime.Unix()) + } + if !hdr.ChangeTime.IsZero() { + f.formatNumeric(blk.toGNU().changeTime(), hdr.ChangeTime.Unix()) + } + // TODO(dsnet): Re-enable this when adding sparse support. + // See https://golang.org/issue/22735 + /* + if hdr.Typeflag == TypeGNUSparse { + sph := append([]sparseEntry{}, hdr.SparseHoles...) // Copy sparse map + sph = alignSparseEntries(sph, hdr.Size) + spd = invertSparseEntries(sph, hdr.Size) + + // Format the sparse map. + formatSPD := func(sp sparseDatas, sa sparseArray) sparseDatas { + for i := 0; len(sp) > 0 && i < sa.MaxEntries(); i++ { + f.formatNumeric(sa.Entry(i).Offset(), sp[0].Offset) + f.formatNumeric(sa.Entry(i).Length(), sp[0].Length) + sp = sp[1:] + } + if len(sp) > 0 { + sa.IsExtended()[0] = 1 + } + return sp + } + sp2 := formatSPD(spd, blk.GNU().Sparse()) + for len(sp2) > 0 { + var spHdr block + sp2 = formatSPD(sp2, spHdr.Sparse()) + spb = append(spb, spHdr[:]...) + } + + // Update size fields in the header block. + realSize := hdr.Size + hdr.Size = 0 // Encoded size; does not account for encoded sparse map + for _, s := range spd { + hdr.Size += s.Length + } + copy(blk.V7().Size(), zeroBlock[:]) // Reset field + f.formatNumeric(blk.V7().Size(), hdr.Size) + f.formatNumeric(blk.GNU().RealSize(), realSize) + } + */ + blk.setFormat(FormatGNU) + if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil { + return err + } + + // Write the extended sparse map and setup the sparse writer if necessary. + if len(spd) > 0 { + // Use tw.w since the sparse map is not accounted for in hdr.Size. + if _, err := tw.w.Write(spb); err != nil { + return err + } + tw.curr = &sparseFileWriter{tw.curr, spd, 0} + } + return nil +} + +type ( + stringFormatter func([]byte, string) + numberFormatter func([]byte, int64) +) + +// templateV7Plus fills out the V7 fields of a block using values from hdr. +// It also fills out fields (uname, gname, devmajor, devminor) that are +// shared in the USTAR, PAX, and GNU formats using the provided formatters. +// +// The block returned is only valid until the next call to +// templateV7Plus or writeRawFile. +func (tw *Writer) templateV7Plus(hdr *Header, fmtStr stringFormatter, fmtNum numberFormatter) *block { + tw.blk.reset() + + modTime := hdr.ModTime + if modTime.IsZero() { + modTime = time.Unix(0, 0) + } + + v7 := tw.blk.toV7() + v7.typeFlag()[0] = hdr.Typeflag + fmtStr(v7.name(), hdr.Name) + fmtStr(v7.linkName(), hdr.Linkname) + fmtNum(v7.mode(), hdr.Mode) + fmtNum(v7.uid(), int64(hdr.Uid)) + fmtNum(v7.gid(), int64(hdr.Gid)) + fmtNum(v7.size(), hdr.Size) + fmtNum(v7.modTime(), modTime.Unix()) + + ustar := tw.blk.toUSTAR() + fmtStr(ustar.userName(), hdr.Uname) + fmtStr(ustar.groupName(), hdr.Gname) + fmtNum(ustar.devMajor(), hdr.Devmajor) + fmtNum(ustar.devMinor(), hdr.Devminor) + + return &tw.blk +} + +// writeRawFile writes a minimal file with the given name and flag type. +// It uses format to encode the header format and will write data as the body. +// It uses default values for all of the other fields (as BSD and GNU tar does). +func (tw *Writer) writeRawFile(name, data string, flag byte, format Format) error { + tw.blk.reset() + + // Best effort for the filename. + name = toASCII(name) + if len(name) > nameSize { + name = name[:nameSize] + } + name = strings.TrimRight(name, "/") + + var f formatter + v7 := tw.blk.toV7() + v7.typeFlag()[0] = flag + f.formatString(v7.name(), name) + f.formatOctal(v7.mode(), 0) + f.formatOctal(v7.uid(), 0) + f.formatOctal(v7.gid(), 0) + f.formatOctal(v7.size(), int64(len(data))) // Must be < 8GiB + f.formatOctal(v7.modTime(), 0) + tw.blk.setFormat(format) + if f.err != nil { + return f.err // Only occurs if size condition is violated + } + + // Write the header and data. + if err := tw.writeRawHeader(&tw.blk, int64(len(data)), flag); err != nil { + return err + } + _, err := io.WriteString(tw, data) + return err +} + +// writeRawHeader writes the value of blk, regardless of its value. +// It sets up the Writer such that it can accept a file of the given size. +// If the flag is a special header-only flag, then the size is treated as zero. +func (tw *Writer) writeRawHeader(blk *block, size int64, flag byte) error { + if err := tw.Flush(); err != nil { + return err + } + if _, err := tw.w.Write(blk[:]); err != nil { + return err + } + if isHeaderOnlyType(flag) { + size = 0 + } + tw.curr = ®FileWriter{tw.w, size} + tw.pad = blockPadding(size) + return nil +} + +// splitUSTARPath splits a path according to USTAR prefix and suffix rules. +// If the path is not splittable, then it will return ("", "", false). +func splitUSTARPath(name string) (prefix, suffix string, ok bool) { + length := len(name) + if length <= nameSize || !isASCII(name) { + return "", "", false + } else if length > prefixSize+1 { + length = prefixSize + 1 + } else if name[length-1] == '/' { + length-- + } + + i := strings.LastIndex(name[:length], "/") + nlen := len(name) - i - 1 // nlen is length of suffix + plen := i // plen is length of prefix + if i <= 0 || nlen > nameSize || nlen == 0 || plen > prefixSize { + return "", "", false + } + return name[:i], name[i+1:], true +} + +// Write writes to the current file in the tar archive. +// Write returns the error ErrWriteTooLong if more than +// Header.Size bytes are written after WriteHeader. +// +// Calling Write on special types like TypeLink, TypeSymlink, TypeChar, +// TypeBlock, TypeDir, and TypeFifo returns (0, ErrWriteTooLong) regardless +// of what the Header.Size claims. +func (tw *Writer) Write(b []byte) (int, error) { + if tw.err != nil { + return 0, tw.err + } + n, err := tw.curr.Write(b) + if err != nil && err != ErrWriteTooLong { + tw.err = err + } + return n, err +} + +// readFrom populates the content of the current file by reading from r. +// The bytes read must match the number of remaining bytes in the current file. +// +// If the current file is sparse and r is an io.ReadSeeker, +// then readFrom uses Seek to skip past holes defined in Header.SparseHoles, +// assuming that skipped regions are all NULs. +// This always reads the last byte to ensure r is the right size. +// +// TODO(dsnet): Re-export this when adding sparse file support. +// See https://golang.org/issue/22735 +func (tw *Writer) readFrom(r io.Reader) (int64, error) { + if tw.err != nil { + return 0, tw.err + } + n, err := tw.curr.ReadFrom(r) + if err != nil && err != ErrWriteTooLong { + tw.err = err + } + return n, err +} + +// Close closes the tar archive by flushing the padding, and writing the footer. +// If the current file (from a prior call to WriteHeader) is not fully written, +// then this returns an error. +func (tw *Writer) Close() error { + if tw.err == ErrWriteAfterClose { + return nil + } + if tw.err != nil { + return tw.err + } + + // Trailer: two zero blocks. + err := tw.Flush() + for i := 0; i < 2 && err == nil; i++ { + _, err = tw.w.Write(zeroBlock[:]) + } + + // Ensure all future actions are invalid. + tw.err = ErrWriteAfterClose + return err // Report IO errors +} + +// regFileWriter is a fileWriter for writing data to a regular file entry. +type regFileWriter struct { + w io.Writer // Underlying Writer + nb int64 // Number of remaining bytes to write +} + +func (fw *regFileWriter) Write(b []byte) (n int, err error) { + overwrite := int64(len(b)) > fw.nb + if overwrite { + b = b[:fw.nb] + } + if len(b) > 0 { + n, err = fw.w.Write(b) + fw.nb -= int64(n) + } + switch { + case err != nil: + return n, err + case overwrite: + return n, ErrWriteTooLong + default: + return n, nil + } +} + +func (fw *regFileWriter) ReadFrom(r io.Reader) (int64, error) { + return io.Copy(struct{ io.Writer }{fw}, r) +} + +// logicalRemaining implements fileState.logicalRemaining. +func (fw regFileWriter) logicalRemaining() int64 { + return fw.nb +} + +// logicalRemaining implements fileState.physicalRemaining. +func (fw regFileWriter) physicalRemaining() int64 { + return fw.nb +} + +// sparseFileWriter is a fileWriter for writing data to a sparse file entry. +type sparseFileWriter struct { + fw fileWriter // Underlying fileWriter + sp sparseDatas // Normalized list of data fragments + pos int64 // Current position in sparse file +} + +func (sw *sparseFileWriter) Write(b []byte) (n int, err error) { + overwrite := int64(len(b)) > sw.logicalRemaining() + if overwrite { + b = b[:sw.logicalRemaining()] + } + + b0 := b + endPos := sw.pos + int64(len(b)) + for endPos > sw.pos && err == nil { + var nf int // Bytes written in fragment + dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset() + if sw.pos < dataStart { // In a hole fragment + bf := b[:min(int64(len(b)), dataStart-sw.pos)] + nf, err = zeroWriter{}.Write(bf) + } else { // In a data fragment + bf := b[:min(int64(len(b)), dataEnd-sw.pos)] + nf, err = sw.fw.Write(bf) + } + b = b[nf:] + sw.pos += int64(nf) + if sw.pos >= dataEnd && len(sw.sp) > 1 { + sw.sp = sw.sp[1:] // Ensure last fragment always remains + } + } + + n = len(b0) - len(b) + switch { + case err == ErrWriteTooLong: + return n, errMissData // Not possible; implies bug in validation logic + case err != nil: + return n, err + case sw.logicalRemaining() == 0 && sw.physicalRemaining() > 0: + return n, errUnrefData // Not possible; implies bug in validation logic + case overwrite: + return n, ErrWriteTooLong + default: + return n, nil + } +} + +func (sw *sparseFileWriter) ReadFrom(r io.Reader) (n int64, err error) { + rs, ok := r.(io.ReadSeeker) + if ok { + if _, err := rs.Seek(0, io.SeekCurrent); err != nil { + ok = false // Not all io.Seeker can really seek + } + } + if !ok { + return io.Copy(struct{ io.Writer }{sw}, r) + } + + var readLastByte bool + pos0 := sw.pos + for sw.logicalRemaining() > 0 && !readLastByte && err == nil { + var nf int64 // Size of fragment + dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset() + if sw.pos < dataStart { // In a hole fragment + nf = dataStart - sw.pos + if sw.physicalRemaining() == 0 { + readLastByte = true + nf-- + } + _, err = rs.Seek(nf, io.SeekCurrent) + } else { // In a data fragment + nf = dataEnd - sw.pos + nf, err = io.CopyN(sw.fw, rs, nf) + } + sw.pos += nf + if sw.pos >= dataEnd && len(sw.sp) > 1 { + sw.sp = sw.sp[1:] // Ensure last fragment always remains + } + } + + // If the last fragment is a hole, then seek to 1-byte before EOF, and + // read a single byte to ensure the file is the right size. + if readLastByte && err == nil { + _, err = mustReadFull(rs, []byte{0}) + sw.pos++ + } + + n = sw.pos - pos0 + switch { + case err == io.EOF: + return n, io.ErrUnexpectedEOF + case err == ErrWriteTooLong: + return n, errMissData // Not possible; implies bug in validation logic + case err != nil: + return n, err + case sw.logicalRemaining() == 0 && sw.physicalRemaining() > 0: + return n, errUnrefData // Not possible; implies bug in validation logic + default: + return n, ensureEOF(rs) + } +} + +func (sw sparseFileWriter) logicalRemaining() int64 { + return sw.sp[len(sw.sp)-1].endOffset() - sw.pos +} +func (sw sparseFileWriter) physicalRemaining() int64 { + return sw.fw.physicalRemaining() +} + +// zeroWriter may only be written with NULs, otherwise it returns errWriteHole. +type zeroWriter struct{} + +func (zeroWriter) Write(b []byte) (int, error) { + for i, c := range b { + if c != 0 { + return i, errWriteHole + } + } + return len(b), nil +} + +// ensureEOF checks whether r is at EOF, reporting ErrWriteTooLong if not so. +func ensureEOF(r io.Reader) error { + n, err := tryReadFull(r, []byte{0}) + switch { + case n > 0: + return ErrWriteTooLong + case err == io.EOF: + return nil + default: + return err + } +} diff --git a/src/archive/tar/writer_test.go b/src/archive/tar/writer_test.go new file mode 100644 index 0000000..6402649 --- /dev/null +++ b/src/archive/tar/writer_test.go @@ -0,0 +1,1335 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "bytes" + "encoding/hex" + "errors" + "io" + "os" + "path" + "reflect" + "sort" + "strings" + "testing" + "testing/iotest" + "time" +) + +func bytediff(a, b []byte) string { + const ( + uniqueA = "- " + uniqueB = "+ " + identity = " " + ) + var ss []string + sa := strings.Split(strings.TrimSpace(hex.Dump(a)), "\n") + sb := strings.Split(strings.TrimSpace(hex.Dump(b)), "\n") + for len(sa) > 0 && len(sb) > 0 { + if sa[0] == sb[0] { + ss = append(ss, identity+sa[0]) + } else { + ss = append(ss, uniqueA+sa[0]) + ss = append(ss, uniqueB+sb[0]) + } + sa, sb = sa[1:], sb[1:] + } + for len(sa) > 0 { + ss = append(ss, uniqueA+sa[0]) + sa = sa[1:] + } + for len(sb) > 0 { + ss = append(ss, uniqueB+sb[0]) + sb = sb[1:] + } + return strings.Join(ss, "\n") +} + +func TestWriter(t *testing.T) { + type ( + testHeader struct { // WriteHeader(hdr) == wantErr + hdr Header + wantErr error + } + testWrite struct { // Write(str) == (wantCnt, wantErr) + str string + wantCnt int + wantErr error + } + testReadFrom struct { // ReadFrom(testFile{ops}) == (wantCnt, wantErr) + ops fileOps + wantCnt int64 + wantErr error + } + testClose struct { // Close() == wantErr + wantErr error + } + testFnc any // testHeader | testWrite | testReadFrom | testClose + ) + + vectors := []struct { + file string // Optional filename of expected output + tests []testFnc + }{{ + // The writer test file was produced with this command: + // tar (GNU tar) 1.26 + // ln -s small.txt link.txt + // tar -b 1 --format=ustar -c -f writer.tar small.txt small2.txt link.txt + file: "testdata/writer.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "small.txt", + Size: 5, + Mode: 0640, + Uid: 73025, + Gid: 5000, + Uname: "dsymonds", + Gname: "eng", + ModTime: time.Unix(1246508266, 0), + }, nil}, + testWrite{"Kilts", 5, nil}, + + testHeader{Header{ + Typeflag: TypeReg, + Name: "small2.txt", + Size: 11, + Mode: 0640, + Uid: 73025, + Uname: "dsymonds", + Gname: "eng", + Gid: 5000, + ModTime: time.Unix(1245217492, 0), + }, nil}, + testWrite{"Google.com\n", 11, nil}, + + testHeader{Header{ + Typeflag: TypeSymlink, + Name: "link.txt", + Linkname: "small.txt", + Mode: 0777, + Uid: 1000, + Gid: 1000, + Uname: "strings", + Gname: "strings", + ModTime: time.Unix(1314603082, 0), + }, nil}, + testWrite{"", 0, nil}, + + testClose{nil}, + }, + }, { + // The truncated test file was produced using these commands: + // dd if=/dev/zero bs=1048576 count=16384 > /tmp/16gig.txt + // tar -b 1 -c -f- /tmp/16gig.txt | dd bs=512 count=8 > writer-big.tar + file: "testdata/writer-big.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "tmp/16gig.txt", + Size: 16 << 30, + Mode: 0640, + Uid: 73025, + Gid: 5000, + Uname: "dsymonds", + Gname: "eng", + ModTime: time.Unix(1254699560, 0), + Format: FormatGNU, + }, nil}, + }, + }, { + // This truncated file was produced using this library. + // It was verified to work with GNU tar 1.27.1 and BSD tar 3.1.2. + // dd if=/dev/zero bs=1G count=16 >> writer-big-long.tar + // gnutar -xvf writer-big-long.tar + // bsdtar -xvf writer-big-long.tar + // + // This file is in PAX format. + file: "testdata/writer-big-long.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: strings.Repeat("longname/", 15) + "16gig.txt", + Size: 16 << 30, + Mode: 0644, + Uid: 1000, + Gid: 1000, + Uname: "guillaume", + Gname: "guillaume", + ModTime: time.Unix(1399583047, 0), + }, nil}, + }, + }, { + // This file was produced using GNU tar v1.17. + // gnutar -b 4 --format=ustar (longname/)*15 + file.txt + file: "testdata/ustar.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: strings.Repeat("longname/", 15) + "file.txt", + Size: 6, + Mode: 0644, + Uid: 501, + Gid: 20, + Uname: "shane", + Gname: "staff", + ModTime: time.Unix(1360135598, 0), + }, nil}, + testWrite{"hello\n", 6, nil}, + testClose{nil}, + }, + }, { + // This file was produced using GNU tar v1.26: + // echo "Slartibartfast" > file.txt + // ln file.txt hard.txt + // tar -b 1 --format=ustar -c -f hardlink.tar file.txt hard.txt + file: "testdata/hardlink.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "file.txt", + Size: 15, + Mode: 0644, + Uid: 1000, + Gid: 100, + Uname: "vbatts", + Gname: "users", + ModTime: time.Unix(1425484303, 0), + }, nil}, + testWrite{"Slartibartfast\n", 15, nil}, + + testHeader{Header{ + Typeflag: TypeLink, + Name: "hard.txt", + Linkname: "file.txt", + Mode: 0644, + Uid: 1000, + Gid: 100, + Uname: "vbatts", + Gname: "users", + ModTime: time.Unix(1425484303, 0), + }, nil}, + testWrite{"", 0, nil}, + + testClose{nil}, + }, + }, { + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "bad-null.txt", + Xattrs: map[string]string{"null\x00null\x00": "fizzbuzz"}, + }, headerError{}}, + }, + }, { + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "null\x00.txt", + }, headerError{}}, + }, + }, { + file: "testdata/pax-records.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "file", + Uname: strings.Repeat("long", 10), + PAXRecords: map[string]string{ + "path": "FILE", // Should be ignored + "GNU.sparse.map": "0,0", // Should be ignored + "comment": "Hello, 世界", + "GOLANG.pkg": "tar", + }, + }, nil}, + testClose{nil}, + }, + }, { + // Craft a theoretically valid PAX archive with global headers. + // The GNU and BSD tar tools do not parse these the same way. + // + // BSD tar v3.1.2 parses and ignores all global headers; + // the behavior is verified by researching the source code. + // + // $ bsdtar -tvf pax-global-records.tar + // ---------- 0 0 0 0 Dec 31 1969 file1 + // ---------- 0 0 0 0 Dec 31 1969 file2 + // ---------- 0 0 0 0 Dec 31 1969 file3 + // ---------- 0 0 0 0 May 13 2014 file4 + // + // GNU tar v1.27.1 applies global headers to subsequent records, + // but does not do the following properly: + // * It does not treat an empty record as deletion. + // * It does not use subsequent global headers to update previous ones. + // + // $ gnutar -tvf pax-global-records.tar + // ---------- 0/0 0 2017-07-13 19:40 global1 + // ---------- 0/0 0 2017-07-13 19:40 file2 + // gnutar: Substituting `.' for empty member name + // ---------- 0/0 0 1969-12-31 16:00 + // gnutar: Substituting `.' for empty member name + // ---------- 0/0 0 2014-05-13 09:53 + // + // According to the PAX specification, this should have been the result: + // ---------- 0/0 0 2017-07-13 19:40 global1 + // ---------- 0/0 0 2017-07-13 19:40 file2 + // ---------- 0/0 0 2017-07-13 19:40 file3 + // ---------- 0/0 0 2014-05-13 09:53 file4 + file: "testdata/pax-global-records.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeXGlobalHeader, + PAXRecords: map[string]string{"path": "global1", "mtime": "1500000000.0"}, + }, nil}, + testHeader{Header{ + Typeflag: TypeReg, Name: "file1", + }, nil}, + testHeader{Header{ + Typeflag: TypeReg, + Name: "file2", + PAXRecords: map[string]string{"path": "file2"}, + }, nil}, + testHeader{Header{ + Typeflag: TypeXGlobalHeader, + PAXRecords: map[string]string{"path": ""}, // Should delete "path", but keep "mtime" + }, nil}, + testHeader{Header{ + Typeflag: TypeReg, Name: "file3", + }, nil}, + testHeader{Header{ + Typeflag: TypeReg, + Name: "file4", + ModTime: time.Unix(1400000000, 0), + PAXRecords: map[string]string{"mtime": "1400000000"}, + }, nil}, + testClose{nil}, + }, + }, { + file: "testdata/gnu-utf8.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", + Mode: 0644, + Uid: 1000, Gid: 1000, + Uname: "☺", + Gname: "⚹", + ModTime: time.Unix(0, 0), + Format: FormatGNU, + }, nil}, + testClose{nil}, + }, + }, { + file: "testdata/gnu-not-utf8.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "hi\x80\x81\x82\x83bye", + Mode: 0644, + Uid: 1000, + Gid: 1000, + Uname: "rawr", + Gname: "dsnet", + ModTime: time.Unix(0, 0), + Format: FormatGNU, + }, nil}, + testClose{nil}, + }, + // TODO(dsnet): Re-enable this test when adding sparse support. + // See https://golang.org/issue/22735 + /* + }, { + file: "testdata/gnu-nil-sparse-data.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeGNUSparse, + Name: "sparse.db", + Size: 1000, + SparseHoles: []sparseEntry{{Offset: 1000, Length: 0}}, + }, nil}, + testWrite{strings.Repeat("0123456789", 100), 1000, nil}, + testClose{}, + }, + }, { + file: "testdata/gnu-nil-sparse-hole.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeGNUSparse, + Name: "sparse.db", + Size: 1000, + SparseHoles: []sparseEntry{{Offset: 0, Length: 1000}}, + }, nil}, + testWrite{strings.Repeat("\x00", 1000), 1000, nil}, + testClose{}, + }, + }, { + file: "testdata/pax-nil-sparse-data.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "sparse.db", + Size: 1000, + SparseHoles: []sparseEntry{{Offset: 1000, Length: 0}}, + }, nil}, + testWrite{strings.Repeat("0123456789", 100), 1000, nil}, + testClose{}, + }, + }, { + file: "testdata/pax-nil-sparse-hole.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "sparse.db", + Size: 1000, + SparseHoles: []sparseEntry{{Offset: 0, Length: 1000}}, + }, nil}, + testWrite{strings.Repeat("\x00", 1000), 1000, nil}, + testClose{}, + }, + }, { + file: "testdata/gnu-sparse-big.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeGNUSparse, + Name: "gnu-sparse", + Size: 6e10, + SparseHoles: []sparseEntry{ + {Offset: 0e10, Length: 1e10 - 100}, + {Offset: 1e10, Length: 1e10 - 100}, + {Offset: 2e10, Length: 1e10 - 100}, + {Offset: 3e10, Length: 1e10 - 100}, + {Offset: 4e10, Length: 1e10 - 100}, + {Offset: 5e10, Length: 1e10 - 100}, + }, + }, nil}, + testReadFrom{fileOps{ + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + }, 6e10, nil}, + testClose{nil}, + }, + }, { + file: "testdata/pax-sparse-big.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "pax-sparse", + Size: 6e10, + SparseHoles: []sparseEntry{ + {Offset: 0e10, Length: 1e10 - 100}, + {Offset: 1e10, Length: 1e10 - 100}, + {Offset: 2e10, Length: 1e10 - 100}, + {Offset: 3e10, Length: 1e10 - 100}, + {Offset: 4e10, Length: 1e10 - 100}, + {Offset: 5e10, Length: 1e10 - 100}, + }, + }, nil}, + testReadFrom{fileOps{ + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + }, 6e10, nil}, + testClose{nil}, + }, + */ + }, { + file: "testdata/trailing-slash.tar", + tests: []testFnc{ + testHeader{Header{Name: strings.Repeat("123456789/", 30)}, nil}, + testClose{nil}, + }, + }, { + // Automatically promote zero value of Typeflag depending on the name. + file: "testdata/file-and-dir.tar", + tests: []testFnc{ + testHeader{Header{Name: "small.txt", Size: 5}, nil}, + testWrite{"Kilts", 5, nil}, + testHeader{Header{Name: "dir/"}, nil}, + testClose{nil}, + }, + }} + + equalError := func(x, y error) bool { + _, ok1 := x.(headerError) + _, ok2 := y.(headerError) + if ok1 || ok2 { + return ok1 && ok2 + } + return x == y + } + for _, v := range vectors { + t.Run(path.Base(v.file), func(t *testing.T) { + const maxSize = 10 << 10 // 10KiB + buf := new(bytes.Buffer) + tw := NewWriter(iotest.TruncateWriter(buf, maxSize)) + + for i, tf := range v.tests { + switch tf := tf.(type) { + case testHeader: + err := tw.WriteHeader(&tf.hdr) + if !equalError(err, tf.wantErr) { + t.Fatalf("test %d, WriteHeader() = %v, want %v", i, err, tf.wantErr) + } + case testWrite: + got, err := tw.Write([]byte(tf.str)) + if got != tf.wantCnt || !equalError(err, tf.wantErr) { + t.Fatalf("test %d, Write() = (%d, %v), want (%d, %v)", i, got, err, tf.wantCnt, tf.wantErr) + } + case testReadFrom: + f := &testFile{ops: tf.ops} + got, err := tw.readFrom(f) + if _, ok := err.(testError); ok { + t.Errorf("test %d, ReadFrom(): %v", i, err) + } else if got != tf.wantCnt || !equalError(err, tf.wantErr) { + t.Errorf("test %d, ReadFrom() = (%d, %v), want (%d, %v)", i, got, err, tf.wantCnt, tf.wantErr) + } + if len(f.ops) > 0 { + t.Errorf("test %d, expected %d more operations", i, len(f.ops)) + } + case testClose: + err := tw.Close() + if !equalError(err, tf.wantErr) { + t.Fatalf("test %d, Close() = %v, want %v", i, err, tf.wantErr) + } + default: + t.Fatalf("test %d, unknown test operation: %T", i, tf) + } + } + + if v.file != "" { + want, err := os.ReadFile(v.file) + if err != nil { + t.Fatalf("ReadFile() = %v, want nil", err) + } + got := buf.Bytes() + if !bytes.Equal(want, got) { + t.Fatalf("incorrect result: (-got +want)\n%v", bytediff(got, want)) + } + } + }) + } +} + +func TestPax(t *testing.T) { + // Create an archive with a large name + fileinfo, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + hdr, err := FileInfoHeader(fileinfo, "") + if err != nil { + t.Fatalf("os.Stat: %v", err) + } + // Force a PAX long name to be written + longName := strings.Repeat("ab", 100) + contents := strings.Repeat(" ", int(hdr.Size)) + hdr.Name = longName + var buf bytes.Buffer + writer := NewWriter(&buf) + if err := writer.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if _, err = writer.Write([]byte(contents)); err != nil { + t.Fatal(err) + } + if err := writer.Close(); err != nil { + t.Fatal(err) + } + // Simple test to make sure PAX extensions are in effect + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { + t.Fatal("Expected at least one PAX header to be written.") + } + // Test that we can get a long name back out of the archive. + reader := NewReader(&buf) + hdr, err = reader.Next() + if err != nil { + t.Fatal(err) + } + if hdr.Name != longName { + t.Fatal("Couldn't recover long file name") + } +} + +func TestPaxSymlink(t *testing.T) { + // Create an archive with a large linkname + fileinfo, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + hdr, err := FileInfoHeader(fileinfo, "") + hdr.Typeflag = TypeSymlink + if err != nil { + t.Fatalf("os.Stat:1 %v", err) + } + // Force a PAX long linkname to be written + longLinkname := strings.Repeat("1234567890/1234567890", 10) + hdr.Linkname = longLinkname + + hdr.Size = 0 + var buf bytes.Buffer + writer := NewWriter(&buf) + if err := writer.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if err := writer.Close(); err != nil { + t.Fatal(err) + } + // Simple test to make sure PAX extensions are in effect + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { + t.Fatal("Expected at least one PAX header to be written.") + } + // Test that we can get a long name back out of the archive. + reader := NewReader(&buf) + hdr, err = reader.Next() + if err != nil { + t.Fatal(err) + } + if hdr.Linkname != longLinkname { + t.Fatal("Couldn't recover long link name") + } +} + +func TestPaxNonAscii(t *testing.T) { + // Create an archive with non ascii. These should trigger a pax header + // because pax headers have a defined utf-8 encoding. + fileinfo, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + + hdr, err := FileInfoHeader(fileinfo, "") + if err != nil { + t.Fatalf("os.Stat:1 %v", err) + } + + // some sample data + chineseFilename := "文件名" + chineseGroupname := "組" + chineseUsername := "用戶名" + + hdr.Name = chineseFilename + hdr.Gname = chineseGroupname + hdr.Uname = chineseUsername + + contents := strings.Repeat(" ", int(hdr.Size)) + + var buf bytes.Buffer + writer := NewWriter(&buf) + if err := writer.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if _, err = writer.Write([]byte(contents)); err != nil { + t.Fatal(err) + } + if err := writer.Close(); err != nil { + t.Fatal(err) + } + // Simple test to make sure PAX extensions are in effect + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { + t.Fatal("Expected at least one PAX header to be written.") + } + // Test that we can get a long name back out of the archive. + reader := NewReader(&buf) + hdr, err = reader.Next() + if err != nil { + t.Fatal(err) + } + if hdr.Name != chineseFilename { + t.Fatal("Couldn't recover unicode name") + } + if hdr.Gname != chineseGroupname { + t.Fatal("Couldn't recover unicode group") + } + if hdr.Uname != chineseUsername { + t.Fatal("Couldn't recover unicode user") + } +} + +func TestPaxXattrs(t *testing.T) { + xattrs := map[string]string{ + "user.key": "value", + } + + // Create an archive with an xattr + fileinfo, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + hdr, err := FileInfoHeader(fileinfo, "") + if err != nil { + t.Fatalf("os.Stat: %v", err) + } + contents := "Kilts" + hdr.Xattrs = xattrs + var buf bytes.Buffer + writer := NewWriter(&buf) + if err := writer.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if _, err = writer.Write([]byte(contents)); err != nil { + t.Fatal(err) + } + if err := writer.Close(); err != nil { + t.Fatal(err) + } + // Test that we can get the xattrs back out of the archive. + reader := NewReader(&buf) + hdr, err = reader.Next() + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(hdr.Xattrs, xattrs) { + t.Fatalf("xattrs did not survive round trip: got %+v, want %+v", + hdr.Xattrs, xattrs) + } +} + +func TestPaxHeadersSorted(t *testing.T) { + fileinfo, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + hdr, err := FileInfoHeader(fileinfo, "") + if err != nil { + t.Fatalf("os.Stat: %v", err) + } + contents := strings.Repeat(" ", int(hdr.Size)) + + hdr.Xattrs = map[string]string{ + "foo": "foo", + "bar": "bar", + "baz": "baz", + "qux": "qux", + } + + var buf bytes.Buffer + writer := NewWriter(&buf) + if err := writer.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if _, err = writer.Write([]byte(contents)); err != nil { + t.Fatal(err) + } + if err := writer.Close(); err != nil { + t.Fatal(err) + } + // Simple test to make sure PAX extensions are in effect + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { + t.Fatal("Expected at least one PAX header to be written.") + } + + // xattr bar should always appear before others + indices := []int{ + bytes.Index(buf.Bytes(), []byte("bar=bar")), + bytes.Index(buf.Bytes(), []byte("baz=baz")), + bytes.Index(buf.Bytes(), []byte("foo=foo")), + bytes.Index(buf.Bytes(), []byte("qux=qux")), + } + if !sort.IntsAreSorted(indices) { + t.Fatal("PAX headers are not sorted") + } +} + +func TestUSTARLongName(t *testing.T) { + // Create an archive with a path that failed to split with USTAR extension in previous versions. + fileinfo, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + hdr, err := FileInfoHeader(fileinfo, "") + hdr.Typeflag = TypeDir + if err != nil { + t.Fatalf("os.Stat:1 %v", err) + } + // Force a PAX long name to be written. The name was taken from a practical example + // that fails and replaced ever char through numbers to anonymize the sample. + longName := "/0000_0000000/00000-000000000/0000_0000000/00000-0000000000000/0000_0000000/00000-0000000-00000000/0000_0000000/00000000/0000_0000000/000/0000_0000000/00000000v00/0000_0000000/000000/0000_0000000/0000000/0000_0000000/00000y-00/0000/0000/00000000/0x000000/" + hdr.Name = longName + + hdr.Size = 0 + var buf bytes.Buffer + writer := NewWriter(&buf) + if err := writer.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if err := writer.Close(); err != nil { + t.Fatal(err) + } + // Test that we can get a long name back out of the archive. + reader := NewReader(&buf) + hdr, err = reader.Next() + if err != nil { + t.Fatal(err) + } + if hdr.Name != longName { + t.Fatal("Couldn't recover long name") + } +} + +func TestValidTypeflagWithPAXHeader(t *testing.T) { + var buffer bytes.Buffer + tw := NewWriter(&buffer) + + fileName := strings.Repeat("ab", 100) + + hdr := &Header{ + Name: fileName, + Size: 4, + Typeflag: 0, + } + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("Failed to write header: %s", err) + } + if _, err := tw.Write([]byte("fooo")); err != nil { + t.Fatalf("Failed to write the file's data: %s", err) + } + tw.Close() + + tr := NewReader(&buffer) + + for { + header, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("Failed to read header: %s", err) + } + if header.Typeflag != TypeReg { + t.Fatalf("Typeflag should've been %d, found %d", TypeReg, header.Typeflag) + } + } +} + +// failOnceWriter fails exactly once and then always reports success. +type failOnceWriter bool + +func (w *failOnceWriter) Write(b []byte) (int, error) { + if !*w { + return 0, io.ErrShortWrite + } + *w = true + return len(b), nil +} + +func TestWriterErrors(t *testing.T) { + t.Run("HeaderOnly", func(t *testing.T) { + tw := NewWriter(new(bytes.Buffer)) + hdr := &Header{Name: "dir/", Typeflag: TypeDir} + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("WriteHeader() = %v, want nil", err) + } + if _, err := tw.Write([]byte{0x00}); err != ErrWriteTooLong { + t.Fatalf("Write() = %v, want %v", err, ErrWriteTooLong) + } + }) + + t.Run("NegativeSize", func(t *testing.T) { + tw := NewWriter(new(bytes.Buffer)) + hdr := &Header{Name: "small.txt", Size: -1} + if err := tw.WriteHeader(hdr); err == nil { + t.Fatalf("WriteHeader() = nil, want non-nil error") + } + }) + + t.Run("BeforeHeader", func(t *testing.T) { + tw := NewWriter(new(bytes.Buffer)) + if _, err := tw.Write([]byte("Kilts")); err != ErrWriteTooLong { + t.Fatalf("Write() = %v, want %v", err, ErrWriteTooLong) + } + }) + + t.Run("AfterClose", func(t *testing.T) { + tw := NewWriter(new(bytes.Buffer)) + hdr := &Header{Name: "small.txt"} + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("WriteHeader() = %v, want nil", err) + } + if err := tw.Close(); err != nil { + t.Fatalf("Close() = %v, want nil", err) + } + if _, err := tw.Write([]byte("Kilts")); err != ErrWriteAfterClose { + t.Fatalf("Write() = %v, want %v", err, ErrWriteAfterClose) + } + if err := tw.Flush(); err != ErrWriteAfterClose { + t.Fatalf("Flush() = %v, want %v", err, ErrWriteAfterClose) + } + if err := tw.Close(); err != nil { + t.Fatalf("Close() = %v, want nil", err) + } + }) + + t.Run("PrematureFlush", func(t *testing.T) { + tw := NewWriter(new(bytes.Buffer)) + hdr := &Header{Name: "small.txt", Size: 5} + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("WriteHeader() = %v, want nil", err) + } + if err := tw.Flush(); err == nil { + t.Fatalf("Flush() = %v, want non-nil error", err) + } + }) + + t.Run("PrematureClose", func(t *testing.T) { + tw := NewWriter(new(bytes.Buffer)) + hdr := &Header{Name: "small.txt", Size: 5} + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("WriteHeader() = %v, want nil", err) + } + if err := tw.Close(); err == nil { + t.Fatalf("Close() = %v, want non-nil error", err) + } + }) + + t.Run("Persistence", func(t *testing.T) { + tw := NewWriter(new(failOnceWriter)) + if err := tw.WriteHeader(&Header{}); err != io.ErrShortWrite { + t.Fatalf("WriteHeader() = %v, want %v", err, io.ErrShortWrite) + } + if err := tw.WriteHeader(&Header{Name: "small.txt"}); err == nil { + t.Errorf("WriteHeader() = got %v, want non-nil error", err) + } + if _, err := tw.Write(nil); err == nil { + t.Errorf("Write() = %v, want non-nil error", err) + } + if err := tw.Flush(); err == nil { + t.Errorf("Flush() = %v, want non-nil error", err) + } + if err := tw.Close(); err == nil { + t.Errorf("Close() = %v, want non-nil error", err) + } + }) +} + +func TestSplitUSTARPath(t *testing.T) { + sr := strings.Repeat + + vectors := []struct { + input string // Input path + prefix string // Expected output prefix + suffix string // Expected output suffix + ok bool // Split success? + }{ + {"", "", "", false}, + {"abc", "", "", false}, + {"用戶名", "", "", false}, + {sr("a", nameSize), "", "", false}, + {sr("a", nameSize) + "/", "", "", false}, + {sr("a", nameSize) + "/a", sr("a", nameSize), "a", true}, + {sr("a", prefixSize) + "/", "", "", false}, + {sr("a", prefixSize) + "/a", sr("a", prefixSize), "a", true}, + {sr("a", nameSize+1), "", "", false}, + {sr("/", nameSize+1), sr("/", nameSize-1), "/", true}, + {sr("a", prefixSize) + "/" + sr("b", nameSize), + sr("a", prefixSize), sr("b", nameSize), true}, + {sr("a", prefixSize) + "//" + sr("b", nameSize), "", "", false}, + {sr("a/", nameSize), sr("a/", 77) + "a", sr("a/", 22), true}, + } + + for _, v := range vectors { + prefix, suffix, ok := splitUSTARPath(v.input) + if prefix != v.prefix || suffix != v.suffix || ok != v.ok { + t.Errorf("splitUSTARPath(%q):\ngot (%q, %q, %v)\nwant (%q, %q, %v)", + v.input, prefix, suffix, ok, v.prefix, v.suffix, v.ok) + } + } +} + +// TestIssue12594 tests that the Writer does not attempt to populate the prefix +// field when encoding a header in the GNU format. The prefix field is valid +// in USTAR and PAX, but not GNU. +func TestIssue12594(t *testing.T) { + names := []string{ + "0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/file.txt", + "0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/33/file.txt", + "0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/333/file.txt", + "0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/33/34/35/36/37/38/39/40/file.txt", + "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000/file.txt", + "/home/support/.openoffice.org/3/user/uno_packages/cache/registry/com.sun.star.comp.deployment.executable.PackageRegistryBackend", + } + + for i, name := range names { + var b bytes.Buffer + + tw := NewWriter(&b) + if err := tw.WriteHeader(&Header{ + Name: name, + Uid: 1 << 25, // Prevent USTAR format + }); err != nil { + t.Errorf("test %d, unexpected WriteHeader error: %v", i, err) + } + if err := tw.Close(); err != nil { + t.Errorf("test %d, unexpected Close error: %v", i, err) + } + + // The prefix field should never appear in the GNU format. + var blk block + copy(blk[:], b.Bytes()) + prefix := string(blk.toUSTAR().prefix()) + prefix, _, _ = strings.Cut(prefix, "\x00") // Truncate at the NUL terminator + if blk.getFormat() == FormatGNU && len(prefix) > 0 && strings.HasPrefix(name, prefix) { + t.Errorf("test %d, found prefix in GNU format: %s", i, prefix) + } + + tr := NewReader(&b) + hdr, err := tr.Next() + if err != nil { + t.Errorf("test %d, unexpected Next error: %v", i, err) + } + if hdr.Name != name { + t.Errorf("test %d, hdr.Name = %s, want %s", i, hdr.Name, name) + } + } +} + +func TestWriteLongHeader(t *testing.T) { + for _, test := range []struct { + name string + h *Header + }{{ + name: "name too long", + h: &Header{Name: strings.Repeat("a", maxSpecialFileSize)}, + }, { + name: "linkname too long", + h: &Header{Linkname: strings.Repeat("a", maxSpecialFileSize)}, + }, { + name: "uname too long", + h: &Header{Uname: strings.Repeat("a", maxSpecialFileSize)}, + }, { + name: "gname too long", + h: &Header{Gname: strings.Repeat("a", maxSpecialFileSize)}, + }, { + name: "PAX header too long", + h: &Header{PAXRecords: map[string]string{"GOLANG.x": strings.Repeat("a", maxSpecialFileSize)}}, + }} { + w := NewWriter(io.Discard) + if err := w.WriteHeader(test.h); err != ErrFieldTooLong { + t.Errorf("%v: w.WriteHeader() = %v, want ErrFieldTooLong", test.name, err) + } + } +} + +// testNonEmptyWriter wraps an io.Writer and ensures that +// Write is never called with an empty buffer. +type testNonEmptyWriter struct{ io.Writer } + +func (w testNonEmptyWriter) Write(b []byte) (int, error) { + if len(b) == 0 { + return 0, errors.New("unexpected empty Write call") + } + return w.Writer.Write(b) +} + +func TestFileWriter(t *testing.T) { + type ( + testWrite struct { // Write(str) == (wantCnt, wantErr) + str string + wantCnt int + wantErr error + } + testReadFrom struct { // ReadFrom(testFile{ops}) == (wantCnt, wantErr) + ops fileOps + wantCnt int64 + wantErr error + } + testRemaining struct { // logicalRemaining() == wantLCnt, physicalRemaining() == wantPCnt + wantLCnt int64 + wantPCnt int64 + } + testFnc any // testWrite | testReadFrom | testRemaining + ) + + type ( + makeReg struct { + size int64 + wantStr string + } + makeSparse struct { + makeReg makeReg + sph sparseHoles + size int64 + } + fileMaker any // makeReg | makeSparse + ) + + vectors := []struct { + maker fileMaker + tests []testFnc + }{{ + maker: makeReg{0, ""}, + tests: []testFnc{ + testRemaining{0, 0}, + testWrite{"", 0, nil}, + testWrite{"a", 0, ErrWriteTooLong}, + testReadFrom{fileOps{""}, 0, nil}, + testReadFrom{fileOps{"a"}, 0, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{1, "a"}, + tests: []testFnc{ + testRemaining{1, 1}, + testWrite{"", 0, nil}, + testWrite{"a", 1, nil}, + testWrite{"bcde", 0, ErrWriteTooLong}, + testWrite{"", 0, nil}, + testReadFrom{fileOps{""}, 0, nil}, + testReadFrom{fileOps{"a"}, 0, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{5, "hello"}, + tests: []testFnc{ + testRemaining{5, 5}, + testWrite{"hello", 5, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{5, "\x00\x00\x00\x00\x00"}, + tests: []testFnc{ + testRemaining{5, 5}, + testReadFrom{fileOps{"\x00\x00\x00\x00\x00"}, 5, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{5, "\x00\x00\x00\x00\x00"}, + tests: []testFnc{ + testRemaining{5, 5}, + testReadFrom{fileOps{"\x00\x00\x00\x00\x00extra"}, 5, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{5, "abc\x00\x00"}, + tests: []testFnc{ + testRemaining{5, 5}, + testWrite{"abc", 3, nil}, + testRemaining{2, 2}, + testReadFrom{fileOps{"\x00\x00"}, 2, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{5, "\x00\x00abc"}, + tests: []testFnc{ + testRemaining{5, 5}, + testWrite{"\x00\x00", 2, nil}, + testRemaining{3, 3}, + testWrite{"abc", 3, nil}, + testReadFrom{fileOps{"z"}, 0, ErrWriteTooLong}, + testWrite{"z", 0, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testRemaining{8, 5}, + testWrite{"ab\x00\x00\x00cde", 8, nil}, + testWrite{"a", 0, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab\x00\x00\x00cdez", 8, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab\x00", 3, nil}, + testRemaining{5, 3}, + testWrite{"\x00\x00cde", 5, nil}, + testWrite{"a", 0, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab", 2, nil}, + testRemaining{6, 3}, + testReadFrom{fileOps{int64(3), "cde"}, 6, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testReadFrom{fileOps{"ab", int64(3), "cde"}, 8, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testReadFrom{fileOps{"ab", int64(3), "cdeX"}, 8, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{4, "abcd"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testReadFrom{fileOps{"ab", int64(3), "cd"}, 7, io.ErrUnexpectedEOF}, + testRemaining{1, 0}, + }, + }, { + maker: makeSparse{makeReg{4, "abcd"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testReadFrom{fileOps{"ab", int64(3), "cde"}, 7, errMissData}, + testRemaining{1, 0}, + }, + }, { + maker: makeSparse{makeReg{6, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testReadFrom{fileOps{"ab", int64(3), "cde"}, 8, errUnrefData}, + testRemaining{0, 1}, + }, + }, { + maker: makeSparse{makeReg{4, "abcd"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab", 2, nil}, + testRemaining{6, 2}, + testWrite{"\x00\x00\x00", 3, nil}, + testRemaining{3, 2}, + testWrite{"cde", 2, errMissData}, + testRemaining{1, 0}, + }, + }, { + maker: makeSparse{makeReg{6, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab", 2, nil}, + testRemaining{6, 4}, + testWrite{"\x00\x00\x00", 3, nil}, + testRemaining{3, 4}, + testWrite{"cde", 3, errUnrefData}, + testRemaining{0, 1}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testRemaining{7, 3}, + testWrite{"\x00\x00abc\x00\x00", 7, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testRemaining{7, 3}, + testReadFrom{fileOps{int64(2), "abc", int64(1), "\x00"}, 7, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{3, ""}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"abcdefg", 0, errWriteHole}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"\x00\x00abcde", 5, errWriteHole}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"\x00\x00abc\x00\x00z", 7, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"\x00\x00", 2, nil}, + testRemaining{5, 3}, + testWrite{"abc", 3, nil}, + testRemaining{2, 0}, + testWrite{"\x00\x00", 2, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{2, "ab"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"\x00\x00", 2, nil}, + testWrite{"abc", 2, errMissData}, + testWrite{"\x00\x00", 0, errMissData}, + }, + }, { + maker: makeSparse{makeReg{4, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"\x00\x00", 2, nil}, + testWrite{"abc", 3, nil}, + testWrite{"\x00\x00", 2, errUnrefData}, + }, + }} + + for i, v := range vectors { + var wantStr string + bb := new(bytes.Buffer) + w := testNonEmptyWriter{bb} + var fw fileWriter + switch maker := v.maker.(type) { + case makeReg: + fw = ®FileWriter{w, maker.size} + wantStr = maker.wantStr + case makeSparse: + if !validateSparseEntries(maker.sph, maker.size) { + t.Fatalf("invalid sparse map: %v", maker.sph) + } + spd := invertSparseEntries(maker.sph, maker.size) + fw = ®FileWriter{w, maker.makeReg.size} + fw = &sparseFileWriter{fw, spd, 0} + wantStr = maker.makeReg.wantStr + default: + t.Fatalf("test %d, unknown make operation: %T", i, maker) + } + + for j, tf := range v.tests { + switch tf := tf.(type) { + case testWrite: + got, err := fw.Write([]byte(tf.str)) + if got != tf.wantCnt || err != tf.wantErr { + t.Errorf("test %d.%d, Write(%s):\ngot (%d, %v)\nwant (%d, %v)", i, j, tf.str, got, err, tf.wantCnt, tf.wantErr) + } + case testReadFrom: + f := &testFile{ops: tf.ops} + got, err := fw.ReadFrom(f) + if _, ok := err.(testError); ok { + t.Errorf("test %d.%d, ReadFrom(): %v", i, j, err) + } else if got != tf.wantCnt || err != tf.wantErr { + t.Errorf("test %d.%d, ReadFrom() = (%d, %v), want (%d, %v)", i, j, got, err, tf.wantCnt, tf.wantErr) + } + if len(f.ops) > 0 { + t.Errorf("test %d.%d, expected %d more operations", i, j, len(f.ops)) + } + case testRemaining: + if got := fw.logicalRemaining(); got != tf.wantLCnt { + t.Errorf("test %d.%d, logicalRemaining() = %d, want %d", i, j, got, tf.wantLCnt) + } + if got := fw.physicalRemaining(); got != tf.wantPCnt { + t.Errorf("test %d.%d, physicalRemaining() = %d, want %d", i, j, got, tf.wantPCnt) + } + default: + t.Fatalf("test %d.%d, unknown test operation: %T", i, j, tf) + } + } + + if got := bb.String(); got != wantStr { + t.Fatalf("test %d, String() = %q, want %q", i, got, wantStr) + } + } +} diff --git a/src/archive/zip/example_test.go b/src/archive/zip/example_test.go new file mode 100644 index 0000000..1eed304 --- /dev/null +++ b/src/archive/zip/example_test.go @@ -0,0 +1,93 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip_test + +import ( + "archive/zip" + "bytes" + "compress/flate" + "fmt" + "io" + "log" + "os" +) + +func ExampleWriter() { + // Create a buffer to write our archive to. + buf := new(bytes.Buffer) + + // Create a new zip archive. + w := zip.NewWriter(buf) + + // Add some files to the archive. + var files = []struct { + Name, Body string + }{ + {"readme.txt", "This archive contains some text files."}, + {"gopher.txt", "Gopher names:\nGeorge\nGeoffrey\nGonzo"}, + {"todo.txt", "Get animal handling licence.\nWrite more examples."}, + } + for _, file := range files { + f, err := w.Create(file.Name) + if err != nil { + log.Fatal(err) + } + _, err = f.Write([]byte(file.Body)) + if err != nil { + log.Fatal(err) + } + } + + // Make sure to check the error on Close. + err := w.Close() + if err != nil { + log.Fatal(err) + } +} + +func ExampleReader() { + // Open a zip archive for reading. + r, err := zip.OpenReader("testdata/readme.zip") + if err != nil { + log.Fatal(err) + } + defer r.Close() + + // Iterate through the files in the archive, + // printing some of their contents. + for _, f := range r.File { + fmt.Printf("Contents of %s:\n", f.Name) + rc, err := f.Open() + if err != nil { + log.Fatal(err) + } + _, err = io.CopyN(os.Stdout, rc, 68) + if err != nil { + log.Fatal(err) + } + rc.Close() + fmt.Println() + } + // Output: + // Contents of README: + // This is the source code repository for the Go programming language. +} + +func ExampleWriter_RegisterCompressor() { + // Override the default Deflate compressor with a higher compression level. + + // Create a buffer to write our archive to. + buf := new(bytes.Buffer) + + // Create a new zip archive. + w := zip.NewWriter(buf) + + // Register a custom Deflate compressor. + w.RegisterCompressor(zip.Deflate, func(out io.Writer) (io.WriteCloser, error) { + return flate.NewWriter(out, flate.BestCompression) + }) + + // Proceed to add files to w. +} diff --git a/src/archive/zip/fuzz_test.go b/src/archive/zip/fuzz_test.go new file mode 100644 index 0000000..7dffde6 --- /dev/null +++ b/src/archive/zip/fuzz_test.go @@ -0,0 +1,81 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip + +import ( + "bytes" + "io" + "os" + "path/filepath" + "testing" +) + +func FuzzReader(f *testing.F) { + testdata, err := os.ReadDir("testdata") + if err != nil { + f.Fatalf("failed to read testdata directory: %s", err) + } + for _, de := range testdata { + if de.IsDir() { + continue + } + b, err := os.ReadFile(filepath.Join("testdata", de.Name())) + if err != nil { + f.Fatalf("failed to read testdata: %s", err) + } + f.Add(b) + } + + f.Fuzz(func(t *testing.T, b []byte) { + r, err := NewReader(bytes.NewReader(b), int64(len(b))) + if err != nil { + return + } + + type file struct { + header *FileHeader + content []byte + } + files := []file{} + + for _, f := range r.File { + fr, err := f.Open() + if err != nil { + continue + } + content, err := io.ReadAll(fr) + if err != nil { + continue + } + files = append(files, file{header: &f.FileHeader, content: content}) + if _, err := r.Open(f.Name); err != nil { + continue + } + } + + // If we were unable to read anything out of the archive don't + // bother trying to roundtrip it. + if len(files) == 0 { + return + } + + w := NewWriter(io.Discard) + for _, f := range files { + ww, err := w.CreateHeader(f.header) + if err != nil { + t.Fatalf("unable to write previously parsed header: %s", err) + } + if _, err := ww.Write(f.content); err != nil { + t.Fatalf("unable to write previously parsed content: %s", err) + } + } + + if err := w.Close(); err != nil { + t.Fatalf("Unable to write archive: %s", err) + } + + // TODO: We may want to check if the archive roundtrips. + }) +} diff --git a/src/archive/zip/reader.go b/src/archive/zip/reader.go new file mode 100644 index 0000000..92fd6f6 --- /dev/null +++ b/src/archive/zip/reader.go @@ -0,0 +1,845 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip + +import ( + "bufio" + "encoding/binary" + "errors" + "hash" + "hash/crc32" + "io" + "io/fs" + "os" + "path" + "sort" + "strings" + "sync" + "time" +) + +var ( + ErrFormat = errors.New("zip: not a valid zip file") + ErrAlgorithm = errors.New("zip: unsupported compression algorithm") + ErrChecksum = errors.New("zip: checksum error") +) + +// A Reader serves content from a ZIP archive. +type Reader struct { + r io.ReaderAt + File []*File + Comment string + decompressors map[uint16]Decompressor + + // fileList is a list of files sorted by ename, + // for use by the Open method. + fileListOnce sync.Once + fileList []fileListEntry +} + +// A ReadCloser is a Reader that must be closed when no longer needed. +type ReadCloser struct { + f *os.File + Reader +} + +// A File is a single file in a ZIP archive. +// The file information is in the embedded FileHeader. +// The file content can be accessed by calling Open. +type File struct { + FileHeader + zip *Reader + zipr io.ReaderAt + headerOffset int64 + zip64 bool // zip64 extended information extra field presence + descErr error // error reading the data descriptor during init +} + +// OpenReader will open the Zip file specified by name and return a ReadCloser. +func OpenReader(name string) (*ReadCloser, error) { + f, err := os.Open(name) + if err != nil { + return nil, err + } + fi, err := f.Stat() + if err != nil { + f.Close() + return nil, err + } + r := new(ReadCloser) + if err := r.init(f, fi.Size()); err != nil { + f.Close() + return nil, err + } + r.f = f + return r, nil +} + +// NewReader returns a new Reader reading from r, which is assumed to +// have the given size in bytes. +func NewReader(r io.ReaderAt, size int64) (*Reader, error) { + if size < 0 { + return nil, errors.New("zip: size cannot be negative") + } + zr := new(Reader) + if err := zr.init(r, size); err != nil { + return nil, err + } + return zr, nil +} + +func (z *Reader) init(r io.ReaderAt, size int64) error { + end, err := readDirectoryEnd(r, size) + if err != nil { + return err + } + z.r = r + // Since the number of directory records is not validated, it is not + // safe to preallocate z.File without first checking that the specified + // number of files is reasonable, since a malformed archive may + // indicate it contains up to 1 << 128 - 1 files. Since each file has a + // header which will be _at least_ 30 bytes we can safely preallocate + // if (data size / 30) >= end.directoryRecords. + if end.directorySize < uint64(size) && (uint64(size)-end.directorySize)/30 >= end.directoryRecords { + z.File = make([]*File, 0, end.directoryRecords) + } + z.Comment = end.comment + rs := io.NewSectionReader(r, 0, size) + if _, err = rs.Seek(int64(end.directoryOffset), io.SeekStart); err != nil { + return err + } + buf := bufio.NewReader(rs) + + // The count of files inside a zip is truncated to fit in a uint16. + // Gloss over this by reading headers until we encounter + // a bad one, and then only report an ErrFormat or UnexpectedEOF if + // the file count modulo 65536 is incorrect. + for { + f := &File{zip: z, zipr: r} + err = readDirectoryHeader(f, buf) + if err == ErrFormat || err == io.ErrUnexpectedEOF { + break + } + if err != nil { + return err + } + z.File = append(z.File, f) + } + if uint16(len(z.File)) != uint16(end.directoryRecords) { // only compare 16 bits here + // Return the readDirectoryHeader error if we read + // the wrong number of directory entries. + return err + } + return nil +} + +// RegisterDecompressor registers or overrides a custom decompressor for a +// specific method ID. If a decompressor for a given method is not found, +// Reader will default to looking up the decompressor at the package level. +func (z *Reader) RegisterDecompressor(method uint16, dcomp Decompressor) { + if z.decompressors == nil { + z.decompressors = make(map[uint16]Decompressor) + } + z.decompressors[method] = dcomp +} + +func (z *Reader) decompressor(method uint16) Decompressor { + dcomp := z.decompressors[method] + if dcomp == nil { + dcomp = decompressor(method) + } + return dcomp +} + +// Close closes the Zip file, rendering it unusable for I/O. +func (rc *ReadCloser) Close() error { + return rc.f.Close() +} + +// DataOffset returns the offset of the file's possibly-compressed +// data, relative to the beginning of the zip file. +// +// Most callers should instead use Open, which transparently +// decompresses data and verifies checksums. +func (f *File) DataOffset() (offset int64, err error) { + bodyOffset, err := f.findBodyOffset() + if err != nil { + return + } + return f.headerOffset + bodyOffset, nil +} + +// Open returns a ReadCloser that provides access to the File's contents. +// Multiple files may be read concurrently. +func (f *File) Open() (io.ReadCloser, error) { + bodyOffset, err := f.findBodyOffset() + if err != nil { + return nil, err + } + size := int64(f.CompressedSize64) + r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size) + dcomp := f.zip.decompressor(f.Method) + if dcomp == nil { + return nil, ErrAlgorithm + } + var rc io.ReadCloser = dcomp(r) + var desr io.Reader + if f.hasDataDescriptor() { + desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen) + } + rc = &checksumReader{ + rc: rc, + hash: crc32.NewIEEE(), + f: f, + desr: desr, + } + return rc, nil +} + +// OpenRaw returns a Reader that provides access to the File's contents without +// decompression. +func (f *File) OpenRaw() (io.Reader, error) { + bodyOffset, err := f.findBodyOffset() + if err != nil { + return nil, err + } + r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, int64(f.CompressedSize64)) + return r, nil +} + +type checksumReader struct { + rc io.ReadCloser + hash hash.Hash32 + nread uint64 // number of bytes read so far + f *File + desr io.Reader // if non-nil, where to read the data descriptor + err error // sticky error +} + +func (r *checksumReader) Stat() (fs.FileInfo, error) { + return headerFileInfo{&r.f.FileHeader}, nil +} + +func (r *checksumReader) Read(b []byte) (n int, err error) { + if r.err != nil { + return 0, r.err + } + n, err = r.rc.Read(b) + r.hash.Write(b[:n]) + r.nread += uint64(n) + if err == nil { + return + } + if err == io.EOF { + if r.nread != r.f.UncompressedSize64 { + return 0, io.ErrUnexpectedEOF + } + if r.desr != nil { + if err1 := readDataDescriptor(r.desr, r.f); err1 != nil { + if err1 == io.EOF { + err = io.ErrUnexpectedEOF + } else { + err = err1 + } + } else if r.hash.Sum32() != r.f.CRC32 { + err = ErrChecksum + } + } else { + // If there's not a data descriptor, we still compare + // the CRC32 of what we've read against the file header + // or TOC's CRC32, if it seems like it was set. + if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 { + err = ErrChecksum + } + } + } + r.err = err + return +} + +func (r *checksumReader) Close() error { return r.rc.Close() } + +// findBodyOffset does the minimum work to verify the file has a header +// and returns the file body offset. +func (f *File) findBodyOffset() (int64, error) { + var buf [fileHeaderLen]byte + if _, err := f.zipr.ReadAt(buf[:], f.headerOffset); err != nil { + return 0, err + } + b := readBuf(buf[:]) + if sig := b.uint32(); sig != fileHeaderSignature { + return 0, ErrFormat + } + b = b[22:] // skip over most of the header + filenameLen := int(b.uint16()) + extraLen := int(b.uint16()) + return int64(fileHeaderLen + filenameLen + extraLen), nil +} + +// readDirectoryHeader attempts to read a directory header from r. +// It returns io.ErrUnexpectedEOF if it cannot read a complete header, +// and ErrFormat if it doesn't find a valid header signature. +func readDirectoryHeader(f *File, r io.Reader) error { + var buf [directoryHeaderLen]byte + if _, err := io.ReadFull(r, buf[:]); err != nil { + return err + } + b := readBuf(buf[:]) + if sig := b.uint32(); sig != directoryHeaderSignature { + return ErrFormat + } + f.CreatorVersion = b.uint16() + f.ReaderVersion = b.uint16() + f.Flags = b.uint16() + f.Method = b.uint16() + f.ModifiedTime = b.uint16() + f.ModifiedDate = b.uint16() + f.CRC32 = b.uint32() + f.CompressedSize = b.uint32() + f.UncompressedSize = b.uint32() + f.CompressedSize64 = uint64(f.CompressedSize) + f.UncompressedSize64 = uint64(f.UncompressedSize) + filenameLen := int(b.uint16()) + extraLen := int(b.uint16()) + commentLen := int(b.uint16()) + b = b[4:] // skipped start disk number and internal attributes (2x uint16) + f.ExternalAttrs = b.uint32() + f.headerOffset = int64(b.uint32()) + d := make([]byte, filenameLen+extraLen+commentLen) + if _, err := io.ReadFull(r, d); err != nil { + return err + } + f.Name = string(d[:filenameLen]) + f.Extra = d[filenameLen : filenameLen+extraLen] + f.Comment = string(d[filenameLen+extraLen:]) + + // Determine the character encoding. + utf8Valid1, utf8Require1 := detectUTF8(f.Name) + utf8Valid2, utf8Require2 := detectUTF8(f.Comment) + switch { + case !utf8Valid1 || !utf8Valid2: + // Name and Comment definitely not UTF-8. + f.NonUTF8 = true + case !utf8Require1 && !utf8Require2: + // Name and Comment use only single-byte runes that overlap with UTF-8. + f.NonUTF8 = false + default: + // Might be UTF-8, might be some other encoding; preserve existing flag. + // Some ZIP writers use UTF-8 encoding without setting the UTF-8 flag. + // Since it is impossible to always distinguish valid UTF-8 from some + // other encoding (e.g., GBK or Shift-JIS), we trust the flag. + f.NonUTF8 = f.Flags&0x800 == 0 + } + + needUSize := f.UncompressedSize == ^uint32(0) + needCSize := f.CompressedSize == ^uint32(0) + needHeaderOffset := f.headerOffset == int64(^uint32(0)) + + // Best effort to find what we need. + // Other zip authors might not even follow the basic format, + // and we'll just ignore the Extra content in that case. + var modified time.Time +parseExtras: + for extra := readBuf(f.Extra); len(extra) >= 4; { // need at least tag and size + fieldTag := extra.uint16() + fieldSize := int(extra.uint16()) + if len(extra) < fieldSize { + break + } + fieldBuf := extra.sub(fieldSize) + + switch fieldTag { + case zip64ExtraID: + f.zip64 = true + + // update directory values from the zip64 extra block. + // They should only be consulted if the sizes read earlier + // are maxed out. + // See golang.org/issue/13367. + if needUSize { + needUSize = false + if len(fieldBuf) < 8 { + return ErrFormat + } + f.UncompressedSize64 = fieldBuf.uint64() + } + if needCSize { + needCSize = false + if len(fieldBuf) < 8 { + return ErrFormat + } + f.CompressedSize64 = fieldBuf.uint64() + } + if needHeaderOffset { + needHeaderOffset = false + if len(fieldBuf) < 8 { + return ErrFormat + } + f.headerOffset = int64(fieldBuf.uint64()) + } + case ntfsExtraID: + if len(fieldBuf) < 4 { + continue parseExtras + } + fieldBuf.uint32() // reserved (ignored) + for len(fieldBuf) >= 4 { // need at least tag and size + attrTag := fieldBuf.uint16() + attrSize := int(fieldBuf.uint16()) + if len(fieldBuf) < attrSize { + continue parseExtras + } + attrBuf := fieldBuf.sub(attrSize) + if attrTag != 1 || attrSize != 24 { + continue // Ignore irrelevant attributes + } + + const ticksPerSecond = 1e7 // Windows timestamp resolution + ts := int64(attrBuf.uint64()) // ModTime since Windows epoch + secs := int64(ts / ticksPerSecond) + nsecs := (1e9 / ticksPerSecond) * int64(ts%ticksPerSecond) + epoch := time.Date(1601, time.January, 1, 0, 0, 0, 0, time.UTC) + modified = time.Unix(epoch.Unix()+secs, nsecs) + } + case unixExtraID, infoZipUnixExtraID: + if len(fieldBuf) < 8 { + continue parseExtras + } + fieldBuf.uint32() // AcTime (ignored) + ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch + modified = time.Unix(ts, 0) + case extTimeExtraID: + if len(fieldBuf) < 5 || fieldBuf.uint8()&1 == 0 { + continue parseExtras + } + ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch + modified = time.Unix(ts, 0) + } + } + + msdosModified := msDosTimeToTime(f.ModifiedDate, f.ModifiedTime) + f.Modified = msdosModified + if !modified.IsZero() { + f.Modified = modified.UTC() + + // If legacy MS-DOS timestamps are set, we can use the delta between + // the legacy and extended versions to estimate timezone offset. + // + // A non-UTC timezone is always used (even if offset is zero). + // Thus, FileHeader.Modified.Location() == time.UTC is useful for + // determining whether extended timestamps are present. + // This is necessary for users that need to do additional time + // calculations when dealing with legacy ZIP formats. + if f.ModifiedTime != 0 || f.ModifiedDate != 0 { + f.Modified = modified.In(timeZone(msdosModified.Sub(modified))) + } + } + + // Assume that uncompressed size 2³²-1 could plausibly happen in + // an old zip32 file that was sharding inputs into the largest chunks + // possible (or is just malicious; search the web for 42.zip). + // If needUSize is true still, it means we didn't see a zip64 extension. + // As long as the compressed size is not also 2³²-1 (implausible) + // and the header is not also 2³²-1 (equally implausible), + // accept the uncompressed size 2³²-1 as valid. + // If nothing else, this keeps archive/zip working with 42.zip. + _ = needUSize + + if needCSize || needHeaderOffset { + return ErrFormat + } + + return nil +} + +func readDataDescriptor(r io.Reader, f *File) error { + var buf [dataDescriptorLen]byte + // The spec says: "Although not originally assigned a + // signature, the value 0x08074b50 has commonly been adopted + // as a signature value for the data descriptor record. + // Implementers should be aware that ZIP files may be + // encountered with or without this signature marking data + // descriptors and should account for either case when reading + // ZIP files to ensure compatibility." + // + // dataDescriptorLen includes the size of the signature but + // first read just those 4 bytes to see if it exists. + if _, err := io.ReadFull(r, buf[:4]); err != nil { + return err + } + off := 0 + maybeSig := readBuf(buf[:4]) + if maybeSig.uint32() != dataDescriptorSignature { + // No data descriptor signature. Keep these four + // bytes. + off += 4 + } + if _, err := io.ReadFull(r, buf[off:12]); err != nil { + return err + } + b := readBuf(buf[:12]) + if b.uint32() != f.CRC32 { + return ErrChecksum + } + + // The two sizes that follow here can be either 32 bits or 64 bits + // but the spec is not very clear on this and different + // interpretations has been made causing incompatibilities. We + // already have the sizes from the central directory so we can + // just ignore these. + + return nil +} + +func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) { + // look for directoryEndSignature in the last 1k, then in the last 65k + var buf []byte + var directoryEndOffset int64 + for i, bLen := range []int64{1024, 65 * 1024} { + if bLen > size { + bLen = size + } + buf = make([]byte, int(bLen)) + if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF { + return nil, err + } + if p := findSignatureInBlock(buf); p >= 0 { + buf = buf[p:] + directoryEndOffset = size - bLen + int64(p) + break + } + if i == 1 || bLen == size { + return nil, ErrFormat + } + } + + // read header into struct + b := readBuf(buf[4:]) // skip signature + d := &directoryEnd{ + diskNbr: uint32(b.uint16()), + dirDiskNbr: uint32(b.uint16()), + dirRecordsThisDisk: uint64(b.uint16()), + directoryRecords: uint64(b.uint16()), + directorySize: uint64(b.uint32()), + directoryOffset: uint64(b.uint32()), + commentLen: b.uint16(), + } + l := int(d.commentLen) + if l > len(b) { + return nil, errors.New("zip: invalid comment length") + } + d.comment = string(b[:l]) + + // These values mean that the file can be a zip64 file + if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff { + p, err := findDirectory64End(r, directoryEndOffset) + if err == nil && p >= 0 { + err = readDirectory64End(r, p, d) + } + if err != nil { + return nil, err + } + } + // Make sure directoryOffset points to somewhere in our file. + if o := int64(d.directoryOffset); o < 0 || o >= size { + return nil, ErrFormat + } + return d, nil +} + +// findDirectory64End tries to read the zip64 locator just before the +// directory end and returns the offset of the zip64 directory end if +// found. +func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) { + locOffset := directoryEndOffset - directory64LocLen + if locOffset < 0 { + return -1, nil // no need to look for a header outside the file + } + buf := make([]byte, directory64LocLen) + if _, err := r.ReadAt(buf, locOffset); err != nil { + return -1, err + } + b := readBuf(buf) + if sig := b.uint32(); sig != directory64LocSignature { + return -1, nil + } + if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory + return -1, nil // the file is not a valid zip64-file + } + p := b.uint64() // relative offset of the zip64 end of central directory record + if b.uint32() != 1 { // total number of disks + return -1, nil // the file is not a valid zip64-file + } + return int64(p), nil +} + +// readDirectory64End reads the zip64 directory end and updates the +// directory end with the zip64 directory end values. +func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) { + buf := make([]byte, directory64EndLen) + if _, err := r.ReadAt(buf, offset); err != nil { + return err + } + + b := readBuf(buf) + if sig := b.uint32(); sig != directory64EndSignature { + return ErrFormat + } + + b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16) + d.diskNbr = b.uint32() // number of this disk + d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory + d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk + d.directoryRecords = b.uint64() // total number of entries in the central directory + d.directorySize = b.uint64() // size of the central directory + d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number + + return nil +} + +func findSignatureInBlock(b []byte) int { + for i := len(b) - directoryEndLen; i >= 0; i-- { + // defined from directoryEndSignature in struct.go + if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { + // n is length of comment + n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 + if n+directoryEndLen+i <= len(b) { + return i + } + } + } + return -1 +} + +type readBuf []byte + +func (b *readBuf) uint8() uint8 { + v := (*b)[0] + *b = (*b)[1:] + return v +} + +func (b *readBuf) uint16() uint16 { + v := binary.LittleEndian.Uint16(*b) + *b = (*b)[2:] + return v +} + +func (b *readBuf) uint32() uint32 { + v := binary.LittleEndian.Uint32(*b) + *b = (*b)[4:] + return v +} + +func (b *readBuf) uint64() uint64 { + v := binary.LittleEndian.Uint64(*b) + *b = (*b)[8:] + return v +} + +func (b *readBuf) sub(n int) readBuf { + b2 := (*b)[:n] + *b = (*b)[n:] + return b2 +} + +// A fileListEntry is a File and its ename. +// If file == nil, the fileListEntry describes a directory without metadata. +type fileListEntry struct { + name string + file *File + isDir bool +} + +type fileInfoDirEntry interface { + fs.FileInfo + fs.DirEntry +} + +func (e *fileListEntry) stat() fileInfoDirEntry { + if !e.isDir { + return headerFileInfo{&e.file.FileHeader} + } + return e +} + +// Only used for directories. +func (f *fileListEntry) Name() string { _, elem, _ := split(f.name); return elem } +func (f *fileListEntry) Size() int64 { return 0 } +func (f *fileListEntry) Mode() fs.FileMode { return fs.ModeDir | 0555 } +func (f *fileListEntry) Type() fs.FileMode { return fs.ModeDir } +func (f *fileListEntry) IsDir() bool { return true } +func (f *fileListEntry) Sys() any { return nil } + +func (f *fileListEntry) ModTime() time.Time { + if f.file == nil { + return time.Time{} + } + return f.file.FileHeader.Modified.UTC() +} + +func (f *fileListEntry) Info() (fs.FileInfo, error) { return f, nil } + +// toValidName coerces name to be a valid name for fs.FS.Open. +func toValidName(name string) string { + name = strings.ReplaceAll(name, `\`, `/`) + p := path.Clean(name) + if strings.HasPrefix(p, "/") { + p = p[len("/"):] + } + for strings.HasPrefix(p, "../") { + p = p[len("../"):] + } + return p +} + +func (r *Reader) initFileList() { + r.fileListOnce.Do(func() { + dirs := make(map[string]bool) + knownDirs := make(map[string]bool) + for _, file := range r.File { + isDir := len(file.Name) > 0 && file.Name[len(file.Name)-1] == '/' + name := toValidName(file.Name) + if name == "" { + continue + } + for dir := path.Dir(name); dir != "."; dir = path.Dir(dir) { + dirs[dir] = true + } + entry := fileListEntry{ + name: name, + file: file, + isDir: isDir, + } + r.fileList = append(r.fileList, entry) + if isDir { + knownDirs[name] = true + } + } + for dir := range dirs { + if !knownDirs[dir] { + entry := fileListEntry{ + name: dir, + file: nil, + isDir: true, + } + r.fileList = append(r.fileList, entry) + } + } + + sort.Slice(r.fileList, func(i, j int) bool { return fileEntryLess(r.fileList[i].name, r.fileList[j].name) }) + }) +} + +func fileEntryLess(x, y string) bool { + xdir, xelem, _ := split(x) + ydir, yelem, _ := split(y) + return xdir < ydir || xdir == ydir && xelem < yelem +} + +// Open opens the named file in the ZIP archive, +// using the semantics of fs.FS.Open: +// paths are always slash separated, with no +// leading / or ../ elements. +func (r *Reader) Open(name string) (fs.File, error) { + r.initFileList() + + if !fs.ValidPath(name) { + return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid} + } + e := r.openLookup(name) + if e == nil { + return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrNotExist} + } + if e.isDir { + return &openDir{e, r.openReadDir(name), 0}, nil + } + rc, err := e.file.Open() + if err != nil { + return nil, err + } + return rc.(fs.File), nil +} + +func split(name string) (dir, elem string, isDir bool) { + if len(name) > 0 && name[len(name)-1] == '/' { + isDir = true + name = name[:len(name)-1] + } + i := len(name) - 1 + for i >= 0 && name[i] != '/' { + i-- + } + if i < 0 { + return ".", name, isDir + } + return name[:i], name[i+1:], isDir +} + +var dotFile = &fileListEntry{name: "./", isDir: true} + +func (r *Reader) openLookup(name string) *fileListEntry { + if name == "." { + return dotFile + } + + dir, elem, _ := split(name) + files := r.fileList + i := sort.Search(len(files), func(i int) bool { + idir, ielem, _ := split(files[i].name) + return idir > dir || idir == dir && ielem >= elem + }) + if i < len(files) { + fname := files[i].name + if fname == name || len(fname) == len(name)+1 && fname[len(name)] == '/' && fname[:len(name)] == name { + return &files[i] + } + } + return nil +} + +func (r *Reader) openReadDir(dir string) []fileListEntry { + files := r.fileList + i := sort.Search(len(files), func(i int) bool { + idir, _, _ := split(files[i].name) + return idir >= dir + }) + j := sort.Search(len(files), func(j int) bool { + jdir, _, _ := split(files[j].name) + return jdir > dir + }) + return files[i:j] +} + +type openDir struct { + e *fileListEntry + files []fileListEntry + offset int +} + +func (d *openDir) Close() error { return nil } +func (d *openDir) Stat() (fs.FileInfo, error) { return d.e.stat(), nil } + +func (d *openDir) Read([]byte) (int, error) { + return 0, &fs.PathError{Op: "read", Path: d.e.name, Err: errors.New("is a directory")} +} + +func (d *openDir) ReadDir(count int) ([]fs.DirEntry, error) { + n := len(d.files) - d.offset + if count > 0 && n > count { + n = count + } + if n == 0 { + if count <= 0 { + return nil, nil + } + return nil, io.EOF + } + list := make([]fs.DirEntry, n) + for i := range list { + list[i] = d.files[d.offset+i].stat() + } + d.offset += n + return list, nil +} diff --git a/src/archive/zip/reader_test.go b/src/archive/zip/reader_test.go new file mode 100644 index 0000000..d1a9bdd --- /dev/null +++ b/src/archive/zip/reader_test.go @@ -0,0 +1,1410 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip + +import ( + "bytes" + "encoding/binary" + "encoding/hex" + "internal/obscuretestdata" + "io" + "io/fs" + "os" + "path/filepath" + "reflect" + "regexp" + "strings" + "testing" + "testing/fstest" + "time" +) + +type ZipTest struct { + Name string + Source func() (r io.ReaderAt, size int64) // if non-nil, used instead of testdata/<Name> file + Comment string + File []ZipTestFile + Obscured bool // needed for Apple notarization (golang.org/issue/34986) + Error error // the error that Opening this file should return +} + +type ZipTestFile struct { + Name string + Mode fs.FileMode + NonUTF8 bool + ModTime time.Time + Modified time.Time + + // Information describing expected zip file content. + // First, reading the entire content should produce the error ContentErr. + // Second, if ContentErr==nil, the content should match Content. + // If content is large, an alternative to setting Content is to set File, + // which names a file in the testdata/ directory containing the + // uncompressed expected content. + // If content is very large, an alternative to setting Content or File + // is to set Size, which will then be checked against the header-reported size + // but will bypass the decompressing of the actual data. + // This last option is used for testing very large (multi-GB) compressed files. + ContentErr error + Content []byte + File string + Size uint64 +} + +var tests = []ZipTest{ + { + Name: "test.zip", + Comment: "This is a zipfile comment.", + File: []ZipTestFile{ + { + Name: "test.txt", + Content: []byte("This is a test text file.\n"), + Modified: time.Date(2010, 9, 5, 12, 12, 1, 0, timeZone(+10*time.Hour)), + Mode: 0644, + }, + { + Name: "gophercolor16x16.png", + File: "gophercolor16x16.png", + Modified: time.Date(2010, 9, 5, 15, 52, 58, 0, timeZone(+10*time.Hour)), + Mode: 0644, + }, + }, + }, + { + Name: "test-trailing-junk.zip", + Comment: "This is a zipfile comment.", + File: []ZipTestFile{ + { + Name: "test.txt", + Content: []byte("This is a test text file.\n"), + Modified: time.Date(2010, 9, 5, 12, 12, 1, 0, timeZone(+10*time.Hour)), + Mode: 0644, + }, + { + Name: "gophercolor16x16.png", + File: "gophercolor16x16.png", + Modified: time.Date(2010, 9, 5, 15, 52, 58, 0, timeZone(+10*time.Hour)), + Mode: 0644, + }, + }, + }, + { + Name: "r.zip", + Source: returnRecursiveZip, + File: []ZipTestFile{ + { + Name: "r/r.zip", + Content: rZipBytes(), + Modified: time.Date(2010, 3, 4, 0, 24, 16, 0, time.UTC), + Mode: 0666, + }, + }, + }, + { + Name: "symlink.zip", + File: []ZipTestFile{ + { + Name: "symlink", + Content: []byte("../target"), + Modified: time.Date(2012, 2, 3, 19, 56, 48, 0, timeZone(-2*time.Hour)), + Mode: 0777 | fs.ModeSymlink, + }, + }, + }, + { + Name: "readme.zip", + }, + { + Name: "readme.notzip", + Error: ErrFormat, + }, + { + Name: "dd.zip", + File: []ZipTestFile{ + { + Name: "filename", + Content: []byte("This is a test textfile.\n"), + Modified: time.Date(2011, 2, 2, 13, 6, 20, 0, time.UTC), + Mode: 0666, + }, + }, + }, + { + // created in windows XP file manager. + Name: "winxp.zip", + File: []ZipTestFile{ + { + Name: "hello", + Content: []byte("world \r\n"), + Modified: time.Date(2011, 12, 8, 10, 4, 24, 0, time.UTC), + Mode: 0666, + }, + { + Name: "dir/bar", + Content: []byte("foo \r\n"), + Modified: time.Date(2011, 12, 8, 10, 4, 50, 0, time.UTC), + Mode: 0666, + }, + { + Name: "dir/empty/", + Content: []byte{}, + Modified: time.Date(2011, 12, 8, 10, 8, 6, 0, time.UTC), + Mode: fs.ModeDir | 0777, + }, + { + Name: "readonly", + Content: []byte("important \r\n"), + Modified: time.Date(2011, 12, 8, 10, 6, 8, 0, time.UTC), + Mode: 0444, + }, + }, + }, + { + // created by Zip 3.0 under Linux + Name: "unix.zip", + File: []ZipTestFile{ + { + Name: "hello", + Content: []byte("world \r\n"), + Modified: time.Date(2011, 12, 8, 10, 4, 24, 0, timeZone(0)), + Mode: 0666, + }, + { + Name: "dir/bar", + Content: []byte("foo \r\n"), + Modified: time.Date(2011, 12, 8, 10, 4, 50, 0, timeZone(0)), + Mode: 0666, + }, + { + Name: "dir/empty/", + Content: []byte{}, + Modified: time.Date(2011, 12, 8, 10, 8, 6, 0, timeZone(0)), + Mode: fs.ModeDir | 0777, + }, + { + Name: "readonly", + Content: []byte("important \r\n"), + Modified: time.Date(2011, 12, 8, 10, 6, 8, 0, timeZone(0)), + Mode: 0444, + }, + }, + }, + { + // created by Go, before we wrote the "optional" data + // descriptor signatures (which are required by macOS). + // Use obscured file to avoid Apple’s notarization service + // rejecting the toolchain due to an inability to unzip this archive. + // See golang.org/issue/34986 + Name: "go-no-datadesc-sig.zip.base64", + Obscured: true, + File: []ZipTestFile{ + { + Name: "foo.txt", + Content: []byte("foo\n"), + Modified: time.Date(2012, 3, 8, 16, 59, 10, 0, timeZone(-8*time.Hour)), + Mode: 0644, + }, + { + Name: "bar.txt", + Content: []byte("bar\n"), + Modified: time.Date(2012, 3, 8, 16, 59, 12, 0, timeZone(-8*time.Hour)), + Mode: 0644, + }, + }, + }, + { + // created by Go, after we wrote the "optional" data + // descriptor signatures (which are required by macOS) + Name: "go-with-datadesc-sig.zip", + File: []ZipTestFile{ + { + Name: "foo.txt", + Content: []byte("foo\n"), + Modified: time.Date(1979, 11, 30, 0, 0, 0, 0, time.UTC), + Mode: 0666, + }, + { + Name: "bar.txt", + Content: []byte("bar\n"), + Modified: time.Date(1979, 11, 30, 0, 0, 0, 0, time.UTC), + Mode: 0666, + }, + }, + }, + { + Name: "Bad-CRC32-in-data-descriptor", + Source: returnCorruptCRC32Zip, + File: []ZipTestFile{ + { + Name: "foo.txt", + Content: []byte("foo\n"), + Modified: time.Date(1979, 11, 30, 0, 0, 0, 0, time.UTC), + Mode: 0666, + ContentErr: ErrChecksum, + }, + { + Name: "bar.txt", + Content: []byte("bar\n"), + Modified: time.Date(1979, 11, 30, 0, 0, 0, 0, time.UTC), + Mode: 0666, + }, + }, + }, + // Tests that we verify (and accept valid) crc32s on files + // with crc32s in their file header (not in data descriptors) + { + Name: "crc32-not-streamed.zip", + File: []ZipTestFile{ + { + Name: "foo.txt", + Content: []byte("foo\n"), + Modified: time.Date(2012, 3, 8, 16, 59, 10, 0, timeZone(-8*time.Hour)), + Mode: 0644, + }, + { + Name: "bar.txt", + Content: []byte("bar\n"), + Modified: time.Date(2012, 3, 8, 16, 59, 12, 0, timeZone(-8*time.Hour)), + Mode: 0644, + }, + }, + }, + // Tests that we verify (and reject invalid) crc32s on files + // with crc32s in their file header (not in data descriptors) + { + Name: "crc32-not-streamed.zip", + Source: returnCorruptNotStreamedZip, + File: []ZipTestFile{ + { + Name: "foo.txt", + Content: []byte("foo\n"), + Modified: time.Date(2012, 3, 8, 16, 59, 10, 0, timeZone(-8*time.Hour)), + Mode: 0644, + ContentErr: ErrChecksum, + }, + { + Name: "bar.txt", + Content: []byte("bar\n"), + Modified: time.Date(2012, 3, 8, 16, 59, 12, 0, timeZone(-8*time.Hour)), + Mode: 0644, + }, + }, + }, + { + Name: "zip64.zip", + File: []ZipTestFile{ + { + Name: "README", + Content: []byte("This small file is in ZIP64 format.\n"), + Modified: time.Date(2012, 8, 10, 14, 33, 32, 0, time.UTC), + Mode: 0644, + }, + }, + }, + // Another zip64 file with different Extras fields. (golang.org/issue/7069) + { + Name: "zip64-2.zip", + File: []ZipTestFile{ + { + Name: "README", + Content: []byte("This small file is in ZIP64 format.\n"), + Modified: time.Date(2012, 8, 10, 14, 33, 32, 0, timeZone(-4*time.Hour)), + Mode: 0644, + }, + }, + }, + // Largest possible non-zip64 file, with no zip64 header. + { + Name: "big.zip", + Source: returnBigZipBytes, + File: []ZipTestFile{ + { + Name: "big.file", + Content: nil, + Size: 1<<32 - 1, + Modified: time.Date(1979, 11, 30, 0, 0, 0, 0, time.UTC), + Mode: 0666, + }, + }, + }, + { + Name: "utf8-7zip.zip", + File: []ZipTestFile{ + { + Name: "世界", + Content: []byte{}, + Mode: 0666, + Modified: time.Date(2017, 11, 6, 13, 9, 27, 867862500, timeZone(-8*time.Hour)), + }, + }, + }, + { + Name: "utf8-infozip.zip", + File: []ZipTestFile{ + { + Name: "世界", + Content: []byte{}, + Mode: 0644, + // Name is valid UTF-8, but format does not have UTF-8 flag set. + // We don't do UTF-8 detection for multi-byte runes due to + // false-positives with other encodings (e.g., Shift-JIS). + // Format says encoding is not UTF-8, so we trust it. + NonUTF8: true, + Modified: time.Date(2017, 11, 6, 13, 9, 27, 0, timeZone(-8*time.Hour)), + }, + }, + }, + { + Name: "utf8-osx.zip", + File: []ZipTestFile{ + { + Name: "世界", + Content: []byte{}, + Mode: 0644, + // Name is valid UTF-8, but format does not have UTF-8 set. + NonUTF8: true, + Modified: time.Date(2017, 11, 6, 13, 9, 27, 0, timeZone(-8*time.Hour)), + }, + }, + }, + { + Name: "utf8-winrar.zip", + File: []ZipTestFile{ + { + Name: "世界", + Content: []byte{}, + Mode: 0666, + Modified: time.Date(2017, 11, 6, 13, 9, 27, 867862500, timeZone(-8*time.Hour)), + }, + }, + }, + { + Name: "utf8-winzip.zip", + File: []ZipTestFile{ + { + Name: "世界", + Content: []byte{}, + Mode: 0666, + Modified: time.Date(2017, 11, 6, 13, 9, 27, 867000000, timeZone(-8*time.Hour)), + }, + }, + }, + { + Name: "time-7zip.zip", + File: []ZipTestFile{ + { + Name: "test.txt", + Content: []byte{}, + Size: 1<<32 - 1, + Modified: time.Date(2017, 10, 31, 21, 11, 57, 244817900, timeZone(-7*time.Hour)), + Mode: 0666, + }, + }, + }, + { + Name: "time-infozip.zip", + File: []ZipTestFile{ + { + Name: "test.txt", + Content: []byte{}, + Size: 1<<32 - 1, + Modified: time.Date(2017, 10, 31, 21, 11, 57, 0, timeZone(-7*time.Hour)), + Mode: 0644, + }, + }, + }, + { + Name: "time-osx.zip", + File: []ZipTestFile{ + { + Name: "test.txt", + Content: []byte{}, + Size: 1<<32 - 1, + Modified: time.Date(2017, 10, 31, 21, 11, 57, 0, timeZone(-7*time.Hour)), + Mode: 0644, + }, + }, + }, + { + Name: "time-win7.zip", + File: []ZipTestFile{ + { + Name: "test.txt", + Content: []byte{}, + Size: 1<<32 - 1, + Modified: time.Date(2017, 10, 31, 21, 11, 58, 0, time.UTC), + Mode: 0666, + }, + }, + }, + { + Name: "time-winrar.zip", + File: []ZipTestFile{ + { + Name: "test.txt", + Content: []byte{}, + Size: 1<<32 - 1, + Modified: time.Date(2017, 10, 31, 21, 11, 57, 244817900, timeZone(-7*time.Hour)), + Mode: 0666, + }, + }, + }, + { + Name: "time-winzip.zip", + File: []ZipTestFile{ + { + Name: "test.txt", + Content: []byte{}, + Size: 1<<32 - 1, + Modified: time.Date(2017, 10, 31, 21, 11, 57, 244000000, timeZone(-7*time.Hour)), + Mode: 0666, + }, + }, + }, + { + Name: "time-go.zip", + File: []ZipTestFile{ + { + Name: "test.txt", + Content: []byte{}, + Size: 1<<32 - 1, + Modified: time.Date(2017, 10, 31, 21, 11, 57, 0, timeZone(-7*time.Hour)), + Mode: 0666, + }, + }, + }, + { + Name: "time-22738.zip", + File: []ZipTestFile{ + { + Name: "file", + Content: []byte{}, + Mode: 0666, + Modified: time.Date(1999, 12, 31, 19, 0, 0, 0, timeZone(-5*time.Hour)), + ModTime: time.Date(1999, 12, 31, 19, 0, 0, 0, time.UTC), + }, + }, + }, +} + +func TestReader(t *testing.T) { + for _, zt := range tests { + t.Run(zt.Name, func(t *testing.T) { + readTestZip(t, zt) + }) + } +} + +func readTestZip(t *testing.T, zt ZipTest) { + var z *Reader + var err error + var raw []byte + if zt.Source != nil { + rat, size := zt.Source() + z, err = NewReader(rat, size) + raw = make([]byte, size) + if _, err := rat.ReadAt(raw, 0); err != nil { + t.Errorf("ReadAt error=%v", err) + return + } + } else { + path := filepath.Join("testdata", zt.Name) + if zt.Obscured { + tf, err := obscuretestdata.DecodeToTempFile(path) + if err != nil { + t.Errorf("obscuretestdata.DecodeToTempFile(%s): %v", path, err) + return + } + defer os.Remove(tf) + path = tf + } + var rc *ReadCloser + rc, err = OpenReader(path) + if err == nil { + defer rc.Close() + z = &rc.Reader + } + var err2 error + raw, err2 = os.ReadFile(path) + if err2 != nil { + t.Errorf("ReadFile(%s) error=%v", path, err2) + return + } + } + if err != zt.Error { + t.Errorf("error=%v, want %v", err, zt.Error) + return + } + + // bail if file is not zip + if err == ErrFormat { + return + } + + // bail here if no Files expected to be tested + // (there may actually be files in the zip, but we don't care) + if zt.File == nil { + return + } + + if z.Comment != zt.Comment { + t.Errorf("comment=%q, want %q", z.Comment, zt.Comment) + } + if len(z.File) != len(zt.File) { + t.Fatalf("file count=%d, want %d", len(z.File), len(zt.File)) + } + + // test read of each file + for i, ft := range zt.File { + readTestFile(t, zt, ft, z.File[i], raw) + } + if t.Failed() { + return + } + + // test simultaneous reads + n := 0 + done := make(chan bool) + for i := 0; i < 5; i++ { + for j, ft := range zt.File { + go func(j int, ft ZipTestFile) { + readTestFile(t, zt, ft, z.File[j], raw) + done <- true + }(j, ft) + n++ + } + } + for ; n > 0; n-- { + <-done + } +} + +func equalTimeAndZone(t1, t2 time.Time) bool { + name1, offset1 := t1.Zone() + name2, offset2 := t2.Zone() + return t1.Equal(t2) && name1 == name2 && offset1 == offset2 +} + +func readTestFile(t *testing.T, zt ZipTest, ft ZipTestFile, f *File, raw []byte) { + if f.Name != ft.Name { + t.Errorf("name=%q, want %q", f.Name, ft.Name) + } + if !ft.Modified.IsZero() && !equalTimeAndZone(f.Modified, ft.Modified) { + t.Errorf("%s: Modified=%s, want %s", f.Name, f.Modified, ft.Modified) + } + if !ft.ModTime.IsZero() && !equalTimeAndZone(f.ModTime(), ft.ModTime) { + t.Errorf("%s: ModTime=%s, want %s", f.Name, f.ModTime(), ft.ModTime) + } + + testFileMode(t, f, ft.Mode) + + size := uint64(f.UncompressedSize) + if size == uint32max { + size = f.UncompressedSize64 + } else if size != f.UncompressedSize64 { + t.Errorf("%v: UncompressedSize=%#x does not match UncompressedSize64=%#x", f.Name, size, f.UncompressedSize64) + } + + // Check that OpenRaw returns the correct byte segment + rw, err := f.OpenRaw() + if err != nil { + t.Errorf("%v: OpenRaw error=%v", f.Name, err) + return + } + start, err := f.DataOffset() + if err != nil { + t.Errorf("%v: DataOffset error=%v", f.Name, err) + return + } + got, err := io.ReadAll(rw) + if err != nil { + t.Errorf("%v: OpenRaw ReadAll error=%v", f.Name, err) + return + } + end := uint64(start) + f.CompressedSize64 + want := raw[start:end] + if !bytes.Equal(got, want) { + t.Logf("got %q", got) + t.Logf("want %q", want) + t.Errorf("%v: OpenRaw returned unexpected bytes", f.Name) + return + } + + r, err := f.Open() + if err != nil { + t.Errorf("%v", err) + return + } + + // For very large files, just check that the size is correct. + // The content is expected to be all zeros. + // Don't bother uncompressing: too big. + if ft.Content == nil && ft.File == "" && ft.Size > 0 { + if size != ft.Size { + t.Errorf("%v: uncompressed size %#x, want %#x", ft.Name, size, ft.Size) + } + r.Close() + return + } + + var b bytes.Buffer + _, err = io.Copy(&b, r) + if err != ft.ContentErr { + t.Errorf("copying contents: %v (want %v)", err, ft.ContentErr) + } + if err != nil { + return + } + r.Close() + + if g := uint64(b.Len()); g != size { + t.Errorf("%v: read %v bytes but f.UncompressedSize == %v", f.Name, g, size) + } + + var c []byte + if ft.Content != nil { + c = ft.Content + } else if c, err = os.ReadFile("testdata/" + ft.File); err != nil { + t.Error(err) + return + } + + if b.Len() != len(c) { + t.Errorf("%s: len=%d, want %d", f.Name, b.Len(), len(c)) + return + } + + for i, b := range b.Bytes() { + if b != c[i] { + t.Errorf("%s: content[%d]=%q want %q", f.Name, i, b, c[i]) + return + } + } +} + +func testFileMode(t *testing.T, f *File, want fs.FileMode) { + mode := f.Mode() + if want == 0 { + t.Errorf("%s mode: got %v, want none", f.Name, mode) + } else if mode != want { + t.Errorf("%s mode: want %v, got %v", f.Name, want, mode) + } +} + +func TestInvalidFiles(t *testing.T) { + const size = 1024 * 70 // 70kb + b := make([]byte, size) + + // zeroes + _, err := NewReader(bytes.NewReader(b), size) + if err != ErrFormat { + t.Errorf("zeroes: error=%v, want %v", err, ErrFormat) + } + + // repeated directoryEndSignatures + sig := make([]byte, 4) + binary.LittleEndian.PutUint32(sig, directoryEndSignature) + for i := 0; i < size-4; i += 4 { + copy(b[i:i+4], sig) + } + _, err = NewReader(bytes.NewReader(b), size) + if err != ErrFormat { + t.Errorf("sigs: error=%v, want %v", err, ErrFormat) + } + + // negative size + _, err = NewReader(bytes.NewReader([]byte("foobar")), -1) + if err == nil { + t.Errorf("archive/zip.NewReader: expected error when negative size is passed") + } +} + +func messWith(fileName string, corrupter func(b []byte)) (r io.ReaderAt, size int64) { + data, err := os.ReadFile(filepath.Join("testdata", fileName)) + if err != nil { + panic("Error reading " + fileName + ": " + err.Error()) + } + corrupter(data) + return bytes.NewReader(data), int64(len(data)) +} + +func returnCorruptCRC32Zip() (r io.ReaderAt, size int64) { + return messWith("go-with-datadesc-sig.zip", func(b []byte) { + // Corrupt one of the CRC32s in the data descriptor: + b[0x2d]++ + }) +} + +func returnCorruptNotStreamedZip() (r io.ReaderAt, size int64) { + return messWith("crc32-not-streamed.zip", func(b []byte) { + // Corrupt foo.txt's final crc32 byte, in both + // the file header and TOC. (0x7e -> 0x7f) + b[0x11]++ + b[0x9d]++ + + // TODO(bradfitz): add a new test that only corrupts + // one of these values, and verify that that's also an + // error. Currently, the reader code doesn't verify the + // fileheader and TOC's crc32 match if they're both + // non-zero and only the second line above, the TOC, + // is what matters. + }) +} + +// rZipBytes returns the bytes of a recursive zip file, without +// putting it on disk and triggering certain virus scanners. +func rZipBytes() []byte { + s := ` +0000000 50 4b 03 04 14 00 00 00 08 00 08 03 64 3c f9 f4 +0000010 89 64 48 01 00 00 b8 01 00 00 07 00 00 00 72 2f +0000020 72 2e 7a 69 70 00 25 00 da ff 50 4b 03 04 14 00 +0000030 00 00 08 00 08 03 64 3c f9 f4 89 64 48 01 00 00 +0000040 b8 01 00 00 07 00 00 00 72 2f 72 2e 7a 69 70 00 +0000050 2f 00 d0 ff 00 25 00 da ff 50 4b 03 04 14 00 00 +0000060 00 08 00 08 03 64 3c f9 f4 89 64 48 01 00 00 b8 +0000070 01 00 00 07 00 00 00 72 2f 72 2e 7a 69 70 00 2f +0000080 00 d0 ff c2 54 8e 57 39 00 05 00 fa ff c2 54 8e +0000090 57 39 00 05 00 fa ff 00 05 00 fa ff 00 14 00 eb +00000a0 ff c2 54 8e 57 39 00 05 00 fa ff 00 05 00 fa ff +00000b0 00 14 00 eb ff 42 88 21 c4 00 00 14 00 eb ff 42 +00000c0 88 21 c4 00 00 14 00 eb ff 42 88 21 c4 00 00 14 +00000d0 00 eb ff 42 88 21 c4 00 00 14 00 eb ff 42 88 21 +00000e0 c4 00 00 00 00 ff ff 00 00 00 ff ff 00 34 00 cb +00000f0 ff 42 88 21 c4 00 00 00 00 ff ff 00 00 00 ff ff +0000100 00 34 00 cb ff 42 e8 21 5e 0f 00 00 00 ff ff 0a +0000110 f0 66 64 12 61 c0 15 dc e8 a0 48 bf 48 af 2a b3 +0000120 20 c0 9b 95 0d c4 67 04 42 53 06 06 06 40 00 06 +0000130 00 f9 ff 6d 01 00 00 00 00 42 e8 21 5e 0f 00 00 +0000140 00 ff ff 0a f0 66 64 12 61 c0 15 dc e8 a0 48 bf +0000150 48 af 2a b3 20 c0 9b 95 0d c4 67 04 42 53 06 06 +0000160 06 40 00 06 00 f9 ff 6d 01 00 00 00 00 50 4b 01 +0000170 02 14 00 14 00 00 00 08 00 08 03 64 3c f9 f4 89 +0000180 64 48 01 00 00 b8 01 00 00 07 00 00 00 00 00 00 +0000190 00 00 00 00 00 00 00 00 00 00 00 72 2f 72 2e 7a +00001a0 69 70 50 4b 05 06 00 00 00 00 01 00 01 00 35 00 +00001b0 00 00 6d 01 00 00 00 00` + s = regexp.MustCompile(`[0-9a-f]{7}`).ReplaceAllString(s, "") + s = regexp.MustCompile(`\s+`).ReplaceAllString(s, "") + b, err := hex.DecodeString(s) + if err != nil { + panic(err) + } + return b +} + +func returnRecursiveZip() (r io.ReaderAt, size int64) { + b := rZipBytes() + return bytes.NewReader(b), int64(len(b)) +} + +// biggestZipBytes returns the bytes of a zip file biggest.zip +// that contains a zip file bigger.zip that contains a zip file +// big.zip that contains big.file, which contains 2³²-1 zeros. +// The big.zip file is interesting because it has no zip64 header, +// much like the innermost zip files in the well-known 42.zip. +// +// biggest.zip was generated by changing isZip64 to use > uint32max +// instead of >= uint32max and then running this program: +// +// package main +// +// import ( +// "archive/zip" +// "bytes" +// "io" +// "log" +// "os" +// ) +// +// type zeros struct{} +// +// func (zeros) Read(b []byte) (int, error) { +// for i := range b { +// b[i] = 0 +// } +// return len(b), nil +// } +// +// func main() { +// bigZip := makeZip("big.file", io.LimitReader(zeros{}, 1<<32-1)) +// if err := os.WriteFile("/tmp/big.zip", bigZip, 0666); err != nil { +// log.Fatal(err) +// } +// +// biggerZip := makeZip("big.zip", bytes.NewReader(bigZip)) +// if err := os.WriteFile("/tmp/bigger.zip", biggerZip, 0666); err != nil { +// log.Fatal(err) +// } +// +// biggestZip := makeZip("bigger.zip", bytes.NewReader(biggerZip)) +// if err := os.WriteFile("/tmp/biggest.zip", biggestZip, 0666); err != nil { +// log.Fatal(err) +// } +// } +// +// func makeZip(name string, r io.Reader) []byte { +// var buf bytes.Buffer +// w := zip.NewWriter(&buf) +// wf, err := w.Create(name) +// if err != nil { +// log.Fatal(err) +// } +// if _, err = io.Copy(wf, r); err != nil { +// log.Fatal(err) +// } +// if err := w.Close(); err != nil { +// log.Fatal(err) +// } +// return buf.Bytes() +// } +// +// The 4 GB of zeros compresses to 4 MB, which compresses to 20 kB, +// which compresses to 1252 bytes (in the hex dump below). +// +// It's here in hex for the same reason as rZipBytes above: to avoid +// problems with on-disk virus scanners or other zip processors. +// +func biggestZipBytes() []byte { + s := ` +0000000 50 4b 03 04 14 00 08 00 08 00 00 00 00 00 00 00 +0000010 00 00 00 00 00 00 00 00 00 00 0a 00 00 00 62 69 +0000020 67 67 65 72 2e 7a 69 70 ec dc 6b 4c 53 67 18 07 +0000030 f0 16 c5 ca 65 2e cb b8 94 20 61 1f 44 33 c7 cd +0000040 c0 86 4a b5 c0 62 8a 61 05 c6 cd 91 b2 54 8c 1b +0000050 63 8b 03 9c 1b 95 52 5a e3 a0 19 6c b2 05 59 44 +0000060 64 9d 73 83 71 11 46 61 14 b9 1d 14 09 4a c3 60 +0000070 2e 4c 6e a5 60 45 02 62 81 95 b6 94 9e 9e 77 e7 +0000080 d0 43 b6 f8 71 df 96 3c e7 a4 69 ce bf cf e9 79 +0000090 ce ef 79 3f bf f1 31 db b6 bb 31 76 92 e7 f3 07 +00000a0 8b fc 9c ca cc 08 cc cb cc 5e d2 1c 88 d9 7e bb +00000b0 4f bb 3a 3f 75 f1 5d 7f 8f c2 68 67 77 8f 25 ff +00000c0 84 e2 93 2d ef a4 95 3d 71 4e 2c b9 b0 87 c3 be +00000d0 3d f8 a7 60 24 61 c5 ef ae 9e c8 6c 6d 4e 69 c8 +00000e0 67 65 34 f8 37 76 2d 76 5c 54 f3 95 65 49 c7 0f +00000f0 18 71 4b 7e 5b 6a d1 79 47 61 41 b0 4e 2a 74 45 +0000100 43 58 12 b2 5a a5 c6 7d 68 55 88 d4 98 75 18 6d +0000110 08 d1 1f 8f 5a 9e 96 ee 45 cf a4 84 4e 4b e8 50 +0000120 a7 13 d9 06 de 52 81 97 36 b2 d7 b8 fc 2b 5f 55 +0000130 23 1f 32 59 cf 30 27 fb e2 8a b9 de 45 dd 63 9c +0000140 4b b5 8b 96 4c 7a 62 62 cc a1 a7 cf fa f1 fe dd +0000150 54 62 11 bf 36 78 b3 c7 b1 b5 f2 61 4d 4e dd 66 +0000160 32 2e e6 70 34 5f f4 c9 e6 6c 43 6f da 6b c6 c3 +0000170 09 2c ce 09 57 7f d2 7e b4 23 ba 7c 1b 99 bc 22 +0000180 3e f1 de 91 2f e3 9c 1b 82 cc c2 84 39 aa e6 de +0000190 b4 69 fc cc cb 72 a6 61 45 f0 d3 1d 26 19 7c 8d +00001a0 29 c8 66 02 be 77 6a f9 3d 34 79 17 19 c8 96 24 +00001b0 a3 ac e4 dd 3b 1a 8e c6 fe 96 38 6b bf 67 5a 23 +00001c0 f4 16 f4 e6 8a b4 fc c2 cd bf 95 66 1d bb 35 aa +00001d0 92 7d 66 d8 08 8d a5 1f 54 2a af 09 cf 61 ff d2 +00001e0 85 9d 8f b6 d7 88 07 4a 86 03 db 64 f3 d9 92 73 +00001f0 df ec a7 fc 23 4c 8d 83 79 63 2a d9 fd 8d b3 c8 +0000200 8f 7e d4 19 85 e6 8d 1c 76 f0 8b 58 32 fd 9a d6 +0000210 85 e2 48 ad c3 d5 60 6f 7e 22 dd ef 09 49 7c 7f +0000220 3a 45 c3 71 b7 df f3 4c 63 fb b5 d9 31 5f 6e d6 +0000230 24 1d a4 4a fe 32 a7 5c 16 48 5c 3e 08 6b 8a d3 +0000240 25 1d a2 12 a5 59 24 ea 20 5f 52 6d ad 94 db 6b +0000250 94 b9 5d eb 4b a7 5c 44 bb 1e f2 3c 6b cf 52 c9 +0000260 e9 e5 ba 06 b9 c4 e5 0a d0 00 0d d0 00 0d d0 00 +0000270 0d d0 00 0d d0 00 0d d0 00 0d d0 00 0d d0 00 0d +0000280 d0 00 0d d0 00 0d d0 00 0d d0 00 0d d0 00 0d d0 +0000290 00 0d d0 00 0d d0 00 0d d0 00 0d d0 00 0d d0 00 +00002a0 0d d0 00 cd ff 9e 46 86 fa a7 7d 3a 43 d7 8e 10 +00002b0 52 e9 be e6 6e cf eb 9e 85 4d 65 ce cc 30 c1 44 +00002c0 c0 4e af bc 9c 6c 4b a0 d7 54 ff 1d d5 5c 89 fb +00002d0 b5 34 7e c4 c2 9e f5 a0 f6 5b 7e 6e ca 73 c7 ef +00002e0 5d be de f9 e8 81 eb a5 0a a5 63 54 2c d7 1c d1 +00002f0 89 17 85 f8 16 94 f2 8a b2 a3 f5 b6 6d df 75 cd +0000300 90 dd 64 bd 5d 55 4e f2 55 19 1b b7 cc ef 1b ea +0000310 2e 05 9c f4 aa 1e a8 cd a6 82 c7 59 0f 5e 9d e0 +0000320 bb fc 6c d6 99 23 eb 36 ad c6 c5 e1 d8 e1 e2 3e +0000330 d9 90 5a f7 91 5d 6f bc 33 6d 98 47 d2 7c 2e 2f +0000340 99 a4 25 72 85 49 2c be 0b 5b af 8f e5 6e 81 a6 +0000350 a3 5a 6f 39 53 3a ab 7a 8b 1e 26 f7 46 6c 7d 26 +0000360 53 b3 22 31 94 d3 83 f2 18 4d f5 92 33 27 53 97 +0000370 0f d3 e6 55 9c a6 c5 31 87 6f d3 f3 ae 39 6f 56 +0000380 10 7b ab 7e d0 b4 ca f2 b8 05 be 3f 0e 6e 5a 75 +0000390 ab 0c f5 37 0e ba 8e 75 71 7a aa ed 7a dd 6a 63 +00003a0 be 9b a0 97 27 6a 6f e7 d3 8b c4 7c ec d3 91 56 +00003b0 d9 ac 5e bf 16 42 2f 00 1f 93 a2 23 87 bd e2 59 +00003c0 a0 de 1a 66 c8 62 eb 55 8f 91 17 b4 61 42 7a 50 +00003d0 40 03 34 40 03 34 40 03 34 40 03 34 40 03 34 40 +00003e0 03 34 40 03 34 40 03 34 40 03 34 40 03 34 40 03 +00003f0 34 40 03 34 40 03 34 ff 85 86 90 8b ea 67 90 0d +0000400 e1 42 1b d2 61 d6 79 ec fd 3e 44 28 a4 51 6c 5c +0000410 fc d2 72 ca ba 82 18 46 16 61 cd 93 a9 0f d1 24 +0000420 17 99 e2 2c 71 16 84 0c c8 7a 13 0f 9a 5e c5 f0 +0000430 79 64 e2 12 4d c8 82 a1 81 19 2d aa 44 6d 87 54 +0000440 84 71 c1 f6 d4 ca 25 8c 77 b9 08 c7 c8 5e 10 8a +0000450 8f 61 ed 8c ba 30 1f 79 9a c7 60 34 2b b9 8c f8 +0000460 18 a6 83 1b e3 9f ad 79 fe fd 1b 8b f1 fc 41 6f +0000470 d4 13 1f e3 b8 83 ba 64 92 e7 eb e4 77 05 8f ba +0000480 fa 3b 00 00 ff ff 50 4b 07 08 a6 18 b1 91 5e 04 +0000490 00 00 e4 47 00 00 50 4b 01 02 14 00 14 00 08 00 +00004a0 08 00 00 00 00 00 a6 18 b1 91 5e 04 00 00 e4 47 +00004b0 00 00 0a 00 00 00 00 00 00 00 00 00 00 00 00 00 +00004c0 00 00 00 00 62 69 67 67 65 72 2e 7a 69 70 50 4b +00004d0 05 06 00 00 00 00 01 00 01 00 38 00 00 00 96 04 +00004e0 00 00 00 00` + s = regexp.MustCompile(`[0-9a-f]{7}`).ReplaceAllString(s, "") + s = regexp.MustCompile(`\s+`).ReplaceAllString(s, "") + b, err := hex.DecodeString(s) + if err != nil { + panic(err) + } + return b +} + +func returnBigZipBytes() (r io.ReaderAt, size int64) { + b := biggestZipBytes() + for i := 0; i < 2; i++ { + r, err := NewReader(bytes.NewReader(b), int64(len(b))) + if err != nil { + panic(err) + } + f, err := r.File[0].Open() + if err != nil { + panic(err) + } + b, err = io.ReadAll(f) + if err != nil { + panic(err) + } + } + return bytes.NewReader(b), int64(len(b)) +} + +func TestIssue8186(t *testing.T) { + // Directory headers & data found in the TOC of a JAR file. + dirEnts := []string{ + "PK\x01\x02\n\x00\n\x00\x00\b\x00\x004\x9d3?\xaa\x1b\x06\xf0\x81\x02\x00\x00\x81\x02\x00\x00-\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00res/drawable-xhdpi-v4/ic_actionbar_accept.png\xfe\xca\x00\x00\x00", + "PK\x01\x02\n\x00\n\x00\x00\b\x00\x004\x9d3?\x90K\x89\xc7t\n\x00\x00t\n\x00\x00\x0e\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd1\x02\x00\x00resources.arsc\x00\x00\x00", + "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?\xff$\x18\xed3\x03\x00\x00\xb4\b\x00\x00\x13\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00t\r\x00\x00AndroidManifest.xml", + "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?\x14\xc5K\xab\x192\x02\x00\xc8\xcd\x04\x00\v\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe8\x10\x00\x00classes.dex", + "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?E\x96\nD\xac\x01\x00\x00P\x03\x00\x00&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00:C\x02\x00res/layout/actionbar_set_wallpaper.xml", + "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?Ļ\x14\xe3\xd8\x01\x00\x00\xd8\x03\x00\x00 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00:E\x02\x00res/layout/wallpaper_cropper.xml", + "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?}\xc1\x15\x9eZ\x01\x00\x00!\x02\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00`G\x02\x00META-INF/MANIFEST.MF", + "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?\xe6\x98Ьo\x01\x00\x00\x84\x02\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfcH\x02\x00META-INF/CERT.SF", + "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?\xbfP\x96b\x86\x04\x00\x00\xb2\x06\x00\x00\x11\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa9J\x02\x00META-INF/CERT.RSA", + } + for i, s := range dirEnts { + var f File + err := readDirectoryHeader(&f, strings.NewReader(s)) + if err != nil { + t.Errorf("error reading #%d: %v", i, err) + } + } +} + +// Verify we return ErrUnexpectedEOF when length is short. +func TestIssue10957(t *testing.T) { + data := []byte("PK\x03\x040000000PK\x01\x0200000" + + "0000000000000000000\x00" + + "\x00\x00\x00\x00\x00000000000000PK\x01" + + "\x020000000000000000000" + + "00000\v\x00\x00\x00\x00\x00000000000" + + "00000000000000PK\x01\x0200" + + "00000000000000000000" + + "00\v\x00\x00\x00\x00\x00000000000000" + + "00000000000PK\x01\x020000<" + + "0\x00\x0000000000000000\v\x00\v" + + "\x00\x00\x00\x00\x0000000000\x00\x00\x00\x00000" + + "00000000PK\x01\x0200000000" + + "0000000000000000\v\x00\x00\x00" + + "\x00\x0000PK\x05\x06000000\x05\x000000" + + "\v\x00\x00\x00\x00\x00") + z, err := NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + t.Fatal(err) + } + for i, f := range z.File { + r, err := f.Open() + if err != nil { + continue + } + if f.UncompressedSize64 < 1e6 { + n, err := io.Copy(io.Discard, r) + if i == 3 && err != io.ErrUnexpectedEOF { + t.Errorf("File[3] error = %v; want io.ErrUnexpectedEOF", err) + } + if err == nil && uint64(n) != f.UncompressedSize64 { + t.Errorf("file %d: bad size: copied=%d; want=%d", i, n, f.UncompressedSize64) + } + } + r.Close() + } +} + +// Verify that this particular malformed zip file is rejected. +func TestIssue10956(t *testing.T) { + data := []byte("PK\x06\x06PK\x06\a0000\x00\x00\x00\x00\x00\x00\x00\x00" + + "0000PK\x05\x06000000000000" + + "0000\v\x00000\x00\x00\x00\x00\x00\x00\x000") + r, err := NewReader(bytes.NewReader(data), int64(len(data))) + if err == nil { + t.Errorf("got nil error, want ErrFormat") + } + if r != nil { + t.Errorf("got non-nil Reader, want nil") + } +} + +// Verify we return ErrUnexpectedEOF when reading truncated data descriptor. +func TestIssue11146(t *testing.T) { + data := []byte("PK\x03\x040000000000000000" + + "000000\x01\x00\x00\x000\x01\x00\x00\xff\xff0000" + + "0000000000000000PK\x01\x02" + + "0000\b0\b\x00000000000000" + + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x000000PK\x05\x06\x00\x00" + + "\x00\x0000\x01\x0000008\x00\x00\x00\x00\x00") + z, err := NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + t.Fatal(err) + } + r, err := z.File[0].Open() + if err != nil { + t.Fatal(err) + } + _, err = io.ReadAll(r) + if err != io.ErrUnexpectedEOF { + t.Errorf("File[0] error = %v; want io.ErrUnexpectedEOF", err) + } + r.Close() +} + +// Verify we do not treat non-zip64 archives as zip64 +func TestIssue12449(t *testing.T) { + data := []byte{ + 0x50, 0x4b, 0x03, 0x04, 0x14, 0x00, 0x08, 0x00, + 0x00, 0x00, 0x6b, 0xb4, 0xba, 0x46, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x03, 0x00, 0x18, 0x00, 0xca, 0x64, + 0x55, 0x75, 0x78, 0x0b, 0x00, 0x50, 0x4b, 0x05, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, + 0x00, 0x49, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, + 0x00, 0x31, 0x31, 0x31, 0x32, 0x32, 0x32, 0x0a, + 0x50, 0x4b, 0x07, 0x08, 0x1d, 0x88, 0x77, 0xb0, + 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x50, 0x4b, 0x01, 0x02, 0x14, 0x03, 0x14, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x6b, 0xb4, 0xba, 0x46, + 0x1d, 0x88, 0x77, 0xb0, 0x07, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x18, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xa0, 0x81, 0x00, 0x00, 0x00, 0x00, 0xca, 0x64, + 0x55, 0x75, 0x78, 0x0b, 0x00, 0x50, 0x4b, 0x05, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, + 0x00, 0x49, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, + 0x00, 0x97, 0x2b, 0x49, 0x23, 0x05, 0xc5, 0x0b, + 0xa7, 0xd1, 0x52, 0xa2, 0x9c, 0x50, 0x4b, 0x06, + 0x07, 0xc8, 0x19, 0xc1, 0xaf, 0x94, 0x9c, 0x61, + 0x44, 0xbe, 0x94, 0x19, 0x42, 0x58, 0x12, 0xc6, + 0x5b, 0x50, 0x4b, 0x05, 0x06, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x69, 0x00, 0x00, + 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, + } + // Read in the archive. + _, err := NewReader(bytes.NewReader([]byte(data)), int64(len(data))) + if err != nil { + t.Errorf("Error reading the archive: %v", err) + } +} + +func TestFS(t *testing.T) { + for _, test := range []struct { + file string + want []string + }{ + { + "testdata/unix.zip", + []string{"hello", "dir/bar", "readonly"}, + }, + { + "testdata/subdir.zip", + []string{"a/b/c"}, + }, + } { + t.Run(test.file, func(t *testing.T) { + t.Parallel() + z, err := OpenReader(test.file) + if err != nil { + t.Fatal(err) + } + defer z.Close() + if err := fstest.TestFS(z, test.want...); err != nil { + t.Error(err) + } + }) + } +} + +func TestFSModTime(t *testing.T) { + t.Parallel() + z, err := OpenReader("testdata/subdir.zip") + if err != nil { + t.Fatal(err) + } + defer z.Close() + + for _, test := range []struct { + name string + want time.Time + }{ + { + "a", + time.Date(2021, 4, 19, 12, 29, 56, 0, timeZone(-7*time.Hour)).UTC(), + }, + { + "a/b/c", + time.Date(2021, 4, 19, 12, 29, 59, 0, timeZone(-7*time.Hour)).UTC(), + }, + } { + fi, err := fs.Stat(z, test.name) + if err != nil { + t.Errorf("%s: %v", test.name, err) + continue + } + if got := fi.ModTime(); !got.Equal(test.want) { + t.Errorf("%s: got modtime %v, want %v", test.name, got, test.want) + } + } +} + +func TestCVE202127919(t *testing.T) { + // Archive containing only the file "../test.txt" + data := []byte{ + 0x50, 0x4b, 0x03, 0x04, 0x14, 0x00, 0x08, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x2e, 0x2e, + 0x2f, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, + 0x74, 0x0a, 0xc9, 0xc8, 0x2c, 0x56, 0xc8, 0x2c, + 0x56, 0x48, 0x54, 0x28, 0x49, 0x2d, 0x2e, 0x51, + 0x28, 0x49, 0xad, 0x28, 0x51, 0x48, 0xcb, 0xcc, + 0x49, 0xd5, 0xe3, 0x02, 0x04, 0x00, 0x00, 0xff, + 0xff, 0x50, 0x4b, 0x07, 0x08, 0xc0, 0xd7, 0xed, + 0xc3, 0x20, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, + 0x00, 0x50, 0x4b, 0x01, 0x02, 0x14, 0x00, 0x14, + 0x00, 0x08, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xc0, 0xd7, 0xed, 0xc3, 0x20, 0x00, 0x00, + 0x00, 0x1a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, + 0x2e, 0x2f, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, + 0x78, 0x74, 0x50, 0x4b, 0x05, 0x06, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x39, 0x00, + 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x00, 0x00, + } + r, err := NewReader(bytes.NewReader([]byte(data)), int64(len(data))) + if err != nil { + t.Fatalf("Error reading the archive: %v", err) + } + _, err = r.Open("test.txt") + if err != nil { + t.Errorf("Error reading file: %v", err) + } + if len(r.File) != 1 { + t.Fatalf("No entries in the file list") + } + if r.File[0].Name != "../test.txt" { + t.Errorf("Unexpected entry name: %s", r.File[0].Name) + } + if _, err := r.File[0].Open(); err != nil { + t.Errorf("Error opening file: %v", err) + } +} + +func TestCVE202133196(t *testing.T) { + // Archive that indicates it has 1 << 128 -1 files, + // this would previously cause a panic due to attempting + // to allocate a slice with 1 << 128 -1 elements. + data := []byte{ + 0x50, 0x4b, 0x03, 0x04, 0x14, 0x00, 0x08, 0x08, + 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x02, + 0x03, 0x62, 0x61, 0x65, 0x03, 0x04, 0x00, 0x00, + 0xff, 0xff, 0x50, 0x4b, 0x07, 0x08, 0xbe, 0x20, + 0x5c, 0x6c, 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, + 0x00, 0x00, 0x50, 0x4b, 0x01, 0x02, 0x14, 0x00, + 0x14, 0x00, 0x08, 0x08, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xbe, 0x20, 0x5c, 0x6c, 0x09, 0x00, + 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x02, 0x03, 0x50, 0x4b, 0x06, 0x06, 0x2c, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2d, + 0x00, 0x2d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x31, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x50, 0x4b, 0x06, 0x07, 0x00, + 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x50, + 0x4b, 0x05, 0x06, 0x00, 0x00, 0x00, 0x00, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x00, 0x00, + } + _, err := NewReader(bytes.NewReader(data), int64(len(data))) + if err != ErrFormat { + t.Fatalf("unexpected error, got: %v, want: %v", err, ErrFormat) + } + + // Also check that an archive containing a handful of empty + // files doesn't cause an issue + b := bytes.NewBuffer(nil) + w := NewWriter(b) + for i := 0; i < 5; i++ { + _, err := w.Create("") + if err != nil { + t.Fatalf("Writer.Create failed: %s", err) + } + } + if err := w.Close(); err != nil { + t.Fatalf("Writer.Close failed: %s", err) + } + r, err := NewReader(bytes.NewReader(b.Bytes()), int64(b.Len())) + if err != nil { + t.Fatalf("NewReader failed: %s", err) + } + if len(r.File) != 5 { + t.Errorf("Archive has unexpected number of files, got %d, want 5", len(r.File)) + } +} + +func TestCVE202139293(t *testing.T) { + // directory size is so large, that the check in Reader.init + // overflows when subtracting from the archive size, causing + // the pre-allocation check to be bypassed. + data := []byte{ + 0x50, 0x4b, 0x06, 0x06, 0x05, 0x06, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x4b, + 0x06, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x50, 0x4b, 0x05, 0x06, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x4b, + 0x06, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x50, 0x4b, 0x05, 0x06, 0x00, 0x31, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, + 0xff, 0x50, 0xfe, 0x00, 0xff, 0x00, 0x3a, 0x00, 0x00, 0x00, 0xff, + } + _, err := NewReader(bytes.NewReader(data), int64(len(data))) + if err != ErrFormat { + t.Fatalf("unexpected error, got: %v, want: %v", err, ErrFormat) + } +} + +func TestCVE202141772(t *testing.T) { + // Archive contains a file whose name is exclusively made up of '/', '\' + // characters, or "../", "..\" paths, which would previously cause a panic. + // + // Length Method Size Cmpr Date Time CRC-32 Name + // -------- ------ ------- ---- ---------- ----- -------- ---- + // 0 Stored 0 0% 08-05-2021 18:32 00000000 / + // 0 Stored 0 0% 09-14-2021 12:59 00000000 // + // 0 Stored 0 0% 09-14-2021 12:59 00000000 \ + // 11 Stored 11 0% 09-14-2021 13:04 0d4a1185 /test.txt + // -------- ------- --- ------- + // 11 11 0% 4 files + data := []byte{ + 0x50, 0x4b, 0x03, 0x04, 0x0a, 0x00, 0x00, 0x08, + 0x00, 0x00, 0x06, 0x94, 0x05, 0x53, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2f, 0x50, + 0x4b, 0x03, 0x04, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x78, 0x67, 0x2e, 0x53, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x02, 0x00, 0x00, 0x00, 0x2f, 0x2f, 0x50, + 0x4b, 0x03, 0x04, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x78, 0x67, 0x2e, 0x53, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x5c, 0x50, 0x4b, + 0x03, 0x04, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x91, 0x68, 0x2e, 0x53, 0x85, 0x11, 0x4a, 0x0d, + 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x2f, 0x74, 0x65, 0x73, + 0x74, 0x2e, 0x74, 0x78, 0x74, 0x68, 0x65, 0x6c, + 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, + 0x50, 0x4b, 0x01, 0x02, 0x14, 0x03, 0x0a, 0x00, + 0x00, 0x08, 0x00, 0x00, 0x06, 0x94, 0x05, 0x53, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, + 0xed, 0x41, 0x00, 0x00, 0x00, 0x00, 0x2f, 0x50, + 0x4b, 0x01, 0x02, 0x3f, 0x00, 0x0a, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x78, 0x67, 0x2e, 0x53, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x24, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, + 0x00, 0x1f, 0x00, 0x00, 0x00, 0x2f, 0x2f, 0x0a, + 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x18, 0x00, 0x93, 0x98, 0x25, 0x57, 0x25, + 0xa9, 0xd7, 0x01, 0x93, 0x98, 0x25, 0x57, 0x25, + 0xa9, 0xd7, 0x01, 0x93, 0x98, 0x25, 0x57, 0x25, + 0xa9, 0xd7, 0x01, 0x50, 0x4b, 0x01, 0x02, 0x3f, + 0x00, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, + 0x67, 0x2e, 0x53, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x20, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, + 0x00, 0x5c, 0x0a, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x18, 0x00, 0x93, 0x98, + 0x25, 0x57, 0x25, 0xa9, 0xd7, 0x01, 0x93, 0x98, + 0x25, 0x57, 0x25, 0xa9, 0xd7, 0x01, 0x93, 0x98, + 0x25, 0x57, 0x25, 0xa9, 0xd7, 0x01, 0x50, 0x4b, + 0x01, 0x02, 0x3f, 0x00, 0x0a, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x91, 0x68, 0x2e, 0x53, 0x85, 0x11, + 0x4a, 0x0d, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, + 0x00, 0x00, 0x09, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x5e, 0x00, 0x00, 0x00, 0x2f, 0x74, 0x65, 0x73, + 0x74, 0x2e, 0x74, 0x78, 0x74, 0x0a, 0x00, 0x20, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x18, + 0x00, 0xa9, 0x80, 0x51, 0x01, 0x26, 0xa9, 0xd7, + 0x01, 0x31, 0xd1, 0x57, 0x01, 0x26, 0xa9, 0xd7, + 0x01, 0xdf, 0x48, 0x85, 0xf9, 0x25, 0xa9, 0xd7, + 0x01, 0x50, 0x4b, 0x05, 0x06, 0x00, 0x00, 0x00, + 0x00, 0x04, 0x00, 0x04, 0x00, 0x31, 0x01, 0x00, + 0x00, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, + } + r, err := NewReader(bytes.NewReader([]byte(data)), int64(len(data))) + if err != nil { + t.Fatalf("Error reading the archive: %v", err) + } + entryNames := []string{`/`, `//`, `\`, `/test.txt`} + var names []string + for _, f := range r.File { + names = append(names, f.Name) + if _, err := f.Open(); err != nil { + t.Errorf("Error opening %q: %v", f.Name, err) + } + if _, err := r.Open(f.Name); err == nil { + t.Errorf("Opening %q with fs.FS API succeeded", f.Name) + } + } + if !reflect.DeepEqual(names, entryNames) { + t.Errorf("Unexpected file entries: %q", names) + } + if _, err := r.Open(""); err == nil { + t.Errorf("Opening %q with fs.FS API succeeded", "") + } + if _, err := r.Open("test.txt"); err != nil { + t.Errorf("Error opening %q with fs.FS API: %v", "test.txt", err) + } + dirEntries, err := fs.ReadDir(r, ".") + if err != nil { + t.Fatalf("Error reading the root directory: %v", err) + } + if len(dirEntries) != 1 || dirEntries[0].Name() != "test.txt" { + t.Errorf("Unexpected directory entries") + for _, dirEntry := range dirEntries { + _, err := r.Open(dirEntry.Name()) + t.Logf("%q (Open error: %v)", dirEntry.Name(), err) + } + t.FailNow() + } + info, err := dirEntries[0].Info() + if err != nil { + t.Fatalf("Error reading info entry: %v", err) + } + if name := info.Name(); name != "test.txt" { + t.Errorf("Inconsistent name in info entry: %v", name) + } +} diff --git a/src/archive/zip/register.go b/src/archive/zip/register.go new file mode 100644 index 0000000..4389246 --- /dev/null +++ b/src/archive/zip/register.go @@ -0,0 +1,147 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip + +import ( + "compress/flate" + "errors" + "io" + "sync" +) + +// A Compressor returns a new compressing writer, writing to w. +// The WriteCloser's Close method must be used to flush pending data to w. +// The Compressor itself must be safe to invoke from multiple goroutines +// simultaneously, but each returned writer will be used only by +// one goroutine at a time. +type Compressor func(w io.Writer) (io.WriteCloser, error) + +// A Decompressor returns a new decompressing reader, reading from r. +// The ReadCloser's Close method must be used to release associated resources. +// The Decompressor itself must be safe to invoke from multiple goroutines +// simultaneously, but each returned reader will be used only by +// one goroutine at a time. +type Decompressor func(r io.Reader) io.ReadCloser + +var flateWriterPool sync.Pool + +func newFlateWriter(w io.Writer) io.WriteCloser { + fw, ok := flateWriterPool.Get().(*flate.Writer) + if ok { + fw.Reset(w) + } else { + fw, _ = flate.NewWriter(w, 5) + } + return &pooledFlateWriter{fw: fw} +} + +type pooledFlateWriter struct { + mu sync.Mutex // guards Close and Write + fw *flate.Writer +} + +func (w *pooledFlateWriter) Write(p []byte) (n int, err error) { + w.mu.Lock() + defer w.mu.Unlock() + if w.fw == nil { + return 0, errors.New("Write after Close") + } + return w.fw.Write(p) +} + +func (w *pooledFlateWriter) Close() error { + w.mu.Lock() + defer w.mu.Unlock() + var err error + if w.fw != nil { + err = w.fw.Close() + flateWriterPool.Put(w.fw) + w.fw = nil + } + return err +} + +var flateReaderPool sync.Pool + +func newFlateReader(r io.Reader) io.ReadCloser { + fr, ok := flateReaderPool.Get().(io.ReadCloser) + if ok { + fr.(flate.Resetter).Reset(r, nil) + } else { + fr = flate.NewReader(r) + } + return &pooledFlateReader{fr: fr} +} + +type pooledFlateReader struct { + mu sync.Mutex // guards Close and Read + fr io.ReadCloser +} + +func (r *pooledFlateReader) Read(p []byte) (n int, err error) { + r.mu.Lock() + defer r.mu.Unlock() + if r.fr == nil { + return 0, errors.New("Read after Close") + } + return r.fr.Read(p) +} + +func (r *pooledFlateReader) Close() error { + r.mu.Lock() + defer r.mu.Unlock() + var err error + if r.fr != nil { + err = r.fr.Close() + flateReaderPool.Put(r.fr) + r.fr = nil + } + return err +} + +var ( + compressors sync.Map // map[uint16]Compressor + decompressors sync.Map // map[uint16]Decompressor +) + +func init() { + compressors.Store(Store, Compressor(func(w io.Writer) (io.WriteCloser, error) { return &nopCloser{w}, nil })) + compressors.Store(Deflate, Compressor(func(w io.Writer) (io.WriteCloser, error) { return newFlateWriter(w), nil })) + + decompressors.Store(Store, Decompressor(io.NopCloser)) + decompressors.Store(Deflate, Decompressor(newFlateReader)) +} + +// RegisterDecompressor allows custom decompressors for a specified method ID. +// The common methods Store and Deflate are built in. +func RegisterDecompressor(method uint16, dcomp Decompressor) { + if _, dup := decompressors.LoadOrStore(method, dcomp); dup { + panic("decompressor already registered") + } +} + +// RegisterCompressor registers custom compressors for a specified method ID. +// The common methods Store and Deflate are built in. +func RegisterCompressor(method uint16, comp Compressor) { + if _, dup := compressors.LoadOrStore(method, comp); dup { + panic("compressor already registered") + } +} + +func compressor(method uint16) Compressor { + ci, ok := compressors.Load(method) + if !ok { + return nil + } + return ci.(Compressor) +} + +func decompressor(method uint16) Decompressor { + di, ok := decompressors.Load(method) + if !ok { + return nil + } + return di.(Decompressor) +} diff --git a/src/archive/zip/struct.go b/src/archive/zip/struct.go new file mode 100644 index 0000000..6f73fb8 --- /dev/null +++ b/src/archive/zip/struct.go @@ -0,0 +1,392 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package zip provides support for reading and writing ZIP archives. + +See: https://www.pkware.com/appnote + +This package does not support disk spanning. + +A note about ZIP64: + +To be backwards compatible the FileHeader has both 32 and 64 bit Size +fields. The 64 bit fields will always contain the correct value and +for normal archives both fields will be the same. For files requiring +the ZIP64 format the 32 bit fields will be 0xffffffff and the 64 bit +fields must be used instead. +*/ +package zip + +import ( + "io/fs" + "path" + "time" +) + +// Compression methods. +const ( + Store uint16 = 0 // no compression + Deflate uint16 = 8 // DEFLATE compressed +) + +const ( + fileHeaderSignature = 0x04034b50 + directoryHeaderSignature = 0x02014b50 + directoryEndSignature = 0x06054b50 + directory64LocSignature = 0x07064b50 + directory64EndSignature = 0x06064b50 + dataDescriptorSignature = 0x08074b50 // de-facto standard; required by OS X Finder + fileHeaderLen = 30 // + filename + extra + directoryHeaderLen = 46 // + filename + extra + comment + directoryEndLen = 22 // + comment + dataDescriptorLen = 16 // four uint32: descriptor signature, crc32, compressed size, size + dataDescriptor64Len = 24 // two uint32: signature, crc32 | two uint64: compressed size, size + directory64LocLen = 20 // + directory64EndLen = 56 // + extra + + // Constants for the first byte in CreatorVersion. + creatorFAT = 0 + creatorUnix = 3 + creatorNTFS = 11 + creatorVFAT = 14 + creatorMacOSX = 19 + + // Version numbers. + zipVersion20 = 20 // 2.0 + zipVersion45 = 45 // 4.5 (reads and writes zip64 archives) + + // Limits for non zip64 files. + uint16max = (1 << 16) - 1 + uint32max = (1 << 32) - 1 + + // Extra header IDs. + // + // IDs 0..31 are reserved for official use by PKWARE. + // IDs above that range are defined by third-party vendors. + // Since ZIP lacked high precision timestamps (nor a official specification + // of the timezone used for the date fields), many competing extra fields + // have been invented. Pervasive use effectively makes them "official". + // + // See http://mdfs.net/Docs/Comp/Archiving/Zip/ExtraField + zip64ExtraID = 0x0001 // Zip64 extended information + ntfsExtraID = 0x000a // NTFS + unixExtraID = 0x000d // UNIX + extTimeExtraID = 0x5455 // Extended timestamp + infoZipUnixExtraID = 0x5855 // Info-ZIP Unix extension +) + +// FileHeader describes a file within a zip file. +// See the zip spec for details. +type FileHeader struct { + // Name is the name of the file. + // + // It must be a relative path, not start with a drive letter (such as "C:"), + // and must use forward slashes instead of back slashes. A trailing slash + // indicates that this file is a directory and should have no data. + // + // When reading zip files, the Name field is populated from + // the zip file directly and is not validated for correctness. + // It is the caller's responsibility to sanitize it as + // appropriate, including canonicalizing slash directions, + // validating that paths are relative, and preventing path + // traversal through filenames ("../../../"). + Name string + + // Comment is any arbitrary user-defined string shorter than 64KiB. + Comment string + + // NonUTF8 indicates that Name and Comment are not encoded in UTF-8. + // + // By specification, the only other encoding permitted should be CP-437, + // but historically many ZIP readers interpret Name and Comment as whatever + // the system's local character encoding happens to be. + // + // This flag should only be set if the user intends to encode a non-portable + // ZIP file for a specific localized region. Otherwise, the Writer + // automatically sets the ZIP format's UTF-8 flag for valid UTF-8 strings. + NonUTF8 bool + + CreatorVersion uint16 + ReaderVersion uint16 + Flags uint16 + + // Method is the compression method. If zero, Store is used. + Method uint16 + + // Modified is the modified time of the file. + // + // When reading, an extended timestamp is preferred over the legacy MS-DOS + // date field, and the offset between the times is used as the timezone. + // If only the MS-DOS date is present, the timezone is assumed to be UTC. + // + // When writing, an extended timestamp (which is timezone-agnostic) is + // always emitted. The legacy MS-DOS date field is encoded according to the + // location of the Modified time. + Modified time.Time + ModifiedTime uint16 // Deprecated: Legacy MS-DOS date; use Modified instead. + ModifiedDate uint16 // Deprecated: Legacy MS-DOS time; use Modified instead. + + CRC32 uint32 + CompressedSize uint32 // Deprecated: Use CompressedSize64 instead. + UncompressedSize uint32 // Deprecated: Use UncompressedSize64 instead. + CompressedSize64 uint64 + UncompressedSize64 uint64 + Extra []byte + ExternalAttrs uint32 // Meaning depends on CreatorVersion +} + +// FileInfo returns an fs.FileInfo for the FileHeader. +func (h *FileHeader) FileInfo() fs.FileInfo { + return headerFileInfo{h} +} + +// headerFileInfo implements fs.FileInfo. +type headerFileInfo struct { + fh *FileHeader +} + +func (fi headerFileInfo) Name() string { return path.Base(fi.fh.Name) } +func (fi headerFileInfo) Size() int64 { + if fi.fh.UncompressedSize64 > 0 { + return int64(fi.fh.UncompressedSize64) + } + return int64(fi.fh.UncompressedSize) +} +func (fi headerFileInfo) IsDir() bool { return fi.Mode().IsDir() } +func (fi headerFileInfo) ModTime() time.Time { + if fi.fh.Modified.IsZero() { + return fi.fh.ModTime() + } + return fi.fh.Modified.UTC() +} +func (fi headerFileInfo) Mode() fs.FileMode { return fi.fh.Mode() } +func (fi headerFileInfo) Type() fs.FileMode { return fi.fh.Mode().Type() } +func (fi headerFileInfo) Sys() any { return fi.fh } + +func (fi headerFileInfo) Info() (fs.FileInfo, error) { return fi, nil } + +// FileInfoHeader creates a partially-populated FileHeader from an +// fs.FileInfo. +// Because fs.FileInfo's Name method returns only the base name of +// the file it describes, it may be necessary to modify the Name field +// of the returned header to provide the full path name of the file. +// If compression is desired, callers should set the FileHeader.Method +// field; it is unset by default. +func FileInfoHeader(fi fs.FileInfo) (*FileHeader, error) { + size := fi.Size() + fh := &FileHeader{ + Name: fi.Name(), + UncompressedSize64: uint64(size), + } + fh.SetModTime(fi.ModTime()) + fh.SetMode(fi.Mode()) + if fh.UncompressedSize64 > uint32max { + fh.UncompressedSize = uint32max + } else { + fh.UncompressedSize = uint32(fh.UncompressedSize64) + } + return fh, nil +} + +type directoryEnd struct { + diskNbr uint32 // unused + dirDiskNbr uint32 // unused + dirRecordsThisDisk uint64 // unused + directoryRecords uint64 + directorySize uint64 + directoryOffset uint64 // relative to file + commentLen uint16 + comment string +} + +// timeZone returns a *time.Location based on the provided offset. +// If the offset is non-sensible, then this uses an offset of zero. +func timeZone(offset time.Duration) *time.Location { + const ( + minOffset = -12 * time.Hour // E.g., Baker island at -12:00 + maxOffset = +14 * time.Hour // E.g., Line island at +14:00 + offsetAlias = 15 * time.Minute // E.g., Nepal at +5:45 + ) + offset = offset.Round(offsetAlias) + if offset < minOffset || maxOffset < offset { + offset = 0 + } + return time.FixedZone("", int(offset/time.Second)) +} + +// msDosTimeToTime converts an MS-DOS date and time into a time.Time. +// The resolution is 2s. +// See: https://msdn.microsoft.com/en-us/library/ms724247(v=VS.85).aspx +func msDosTimeToTime(dosDate, dosTime uint16) time.Time { + return time.Date( + // date bits 0-4: day of month; 5-8: month; 9-15: years since 1980 + int(dosDate>>9+1980), + time.Month(dosDate>>5&0xf), + int(dosDate&0x1f), + + // time bits 0-4: second/2; 5-10: minute; 11-15: hour + int(dosTime>>11), + int(dosTime>>5&0x3f), + int(dosTime&0x1f*2), + 0, // nanoseconds + + time.UTC, + ) +} + +// timeToMsDosTime converts a time.Time to an MS-DOS date and time. +// The resolution is 2s. +// See: https://msdn.microsoft.com/en-us/library/ms724274(v=VS.85).aspx +func timeToMsDosTime(t time.Time) (fDate uint16, fTime uint16) { + fDate = uint16(t.Day() + int(t.Month())<<5 + (t.Year()-1980)<<9) + fTime = uint16(t.Second()/2 + t.Minute()<<5 + t.Hour()<<11) + return +} + +// ModTime returns the modification time in UTC using the legacy +// ModifiedDate and ModifiedTime fields. +// +// Deprecated: Use Modified instead. +func (h *FileHeader) ModTime() time.Time { + return msDosTimeToTime(h.ModifiedDate, h.ModifiedTime) +} + +// SetModTime sets the Modified, ModifiedTime, and ModifiedDate fields +// to the given time in UTC. +// +// Deprecated: Use Modified instead. +func (h *FileHeader) SetModTime(t time.Time) { + t = t.UTC() // Convert to UTC for compatibility + h.Modified = t + h.ModifiedDate, h.ModifiedTime = timeToMsDosTime(t) +} + +const ( + // Unix constants. The specification doesn't mention them, + // but these seem to be the values agreed on by tools. + s_IFMT = 0xf000 + s_IFSOCK = 0xc000 + s_IFLNK = 0xa000 + s_IFREG = 0x8000 + s_IFBLK = 0x6000 + s_IFDIR = 0x4000 + s_IFCHR = 0x2000 + s_IFIFO = 0x1000 + s_ISUID = 0x800 + s_ISGID = 0x400 + s_ISVTX = 0x200 + + msdosDir = 0x10 + msdosReadOnly = 0x01 +) + +// Mode returns the permission and mode bits for the FileHeader. +func (h *FileHeader) Mode() (mode fs.FileMode) { + switch h.CreatorVersion >> 8 { + case creatorUnix, creatorMacOSX: + mode = unixModeToFileMode(h.ExternalAttrs >> 16) + case creatorNTFS, creatorVFAT, creatorFAT: + mode = msdosModeToFileMode(h.ExternalAttrs) + } + if len(h.Name) > 0 && h.Name[len(h.Name)-1] == '/' { + mode |= fs.ModeDir + } + return mode +} + +// SetMode changes the permission and mode bits for the FileHeader. +func (h *FileHeader) SetMode(mode fs.FileMode) { + h.CreatorVersion = h.CreatorVersion&0xff | creatorUnix<<8 + h.ExternalAttrs = fileModeToUnixMode(mode) << 16 + + // set MSDOS attributes too, as the original zip does. + if mode&fs.ModeDir != 0 { + h.ExternalAttrs |= msdosDir + } + if mode&0200 == 0 { + h.ExternalAttrs |= msdosReadOnly + } +} + +// isZip64 reports whether the file size exceeds the 32 bit limit +func (h *FileHeader) isZip64() bool { + return h.CompressedSize64 >= uint32max || h.UncompressedSize64 >= uint32max +} + +func (f *FileHeader) hasDataDescriptor() bool { + return f.Flags&0x8 != 0 +} + +func msdosModeToFileMode(m uint32) (mode fs.FileMode) { + if m&msdosDir != 0 { + mode = fs.ModeDir | 0777 + } else { + mode = 0666 + } + if m&msdosReadOnly != 0 { + mode &^= 0222 + } + return mode +} + +func fileModeToUnixMode(mode fs.FileMode) uint32 { + var m uint32 + switch mode & fs.ModeType { + default: + m = s_IFREG + case fs.ModeDir: + m = s_IFDIR + case fs.ModeSymlink: + m = s_IFLNK + case fs.ModeNamedPipe: + m = s_IFIFO + case fs.ModeSocket: + m = s_IFSOCK + case fs.ModeDevice: + m = s_IFBLK + case fs.ModeDevice | fs.ModeCharDevice: + m = s_IFCHR + } + if mode&fs.ModeSetuid != 0 { + m |= s_ISUID + } + if mode&fs.ModeSetgid != 0 { + m |= s_ISGID + } + if mode&fs.ModeSticky != 0 { + m |= s_ISVTX + } + return m | uint32(mode&0777) +} + +func unixModeToFileMode(m uint32) fs.FileMode { + mode := fs.FileMode(m & 0777) + switch m & s_IFMT { + case s_IFBLK: + mode |= fs.ModeDevice + case s_IFCHR: + mode |= fs.ModeDevice | fs.ModeCharDevice + case s_IFDIR: + mode |= fs.ModeDir + case s_IFIFO: + mode |= fs.ModeNamedPipe + case s_IFLNK: + mode |= fs.ModeSymlink + case s_IFREG: + // nothing to do + case s_IFSOCK: + mode |= fs.ModeSocket + } + if m&s_ISGID != 0 { + mode |= fs.ModeSetgid + } + if m&s_ISUID != 0 { + mode |= fs.ModeSetuid + } + if m&s_ISVTX != 0 { + mode |= fs.ModeSticky + } + return mode +} diff --git a/src/archive/zip/testdata/crc32-not-streamed.zip b/src/archive/zip/testdata/crc32-not-streamed.zip Binary files differnew file mode 100644 index 0000000..f268d88 --- /dev/null +++ b/src/archive/zip/testdata/crc32-not-streamed.zip diff --git a/src/archive/zip/testdata/dd.zip b/src/archive/zip/testdata/dd.zip Binary files differnew file mode 100644 index 0000000..e53378b --- /dev/null +++ b/src/archive/zip/testdata/dd.zip diff --git a/src/archive/zip/testdata/go-no-datadesc-sig.zip.base64 b/src/archive/zip/testdata/go-no-datadesc-sig.zip.base64 new file mode 100644 index 0000000..1c2c071 --- /dev/null +++ b/src/archive/zip/testdata/go-no-datadesc-sig.zip.base64 @@ -0,0 +1 @@ +UEsDBBQACAAAAGWHaECoZTJ+BAAAAAQAAAAHABgAZm9vLnR4dFVUBQAD3lVZT3V4CwABBPUBAAAEFAAAAGZvbwqoZTJ+BAAAAAQAAABQSwMEFAAIAAAAZodoQOmzogQEAAAABAAAAAcAGABiYXIudHh0VVQFAAPgVVlPdXgLAAEE9QEAAAQUAAAAYmFyCumzogQEAAAABAAAAFBLAQIUAxQACAAAAGWHaECoZTJ+BAAAAAQAAAAHABgAAAAAAAAAAACkgQAAAABmb28udHh0VVQFAAPeVVlPdXgLAAEE9QEAAAQUAAAAUEsBAhQDFAAIAAAAZodoQOmzogQEAAAABAAAAAcAGAAAAAAAAAAAAKSBTQAAAGJhci50eHRVVAUAA+BVWU91eAsAAQT1AQAABBQAAABQSwUGAAAAAAIAAgCaAAAAmgAAAAAA diff --git a/src/archive/zip/testdata/go-with-datadesc-sig.zip b/src/archive/zip/testdata/go-with-datadesc-sig.zip Binary files differnew file mode 100644 index 0000000..bcfe121 --- /dev/null +++ b/src/archive/zip/testdata/go-with-datadesc-sig.zip diff --git a/src/archive/zip/testdata/gophercolor16x16.png b/src/archive/zip/testdata/gophercolor16x16.png Binary files differnew file mode 100644 index 0000000..48854ff --- /dev/null +++ b/src/archive/zip/testdata/gophercolor16x16.png diff --git a/src/archive/zip/testdata/readme.notzip b/src/archive/zip/testdata/readme.notzip Binary files differnew file mode 100644 index 0000000..8173727 --- /dev/null +++ b/src/archive/zip/testdata/readme.notzip diff --git a/src/archive/zip/testdata/readme.zip b/src/archive/zip/testdata/readme.zip Binary files differnew file mode 100644 index 0000000..5642a67 --- /dev/null +++ b/src/archive/zip/testdata/readme.zip diff --git a/src/archive/zip/testdata/subdir.zip b/src/archive/zip/testdata/subdir.zip Binary files differnew file mode 100644 index 0000000..324d06b --- /dev/null +++ b/src/archive/zip/testdata/subdir.zip diff --git a/src/archive/zip/testdata/symlink.zip b/src/archive/zip/testdata/symlink.zip Binary files differnew file mode 100644 index 0000000..af84693 --- /dev/null +++ b/src/archive/zip/testdata/symlink.zip diff --git a/src/archive/zip/testdata/test-trailing-junk.zip b/src/archive/zip/testdata/test-trailing-junk.zip Binary files differnew file mode 100644 index 0000000..42281b4 --- /dev/null +++ b/src/archive/zip/testdata/test-trailing-junk.zip diff --git a/src/archive/zip/testdata/test.zip b/src/archive/zip/testdata/test.zip Binary files differnew file mode 100644 index 0000000..03890c0 --- /dev/null +++ b/src/archive/zip/testdata/test.zip diff --git a/src/archive/zip/testdata/time-22738.zip b/src/archive/zip/testdata/time-22738.zip Binary files differnew file mode 100644 index 0000000..eb85b57 --- /dev/null +++ b/src/archive/zip/testdata/time-22738.zip diff --git a/src/archive/zip/testdata/time-7zip.zip b/src/archive/zip/testdata/time-7zip.zip Binary files differnew file mode 100644 index 0000000..4f74819 --- /dev/null +++ b/src/archive/zip/testdata/time-7zip.zip diff --git a/src/archive/zip/testdata/time-go.zip b/src/archive/zip/testdata/time-go.zip Binary files differnew file mode 100644 index 0000000..f008805 --- /dev/null +++ b/src/archive/zip/testdata/time-go.zip diff --git a/src/archive/zip/testdata/time-infozip.zip b/src/archive/zip/testdata/time-infozip.zip Binary files differnew file mode 100644 index 0000000..8e63948 --- /dev/null +++ b/src/archive/zip/testdata/time-infozip.zip diff --git a/src/archive/zip/testdata/time-osx.zip b/src/archive/zip/testdata/time-osx.zip Binary files differnew file mode 100644 index 0000000..e82c5c2 --- /dev/null +++ b/src/archive/zip/testdata/time-osx.zip diff --git a/src/archive/zip/testdata/time-win7.zip b/src/archive/zip/testdata/time-win7.zip Binary files differnew file mode 100644 index 0000000..8ba222b --- /dev/null +++ b/src/archive/zip/testdata/time-win7.zip diff --git a/src/archive/zip/testdata/time-winrar.zip b/src/archive/zip/testdata/time-winrar.zip Binary files differnew file mode 100644 index 0000000..a8a19b0 --- /dev/null +++ b/src/archive/zip/testdata/time-winrar.zip diff --git a/src/archive/zip/testdata/time-winzip.zip b/src/archive/zip/testdata/time-winzip.zip Binary files differnew file mode 100644 index 0000000..f6e8f8b --- /dev/null +++ b/src/archive/zip/testdata/time-winzip.zip diff --git a/src/archive/zip/testdata/unix.zip b/src/archive/zip/testdata/unix.zip Binary files differnew file mode 100644 index 0000000..ce1a981 --- /dev/null +++ b/src/archive/zip/testdata/unix.zip diff --git a/src/archive/zip/testdata/utf8-7zip.zip b/src/archive/zip/testdata/utf8-7zip.zip Binary files differnew file mode 100644 index 0000000..0e97884 --- /dev/null +++ b/src/archive/zip/testdata/utf8-7zip.zip diff --git a/src/archive/zip/testdata/utf8-infozip.zip b/src/archive/zip/testdata/utf8-infozip.zip Binary files differnew file mode 100644 index 0000000..25a8926 --- /dev/null +++ b/src/archive/zip/testdata/utf8-infozip.zip diff --git a/src/archive/zip/testdata/utf8-osx.zip b/src/archive/zip/testdata/utf8-osx.zip Binary files differnew file mode 100644 index 0000000..9b0c058 --- /dev/null +++ b/src/archive/zip/testdata/utf8-osx.zip diff --git a/src/archive/zip/testdata/utf8-winrar.zip b/src/archive/zip/testdata/utf8-winrar.zip Binary files differnew file mode 100644 index 0000000..4bad6c3 --- /dev/null +++ b/src/archive/zip/testdata/utf8-winrar.zip diff --git a/src/archive/zip/testdata/utf8-winzip.zip b/src/archive/zip/testdata/utf8-winzip.zip Binary files differnew file mode 100644 index 0000000..909d52e --- /dev/null +++ b/src/archive/zip/testdata/utf8-winzip.zip diff --git a/src/archive/zip/testdata/winxp.zip b/src/archive/zip/testdata/winxp.zip Binary files differnew file mode 100644 index 0000000..3919322 --- /dev/null +++ b/src/archive/zip/testdata/winxp.zip diff --git a/src/archive/zip/testdata/zip64-2.zip b/src/archive/zip/testdata/zip64-2.zip Binary files differnew file mode 100644 index 0000000..f844e35 --- /dev/null +++ b/src/archive/zip/testdata/zip64-2.zip diff --git a/src/archive/zip/testdata/zip64.zip b/src/archive/zip/testdata/zip64.zip Binary files differnew file mode 100644 index 0000000..a2ee1fa --- /dev/null +++ b/src/archive/zip/testdata/zip64.zip diff --git a/src/archive/zip/writer.go b/src/archive/zip/writer.go new file mode 100644 index 0000000..3b23cc3 --- /dev/null +++ b/src/archive/zip/writer.go @@ -0,0 +1,634 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip + +import ( + "bufio" + "encoding/binary" + "errors" + "hash" + "hash/crc32" + "io" + "strings" + "unicode/utf8" +) + +var ( + errLongName = errors.New("zip: FileHeader.Name too long") + errLongExtra = errors.New("zip: FileHeader.Extra too long") +) + +// Writer implements a zip file writer. +type Writer struct { + cw *countWriter + dir []*header + last *fileWriter + closed bool + compressors map[uint16]Compressor + comment string + + // testHookCloseSizeOffset if non-nil is called with the size + // of offset of the central directory at Close. + testHookCloseSizeOffset func(size, offset uint64) +} + +type header struct { + *FileHeader + offset uint64 + raw bool +} + +// NewWriter returns a new Writer writing a zip file to w. +func NewWriter(w io.Writer) *Writer { + return &Writer{cw: &countWriter{w: bufio.NewWriter(w)}} +} + +// SetOffset sets the offset of the beginning of the zip data within the +// underlying writer. It should be used when the zip data is appended to an +// existing file, such as a binary executable. +// It must be called before any data is written. +func (w *Writer) SetOffset(n int64) { + if w.cw.count != 0 { + panic("zip: SetOffset called after data was written") + } + w.cw.count = n +} + +// Flush flushes any buffered data to the underlying writer. +// Calling Flush is not normally necessary; calling Close is sufficient. +func (w *Writer) Flush() error { + return w.cw.w.(*bufio.Writer).Flush() +} + +// SetComment sets the end-of-central-directory comment field. +// It can only be called before Close. +func (w *Writer) SetComment(comment string) error { + if len(comment) > uint16max { + return errors.New("zip: Writer.Comment too long") + } + w.comment = comment + return nil +} + +// Close finishes writing the zip file by writing the central directory. +// It does not close the underlying writer. +func (w *Writer) Close() error { + if w.last != nil && !w.last.closed { + if err := w.last.close(); err != nil { + return err + } + w.last = nil + } + if w.closed { + return errors.New("zip: writer closed twice") + } + w.closed = true + + // write central directory + start := w.cw.count + for _, h := range w.dir { + var buf [directoryHeaderLen]byte + b := writeBuf(buf[:]) + b.uint32(uint32(directoryHeaderSignature)) + b.uint16(h.CreatorVersion) + b.uint16(h.ReaderVersion) + b.uint16(h.Flags) + b.uint16(h.Method) + b.uint16(h.ModifiedTime) + b.uint16(h.ModifiedDate) + b.uint32(h.CRC32) + if h.isZip64() || h.offset >= uint32max { + // the file needs a zip64 header. store maxint in both + // 32 bit size fields (and offset later) to signal that the + // zip64 extra header should be used. + b.uint32(uint32max) // compressed size + b.uint32(uint32max) // uncompressed size + + // append a zip64 extra block to Extra + var buf [28]byte // 2x uint16 + 3x uint64 + eb := writeBuf(buf[:]) + eb.uint16(zip64ExtraID) + eb.uint16(24) // size = 3x uint64 + eb.uint64(h.UncompressedSize64) + eb.uint64(h.CompressedSize64) + eb.uint64(h.offset) + h.Extra = append(h.Extra, buf[:]...) + } else { + b.uint32(h.CompressedSize) + b.uint32(h.UncompressedSize) + } + + b.uint16(uint16(len(h.Name))) + b.uint16(uint16(len(h.Extra))) + b.uint16(uint16(len(h.Comment))) + b = b[4:] // skip disk number start and internal file attr (2x uint16) + b.uint32(h.ExternalAttrs) + if h.offset > uint32max { + b.uint32(uint32max) + } else { + b.uint32(uint32(h.offset)) + } + if _, err := w.cw.Write(buf[:]); err != nil { + return err + } + if _, err := io.WriteString(w.cw, h.Name); err != nil { + return err + } + if _, err := w.cw.Write(h.Extra); err != nil { + return err + } + if _, err := io.WriteString(w.cw, h.Comment); err != nil { + return err + } + } + end := w.cw.count + + records := uint64(len(w.dir)) + size := uint64(end - start) + offset := uint64(start) + + if f := w.testHookCloseSizeOffset; f != nil { + f(size, offset) + } + + if records >= uint16max || size >= uint32max || offset >= uint32max { + var buf [directory64EndLen + directory64LocLen]byte + b := writeBuf(buf[:]) + + // zip64 end of central directory record + b.uint32(directory64EndSignature) + b.uint64(directory64EndLen - 12) // length minus signature (uint32) and length fields (uint64) + b.uint16(zipVersion45) // version made by + b.uint16(zipVersion45) // version needed to extract + b.uint32(0) // number of this disk + b.uint32(0) // number of the disk with the start of the central directory + b.uint64(records) // total number of entries in the central directory on this disk + b.uint64(records) // total number of entries in the central directory + b.uint64(size) // size of the central directory + b.uint64(offset) // offset of start of central directory with respect to the starting disk number + + // zip64 end of central directory locator + b.uint32(directory64LocSignature) + b.uint32(0) // number of the disk with the start of the zip64 end of central directory + b.uint64(uint64(end)) // relative offset of the zip64 end of central directory record + b.uint32(1) // total number of disks + + if _, err := w.cw.Write(buf[:]); err != nil { + return err + } + + // store max values in the regular end record to signal + // that the zip64 values should be used instead + records = uint16max + size = uint32max + offset = uint32max + } + + // write end record + var buf [directoryEndLen]byte + b := writeBuf(buf[:]) + b.uint32(uint32(directoryEndSignature)) + b = b[4:] // skip over disk number and first disk number (2x uint16) + b.uint16(uint16(records)) // number of entries this disk + b.uint16(uint16(records)) // number of entries total + b.uint32(uint32(size)) // size of directory + b.uint32(uint32(offset)) // start of directory + b.uint16(uint16(len(w.comment))) // byte size of EOCD comment + if _, err := w.cw.Write(buf[:]); err != nil { + return err + } + if _, err := io.WriteString(w.cw, w.comment); err != nil { + return err + } + + return w.cw.w.(*bufio.Writer).Flush() +} + +// Create adds a file to the zip file using the provided name. +// It returns a Writer to which the file contents should be written. +// The file contents will be compressed using the Deflate method. +// The name must be a relative path: it must not start with a drive +// letter (e.g. C:) or leading slash, and only forward slashes are +// allowed. To create a directory instead of a file, add a trailing +// slash to the name. +// The file's contents must be written to the io.Writer before the next +// call to Create, CreateHeader, or Close. +func (w *Writer) Create(name string) (io.Writer, error) { + header := &FileHeader{ + Name: name, + Method: Deflate, + } + return w.CreateHeader(header) +} + +// detectUTF8 reports whether s is a valid UTF-8 string, and whether the string +// must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII, +// or any other common encoding). +func detectUTF8(s string) (valid, require bool) { + for i := 0; i < len(s); { + r, size := utf8.DecodeRuneInString(s[i:]) + i += size + // Officially, ZIP uses CP-437, but many readers use the system's + // local character encoding. Most encoding are compatible with a large + // subset of CP-437, which itself is ASCII-like. + // + // Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those + // characters with localized currency and overline characters. + if r < 0x20 || r > 0x7d || r == 0x5c { + if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) { + return false, false + } + require = true + } + } + return true, require +} + +// prepare performs the bookkeeping operations required at the start of +// CreateHeader and CreateRaw. +func (w *Writer) prepare(fh *FileHeader) error { + if w.last != nil && !w.last.closed { + if err := w.last.close(); err != nil { + return err + } + } + if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh { + // See https://golang.org/issue/11144 confusion. + return errors.New("archive/zip: invalid duplicate FileHeader") + } + return nil +} + +// CreateHeader adds a file to the zip archive using the provided FileHeader +// for the file metadata. Writer takes ownership of fh and may mutate +// its fields. The caller must not modify fh after calling CreateHeader. +// +// This returns a Writer to which the file contents should be written. +// The file's contents must be written to the io.Writer before the next +// call to Create, CreateHeader, CreateRaw, or Close. +func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) { + if err := w.prepare(fh); err != nil { + return nil, err + } + + // The ZIP format has a sad state of affairs regarding character encoding. + // Officially, the name and comment fields are supposed to be encoded + // in CP-437 (which is mostly compatible with ASCII), unless the UTF-8 + // flag bit is set. However, there are several problems: + // + // * Many ZIP readers still do not support UTF-8. + // * If the UTF-8 flag is cleared, several readers simply interpret the + // name and comment fields as whatever the local system encoding is. + // + // In order to avoid breaking readers without UTF-8 support, + // we avoid setting the UTF-8 flag if the strings are CP-437 compatible. + // However, if the strings require multibyte UTF-8 encoding and is a + // valid UTF-8 string, then we set the UTF-8 bit. + // + // For the case, where the user explicitly wants to specify the encoding + // as UTF-8, they will need to set the flag bit themselves. + utf8Valid1, utf8Require1 := detectUTF8(fh.Name) + utf8Valid2, utf8Require2 := detectUTF8(fh.Comment) + switch { + case fh.NonUTF8: + fh.Flags &^= 0x800 + case (utf8Require1 || utf8Require2) && (utf8Valid1 && utf8Valid2): + fh.Flags |= 0x800 + } + + fh.CreatorVersion = fh.CreatorVersion&0xff00 | zipVersion20 // preserve compatibility byte + fh.ReaderVersion = zipVersion20 + + // If Modified is set, this takes precedence over MS-DOS timestamp fields. + if !fh.Modified.IsZero() { + // Contrary to the FileHeader.SetModTime method, we intentionally + // do not convert to UTC, because we assume the user intends to encode + // the date using the specified timezone. A user may want this control + // because many legacy ZIP readers interpret the timestamp according + // to the local timezone. + // + // The timezone is only non-UTC if a user directly sets the Modified + // field directly themselves. All other approaches sets UTC. + fh.ModifiedDate, fh.ModifiedTime = timeToMsDosTime(fh.Modified) + + // Use "extended timestamp" format since this is what Info-ZIP uses. + // Nearly every major ZIP implementation uses a different format, + // but at least most seem to be able to understand the other formats. + // + // This format happens to be identical for both local and central header + // if modification time is the only timestamp being encoded. + var mbuf [9]byte // 2*SizeOf(uint16) + SizeOf(uint8) + SizeOf(uint32) + mt := uint32(fh.Modified.Unix()) + eb := writeBuf(mbuf[:]) + eb.uint16(extTimeExtraID) + eb.uint16(5) // Size: SizeOf(uint8) + SizeOf(uint32) + eb.uint8(1) // Flags: ModTime + eb.uint32(mt) // ModTime + fh.Extra = append(fh.Extra, mbuf[:]...) + } + + var ( + ow io.Writer + fw *fileWriter + ) + h := &header{ + FileHeader: fh, + offset: uint64(w.cw.count), + } + + if strings.HasSuffix(fh.Name, "/") { + // Set the compression method to Store to ensure data length is truly zero, + // which the writeHeader method always encodes for the size fields. + // This is necessary as most compression formats have non-zero lengths + // even when compressing an empty string. + fh.Method = Store + fh.Flags &^= 0x8 // we will not write a data descriptor + + // Explicitly clear sizes as they have no meaning for directories. + fh.CompressedSize = 0 + fh.CompressedSize64 = 0 + fh.UncompressedSize = 0 + fh.UncompressedSize64 = 0 + + ow = dirWriter{} + } else { + fh.Flags |= 0x8 // we will write a data descriptor + + fw = &fileWriter{ + zipw: w.cw, + compCount: &countWriter{w: w.cw}, + crc32: crc32.NewIEEE(), + } + comp := w.compressor(fh.Method) + if comp == nil { + return nil, ErrAlgorithm + } + var err error + fw.comp, err = comp(fw.compCount) + if err != nil { + return nil, err + } + fw.rawCount = &countWriter{w: fw.comp} + fw.header = h + ow = fw + } + w.dir = append(w.dir, h) + if err := writeHeader(w.cw, h); err != nil { + return nil, err + } + // If we're creating a directory, fw is nil. + w.last = fw + return ow, nil +} + +func writeHeader(w io.Writer, h *header) error { + const maxUint16 = 1<<16 - 1 + if len(h.Name) > maxUint16 { + return errLongName + } + if len(h.Extra) > maxUint16 { + return errLongExtra + } + + var buf [fileHeaderLen]byte + b := writeBuf(buf[:]) + b.uint32(uint32(fileHeaderSignature)) + b.uint16(h.ReaderVersion) + b.uint16(h.Flags) + b.uint16(h.Method) + b.uint16(h.ModifiedTime) + b.uint16(h.ModifiedDate) + // In raw mode (caller does the compression), the values are either + // written here or in the trailing data descriptor based on the header + // flags. + if h.raw && !h.hasDataDescriptor() { + b.uint32(h.CRC32) + b.uint32(uint32(min64(h.CompressedSize64, uint32max))) + b.uint32(uint32(min64(h.UncompressedSize64, uint32max))) + } else { + // When this package handle the compression, these values are + // always written to the trailing data descriptor. + b.uint32(0) // crc32 + b.uint32(0) // compressed size + b.uint32(0) // uncompressed size + } + b.uint16(uint16(len(h.Name))) + b.uint16(uint16(len(h.Extra))) + if _, err := w.Write(buf[:]); err != nil { + return err + } + if _, err := io.WriteString(w, h.Name); err != nil { + return err + } + _, err := w.Write(h.Extra) + return err +} + +func min64(x, y uint64) uint64 { + if x < y { + return x + } + return y +} + +// CreateRaw adds a file to the zip archive using the provided FileHeader and +// returns a Writer to which the file contents should be written. The file's +// contents must be written to the io.Writer before the next call to Create, +// CreateHeader, CreateRaw, or Close. +// +// In contrast to CreateHeader, the bytes passed to Writer are not compressed. +func (w *Writer) CreateRaw(fh *FileHeader) (io.Writer, error) { + if err := w.prepare(fh); err != nil { + return nil, err + } + + fh.CompressedSize = uint32(min64(fh.CompressedSize64, uint32max)) + fh.UncompressedSize = uint32(min64(fh.UncompressedSize64, uint32max)) + + h := &header{ + FileHeader: fh, + offset: uint64(w.cw.count), + raw: true, + } + w.dir = append(w.dir, h) + if err := writeHeader(w.cw, h); err != nil { + return nil, err + } + + if strings.HasSuffix(fh.Name, "/") { + w.last = nil + return dirWriter{}, nil + } + + fw := &fileWriter{ + header: h, + zipw: w.cw, + } + w.last = fw + return fw, nil +} + +// Copy copies the file f (obtained from a Reader) into w. It copies the raw +// form directly bypassing decompression, compression, and validation. +func (w *Writer) Copy(f *File) error { + r, err := f.OpenRaw() + if err != nil { + return err + } + fw, err := w.CreateRaw(&f.FileHeader) + if err != nil { + return err + } + _, err = io.Copy(fw, r) + return err +} + +// RegisterCompressor registers or overrides a custom compressor for a specific +// method ID. If a compressor for a given method is not found, Writer will +// default to looking up the compressor at the package level. +func (w *Writer) RegisterCompressor(method uint16, comp Compressor) { + if w.compressors == nil { + w.compressors = make(map[uint16]Compressor) + } + w.compressors[method] = comp +} + +func (w *Writer) compressor(method uint16) Compressor { + comp := w.compressors[method] + if comp == nil { + comp = compressor(method) + } + return comp +} + +type dirWriter struct{} + +func (dirWriter) Write(b []byte) (int, error) { + if len(b) == 0 { + return 0, nil + } + return 0, errors.New("zip: write to directory") +} + +type fileWriter struct { + *header + zipw io.Writer + rawCount *countWriter + comp io.WriteCloser + compCount *countWriter + crc32 hash.Hash32 + closed bool +} + +func (w *fileWriter) Write(p []byte) (int, error) { + if w.closed { + return 0, errors.New("zip: write to closed file") + } + if w.raw { + return w.zipw.Write(p) + } + w.crc32.Write(p) + return w.rawCount.Write(p) +} + +func (w *fileWriter) close() error { + if w.closed { + return errors.New("zip: file closed twice") + } + w.closed = true + if w.raw { + return w.writeDataDescriptor() + } + if err := w.comp.Close(); err != nil { + return err + } + + // update FileHeader + fh := w.header.FileHeader + fh.CRC32 = w.crc32.Sum32() + fh.CompressedSize64 = uint64(w.compCount.count) + fh.UncompressedSize64 = uint64(w.rawCount.count) + + if fh.isZip64() { + fh.CompressedSize = uint32max + fh.UncompressedSize = uint32max + fh.ReaderVersion = zipVersion45 // requires 4.5 - File uses ZIP64 format extensions + } else { + fh.CompressedSize = uint32(fh.CompressedSize64) + fh.UncompressedSize = uint32(fh.UncompressedSize64) + } + + return w.writeDataDescriptor() +} + +func (w *fileWriter) writeDataDescriptor() error { + if !w.hasDataDescriptor() { + return nil + } + // Write data descriptor. This is more complicated than one would + // think, see e.g. comments in zipfile.c:putextended() and + // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588. + // The approach here is to write 8 byte sizes if needed without + // adding a zip64 extra in the local header (too late anyway). + var buf []byte + if w.isZip64() { + buf = make([]byte, dataDescriptor64Len) + } else { + buf = make([]byte, dataDescriptorLen) + } + b := writeBuf(buf) + b.uint32(dataDescriptorSignature) // de-facto standard, required by OS X + b.uint32(w.CRC32) + if w.isZip64() { + b.uint64(w.CompressedSize64) + b.uint64(w.UncompressedSize64) + } else { + b.uint32(w.CompressedSize) + b.uint32(w.UncompressedSize) + } + _, err := w.zipw.Write(buf) + return err +} + +type countWriter struct { + w io.Writer + count int64 +} + +func (w *countWriter) Write(p []byte) (int, error) { + n, err := w.w.Write(p) + w.count += int64(n) + return n, err +} + +type nopCloser struct { + io.Writer +} + +func (w nopCloser) Close() error { + return nil +} + +type writeBuf []byte + +func (b *writeBuf) uint8(v uint8) { + (*b)[0] = v + *b = (*b)[1:] +} + +func (b *writeBuf) uint16(v uint16) { + binary.LittleEndian.PutUint16(*b, v) + *b = (*b)[2:] +} + +func (b *writeBuf) uint32(v uint32) { + binary.LittleEndian.PutUint32(*b, v) + *b = (*b)[4:] +} + +func (b *writeBuf) uint64(v uint64) { + binary.LittleEndian.PutUint64(*b, v) + *b = (*b)[8:] +} diff --git a/src/archive/zip/writer_test.go b/src/archive/zip/writer_test.go new file mode 100644 index 0000000..2b73eca --- /dev/null +++ b/src/archive/zip/writer_test.go @@ -0,0 +1,604 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip + +import ( + "bytes" + "compress/flate" + "encoding/binary" + "fmt" + "hash/crc32" + "io" + "io/fs" + "math/rand" + "os" + "strings" + "testing" + "time" +) + +// TODO(adg): a more sophisticated test suite + +type WriteTest struct { + Name string + Data []byte + Method uint16 + Mode fs.FileMode +} + +var writeTests = []WriteTest{ + { + Name: "foo", + Data: []byte("Rabbits, guinea pigs, gophers, marsupial rats, and quolls."), + Method: Store, + Mode: 0666, + }, + { + Name: "bar", + Data: nil, // large data set in the test + Method: Deflate, + Mode: 0644, + }, + { + Name: "setuid", + Data: []byte("setuid file"), + Method: Deflate, + Mode: 0755 | fs.ModeSetuid, + }, + { + Name: "setgid", + Data: []byte("setgid file"), + Method: Deflate, + Mode: 0755 | fs.ModeSetgid, + }, + { + Name: "symlink", + Data: []byte("../link/target"), + Method: Deflate, + Mode: 0755 | fs.ModeSymlink, + }, + { + Name: "device", + Data: []byte("device file"), + Method: Deflate, + Mode: 0755 | fs.ModeDevice, + }, + { + Name: "chardevice", + Data: []byte("char device file"), + Method: Deflate, + Mode: 0755 | fs.ModeDevice | fs.ModeCharDevice, + }, +} + +func TestWriter(t *testing.T) { + largeData := make([]byte, 1<<17) + if _, err := rand.Read(largeData); err != nil { + t.Fatal("rand.Read failed:", err) + } + writeTests[1].Data = largeData + defer func() { + writeTests[1].Data = nil + }() + + // write a zip file + buf := new(bytes.Buffer) + w := NewWriter(buf) + + for _, wt := range writeTests { + testCreate(t, w, &wt) + } + + if err := w.Close(); err != nil { + t.Fatal(err) + } + + // read it back + r, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) + if err != nil { + t.Fatal(err) + } + for i, wt := range writeTests { + testReadFile(t, r.File[i], &wt) + } +} + +// TestWriterComment is test for EOCD comment read/write. +func TestWriterComment(t *testing.T) { + var tests = []struct { + comment string + ok bool + }{ + {"hi, hello", true}, + {"hi, こんにちわ", true}, + {strings.Repeat("a", uint16max), true}, + {strings.Repeat("a", uint16max+1), false}, + } + + for _, test := range tests { + // write a zip file + buf := new(bytes.Buffer) + w := NewWriter(buf) + if err := w.SetComment(test.comment); err != nil { + if test.ok { + t.Fatalf("SetComment: unexpected error %v", err) + } + continue + } else { + if !test.ok { + t.Fatalf("SetComment: unexpected success, want error") + } + } + + if err := w.Close(); test.ok == (err != nil) { + t.Fatal(err) + } + + if w.closed != test.ok { + t.Fatalf("Writer.closed: got %v, want %v", w.closed, test.ok) + } + + // skip read test in failure cases + if !test.ok { + continue + } + + // read it back + r, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) + if err != nil { + t.Fatal(err) + } + if r.Comment != test.comment { + t.Fatalf("Reader.Comment: got %v, want %v", r.Comment, test.comment) + } + } +} + +func TestWriterUTF8(t *testing.T) { + var utf8Tests = []struct { + name string + comment string + nonUTF8 bool + flags uint16 + }{ + { + name: "hi, hello", + comment: "in the world", + flags: 0x8, + }, + { + name: "hi, こんにちわ", + comment: "in the world", + flags: 0x808, + }, + { + name: "hi, こんにちわ", + comment: "in the world", + nonUTF8: true, + flags: 0x8, + }, + { + name: "hi, hello", + comment: "in the 世界", + flags: 0x808, + }, + { + name: "hi, こんにちわ", + comment: "in the 世界", + flags: 0x808, + }, + { + name: "the replacement rune is �", + comment: "the replacement rune is �", + flags: 0x808, + }, + { + // Name is Japanese encoded in Shift JIS. + name: "\x93\xfa\x96{\x8c\xea.txt", + comment: "in the 世界", + flags: 0x008, // UTF-8 must not be set + }, + } + + // write a zip file + buf := new(bytes.Buffer) + w := NewWriter(buf) + + for _, test := range utf8Tests { + h := &FileHeader{ + Name: test.name, + Comment: test.comment, + NonUTF8: test.nonUTF8, + Method: Deflate, + } + w, err := w.CreateHeader(h) + if err != nil { + t.Fatal(err) + } + w.Write([]byte{}) + } + + if err := w.Close(); err != nil { + t.Fatal(err) + } + + // read it back + r, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) + if err != nil { + t.Fatal(err) + } + for i, test := range utf8Tests { + flags := r.File[i].Flags + if flags != test.flags { + t.Errorf("CreateHeader(name=%q comment=%q nonUTF8=%v): flags=%#x, want %#x", test.name, test.comment, test.nonUTF8, flags, test.flags) + } + } +} + +func TestWriterTime(t *testing.T) { + var buf bytes.Buffer + h := &FileHeader{ + Name: "test.txt", + Modified: time.Date(2017, 10, 31, 21, 11, 57, 0, timeZone(-7*time.Hour)), + } + w := NewWriter(&buf) + if _, err := w.CreateHeader(h); err != nil { + t.Fatalf("unexpected CreateHeader error: %v", err) + } + if err := w.Close(); err != nil { + t.Fatalf("unexpected Close error: %v", err) + } + + want, err := os.ReadFile("testdata/time-go.zip") + if err != nil { + t.Fatalf("unexpected ReadFile error: %v", err) + } + if got := buf.Bytes(); !bytes.Equal(got, want) { + fmt.Printf("%x\n%x\n", got, want) + t.Error("contents of time-go.zip differ") + } +} + +func TestWriterOffset(t *testing.T) { + largeData := make([]byte, 1<<17) + if _, err := rand.Read(largeData); err != nil { + t.Fatal("rand.Read failed:", err) + } + writeTests[1].Data = largeData + defer func() { + writeTests[1].Data = nil + }() + + // write a zip file + buf := new(bytes.Buffer) + existingData := []byte{1, 2, 3, 1, 2, 3, 1, 2, 3} + n, _ := buf.Write(existingData) + w := NewWriter(buf) + w.SetOffset(int64(n)) + + for _, wt := range writeTests { + testCreate(t, w, &wt) + } + + if err := w.Close(); err != nil { + t.Fatal(err) + } + + // read it back + r, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) + if err != nil { + t.Fatal(err) + } + for i, wt := range writeTests { + testReadFile(t, r.File[i], &wt) + } +} + +func TestWriterFlush(t *testing.T) { + var buf bytes.Buffer + w := NewWriter(struct{ io.Writer }{&buf}) + _, err := w.Create("foo") + if err != nil { + t.Fatal(err) + } + if buf.Len() > 0 { + t.Fatalf("Unexpected %d bytes already in buffer", buf.Len()) + } + if err := w.Flush(); err != nil { + t.Fatal(err) + } + if buf.Len() == 0 { + t.Fatal("No bytes written after Flush") + } +} + +func TestWriterDir(t *testing.T) { + w := NewWriter(io.Discard) + dw, err := w.Create("dir/") + if err != nil { + t.Fatal(err) + } + if _, err := dw.Write(nil); err != nil { + t.Errorf("Write(nil) to directory: got %v, want nil", err) + } + if _, err := dw.Write([]byte("hello")); err == nil { + t.Error(`Write("hello") to directory: got nil error, want non-nil`) + } +} + +func TestWriterDirAttributes(t *testing.T) { + var buf bytes.Buffer + w := NewWriter(&buf) + if _, err := w.CreateHeader(&FileHeader{ + Name: "dir/", + Method: Deflate, + CompressedSize64: 1234, + UncompressedSize64: 5678, + }); err != nil { + t.Fatal(err) + } + if err := w.Close(); err != nil { + t.Fatal(err) + } + b := buf.Bytes() + + var sig [4]byte + binary.LittleEndian.PutUint32(sig[:], uint32(fileHeaderSignature)) + + idx := bytes.Index(b, sig[:]) + if idx == -1 { + t.Fatal("file header not found") + } + b = b[idx:] + + if !bytes.Equal(b[6:10], []byte{0, 0, 0, 0}) { // FileHeader.Flags: 0, FileHeader.Method: 0 + t.Errorf("unexpected method and flags: %v", b[6:10]) + } + + if !bytes.Equal(b[14:26], make([]byte, 12)) { // FileHeader.{CRC32,CompressSize,UncompressedSize} all zero. + t.Errorf("unexpected crc, compress and uncompressed size to be 0 was: %v", b[14:26]) + } + + binary.LittleEndian.PutUint32(sig[:], uint32(dataDescriptorSignature)) + if bytes.Contains(b, sig[:]) { + t.Error("there should be no data descriptor") + } +} + +func TestWriterCopy(t *testing.T) { + // make a zip file + buf := new(bytes.Buffer) + w := NewWriter(buf) + for _, wt := range writeTests { + testCreate(t, w, &wt) + } + if err := w.Close(); err != nil { + t.Fatal(err) + } + + // read it back + src, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) + if err != nil { + t.Fatal(err) + } + for i, wt := range writeTests { + testReadFile(t, src.File[i], &wt) + } + + // make a new zip file copying the old compressed data. + buf2 := new(bytes.Buffer) + dst := NewWriter(buf2) + for _, f := range src.File { + if err := dst.Copy(f); err != nil { + t.Fatal(err) + } + } + if err := dst.Close(); err != nil { + t.Fatal(err) + } + + // read the new one back + r, err := NewReader(bytes.NewReader(buf2.Bytes()), int64(buf2.Len())) + if err != nil { + t.Fatal(err) + } + for i, wt := range writeTests { + testReadFile(t, r.File[i], &wt) + } +} + +func TestWriterCreateRaw(t *testing.T) { + files := []struct { + name string + content []byte + method uint16 + flags uint16 + crc32 uint32 + uncompressedSize uint64 + compressedSize uint64 + }{ + { + name: "small store w desc", + content: []byte("gophers"), + method: Store, + flags: 0x8, + }, + { + name: "small deflate wo desc", + content: bytes.Repeat([]byte("abcdefg"), 2048), + method: Deflate, + }, + } + + // write a zip file + archive := new(bytes.Buffer) + w := NewWriter(archive) + + for i := range files { + f := &files[i] + f.crc32 = crc32.ChecksumIEEE(f.content) + size := uint64(len(f.content)) + f.uncompressedSize = size + f.compressedSize = size + + var compressedContent []byte + if f.method == Deflate { + var buf bytes.Buffer + w, err := flate.NewWriter(&buf, flate.BestSpeed) + if err != nil { + t.Fatalf("flate.NewWriter err = %v", err) + } + _, err = w.Write(f.content) + if err != nil { + t.Fatalf("flate Write err = %v", err) + } + err = w.Close() + if err != nil { + t.Fatalf("flate Writer.Close err = %v", err) + } + compressedContent = buf.Bytes() + f.compressedSize = uint64(len(compressedContent)) + } + + h := &FileHeader{ + Name: f.name, + Method: f.method, + Flags: f.flags, + CRC32: f.crc32, + CompressedSize64: f.compressedSize, + UncompressedSize64: f.uncompressedSize, + } + w, err := w.CreateRaw(h) + if err != nil { + t.Fatal(err) + } + if compressedContent != nil { + _, err = w.Write(compressedContent) + } else { + _, err = w.Write(f.content) + } + if err != nil { + t.Fatalf("%s Write got %v; want nil", f.name, err) + } + } + + if err := w.Close(); err != nil { + t.Fatal(err) + } + + // read it back + r, err := NewReader(bytes.NewReader(archive.Bytes()), int64(archive.Len())) + if err != nil { + t.Fatal(err) + } + for i, want := range files { + got := r.File[i] + if got.Name != want.name { + t.Errorf("got Name %s; want %s", got.Name, want.name) + } + if got.Method != want.method { + t.Errorf("%s: got Method %#x; want %#x", want.name, got.Method, want.method) + } + if got.Flags != want.flags { + t.Errorf("%s: got Flags %#x; want %#x", want.name, got.Flags, want.flags) + } + if got.CRC32 != want.crc32 { + t.Errorf("%s: got CRC32 %#x; want %#x", want.name, got.CRC32, want.crc32) + } + if got.CompressedSize64 != want.compressedSize { + t.Errorf("%s: got CompressedSize64 %d; want %d", want.name, got.CompressedSize64, want.compressedSize) + } + if got.UncompressedSize64 != want.uncompressedSize { + t.Errorf("%s: got UncompressedSize64 %d; want %d", want.name, got.UncompressedSize64, want.uncompressedSize) + } + + r, err := got.Open() + if err != nil { + t.Errorf("%s: Open err = %v", got.Name, err) + continue + } + + buf, err := io.ReadAll(r) + if err != nil { + t.Errorf("%s: ReadAll err = %v", got.Name, err) + continue + } + + if !bytes.Equal(buf, want.content) { + t.Errorf("%v: ReadAll returned unexpected bytes", got.Name) + } + } +} + +func testCreate(t *testing.T, w *Writer, wt *WriteTest) { + header := &FileHeader{ + Name: wt.Name, + Method: wt.Method, + } + if wt.Mode != 0 { + header.SetMode(wt.Mode) + } + f, err := w.CreateHeader(header) + if err != nil { + t.Fatal(err) + } + _, err = f.Write(wt.Data) + if err != nil { + t.Fatal(err) + } +} + +func testReadFile(t *testing.T, f *File, wt *WriteTest) { + if f.Name != wt.Name { + t.Fatalf("File name: got %q, want %q", f.Name, wt.Name) + } + testFileMode(t, f, wt.Mode) + rc, err := f.Open() + if err != nil { + t.Fatalf("opening %s: %v", f.Name, err) + } + b, err := io.ReadAll(rc) + if err != nil { + t.Fatalf("reading %s: %v", f.Name, err) + } + err = rc.Close() + if err != nil { + t.Fatalf("closing %s: %v", f.Name, err) + } + if !bytes.Equal(b, wt.Data) { + t.Errorf("File contents %q, want %q", b, wt.Data) + } +} + +func BenchmarkCompressedZipGarbage(b *testing.B) { + bigBuf := bytes.Repeat([]byte("a"), 1<<20) + + runOnce := func(buf *bytes.Buffer) { + buf.Reset() + zw := NewWriter(buf) + for j := 0; j < 3; j++ { + w, _ := zw.CreateHeader(&FileHeader{ + Name: "foo", + Method: Deflate, + }) + w.Write(bigBuf) + } + zw.Close() + } + + b.ReportAllocs() + // Run once and then reset the timer. + // This effectively discards the very large initial flate setup cost, + // as well as the initialization of bigBuf. + runOnce(&bytes.Buffer{}) + b.ResetTimer() + + b.RunParallel(func(pb *testing.PB) { + var buf bytes.Buffer + for pb.Next() { + runOnce(&buf) + } + }) +} diff --git a/src/archive/zip/zip_test.go b/src/archive/zip/zip_test.go new file mode 100644 index 0000000..ead9cd3 --- /dev/null +++ b/src/archive/zip/zip_test.go @@ -0,0 +1,828 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Tests that involve both reading and writing. + +package zip + +import ( + "bytes" + "errors" + "fmt" + "hash" + "internal/testenv" + "io" + "runtime" + "sort" + "strings" + "testing" + "time" +) + +func TestOver65kFiles(t *testing.T) { + if testing.Short() && testenv.Builder() == "" { + t.Skip("skipping in short mode") + } + buf := new(bytes.Buffer) + w := NewWriter(buf) + const nFiles = (1 << 16) + 42 + for i := 0; i < nFiles; i++ { + _, err := w.CreateHeader(&FileHeader{ + Name: fmt.Sprintf("%d.dat", i), + Method: Store, // avoid Issue 6136 and Issue 6138 + }) + if err != nil { + t.Fatalf("creating file %d: %v", i, err) + } + } + if err := w.Close(); err != nil { + t.Fatalf("Writer.Close: %v", err) + } + s := buf.String() + zr, err := NewReader(strings.NewReader(s), int64(len(s))) + if err != nil { + t.Fatalf("NewReader: %v", err) + } + if got := len(zr.File); got != nFiles { + t.Fatalf("File contains %d files, want %d", got, nFiles) + } + for i := 0; i < nFiles; i++ { + want := fmt.Sprintf("%d.dat", i) + if zr.File[i].Name != want { + t.Fatalf("File(%d) = %q, want %q", i, zr.File[i].Name, want) + } + } +} + +func TestModTime(t *testing.T) { + var testTime = time.Date(2009, time.November, 10, 23, 45, 58, 0, time.UTC) + fh := new(FileHeader) + fh.SetModTime(testTime) + outTime := fh.ModTime() + if !outTime.Equal(testTime) { + t.Errorf("times don't match: got %s, want %s", outTime, testTime) + } +} + +func testHeaderRoundTrip(fh *FileHeader, wantUncompressedSize uint32, wantUncompressedSize64 uint64, t *testing.T) { + fi := fh.FileInfo() + fh2, err := FileInfoHeader(fi) + if err != nil { + t.Fatal(err) + } + if got, want := fh2.Name, fh.Name; got != want { + t.Errorf("Name: got %s, want %s\n", got, want) + } + if got, want := fh2.UncompressedSize, wantUncompressedSize; got != want { + t.Errorf("UncompressedSize: got %d, want %d\n", got, want) + } + if got, want := fh2.UncompressedSize64, wantUncompressedSize64; got != want { + t.Errorf("UncompressedSize64: got %d, want %d\n", got, want) + } + if got, want := fh2.ModifiedTime, fh.ModifiedTime; got != want { + t.Errorf("ModifiedTime: got %d, want %d\n", got, want) + } + if got, want := fh2.ModifiedDate, fh.ModifiedDate; got != want { + t.Errorf("ModifiedDate: got %d, want %d\n", got, want) + } + + if sysfh, ok := fi.Sys().(*FileHeader); !ok && sysfh != fh { + t.Errorf("Sys didn't return original *FileHeader") + } +} + +func TestFileHeaderRoundTrip(t *testing.T) { + fh := &FileHeader{ + Name: "foo.txt", + UncompressedSize: 987654321, + ModifiedTime: 1234, + ModifiedDate: 5678, + } + testHeaderRoundTrip(fh, fh.UncompressedSize, uint64(fh.UncompressedSize), t) +} + +func TestFileHeaderRoundTrip64(t *testing.T) { + fh := &FileHeader{ + Name: "foo.txt", + UncompressedSize64: 9876543210, + ModifiedTime: 1234, + ModifiedDate: 5678, + } + testHeaderRoundTrip(fh, uint32max, fh.UncompressedSize64, t) +} + +func TestFileHeaderRoundTripModified(t *testing.T) { + fh := &FileHeader{ + Name: "foo.txt", + UncompressedSize: 987654321, + Modified: time.Now().Local(), + ModifiedTime: 1234, + ModifiedDate: 5678, + } + fi := fh.FileInfo() + fh2, err := FileInfoHeader(fi) + if err != nil { + t.Fatal(err) + } + if got, want := fh2.Modified, fh.Modified.UTC(); got != want { + t.Errorf("Modified: got %s, want %s\n", got, want) + } + if got, want := fi.ModTime(), fh.Modified.UTC(); got != want { + t.Errorf("Modified: got %s, want %s\n", got, want) + } +} + +func TestFileHeaderRoundTripWithoutModified(t *testing.T) { + fh := &FileHeader{ + Name: "foo.txt", + UncompressedSize: 987654321, + ModifiedTime: 1234, + ModifiedDate: 5678, + } + fi := fh.FileInfo() + fh2, err := FileInfoHeader(fi) + if err != nil { + t.Fatal(err) + } + if got, want := fh2.ModTime(), fh.ModTime(); got != want { + t.Errorf("Modified: got %s, want %s\n", got, want) + } + if got, want := fi.ModTime(), fh.ModTime(); got != want { + t.Errorf("Modified: got %s, want %s\n", got, want) + } +} + +type repeatedByte struct { + off int64 + b byte + n int64 +} + +// rleBuffer is a run-length-encoded byte buffer. +// It's an io.Writer (like a bytes.Buffer) and also an io.ReaderAt, +// allowing random-access reads. +type rleBuffer struct { + buf []repeatedByte +} + +func (r *rleBuffer) Size() int64 { + if len(r.buf) == 0 { + return 0 + } + last := &r.buf[len(r.buf)-1] + return last.off + last.n +} + +func (r *rleBuffer) Write(p []byte) (n int, err error) { + var rp *repeatedByte + if len(r.buf) > 0 { + rp = &r.buf[len(r.buf)-1] + // Fast path, if p is entirely the same byte repeated. + if lastByte := rp.b; len(p) > 0 && p[0] == lastByte { + if bytes.Count(p, []byte{lastByte}) == len(p) { + rp.n += int64(len(p)) + return len(p), nil + } + } + } + + for _, b := range p { + if rp == nil || rp.b != b { + r.buf = append(r.buf, repeatedByte{r.Size(), b, 1}) + rp = &r.buf[len(r.buf)-1] + } else { + rp.n++ + } + } + return len(p), nil +} + +func min(x, y int64) int64 { + if x < y { + return x + } + return y +} + +func memset(a []byte, b byte) { + if len(a) == 0 { + return + } + // Double, until we reach power of 2 >= len(a), same as bytes.Repeat, + // but without allocation. + a[0] = b + for i, l := 1, len(a); i < l; i *= 2 { + copy(a[i:], a[:i]) + } +} + +func (r *rleBuffer) ReadAt(p []byte, off int64) (n int, err error) { + if len(p) == 0 { + return + } + skipParts := sort.Search(len(r.buf), func(i int) bool { + part := &r.buf[i] + return part.off+part.n > off + }) + parts := r.buf[skipParts:] + if len(parts) > 0 { + skipBytes := off - parts[0].off + for _, part := range parts { + repeat := int(min(part.n-skipBytes, int64(len(p)-n))) + memset(p[n:n+repeat], part.b) + n += repeat + if n == len(p) { + return + } + skipBytes = 0 + } + } + if n != len(p) { + err = io.ErrUnexpectedEOF + } + return +} + +// Just testing the rleBuffer used in the Zip64 test above. Not used by the zip code. +func TestRLEBuffer(t *testing.T) { + b := new(rleBuffer) + var all []byte + writes := []string{"abcdeee", "eeeeeee", "eeeefghaaiii"} + for _, w := range writes { + b.Write([]byte(w)) + all = append(all, w...) + } + if len(b.buf) != 10 { + t.Fatalf("len(b.buf) = %d; want 10", len(b.buf)) + } + + for i := 0; i < len(all); i++ { + for j := 0; j < len(all)-i; j++ { + buf := make([]byte, j) + n, err := b.ReadAt(buf, int64(i)) + if err != nil || n != len(buf) { + t.Errorf("ReadAt(%d, %d) = %d, %v; want %d, nil", i, j, n, err, len(buf)) + } + if !bytes.Equal(buf, all[i:i+j]) { + t.Errorf("ReadAt(%d, %d) = %q; want %q", i, j, buf, all[i:i+j]) + } + } + } +} + +// fakeHash32 is a dummy Hash32 that always returns 0. +type fakeHash32 struct { + hash.Hash32 +} + +func (fakeHash32) Write(p []byte) (int, error) { return len(p), nil } +func (fakeHash32) Sum32() uint32 { return 0 } + +func TestZip64(t *testing.T) { + if testing.Short() { + t.Skip("slow test; skipping") + } + t.Parallel() + const size = 1 << 32 // before the "END\n" part + buf := testZip64(t, size) + testZip64DirectoryRecordLength(buf, t) +} + +func TestZip64EdgeCase(t *testing.T) { + if testing.Short() { + t.Skip("slow test; skipping") + } + t.Parallel() + // Test a zip file with uncompressed size 0xFFFFFFFF. + // That's the magic marker for a 64-bit file, so even though + // it fits in a 32-bit field we must use the 64-bit field. + // Go 1.5 and earlier got this wrong, + // writing an invalid zip file. + const size = 1<<32 - 1 - int64(len("END\n")) // before the "END\n" part + buf := testZip64(t, size) + testZip64DirectoryRecordLength(buf, t) +} + +// Tests that we generate a zip64 file if the directory at offset +// 0xFFFFFFFF, but not before. +func TestZip64DirectoryOffset(t *testing.T) { + if testing.Short() { + t.Skip("skipping in short mode") + } + t.Parallel() + const filename = "huge.txt" + gen := func(wantOff uint64) func(*Writer) { + return func(w *Writer) { + w.testHookCloseSizeOffset = func(size, off uint64) { + if off != wantOff { + t.Errorf("central directory offset = %d (%x); want %d", off, off, wantOff) + } + } + f, err := w.CreateHeader(&FileHeader{ + Name: filename, + Method: Store, + }) + if err != nil { + t.Fatal(err) + } + f.(*fileWriter).crc32 = fakeHash32{} + size := wantOff - fileHeaderLen - uint64(len(filename)) - dataDescriptorLen + if _, err := io.CopyN(f, zeros{}, int64(size)); err != nil { + t.Fatal(err) + } + if err := w.Close(); err != nil { + t.Fatal(err) + } + } + } + t.Run("uint32max-2_NoZip64", func(t *testing.T) { + t.Parallel() + if generatesZip64(t, gen(0xfffffffe)) { + t.Error("unexpected zip64") + } + }) + t.Run("uint32max-1_Zip64", func(t *testing.T) { + t.Parallel() + if !generatesZip64(t, gen(0xffffffff)) { + t.Error("expected zip64") + } + }) +} + +// At 16k records, we need to generate a zip64 file. +func TestZip64ManyRecords(t *testing.T) { + if testing.Short() { + t.Skip("skipping in short mode") + } + t.Parallel() + gen := func(numRec int) func(*Writer) { + return func(w *Writer) { + for i := 0; i < numRec; i++ { + _, err := w.CreateHeader(&FileHeader{ + Name: "a.txt", + Method: Store, + }) + if err != nil { + t.Fatal(err) + } + } + if err := w.Close(); err != nil { + t.Fatal(err) + } + } + } + // 16k-1 records shouldn't make a zip64: + t.Run("uint16max-1_NoZip64", func(t *testing.T) { + t.Parallel() + if generatesZip64(t, gen(0xfffe)) { + t.Error("unexpected zip64") + } + }) + // 16k records should make a zip64: + t.Run("uint16max_Zip64", func(t *testing.T) { + t.Parallel() + if !generatesZip64(t, gen(0xffff)) { + t.Error("expected zip64") + } + }) +} + +// suffixSaver is an io.Writer & io.ReaderAt that remembers the last 0 +// to 'keep' bytes of data written to it. Call Suffix to get the +// suffix bytes. +type suffixSaver struct { + keep int + buf []byte + start int + size int64 +} + +func (ss *suffixSaver) Size() int64 { return ss.size } + +var errDiscardedBytes = errors.New("ReadAt of discarded bytes") + +func (ss *suffixSaver) ReadAt(p []byte, off int64) (n int, err error) { + back := ss.size - off + if back > int64(ss.keep) { + return 0, errDiscardedBytes + } + suf := ss.Suffix() + n = copy(p, suf[len(suf)-int(back):]) + if n != len(p) { + err = io.EOF + } + return +} + +func (ss *suffixSaver) Suffix() []byte { + if len(ss.buf) < ss.keep { + return ss.buf + } + buf := make([]byte, ss.keep) + n := copy(buf, ss.buf[ss.start:]) + copy(buf[n:], ss.buf[:]) + return buf +} + +func (ss *suffixSaver) Write(p []byte) (n int, err error) { + n = len(p) + ss.size += int64(len(p)) + if len(ss.buf) < ss.keep { + space := ss.keep - len(ss.buf) + add := len(p) + if add > space { + add = space + } + ss.buf = append(ss.buf, p[:add]...) + p = p[add:] + } + for len(p) > 0 { + n := copy(ss.buf[ss.start:], p) + p = p[n:] + ss.start += n + if ss.start == ss.keep { + ss.start = 0 + } + } + return +} + +// generatesZip64 reports whether f wrote a zip64 file. +// f is also responsible for closing w. +func generatesZip64(t *testing.T, f func(w *Writer)) bool { + ss := &suffixSaver{keep: 10 << 20} + w := NewWriter(ss) + f(w) + return suffixIsZip64(t, ss) +} + +type sizedReaderAt interface { + io.ReaderAt + Size() int64 +} + +func suffixIsZip64(t *testing.T, zip sizedReaderAt) bool { + d := make([]byte, 1024) + if _, err := zip.ReadAt(d, zip.Size()-int64(len(d))); err != nil { + t.Fatalf("ReadAt: %v", err) + } + + sigOff := findSignatureInBlock(d) + if sigOff == -1 { + t.Errorf("failed to find signature in block") + return false + } + + dirOff, err := findDirectory64End(zip, zip.Size()-int64(len(d))+int64(sigOff)) + if err != nil { + t.Fatalf("findDirectory64End: %v", err) + } + if dirOff == -1 { + return false + } + + d = make([]byte, directory64EndLen) + if _, err := zip.ReadAt(d, dirOff); err != nil { + t.Fatalf("ReadAt(off=%d): %v", dirOff, err) + } + + b := readBuf(d) + if sig := b.uint32(); sig != directory64EndSignature { + return false + } + + size := b.uint64() + if size != directory64EndLen-12 { + t.Errorf("expected length of %d, got %d", directory64EndLen-12, size) + } + return true +} + +// Zip64 is required if the total size of the records is uint32max. +func TestZip64LargeDirectory(t *testing.T) { + if runtime.GOARCH == "wasm" { + t.Skip("too slow on wasm") + } + if testing.Short() { + t.Skip("skipping in short mode") + } + t.Parallel() + // gen returns a func that writes a zip with a wantLen bytes + // of central directory. + gen := func(wantLen int64) func(*Writer) { + return func(w *Writer) { + w.testHookCloseSizeOffset = func(size, off uint64) { + if size != uint64(wantLen) { + t.Errorf("Close central directory size = %d; want %d", size, wantLen) + } + } + + uint16string := strings.Repeat(".", uint16max) + remain := wantLen + for remain > 0 { + commentLen := int(uint16max) - directoryHeaderLen - 1 + thisRecLen := directoryHeaderLen + int(uint16max) + commentLen + if int64(thisRecLen) > remain { + remove := thisRecLen - int(remain) + commentLen -= remove + thisRecLen -= remove + } + remain -= int64(thisRecLen) + f, err := w.CreateHeader(&FileHeader{ + Name: uint16string, + Comment: uint16string[:commentLen], + }) + if err != nil { + t.Fatalf("CreateHeader: %v", err) + } + f.(*fileWriter).crc32 = fakeHash32{} + } + if err := w.Close(); err != nil { + t.Fatalf("Close: %v", err) + } + } + } + t.Run("uint32max-1_NoZip64", func(t *testing.T) { + t.Parallel() + if generatesZip64(t, gen(uint32max-1)) { + t.Error("unexpected zip64") + } + }) + t.Run("uint32max_HasZip64", func(t *testing.T) { + t.Parallel() + if !generatesZip64(t, gen(uint32max)) { + t.Error("expected zip64") + } + }) +} + +func testZip64(t testing.TB, size int64) *rleBuffer { + const chunkSize = 1024 + chunks := int(size / chunkSize) + // write size bytes plus "END\n" to a zip file + buf := new(rleBuffer) + w := NewWriter(buf) + f, err := w.CreateHeader(&FileHeader{ + Name: "huge.txt", + Method: Store, + }) + if err != nil { + t.Fatal(err) + } + f.(*fileWriter).crc32 = fakeHash32{} + chunk := make([]byte, chunkSize) + for i := range chunk { + chunk[i] = '.' + } + for i := 0; i < chunks; i++ { + _, err := f.Write(chunk) + if err != nil { + t.Fatal("write chunk:", err) + } + } + if frag := int(size % chunkSize); frag > 0 { + _, err := f.Write(chunk[:frag]) + if err != nil { + t.Fatal("write chunk:", err) + } + } + end := []byte("END\n") + _, err = f.Write(end) + if err != nil { + t.Fatal("write end:", err) + } + if err := w.Close(); err != nil { + t.Fatal(err) + } + + // read back zip file and check that we get to the end of it + r, err := NewReader(buf, int64(buf.Size())) + if err != nil { + t.Fatal("reader:", err) + } + f0 := r.File[0] + rc, err := f0.Open() + if err != nil { + t.Fatal("opening:", err) + } + rc.(*checksumReader).hash = fakeHash32{} + for i := 0; i < chunks; i++ { + _, err := io.ReadFull(rc, chunk) + if err != nil { + t.Fatal("read:", err) + } + } + if frag := int(size % chunkSize); frag > 0 { + _, err := io.ReadFull(rc, chunk[:frag]) + if err != nil { + t.Fatal("read:", err) + } + } + gotEnd, err := io.ReadAll(rc) + if err != nil { + t.Fatal("read end:", err) + } + if !bytes.Equal(gotEnd, end) { + t.Errorf("End of zip64 archive %q, want %q", gotEnd, end) + } + err = rc.Close() + if err != nil { + t.Fatal("closing:", err) + } + if size+int64(len("END\n")) >= 1<<32-1 { + if got, want := f0.UncompressedSize, uint32(uint32max); got != want { + t.Errorf("UncompressedSize %#x, want %#x", got, want) + } + } + + if got, want := f0.UncompressedSize64, uint64(size)+uint64(len(end)); got != want { + t.Errorf("UncompressedSize64 %#x, want %#x", got, want) + } + + return buf +} + +// Issue 9857 +func testZip64DirectoryRecordLength(buf *rleBuffer, t *testing.T) { + if !suffixIsZip64(t, buf) { + t.Fatal("not a zip64") + } +} + +func testValidHeader(h *FileHeader, t *testing.T) { + var buf bytes.Buffer + z := NewWriter(&buf) + + f, err := z.CreateHeader(h) + if err != nil { + t.Fatalf("error creating header: %v", err) + } + if _, err := f.Write([]byte("hi")); err != nil { + t.Fatalf("error writing content: %v", err) + } + if err := z.Close(); err != nil { + t.Fatalf("error closing zip writer: %v", err) + } + + b := buf.Bytes() + zf, err := NewReader(bytes.NewReader(b), int64(len(b))) + if err != nil { + t.Fatalf("got %v, expected nil", err) + } + zh := zf.File[0].FileHeader + if zh.Name != h.Name || zh.Method != h.Method || zh.UncompressedSize64 != uint64(len("hi")) { + t.Fatalf("got %q/%d/%d expected %q/%d/%d", zh.Name, zh.Method, zh.UncompressedSize64, h.Name, h.Method, len("hi")) + } +} + +// Issue 4302. +func TestHeaderInvalidTagAndSize(t *testing.T) { + const timeFormat = "20060102T150405.000.txt" + + ts := time.Now() + filename := ts.Format(timeFormat) + + h := FileHeader{ + Name: filename, + Method: Deflate, + Extra: []byte(ts.Format(time.RFC3339Nano)), // missing tag and len, but Extra is best-effort parsing + } + h.SetModTime(ts) + + testValidHeader(&h, t) +} + +func TestHeaderTooShort(t *testing.T) { + h := FileHeader{ + Name: "foo.txt", + Method: Deflate, + Extra: []byte{zip64ExtraID}, // missing size and second half of tag, but Extra is best-effort parsing + } + testValidHeader(&h, t) +} + +func TestHeaderTooLongErr(t *testing.T) { + var headerTests = []struct { + name string + extra []byte + wanterr error + }{ + { + name: strings.Repeat("x", 1<<16), + extra: []byte{}, + wanterr: errLongName, + }, + { + name: "long_extra", + extra: bytes.Repeat([]byte{0xff}, 1<<16), + wanterr: errLongExtra, + }, + } + + // write a zip file + buf := new(bytes.Buffer) + w := NewWriter(buf) + + for _, test := range headerTests { + h := &FileHeader{ + Name: test.name, + Extra: test.extra, + } + _, err := w.CreateHeader(h) + if err != test.wanterr { + t.Errorf("error=%v, want %v", err, test.wanterr) + } + } + + if err := w.Close(); err != nil { + t.Fatal(err) + } +} + +func TestHeaderIgnoredSize(t *testing.T) { + h := FileHeader{ + Name: "foo.txt", + Method: Deflate, + Extra: []byte{zip64ExtraID & 0xFF, zip64ExtraID >> 8, 24, 0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8}, // bad size but shouldn't be consulted + } + testValidHeader(&h, t) +} + +// Issue 4393. It is valid to have an extra data header +// which contains no body. +func TestZeroLengthHeader(t *testing.T) { + h := FileHeader{ + Name: "extadata.txt", + Method: Deflate, + Extra: []byte{ + 85, 84, 5, 0, 3, 154, 144, 195, 77, // tag 21589 size 5 + 85, 120, 0, 0, // tag 30805 size 0 + }, + } + testValidHeader(&h, t) +} + +// Just benchmarking how fast the Zip64 test above is. Not related to +// our zip performance, since the test above disabled CRC32 and flate. +func BenchmarkZip64Test(b *testing.B) { + for i := 0; i < b.N; i++ { + testZip64(b, 1<<26) + } +} + +func BenchmarkZip64TestSizes(b *testing.B) { + for _, size := range []int64{1 << 12, 1 << 20, 1 << 26} { + b.Run(fmt.Sprint(size), func(b *testing.B) { + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + testZip64(b, size) + } + }) + }) + } +} + +func TestSuffixSaver(t *testing.T) { + const keep = 10 + ss := &suffixSaver{keep: keep} + ss.Write([]byte("abc")) + if got := string(ss.Suffix()); got != "abc" { + t.Errorf("got = %q; want abc", got) + } + ss.Write([]byte("defghijklmno")) + if got := string(ss.Suffix()); got != "fghijklmno" { + t.Errorf("got = %q; want fghijklmno", got) + } + if got, want := ss.Size(), int64(len("abc")+len("defghijklmno")); got != want { + t.Errorf("Size = %d; want %d", got, want) + } + buf := make([]byte, ss.Size()) + for off := int64(0); off < ss.Size(); off++ { + for size := 1; size <= int(ss.Size()-off); size++ { + readBuf := buf[:size] + n, err := ss.ReadAt(readBuf, off) + if off < ss.Size()-keep { + if err != errDiscardedBytes { + t.Errorf("off %d, size %d = %v, %v (%q); want errDiscardedBytes", off, size, n, err, readBuf[:n]) + } + continue + } + want := "abcdefghijklmno"[off : off+int64(size)] + got := string(readBuf[:n]) + if err != nil || got != want { + t.Errorf("off %d, size %d = %v, %v (%q); want %q", off, size, n, err, got, want) + } + } + } + +} + +type zeros struct{} + +func (zeros) Read(p []byte) (int, error) { + for i := range p { + p[i] = 0 + } + return len(p), nil +} |