diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 13:14:23 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 13:14:23 +0000 |
commit | 73df946d56c74384511a194dd01dbe099584fd1a (patch) | |
tree | fd0bcea490dd81327ddfbb31e215439672c9a068 /src/archive | |
parent | Initial commit. (diff) | |
download | golang-1.16-73df946d56c74384511a194dd01dbe099584fd1a.tar.xz golang-1.16-73df946d56c74384511a194dd01dbe099584fd1a.zip |
Adding upstream version 1.16.10.upstream/1.16.10upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
93 files changed, 11945 insertions, 0 deletions
diff --git a/src/archive/tar/common.go b/src/archive/tar/common.go new file mode 100644 index 0000000..c667cfc --- /dev/null +++ b/src/archive/tar/common.go @@ -0,0 +1,723 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package tar implements access to tar archives. +// +// Tape archives (tar) are a file format for storing a sequence of files that +// can be read and written in a streaming manner. +// This package aims to cover most variations of the format, +// including those produced by GNU and BSD tar tools. +package tar + +import ( + "errors" + "fmt" + "io/fs" + "math" + "path" + "reflect" + "strconv" + "strings" + "time" +) + +// BUG: Use of the Uid and Gid fields in Header could overflow on 32-bit +// architectures. If a large value is encountered when decoding, the result +// stored in Header will be the truncated version. + +var ( + ErrHeader = errors.New("archive/tar: invalid tar header") + ErrWriteTooLong = errors.New("archive/tar: write too long") + ErrFieldTooLong = errors.New("archive/tar: header field too long") + ErrWriteAfterClose = errors.New("archive/tar: write after close") + errMissData = errors.New("archive/tar: sparse file references non-existent data") + errUnrefData = errors.New("archive/tar: sparse file contains unreferenced data") + errWriteHole = errors.New("archive/tar: write non-NUL byte in sparse hole") +) + +type headerError []string + +func (he headerError) Error() string { + const prefix = "archive/tar: cannot encode header" + var ss []string + for _, s := range he { + if s != "" { + ss = append(ss, s) + } + } + if len(ss) == 0 { + return prefix + } + return fmt.Sprintf("%s: %v", prefix, strings.Join(ss, "; and ")) +} + +// Type flags for Header.Typeflag. +const ( + // Type '0' indicates a regular file. + TypeReg = '0' + TypeRegA = '\x00' // Deprecated: Use TypeReg instead. + + // Type '1' to '6' are header-only flags and may not have a data body. + TypeLink = '1' // Hard link + TypeSymlink = '2' // Symbolic link + TypeChar = '3' // Character device node + TypeBlock = '4' // Block device node + TypeDir = '5' // Directory + TypeFifo = '6' // FIFO node + + // Type '7' is reserved. + TypeCont = '7' + + // Type 'x' is used by the PAX format to store key-value records that + // are only relevant to the next file. + // This package transparently handles these types. + TypeXHeader = 'x' + + // Type 'g' is used by the PAX format to store key-value records that + // are relevant to all subsequent files. + // This package only supports parsing and composing such headers, + // but does not currently support persisting the global state across files. + TypeXGlobalHeader = 'g' + + // Type 'S' indicates a sparse file in the GNU format. + TypeGNUSparse = 'S' + + // Types 'L' and 'K' are used by the GNU format for a meta file + // used to store the path or link name for the next file. + // This package transparently handles these types. + TypeGNULongName = 'L' + TypeGNULongLink = 'K' +) + +// Keywords for PAX extended header records. +const ( + paxNone = "" // Indicates that no PAX key is suitable + paxPath = "path" + paxLinkpath = "linkpath" + paxSize = "size" + paxUid = "uid" + paxGid = "gid" + paxUname = "uname" + paxGname = "gname" + paxMtime = "mtime" + paxAtime = "atime" + paxCtime = "ctime" // Removed from later revision of PAX spec, but was valid + paxCharset = "charset" // Currently unused + paxComment = "comment" // Currently unused + + paxSchilyXattr = "SCHILY.xattr." + + // Keywords for GNU sparse files in a PAX extended header. + paxGNUSparse = "GNU.sparse." + paxGNUSparseNumBlocks = "GNU.sparse.numblocks" + paxGNUSparseOffset = "GNU.sparse.offset" + paxGNUSparseNumBytes = "GNU.sparse.numbytes" + paxGNUSparseMap = "GNU.sparse.map" + paxGNUSparseName = "GNU.sparse.name" + paxGNUSparseMajor = "GNU.sparse.major" + paxGNUSparseMinor = "GNU.sparse.minor" + paxGNUSparseSize = "GNU.sparse.size" + paxGNUSparseRealSize = "GNU.sparse.realsize" +) + +// basicKeys is a set of the PAX keys for which we have built-in support. +// This does not contain "charset" or "comment", which are both PAX-specific, +// so adding them as first-class features of Header is unlikely. +// Users can use the PAXRecords field to set it themselves. +var basicKeys = map[string]bool{ + paxPath: true, paxLinkpath: true, paxSize: true, paxUid: true, paxGid: true, + paxUname: true, paxGname: true, paxMtime: true, paxAtime: true, paxCtime: true, +} + +// A Header represents a single header in a tar archive. +// Some fields may not be populated. +// +// For forward compatibility, users that retrieve a Header from Reader.Next, +// mutate it in some ways, and then pass it back to Writer.WriteHeader +// should do so by creating a new Header and copying the fields +// that they are interested in preserving. +type Header struct { + // Typeflag is the type of header entry. + // The zero value is automatically promoted to either TypeReg or TypeDir + // depending on the presence of a trailing slash in Name. + Typeflag byte + + Name string // Name of file entry + Linkname string // Target name of link (valid for TypeLink or TypeSymlink) + + Size int64 // Logical file size in bytes + Mode int64 // Permission and mode bits + Uid int // User ID of owner + Gid int // Group ID of owner + Uname string // User name of owner + Gname string // Group name of owner + + // If the Format is unspecified, then Writer.WriteHeader rounds ModTime + // to the nearest second and ignores the AccessTime and ChangeTime fields. + // + // To use AccessTime or ChangeTime, specify the Format as PAX or GNU. + // To use sub-second resolution, specify the Format as PAX. + ModTime time.Time // Modification time + AccessTime time.Time // Access time (requires either PAX or GNU support) + ChangeTime time.Time // Change time (requires either PAX or GNU support) + + Devmajor int64 // Major device number (valid for TypeChar or TypeBlock) + Devminor int64 // Minor device number (valid for TypeChar or TypeBlock) + + // Xattrs stores extended attributes as PAX records under the + // "SCHILY.xattr." namespace. + // + // The following are semantically equivalent: + // h.Xattrs[key] = value + // h.PAXRecords["SCHILY.xattr."+key] = value + // + // When Writer.WriteHeader is called, the contents of Xattrs will take + // precedence over those in PAXRecords. + // + // Deprecated: Use PAXRecords instead. + Xattrs map[string]string + + // PAXRecords is a map of PAX extended header records. + // + // User-defined records should have keys of the following form: + // VENDOR.keyword + // Where VENDOR is some namespace in all uppercase, and keyword may + // not contain the '=' character (e.g., "GOLANG.pkg.version"). + // The key and value should be non-empty UTF-8 strings. + // + // When Writer.WriteHeader is called, PAX records derived from the + // other fields in Header take precedence over PAXRecords. + PAXRecords map[string]string + + // Format specifies the format of the tar header. + // + // This is set by Reader.Next as a best-effort guess at the format. + // Since the Reader liberally reads some non-compliant files, + // it is possible for this to be FormatUnknown. + // + // If the format is unspecified when Writer.WriteHeader is called, + // then it uses the first format (in the order of USTAR, PAX, GNU) + // capable of encoding this Header (see Format). + Format Format +} + +// sparseEntry represents a Length-sized fragment at Offset in the file. +type sparseEntry struct{ Offset, Length int64 } + +func (s sparseEntry) endOffset() int64 { return s.Offset + s.Length } + +// A sparse file can be represented as either a sparseDatas or a sparseHoles. +// As long as the total size is known, they are equivalent and one can be +// converted to the other form and back. The various tar formats with sparse +// file support represent sparse files in the sparseDatas form. That is, they +// specify the fragments in the file that has data, and treat everything else as +// having zero bytes. As such, the encoding and decoding logic in this package +// deals with sparseDatas. +// +// However, the external API uses sparseHoles instead of sparseDatas because the +// zero value of sparseHoles logically represents a normal file (i.e., there are +// no holes in it). On the other hand, the zero value of sparseDatas implies +// that the file has no data in it, which is rather odd. +// +// As an example, if the underlying raw file contains the 10-byte data: +// var compactFile = "abcdefgh" +// +// And the sparse map has the following entries: +// var spd sparseDatas = []sparseEntry{ +// {Offset: 2, Length: 5}, // Data fragment for 2..6 +// {Offset: 18, Length: 3}, // Data fragment for 18..20 +// } +// var sph sparseHoles = []sparseEntry{ +// {Offset: 0, Length: 2}, // Hole fragment for 0..1 +// {Offset: 7, Length: 11}, // Hole fragment for 7..17 +// {Offset: 21, Length: 4}, // Hole fragment for 21..24 +// } +// +// Then the content of the resulting sparse file with a Header.Size of 25 is: +// var sparseFile = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 +type ( + sparseDatas []sparseEntry + sparseHoles []sparseEntry +) + +// validateSparseEntries reports whether sp is a valid sparse map. +// It does not matter whether sp represents data fragments or hole fragments. +func validateSparseEntries(sp []sparseEntry, size int64) bool { + // Validate all sparse entries. These are the same checks as performed by + // the BSD tar utility. + if size < 0 { + return false + } + var pre sparseEntry + for _, cur := range sp { + switch { + case cur.Offset < 0 || cur.Length < 0: + return false // Negative values are never okay + case cur.Offset > math.MaxInt64-cur.Length: + return false // Integer overflow with large length + case cur.endOffset() > size: + return false // Region extends beyond the actual size + case pre.endOffset() > cur.Offset: + return false // Regions cannot overlap and must be in order + } + pre = cur + } + return true +} + +// alignSparseEntries mutates src and returns dst where each fragment's +// starting offset is aligned up to the nearest block edge, and each +// ending offset is aligned down to the nearest block edge. +// +// Even though the Go tar Reader and the BSD tar utility can handle entries +// with arbitrary offsets and lengths, the GNU tar utility can only handle +// offsets and lengths that are multiples of blockSize. +func alignSparseEntries(src []sparseEntry, size int64) []sparseEntry { + dst := src[:0] + for _, s := range src { + pos, end := s.Offset, s.endOffset() + pos += blockPadding(+pos) // Round-up to nearest blockSize + if end != size { + end -= blockPadding(-end) // Round-down to nearest blockSize + } + if pos < end { + dst = append(dst, sparseEntry{Offset: pos, Length: end - pos}) + } + } + return dst +} + +// invertSparseEntries converts a sparse map from one form to the other. +// If the input is sparseHoles, then it will output sparseDatas and vice-versa. +// The input must have been already validated. +// +// This function mutates src and returns a normalized map where: +// * adjacent fragments are coalesced together +// * only the last fragment may be empty +// * the endOffset of the last fragment is the total size +func invertSparseEntries(src []sparseEntry, size int64) []sparseEntry { + dst := src[:0] + var pre sparseEntry + for _, cur := range src { + if cur.Length == 0 { + continue // Skip empty fragments + } + pre.Length = cur.Offset - pre.Offset + if pre.Length > 0 { + dst = append(dst, pre) // Only add non-empty fragments + } + pre.Offset = cur.endOffset() + } + pre.Length = size - pre.Offset // Possibly the only empty fragment + return append(dst, pre) +} + +// fileState tracks the number of logical (includes sparse holes) and physical +// (actual in tar archive) bytes remaining for the current file. +// +// Invariant: LogicalRemaining >= PhysicalRemaining +type fileState interface { + LogicalRemaining() int64 + PhysicalRemaining() int64 +} + +// allowedFormats determines which formats can be used. +// The value returned is the logical OR of multiple possible formats. +// If the value is FormatUnknown, then the input Header cannot be encoded +// and an error is returned explaining why. +// +// As a by-product of checking the fields, this function returns paxHdrs, which +// contain all fields that could not be directly encoded. +// A value receiver ensures that this method does not mutate the source Header. +func (h Header) allowedFormats() (format Format, paxHdrs map[string]string, err error) { + format = FormatUSTAR | FormatPAX | FormatGNU + paxHdrs = make(map[string]string) + + var whyNoUSTAR, whyNoPAX, whyNoGNU string + var preferPAX bool // Prefer PAX over USTAR + verifyString := func(s string, size int, name, paxKey string) { + // NUL-terminator is optional for path and linkpath. + // Technically, it is required for uname and gname, + // but neither GNU nor BSD tar checks for it. + tooLong := len(s) > size + allowLongGNU := paxKey == paxPath || paxKey == paxLinkpath + if hasNUL(s) || (tooLong && !allowLongGNU) { + whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%q", name, s) + format.mustNotBe(FormatGNU) + } + if !isASCII(s) || tooLong { + canSplitUSTAR := paxKey == paxPath + if _, _, ok := splitUSTARPath(s); !canSplitUSTAR || !ok { + whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%q", name, s) + format.mustNotBe(FormatUSTAR) + } + if paxKey == paxNone { + whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%q", name, s) + format.mustNotBe(FormatPAX) + } else { + paxHdrs[paxKey] = s + } + } + if v, ok := h.PAXRecords[paxKey]; ok && v == s { + paxHdrs[paxKey] = v + } + } + verifyNumeric := func(n int64, size int, name, paxKey string) { + if !fitsInBase256(size, n) { + whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%d", name, n) + format.mustNotBe(FormatGNU) + } + if !fitsInOctal(size, n) { + whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%d", name, n) + format.mustNotBe(FormatUSTAR) + if paxKey == paxNone { + whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%d", name, n) + format.mustNotBe(FormatPAX) + } else { + paxHdrs[paxKey] = strconv.FormatInt(n, 10) + } + } + if v, ok := h.PAXRecords[paxKey]; ok && v == strconv.FormatInt(n, 10) { + paxHdrs[paxKey] = v + } + } + verifyTime := func(ts time.Time, size int, name, paxKey string) { + if ts.IsZero() { + return // Always okay + } + if !fitsInBase256(size, ts.Unix()) { + whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%v", name, ts) + format.mustNotBe(FormatGNU) + } + isMtime := paxKey == paxMtime + fitsOctal := fitsInOctal(size, ts.Unix()) + if (isMtime && !fitsOctal) || !isMtime { + whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%v", name, ts) + format.mustNotBe(FormatUSTAR) + } + needsNano := ts.Nanosecond() != 0 + if !isMtime || !fitsOctal || needsNano { + preferPAX = true // USTAR may truncate sub-second measurements + if paxKey == paxNone { + whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%v", name, ts) + format.mustNotBe(FormatPAX) + } else { + paxHdrs[paxKey] = formatPAXTime(ts) + } + } + if v, ok := h.PAXRecords[paxKey]; ok && v == formatPAXTime(ts) { + paxHdrs[paxKey] = v + } + } + + // Check basic fields. + var blk block + v7 := blk.V7() + ustar := blk.USTAR() + gnu := blk.GNU() + verifyString(h.Name, len(v7.Name()), "Name", paxPath) + verifyString(h.Linkname, len(v7.LinkName()), "Linkname", paxLinkpath) + verifyString(h.Uname, len(ustar.UserName()), "Uname", paxUname) + verifyString(h.Gname, len(ustar.GroupName()), "Gname", paxGname) + verifyNumeric(h.Mode, len(v7.Mode()), "Mode", paxNone) + verifyNumeric(int64(h.Uid), len(v7.UID()), "Uid", paxUid) + verifyNumeric(int64(h.Gid), len(v7.GID()), "Gid", paxGid) + verifyNumeric(h.Size, len(v7.Size()), "Size", paxSize) + verifyNumeric(h.Devmajor, len(ustar.DevMajor()), "Devmajor", paxNone) + verifyNumeric(h.Devminor, len(ustar.DevMinor()), "Devminor", paxNone) + verifyTime(h.ModTime, len(v7.ModTime()), "ModTime", paxMtime) + verifyTime(h.AccessTime, len(gnu.AccessTime()), "AccessTime", paxAtime) + verifyTime(h.ChangeTime, len(gnu.ChangeTime()), "ChangeTime", paxCtime) + + // Check for header-only types. + var whyOnlyPAX, whyOnlyGNU string + switch h.Typeflag { + case TypeReg, TypeChar, TypeBlock, TypeFifo, TypeGNUSparse: + // Exclude TypeLink and TypeSymlink, since they may reference directories. + if strings.HasSuffix(h.Name, "/") { + return FormatUnknown, nil, headerError{"filename may not have trailing slash"} + } + case TypeXHeader, TypeGNULongName, TypeGNULongLink: + return FormatUnknown, nil, headerError{"cannot manually encode TypeXHeader, TypeGNULongName, or TypeGNULongLink headers"} + case TypeXGlobalHeader: + h2 := Header{Name: h.Name, Typeflag: h.Typeflag, Xattrs: h.Xattrs, PAXRecords: h.PAXRecords, Format: h.Format} + if !reflect.DeepEqual(h, h2) { + return FormatUnknown, nil, headerError{"only PAXRecords should be set for TypeXGlobalHeader"} + } + whyOnlyPAX = "only PAX supports TypeXGlobalHeader" + format.mayOnlyBe(FormatPAX) + } + if !isHeaderOnlyType(h.Typeflag) && h.Size < 0 { + return FormatUnknown, nil, headerError{"negative size on header-only type"} + } + + // Check PAX records. + if len(h.Xattrs) > 0 { + for k, v := range h.Xattrs { + paxHdrs[paxSchilyXattr+k] = v + } + whyOnlyPAX = "only PAX supports Xattrs" + format.mayOnlyBe(FormatPAX) + } + if len(h.PAXRecords) > 0 { + for k, v := range h.PAXRecords { + switch _, exists := paxHdrs[k]; { + case exists: + continue // Do not overwrite existing records + case h.Typeflag == TypeXGlobalHeader: + paxHdrs[k] = v // Copy all records + case !basicKeys[k] && !strings.HasPrefix(k, paxGNUSparse): + paxHdrs[k] = v // Ignore local records that may conflict + } + } + whyOnlyPAX = "only PAX supports PAXRecords" + format.mayOnlyBe(FormatPAX) + } + for k, v := range paxHdrs { + if !validPAXRecord(k, v) { + return FormatUnknown, nil, headerError{fmt.Sprintf("invalid PAX record: %q", k+" = "+v)} + } + } + + // TODO(dsnet): Re-enable this when adding sparse support. + // See https://golang.org/issue/22735 + /* + // Check sparse files. + if len(h.SparseHoles) > 0 || h.Typeflag == TypeGNUSparse { + if isHeaderOnlyType(h.Typeflag) { + return FormatUnknown, nil, headerError{"header-only type cannot be sparse"} + } + if !validateSparseEntries(h.SparseHoles, h.Size) { + return FormatUnknown, nil, headerError{"invalid sparse holes"} + } + if h.Typeflag == TypeGNUSparse { + whyOnlyGNU = "only GNU supports TypeGNUSparse" + format.mayOnlyBe(FormatGNU) + } else { + whyNoGNU = "GNU supports sparse files only with TypeGNUSparse" + format.mustNotBe(FormatGNU) + } + whyNoUSTAR = "USTAR does not support sparse files" + format.mustNotBe(FormatUSTAR) + } + */ + + // Check desired format. + if wantFormat := h.Format; wantFormat != FormatUnknown { + if wantFormat.has(FormatPAX) && !preferPAX { + wantFormat.mayBe(FormatUSTAR) // PAX implies USTAR allowed too + } + format.mayOnlyBe(wantFormat) // Set union of formats allowed and format wanted + } + if format == FormatUnknown { + switch h.Format { + case FormatUSTAR: + err = headerError{"Format specifies USTAR", whyNoUSTAR, whyOnlyPAX, whyOnlyGNU} + case FormatPAX: + err = headerError{"Format specifies PAX", whyNoPAX, whyOnlyGNU} + case FormatGNU: + err = headerError{"Format specifies GNU", whyNoGNU, whyOnlyPAX} + default: + err = headerError{whyNoUSTAR, whyNoPAX, whyNoGNU, whyOnlyPAX, whyOnlyGNU} + } + } + return format, paxHdrs, err +} + +// FileInfo returns an fs.FileInfo for the Header. +func (h *Header) FileInfo() fs.FileInfo { + return headerFileInfo{h} +} + +// headerFileInfo implements fs.FileInfo. +type headerFileInfo struct { + h *Header +} + +func (fi headerFileInfo) Size() int64 { return fi.h.Size } +func (fi headerFileInfo) IsDir() bool { return fi.Mode().IsDir() } +func (fi headerFileInfo) ModTime() time.Time { return fi.h.ModTime } +func (fi headerFileInfo) Sys() interface{} { return fi.h } + +// Name returns the base name of the file. +func (fi headerFileInfo) Name() string { + if fi.IsDir() { + return path.Base(path.Clean(fi.h.Name)) + } + return path.Base(fi.h.Name) +} + +// Mode returns the permission and mode bits for the headerFileInfo. +func (fi headerFileInfo) Mode() (mode fs.FileMode) { + // Set file permission bits. + mode = fs.FileMode(fi.h.Mode).Perm() + + // Set setuid, setgid and sticky bits. + if fi.h.Mode&c_ISUID != 0 { + mode |= fs.ModeSetuid + } + if fi.h.Mode&c_ISGID != 0 { + mode |= fs.ModeSetgid + } + if fi.h.Mode&c_ISVTX != 0 { + mode |= fs.ModeSticky + } + + // Set file mode bits; clear perm, setuid, setgid, and sticky bits. + switch m := fs.FileMode(fi.h.Mode) &^ 07777; m { + case c_ISDIR: + mode |= fs.ModeDir + case c_ISFIFO: + mode |= fs.ModeNamedPipe + case c_ISLNK: + mode |= fs.ModeSymlink + case c_ISBLK: + mode |= fs.ModeDevice + case c_ISCHR: + mode |= fs.ModeDevice + mode |= fs.ModeCharDevice + case c_ISSOCK: + mode |= fs.ModeSocket + } + + switch fi.h.Typeflag { + case TypeSymlink: + mode |= fs.ModeSymlink + case TypeChar: + mode |= fs.ModeDevice + mode |= fs.ModeCharDevice + case TypeBlock: + mode |= fs.ModeDevice + case TypeDir: + mode |= fs.ModeDir + case TypeFifo: + mode |= fs.ModeNamedPipe + } + + return mode +} + +// sysStat, if non-nil, populates h from system-dependent fields of fi. +var sysStat func(fi fs.FileInfo, h *Header) error + +const ( + // Mode constants from the USTAR spec: + // See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06 + c_ISUID = 04000 // Set uid + c_ISGID = 02000 // Set gid + c_ISVTX = 01000 // Save text (sticky bit) + + // Common Unix mode constants; these are not defined in any common tar standard. + // Header.FileInfo understands these, but FileInfoHeader will never produce these. + c_ISDIR = 040000 // Directory + c_ISFIFO = 010000 // FIFO + c_ISREG = 0100000 // Regular file + c_ISLNK = 0120000 // Symbolic link + c_ISBLK = 060000 // Block special file + c_ISCHR = 020000 // Character special file + c_ISSOCK = 0140000 // Socket +) + +// FileInfoHeader creates a partially-populated Header from fi. +// If fi describes a symlink, FileInfoHeader records link as the link target. +// If fi describes a directory, a slash is appended to the name. +// +// Since fs.FileInfo's Name method only returns the base name of +// the file it describes, it may be necessary to modify Header.Name +// to provide the full path name of the file. +func FileInfoHeader(fi fs.FileInfo, link string) (*Header, error) { + if fi == nil { + return nil, errors.New("archive/tar: FileInfo is nil") + } + fm := fi.Mode() + h := &Header{ + Name: fi.Name(), + ModTime: fi.ModTime(), + Mode: int64(fm.Perm()), // or'd with c_IS* constants later + } + switch { + case fm.IsRegular(): + h.Typeflag = TypeReg + h.Size = fi.Size() + case fi.IsDir(): + h.Typeflag = TypeDir + h.Name += "/" + case fm&fs.ModeSymlink != 0: + h.Typeflag = TypeSymlink + h.Linkname = link + case fm&fs.ModeDevice != 0: + if fm&fs.ModeCharDevice != 0 { + h.Typeflag = TypeChar + } else { + h.Typeflag = TypeBlock + } + case fm&fs.ModeNamedPipe != 0: + h.Typeflag = TypeFifo + case fm&fs.ModeSocket != 0: + return nil, fmt.Errorf("archive/tar: sockets not supported") + default: + return nil, fmt.Errorf("archive/tar: unknown file mode %v", fm) + } + if fm&fs.ModeSetuid != 0 { + h.Mode |= c_ISUID + } + if fm&fs.ModeSetgid != 0 { + h.Mode |= c_ISGID + } + if fm&fs.ModeSticky != 0 { + h.Mode |= c_ISVTX + } + // If possible, populate additional fields from OS-specific + // FileInfo fields. + if sys, ok := fi.Sys().(*Header); ok { + // This FileInfo came from a Header (not the OS). Use the + // original Header to populate all remaining fields. + h.Uid = sys.Uid + h.Gid = sys.Gid + h.Uname = sys.Uname + h.Gname = sys.Gname + h.AccessTime = sys.AccessTime + h.ChangeTime = sys.ChangeTime + if sys.Xattrs != nil { + h.Xattrs = make(map[string]string) + for k, v := range sys.Xattrs { + h.Xattrs[k] = v + } + } + if sys.Typeflag == TypeLink { + // hard link + h.Typeflag = TypeLink + h.Size = 0 + h.Linkname = sys.Linkname + } + if sys.PAXRecords != nil { + h.PAXRecords = make(map[string]string) + for k, v := range sys.PAXRecords { + h.PAXRecords[k] = v + } + } + } + if sysStat != nil { + return h, sysStat(fi, h) + } + return h, nil +} + +// isHeaderOnlyType checks if the given type flag is of the type that has no +// data section even if a size is specified. +func isHeaderOnlyType(flag byte) bool { + switch flag { + case TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, TypeFifo: + return true + default: + return false + } +} + +func min(a, b int64) int64 { + if a < b { + return a + } + return b +} diff --git a/src/archive/tar/example_test.go b/src/archive/tar/example_test.go new file mode 100644 index 0000000..a2474b9 --- /dev/null +++ b/src/archive/tar/example_test.go @@ -0,0 +1,71 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar_test + +import ( + "archive/tar" + "bytes" + "fmt" + "io" + "log" + "os" +) + +func Example_minimal() { + // Create and add some files to the archive. + var buf bytes.Buffer + tw := tar.NewWriter(&buf) + var files = []struct { + Name, Body string + }{ + {"readme.txt", "This archive contains some text files."}, + {"gopher.txt", "Gopher names:\nGeorge\nGeoffrey\nGonzo"}, + {"todo.txt", "Get animal handling license."}, + } + for _, file := range files { + hdr := &tar.Header{ + Name: file.Name, + Mode: 0600, + Size: int64(len(file.Body)), + } + if err := tw.WriteHeader(hdr); err != nil { + log.Fatal(err) + } + if _, err := tw.Write([]byte(file.Body)); err != nil { + log.Fatal(err) + } + } + if err := tw.Close(); err != nil { + log.Fatal(err) + } + + // Open and iterate through the files in the archive. + tr := tar.NewReader(&buf) + for { + hdr, err := tr.Next() + if err == io.EOF { + break // End of archive + } + if err != nil { + log.Fatal(err) + } + fmt.Printf("Contents of %s:\n", hdr.Name) + if _, err := io.Copy(os.Stdout, tr); err != nil { + log.Fatal(err) + } + fmt.Println() + } + + // Output: + // Contents of readme.txt: + // This archive contains some text files. + // Contents of gopher.txt: + // Gopher names: + // George + // Geoffrey + // Gonzo + // Contents of todo.txt: + // Get animal handling license. +} diff --git a/src/archive/tar/format.go b/src/archive/tar/format.go new file mode 100644 index 0000000..cfe24a5 --- /dev/null +++ b/src/archive/tar/format.go @@ -0,0 +1,303 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import "strings" + +// Format represents the tar archive format. +// +// The original tar format was introduced in Unix V7. +// Since then, there have been multiple competing formats attempting to +// standardize or extend the V7 format to overcome its limitations. +// The most common formats are the USTAR, PAX, and GNU formats, +// each with their own advantages and limitations. +// +// The following table captures the capabilities of each format: +// +// | USTAR | PAX | GNU +// ------------------+--------+-----------+---------- +// Name | 256B | unlimited | unlimited +// Linkname | 100B | unlimited | unlimited +// Size | uint33 | unlimited | uint89 +// Mode | uint21 | uint21 | uint57 +// Uid/Gid | uint21 | unlimited | uint57 +// Uname/Gname | 32B | unlimited | 32B +// ModTime | uint33 | unlimited | int89 +// AccessTime | n/a | unlimited | int89 +// ChangeTime | n/a | unlimited | int89 +// Devmajor/Devminor | uint21 | uint21 | uint57 +// ------------------+--------+-----------+---------- +// string encoding | ASCII | UTF-8 | binary +// sub-second times | no | yes | no +// sparse files | no | yes | yes +// +// The table's upper portion shows the Header fields, where each format reports +// the maximum number of bytes allowed for each string field and +// the integer type used to store each numeric field +// (where timestamps are stored as the number of seconds since the Unix epoch). +// +// The table's lower portion shows specialized features of each format, +// such as supported string encodings, support for sub-second timestamps, +// or support for sparse files. +// +// The Writer currently provides no support for sparse files. +type Format int + +// Constants to identify various tar formats. +const ( + // Deliberately hide the meaning of constants from public API. + _ Format = (1 << iota) / 4 // Sequence of 0, 0, 1, 2, 4, 8, etc... + + // FormatUnknown indicates that the format is unknown. + FormatUnknown + + // The format of the original Unix V7 tar tool prior to standardization. + formatV7 + + // FormatUSTAR represents the USTAR header format defined in POSIX.1-1988. + // + // While this format is compatible with most tar readers, + // the format has several limitations making it unsuitable for some usages. + // Most notably, it cannot support sparse files, files larger than 8GiB, + // filenames larger than 256 characters, and non-ASCII filenames. + // + // Reference: + // http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06 + FormatUSTAR + + // FormatPAX represents the PAX header format defined in POSIX.1-2001. + // + // PAX extends USTAR by writing a special file with Typeflag TypeXHeader + // preceding the original header. This file contains a set of key-value + // records, which are used to overcome USTAR's shortcomings, in addition to + // providing the ability to have sub-second resolution for timestamps. + // + // Some newer formats add their own extensions to PAX by defining their + // own keys and assigning certain semantic meaning to the associated values. + // For example, sparse file support in PAX is implemented using keys + // defined by the GNU manual (e.g., "GNU.sparse.map"). + // + // Reference: + // http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html + FormatPAX + + // FormatGNU represents the GNU header format. + // + // The GNU header format is older than the USTAR and PAX standards and + // is not compatible with them. The GNU format supports + // arbitrary file sizes, filenames of arbitrary encoding and length, + // sparse files, and other features. + // + // It is recommended that PAX be chosen over GNU unless the target + // application can only parse GNU formatted archives. + // + // Reference: + // https://www.gnu.org/software/tar/manual/html_node/Standard.html + FormatGNU + + // Schily's tar format, which is incompatible with USTAR. + // This does not cover STAR extensions to the PAX format; these fall under + // the PAX format. + formatSTAR + + formatMax +) + +func (f Format) has(f2 Format) bool { return f&f2 != 0 } +func (f *Format) mayBe(f2 Format) { *f |= f2 } +func (f *Format) mayOnlyBe(f2 Format) { *f &= f2 } +func (f *Format) mustNotBe(f2 Format) { *f &^= f2 } + +var formatNames = map[Format]string{ + formatV7: "V7", FormatUSTAR: "USTAR", FormatPAX: "PAX", FormatGNU: "GNU", formatSTAR: "STAR", +} + +func (f Format) String() string { + var ss []string + for f2 := Format(1); f2 < formatMax; f2 <<= 1 { + if f.has(f2) { + ss = append(ss, formatNames[f2]) + } + } + switch len(ss) { + case 0: + return "<unknown>" + case 1: + return ss[0] + default: + return "(" + strings.Join(ss, " | ") + ")" + } +} + +// Magics used to identify various formats. +const ( + magicGNU, versionGNU = "ustar ", " \x00" + magicUSTAR, versionUSTAR = "ustar\x00", "00" + trailerSTAR = "tar\x00" +) + +// Size constants from various tar specifications. +const ( + blockSize = 512 // Size of each block in a tar stream + nameSize = 100 // Max length of the name field in USTAR format + prefixSize = 155 // Max length of the prefix field in USTAR format +) + +// blockPadding computes the number of bytes needed to pad offset up to the +// nearest block edge where 0 <= n < blockSize. +func blockPadding(offset int64) (n int64) { + return -offset & (blockSize - 1) +} + +var zeroBlock block + +type block [blockSize]byte + +// Convert block to any number of formats. +func (b *block) V7() *headerV7 { return (*headerV7)(b) } +func (b *block) GNU() *headerGNU { return (*headerGNU)(b) } +func (b *block) STAR() *headerSTAR { return (*headerSTAR)(b) } +func (b *block) USTAR() *headerUSTAR { return (*headerUSTAR)(b) } +func (b *block) Sparse() sparseArray { return sparseArray(b[:]) } + +// GetFormat checks that the block is a valid tar header based on the checksum. +// It then attempts to guess the specific format based on magic values. +// If the checksum fails, then FormatUnknown is returned. +func (b *block) GetFormat() Format { + // Verify checksum. + var p parser + value := p.parseOctal(b.V7().Chksum()) + chksum1, chksum2 := b.ComputeChecksum() + if p.err != nil || (value != chksum1 && value != chksum2) { + return FormatUnknown + } + + // Guess the magic values. + magic := string(b.USTAR().Magic()) + version := string(b.USTAR().Version()) + trailer := string(b.STAR().Trailer()) + switch { + case magic == magicUSTAR && trailer == trailerSTAR: + return formatSTAR + case magic == magicUSTAR: + return FormatUSTAR | FormatPAX + case magic == magicGNU && version == versionGNU: + return FormatGNU + default: + return formatV7 + } +} + +// SetFormat writes the magic values necessary for specified format +// and then updates the checksum accordingly. +func (b *block) SetFormat(format Format) { + // Set the magic values. + switch { + case format.has(formatV7): + // Do nothing. + case format.has(FormatGNU): + copy(b.GNU().Magic(), magicGNU) + copy(b.GNU().Version(), versionGNU) + case format.has(formatSTAR): + copy(b.STAR().Magic(), magicUSTAR) + copy(b.STAR().Version(), versionUSTAR) + copy(b.STAR().Trailer(), trailerSTAR) + case format.has(FormatUSTAR | FormatPAX): + copy(b.USTAR().Magic(), magicUSTAR) + copy(b.USTAR().Version(), versionUSTAR) + default: + panic("invalid format") + } + + // Update checksum. + // This field is special in that it is terminated by a NULL then space. + var f formatter + field := b.V7().Chksum() + chksum, _ := b.ComputeChecksum() // Possible values are 256..128776 + f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143 + field[7] = ' ' +} + +// ComputeChecksum computes the checksum for the header block. +// POSIX specifies a sum of the unsigned byte values, but the Sun tar used +// signed byte values. +// We compute and return both. +func (b *block) ComputeChecksum() (unsigned, signed int64) { + for i, c := range b { + if 148 <= i && i < 156 { + c = ' ' // Treat the checksum field itself as all spaces. + } + unsigned += int64(c) + signed += int64(int8(c)) + } + return unsigned, signed +} + +// Reset clears the block with all zeros. +func (b *block) Reset() { + *b = block{} +} + +type headerV7 [blockSize]byte + +func (h *headerV7) Name() []byte { return h[000:][:100] } +func (h *headerV7) Mode() []byte { return h[100:][:8] } +func (h *headerV7) UID() []byte { return h[108:][:8] } +func (h *headerV7) GID() []byte { return h[116:][:8] } +func (h *headerV7) Size() []byte { return h[124:][:12] } +func (h *headerV7) ModTime() []byte { return h[136:][:12] } +func (h *headerV7) Chksum() []byte { return h[148:][:8] } +func (h *headerV7) TypeFlag() []byte { return h[156:][:1] } +func (h *headerV7) LinkName() []byte { return h[157:][:100] } + +type headerGNU [blockSize]byte + +func (h *headerGNU) V7() *headerV7 { return (*headerV7)(h) } +func (h *headerGNU) Magic() []byte { return h[257:][:6] } +func (h *headerGNU) Version() []byte { return h[263:][:2] } +func (h *headerGNU) UserName() []byte { return h[265:][:32] } +func (h *headerGNU) GroupName() []byte { return h[297:][:32] } +func (h *headerGNU) DevMajor() []byte { return h[329:][:8] } +func (h *headerGNU) DevMinor() []byte { return h[337:][:8] } +func (h *headerGNU) AccessTime() []byte { return h[345:][:12] } +func (h *headerGNU) ChangeTime() []byte { return h[357:][:12] } +func (h *headerGNU) Sparse() sparseArray { return sparseArray(h[386:][:24*4+1]) } +func (h *headerGNU) RealSize() []byte { return h[483:][:12] } + +type headerSTAR [blockSize]byte + +func (h *headerSTAR) V7() *headerV7 { return (*headerV7)(h) } +func (h *headerSTAR) Magic() []byte { return h[257:][:6] } +func (h *headerSTAR) Version() []byte { return h[263:][:2] } +func (h *headerSTAR) UserName() []byte { return h[265:][:32] } +func (h *headerSTAR) GroupName() []byte { return h[297:][:32] } +func (h *headerSTAR) DevMajor() []byte { return h[329:][:8] } +func (h *headerSTAR) DevMinor() []byte { return h[337:][:8] } +func (h *headerSTAR) Prefix() []byte { return h[345:][:131] } +func (h *headerSTAR) AccessTime() []byte { return h[476:][:12] } +func (h *headerSTAR) ChangeTime() []byte { return h[488:][:12] } +func (h *headerSTAR) Trailer() []byte { return h[508:][:4] } + +type headerUSTAR [blockSize]byte + +func (h *headerUSTAR) V7() *headerV7 { return (*headerV7)(h) } +func (h *headerUSTAR) Magic() []byte { return h[257:][:6] } +func (h *headerUSTAR) Version() []byte { return h[263:][:2] } +func (h *headerUSTAR) UserName() []byte { return h[265:][:32] } +func (h *headerUSTAR) GroupName() []byte { return h[297:][:32] } +func (h *headerUSTAR) DevMajor() []byte { return h[329:][:8] } +func (h *headerUSTAR) DevMinor() []byte { return h[337:][:8] } +func (h *headerUSTAR) Prefix() []byte { return h[345:][:155] } + +type sparseArray []byte + +func (s sparseArray) Entry(i int) sparseElem { return sparseElem(s[i*24:]) } +func (s sparseArray) IsExtended() []byte { return s[24*s.MaxEntries():][:1] } +func (s sparseArray) MaxEntries() int { return len(s) / 24 } + +type sparseElem []byte + +func (s sparseElem) Offset() []byte { return s[00:][:12] } +func (s sparseElem) Length() []byte { return s[12:][:12] } diff --git a/src/archive/tar/reader.go b/src/archive/tar/reader.go new file mode 100644 index 0000000..1b1d5b4 --- /dev/null +++ b/src/archive/tar/reader.go @@ -0,0 +1,857 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "bytes" + "io" + "strconv" + "strings" + "time" +) + +// Reader provides sequential access to the contents of a tar archive. +// Reader.Next advances to the next file in the archive (including the first), +// and then Reader can be treated as an io.Reader to access the file's data. +type Reader struct { + r io.Reader + pad int64 // Amount of padding (ignored) after current file entry + curr fileReader // Reader for current file entry + blk block // Buffer to use as temporary local storage + + // err is a persistent error. + // It is only the responsibility of every exported method of Reader to + // ensure that this error is sticky. + err error +} + +type fileReader interface { + io.Reader + fileState + + WriteTo(io.Writer) (int64, error) +} + +// NewReader creates a new Reader reading from r. +func NewReader(r io.Reader) *Reader { + return &Reader{r: r, curr: ®FileReader{r, 0}} +} + +// Next advances to the next entry in the tar archive. +// The Header.Size determines how many bytes can be read for the next file. +// Any remaining data in the current file is automatically discarded. +// +// io.EOF is returned at the end of the input. +func (tr *Reader) Next() (*Header, error) { + if tr.err != nil { + return nil, tr.err + } + hdr, err := tr.next() + tr.err = err + return hdr, err +} + +func (tr *Reader) next() (*Header, error) { + var paxHdrs map[string]string + var gnuLongName, gnuLongLink string + + // Externally, Next iterates through the tar archive as if it is a series of + // files. Internally, the tar format often uses fake "files" to add meta + // data that describes the next file. These meta data "files" should not + // normally be visible to the outside. As such, this loop iterates through + // one or more "header files" until it finds a "normal file". + format := FormatUSTAR | FormatPAX | FormatGNU + for { + // Discard the remainder of the file and any padding. + if err := discard(tr.r, tr.curr.PhysicalRemaining()); err != nil { + return nil, err + } + if _, err := tryReadFull(tr.r, tr.blk[:tr.pad]); err != nil { + return nil, err + } + tr.pad = 0 + + hdr, rawHdr, err := tr.readHeader() + if err != nil { + return nil, err + } + if err := tr.handleRegularFile(hdr); err != nil { + return nil, err + } + format.mayOnlyBe(hdr.Format) + + // Check for PAX/GNU special headers and files. + switch hdr.Typeflag { + case TypeXHeader, TypeXGlobalHeader: + format.mayOnlyBe(FormatPAX) + paxHdrs, err = parsePAX(tr) + if err != nil { + return nil, err + } + if hdr.Typeflag == TypeXGlobalHeader { + mergePAX(hdr, paxHdrs) + return &Header{ + Name: hdr.Name, + Typeflag: hdr.Typeflag, + Xattrs: hdr.Xattrs, + PAXRecords: hdr.PAXRecords, + Format: format, + }, nil + } + continue // This is a meta header affecting the next header + case TypeGNULongName, TypeGNULongLink: + format.mayOnlyBe(FormatGNU) + realname, err := io.ReadAll(tr) + if err != nil { + return nil, err + } + + var p parser + switch hdr.Typeflag { + case TypeGNULongName: + gnuLongName = p.parseString(realname) + case TypeGNULongLink: + gnuLongLink = p.parseString(realname) + } + continue // This is a meta header affecting the next header + default: + // The old GNU sparse format is handled here since it is technically + // just a regular file with additional attributes. + + if err := mergePAX(hdr, paxHdrs); err != nil { + return nil, err + } + if gnuLongName != "" { + hdr.Name = gnuLongName + } + if gnuLongLink != "" { + hdr.Linkname = gnuLongLink + } + if hdr.Typeflag == TypeRegA { + if strings.HasSuffix(hdr.Name, "/") { + hdr.Typeflag = TypeDir // Legacy archives use trailing slash for directories + } else { + hdr.Typeflag = TypeReg + } + } + + // The extended headers may have updated the size. + // Thus, setup the regFileReader again after merging PAX headers. + if err := tr.handleRegularFile(hdr); err != nil { + return nil, err + } + + // Sparse formats rely on being able to read from the logical data + // section; there must be a preceding call to handleRegularFile. + if err := tr.handleSparseFile(hdr, rawHdr); err != nil { + return nil, err + } + + // Set the final guess at the format. + if format.has(FormatUSTAR) && format.has(FormatPAX) { + format.mayOnlyBe(FormatUSTAR) + } + hdr.Format = format + return hdr, nil // This is a file, so stop + } + } +} + +// handleRegularFile sets up the current file reader and padding such that it +// can only read the following logical data section. It will properly handle +// special headers that contain no data section. +func (tr *Reader) handleRegularFile(hdr *Header) error { + nb := hdr.Size + if isHeaderOnlyType(hdr.Typeflag) { + nb = 0 + } + if nb < 0 { + return ErrHeader + } + + tr.pad = blockPadding(nb) + tr.curr = ®FileReader{r: tr.r, nb: nb} + return nil +} + +// handleSparseFile checks if the current file is a sparse format of any type +// and sets the curr reader appropriately. +func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block) error { + var spd sparseDatas + var err error + if hdr.Typeflag == TypeGNUSparse { + spd, err = tr.readOldGNUSparseMap(hdr, rawHdr) + } else { + spd, err = tr.readGNUSparsePAXHeaders(hdr) + } + + // If sp is non-nil, then this is a sparse file. + // Note that it is possible for len(sp) == 0. + if err == nil && spd != nil { + if isHeaderOnlyType(hdr.Typeflag) || !validateSparseEntries(spd, hdr.Size) { + return ErrHeader + } + sph := invertSparseEntries(spd, hdr.Size) + tr.curr = &sparseFileReader{tr.curr, sph, 0} + } + return err +} + +// readGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. +// If they are found, then this function reads the sparse map and returns it. +// This assumes that 0.0 headers have already been converted to 0.1 headers +// by the PAX header parsing logic. +func (tr *Reader) readGNUSparsePAXHeaders(hdr *Header) (sparseDatas, error) { + // Identify the version of GNU headers. + var is1x0 bool + major, minor := hdr.PAXRecords[paxGNUSparseMajor], hdr.PAXRecords[paxGNUSparseMinor] + switch { + case major == "0" && (minor == "0" || minor == "1"): + is1x0 = false + case major == "1" && minor == "0": + is1x0 = true + case major != "" || minor != "": + return nil, nil // Unknown GNU sparse PAX version + case hdr.PAXRecords[paxGNUSparseMap] != "": + is1x0 = false // 0.0 and 0.1 did not have explicit version records, so guess + default: + return nil, nil // Not a PAX format GNU sparse file. + } + hdr.Format.mayOnlyBe(FormatPAX) + + // Update hdr from GNU sparse PAX headers. + if name := hdr.PAXRecords[paxGNUSparseName]; name != "" { + hdr.Name = name + } + size := hdr.PAXRecords[paxGNUSparseSize] + if size == "" { + size = hdr.PAXRecords[paxGNUSparseRealSize] + } + if size != "" { + n, err := strconv.ParseInt(size, 10, 64) + if err != nil { + return nil, ErrHeader + } + hdr.Size = n + } + + // Read the sparse map according to the appropriate format. + if is1x0 { + return readGNUSparseMap1x0(tr.curr) + } + return readGNUSparseMap0x1(hdr.PAXRecords) +} + +// mergePAX merges paxHdrs into hdr for all relevant fields of Header. +func mergePAX(hdr *Header, paxHdrs map[string]string) (err error) { + for k, v := range paxHdrs { + if v == "" { + continue // Keep the original USTAR value + } + var id64 int64 + switch k { + case paxPath: + hdr.Name = v + case paxLinkpath: + hdr.Linkname = v + case paxUname: + hdr.Uname = v + case paxGname: + hdr.Gname = v + case paxUid: + id64, err = strconv.ParseInt(v, 10, 64) + hdr.Uid = int(id64) // Integer overflow possible + case paxGid: + id64, err = strconv.ParseInt(v, 10, 64) + hdr.Gid = int(id64) // Integer overflow possible + case paxAtime: + hdr.AccessTime, err = parsePAXTime(v) + case paxMtime: + hdr.ModTime, err = parsePAXTime(v) + case paxCtime: + hdr.ChangeTime, err = parsePAXTime(v) + case paxSize: + hdr.Size, err = strconv.ParseInt(v, 10, 64) + default: + if strings.HasPrefix(k, paxSchilyXattr) { + if hdr.Xattrs == nil { + hdr.Xattrs = make(map[string]string) + } + hdr.Xattrs[k[len(paxSchilyXattr):]] = v + } + } + if err != nil { + return ErrHeader + } + } + hdr.PAXRecords = paxHdrs + return nil +} + +// parsePAX parses PAX headers. +// If an extended header (type 'x') is invalid, ErrHeader is returned +func parsePAX(r io.Reader) (map[string]string, error) { + buf, err := io.ReadAll(r) + if err != nil { + return nil, err + } + sbuf := string(buf) + + // For GNU PAX sparse format 0.0 support. + // This function transforms the sparse format 0.0 headers into format 0.1 + // headers since 0.0 headers were not PAX compliant. + var sparseMap []string + + paxHdrs := make(map[string]string) + for len(sbuf) > 0 { + key, value, residual, err := parsePAXRecord(sbuf) + if err != nil { + return nil, ErrHeader + } + sbuf = residual + + switch key { + case paxGNUSparseOffset, paxGNUSparseNumBytes: + // Validate sparse header order and value. + if (len(sparseMap)%2 == 0 && key != paxGNUSparseOffset) || + (len(sparseMap)%2 == 1 && key != paxGNUSparseNumBytes) || + strings.Contains(value, ",") { + return nil, ErrHeader + } + sparseMap = append(sparseMap, value) + default: + paxHdrs[key] = value + } + } + if len(sparseMap) > 0 { + paxHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",") + } + return paxHdrs, nil +} + +// readHeader reads the next block header and assumes that the underlying reader +// is already aligned to a block boundary. It returns the raw block of the +// header in case further processing is required. +// +// The err will be set to io.EOF only when one of the following occurs: +// * Exactly 0 bytes are read and EOF is hit. +// * Exactly 1 block of zeros is read and EOF is hit. +// * At least 2 blocks of zeros are read. +func (tr *Reader) readHeader() (*Header, *block, error) { + // Two blocks of zero bytes marks the end of the archive. + if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil { + return nil, nil, err // EOF is okay here; exactly 0 bytes read + } + if bytes.Equal(tr.blk[:], zeroBlock[:]) { + if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil { + return nil, nil, err // EOF is okay here; exactly 1 block of zeros read + } + if bytes.Equal(tr.blk[:], zeroBlock[:]) { + return nil, nil, io.EOF // normal EOF; exactly 2 block of zeros read + } + return nil, nil, ErrHeader // Zero block and then non-zero block + } + + // Verify the header matches a known format. + format := tr.blk.GetFormat() + if format == FormatUnknown { + return nil, nil, ErrHeader + } + + var p parser + hdr := new(Header) + + // Unpack the V7 header. + v7 := tr.blk.V7() + hdr.Typeflag = v7.TypeFlag()[0] + hdr.Name = p.parseString(v7.Name()) + hdr.Linkname = p.parseString(v7.LinkName()) + hdr.Size = p.parseNumeric(v7.Size()) + hdr.Mode = p.parseNumeric(v7.Mode()) + hdr.Uid = int(p.parseNumeric(v7.UID())) + hdr.Gid = int(p.parseNumeric(v7.GID())) + hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0) + + // Unpack format specific fields. + if format > formatV7 { + ustar := tr.blk.USTAR() + hdr.Uname = p.parseString(ustar.UserName()) + hdr.Gname = p.parseString(ustar.GroupName()) + hdr.Devmajor = p.parseNumeric(ustar.DevMajor()) + hdr.Devminor = p.parseNumeric(ustar.DevMinor()) + + var prefix string + switch { + case format.has(FormatUSTAR | FormatPAX): + hdr.Format = format + ustar := tr.blk.USTAR() + prefix = p.parseString(ustar.Prefix()) + + // For Format detection, check if block is properly formatted since + // the parser is more liberal than what USTAR actually permits. + notASCII := func(r rune) bool { return r >= 0x80 } + if bytes.IndexFunc(tr.blk[:], notASCII) >= 0 { + hdr.Format = FormatUnknown // Non-ASCII characters in block. + } + nul := func(b []byte) bool { return int(b[len(b)-1]) == 0 } + if !(nul(v7.Size()) && nul(v7.Mode()) && nul(v7.UID()) && nul(v7.GID()) && + nul(v7.ModTime()) && nul(ustar.DevMajor()) && nul(ustar.DevMinor())) { + hdr.Format = FormatUnknown // Numeric fields must end in NUL + } + case format.has(formatSTAR): + star := tr.blk.STAR() + prefix = p.parseString(star.Prefix()) + hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0) + hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0) + case format.has(FormatGNU): + hdr.Format = format + var p2 parser + gnu := tr.blk.GNU() + if b := gnu.AccessTime(); b[0] != 0 { + hdr.AccessTime = time.Unix(p2.parseNumeric(b), 0) + } + if b := gnu.ChangeTime(); b[0] != 0 { + hdr.ChangeTime = time.Unix(p2.parseNumeric(b), 0) + } + + // Prior to Go1.8, the Writer had a bug where it would output + // an invalid tar file in certain rare situations because the logic + // incorrectly believed that the old GNU format had a prefix field. + // This is wrong and leads to an output file that mangles the + // atime and ctime fields, which are often left unused. + // + // In order to continue reading tar files created by former, buggy + // versions of Go, we skeptically parse the atime and ctime fields. + // If we are unable to parse them and the prefix field looks like + // an ASCII string, then we fallback on the pre-Go1.8 behavior + // of treating these fields as the USTAR prefix field. + // + // Note that this will not use the fallback logic for all possible + // files generated by a pre-Go1.8 toolchain. If the generated file + // happened to have a prefix field that parses as valid + // atime and ctime fields (e.g., when they are valid octal strings), + // then it is impossible to distinguish between a valid GNU file + // and an invalid pre-Go1.8 file. + // + // See https://golang.org/issues/12594 + // See https://golang.org/issues/21005 + if p2.err != nil { + hdr.AccessTime, hdr.ChangeTime = time.Time{}, time.Time{} + ustar := tr.blk.USTAR() + if s := p.parseString(ustar.Prefix()); isASCII(s) { + prefix = s + } + hdr.Format = FormatUnknown // Buggy file is not GNU + } + } + if len(prefix) > 0 { + hdr.Name = prefix + "/" + hdr.Name + } + } + return hdr, &tr.blk, p.err +} + +// readOldGNUSparseMap reads the sparse map from the old GNU sparse format. +// The sparse map is stored in the tar header if it's small enough. +// If it's larger than four entries, then one or more extension headers are used +// to store the rest of the sparse map. +// +// The Header.Size does not reflect the size of any extended headers used. +// Thus, this function will read from the raw io.Reader to fetch extra headers. +// This method mutates blk in the process. +func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, error) { + // Make sure that the input format is GNU. + // Unfortunately, the STAR format also has a sparse header format that uses + // the same type flag but has a completely different layout. + if blk.GetFormat() != FormatGNU { + return nil, ErrHeader + } + hdr.Format.mayOnlyBe(FormatGNU) + + var p parser + hdr.Size = p.parseNumeric(blk.GNU().RealSize()) + if p.err != nil { + return nil, p.err + } + s := blk.GNU().Sparse() + spd := make(sparseDatas, 0, s.MaxEntries()) + for { + for i := 0; i < s.MaxEntries(); i++ { + // This termination condition is identical to GNU and BSD tar. + if s.Entry(i).Offset()[0] == 0x00 { + break // Don't return, need to process extended headers (even if empty) + } + offset := p.parseNumeric(s.Entry(i).Offset()) + length := p.parseNumeric(s.Entry(i).Length()) + if p.err != nil { + return nil, p.err + } + spd = append(spd, sparseEntry{Offset: offset, Length: length}) + } + + if s.IsExtended()[0] > 0 { + // There are more entries. Read an extension header and parse its entries. + if _, err := mustReadFull(tr.r, blk[:]); err != nil { + return nil, err + } + s = blk.Sparse() + continue + } + return spd, nil // Done + } +} + +// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format +// version 1.0. The format of the sparse map consists of a series of +// newline-terminated numeric fields. The first field is the number of entries +// and is always present. Following this are the entries, consisting of two +// fields (offset, length). This function must stop reading at the end +// boundary of the block containing the last newline. +// +// Note that the GNU manual says that numeric values should be encoded in octal +// format. However, the GNU tar utility itself outputs these values in decimal. +// As such, this library treats values as being encoded in decimal. +func readGNUSparseMap1x0(r io.Reader) (sparseDatas, error) { + var ( + cntNewline int64 + buf bytes.Buffer + blk block + ) + + // feedTokens copies data in blocks from r into buf until there are + // at least cnt newlines in buf. It will not read more blocks than needed. + feedTokens := func(n int64) error { + for cntNewline < n { + if _, err := mustReadFull(r, blk[:]); err != nil { + return err + } + buf.Write(blk[:]) + for _, c := range blk { + if c == '\n' { + cntNewline++ + } + } + } + return nil + } + + // nextToken gets the next token delimited by a newline. This assumes that + // at least one newline exists in the buffer. + nextToken := func() string { + cntNewline-- + tok, _ := buf.ReadString('\n') + return strings.TrimRight(tok, "\n") + } + + // Parse for the number of entries. + // Use integer overflow resistant math to check this. + if err := feedTokens(1); err != nil { + return nil, err + } + numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int + if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { + return nil, ErrHeader + } + + // Parse for all member entries. + // numEntries is trusted after this since a potential attacker must have + // committed resources proportional to what this library used. + if err := feedTokens(2 * numEntries); err != nil { + return nil, err + } + spd := make(sparseDatas, 0, numEntries) + for i := int64(0); i < numEntries; i++ { + offset, err1 := strconv.ParseInt(nextToken(), 10, 64) + length, err2 := strconv.ParseInt(nextToken(), 10, 64) + if err1 != nil || err2 != nil { + return nil, ErrHeader + } + spd = append(spd, sparseEntry{Offset: offset, Length: length}) + } + return spd, nil +} + +// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format +// version 0.1. The sparse map is stored in the PAX headers. +func readGNUSparseMap0x1(paxHdrs map[string]string) (sparseDatas, error) { + // Get number of entries. + // Use integer overflow resistant math to check this. + numEntriesStr := paxHdrs[paxGNUSparseNumBlocks] + numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int + if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { + return nil, ErrHeader + } + + // There should be two numbers in sparseMap for each entry. + sparseMap := strings.Split(paxHdrs[paxGNUSparseMap], ",") + if len(sparseMap) == 1 && sparseMap[0] == "" { + sparseMap = sparseMap[:0] + } + if int64(len(sparseMap)) != 2*numEntries { + return nil, ErrHeader + } + + // Loop through the entries in the sparse map. + // numEntries is trusted now. + spd := make(sparseDatas, 0, numEntries) + for len(sparseMap) >= 2 { + offset, err1 := strconv.ParseInt(sparseMap[0], 10, 64) + length, err2 := strconv.ParseInt(sparseMap[1], 10, 64) + if err1 != nil || err2 != nil { + return nil, ErrHeader + } + spd = append(spd, sparseEntry{Offset: offset, Length: length}) + sparseMap = sparseMap[2:] + } + return spd, nil +} + +// Read reads from the current file in the tar archive. +// It returns (0, io.EOF) when it reaches the end of that file, +// until Next is called to advance to the next file. +// +// If the current file is sparse, then the regions marked as a hole +// are read back as NUL-bytes. +// +// Calling Read on special types like TypeLink, TypeSymlink, TypeChar, +// TypeBlock, TypeDir, and TypeFifo returns (0, io.EOF) regardless of what +// the Header.Size claims. +func (tr *Reader) Read(b []byte) (int, error) { + if tr.err != nil { + return 0, tr.err + } + n, err := tr.curr.Read(b) + if err != nil && err != io.EOF { + tr.err = err + } + return n, err +} + +// writeTo writes the content of the current file to w. +// The bytes written matches the number of remaining bytes in the current file. +// +// If the current file is sparse and w is an io.WriteSeeker, +// then writeTo uses Seek to skip past holes defined in Header.SparseHoles, +// assuming that skipped regions are filled with NULs. +// This always writes the last byte to ensure w is the right size. +// +// TODO(dsnet): Re-export this when adding sparse file support. +// See https://golang.org/issue/22735 +func (tr *Reader) writeTo(w io.Writer) (int64, error) { + if tr.err != nil { + return 0, tr.err + } + n, err := tr.curr.WriteTo(w) + if err != nil { + tr.err = err + } + return n, err +} + +// regFileReader is a fileReader for reading data from a regular file entry. +type regFileReader struct { + r io.Reader // Underlying Reader + nb int64 // Number of remaining bytes to read +} + +func (fr *regFileReader) Read(b []byte) (n int, err error) { + if int64(len(b)) > fr.nb { + b = b[:fr.nb] + } + if len(b) > 0 { + n, err = fr.r.Read(b) + fr.nb -= int64(n) + } + switch { + case err == io.EOF && fr.nb > 0: + return n, io.ErrUnexpectedEOF + case err == nil && fr.nb == 0: + return n, io.EOF + default: + return n, err + } +} + +func (fr *regFileReader) WriteTo(w io.Writer) (int64, error) { + return io.Copy(w, struct{ io.Reader }{fr}) +} + +func (fr regFileReader) LogicalRemaining() int64 { + return fr.nb +} + +func (fr regFileReader) PhysicalRemaining() int64 { + return fr.nb +} + +// sparseFileReader is a fileReader for reading data from a sparse file entry. +type sparseFileReader struct { + fr fileReader // Underlying fileReader + sp sparseHoles // Normalized list of sparse holes + pos int64 // Current position in sparse file +} + +func (sr *sparseFileReader) Read(b []byte) (n int, err error) { + finished := int64(len(b)) >= sr.LogicalRemaining() + if finished { + b = b[:sr.LogicalRemaining()] + } + + b0 := b + endPos := sr.pos + int64(len(b)) + for endPos > sr.pos && err == nil { + var nf int // Bytes read in fragment + holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset() + if sr.pos < holeStart { // In a data fragment + bf := b[:min(int64(len(b)), holeStart-sr.pos)] + nf, err = tryReadFull(sr.fr, bf) + } else { // In a hole fragment + bf := b[:min(int64(len(b)), holeEnd-sr.pos)] + nf, err = tryReadFull(zeroReader{}, bf) + } + b = b[nf:] + sr.pos += int64(nf) + if sr.pos >= holeEnd && len(sr.sp) > 1 { + sr.sp = sr.sp[1:] // Ensure last fragment always remains + } + } + + n = len(b0) - len(b) + switch { + case err == io.EOF: + return n, errMissData // Less data in dense file than sparse file + case err != nil: + return n, err + case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0: + return n, errUnrefData // More data in dense file than sparse file + case finished: + return n, io.EOF + default: + return n, nil + } +} + +func (sr *sparseFileReader) WriteTo(w io.Writer) (n int64, err error) { + ws, ok := w.(io.WriteSeeker) + if ok { + if _, err := ws.Seek(0, io.SeekCurrent); err != nil { + ok = false // Not all io.Seeker can really seek + } + } + if !ok { + return io.Copy(w, struct{ io.Reader }{sr}) + } + + var writeLastByte bool + pos0 := sr.pos + for sr.LogicalRemaining() > 0 && !writeLastByte && err == nil { + var nf int64 // Size of fragment + holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset() + if sr.pos < holeStart { // In a data fragment + nf = holeStart - sr.pos + nf, err = io.CopyN(ws, sr.fr, nf) + } else { // In a hole fragment + nf = holeEnd - sr.pos + if sr.PhysicalRemaining() == 0 { + writeLastByte = true + nf-- + } + _, err = ws.Seek(nf, io.SeekCurrent) + } + sr.pos += nf + if sr.pos >= holeEnd && len(sr.sp) > 1 { + sr.sp = sr.sp[1:] // Ensure last fragment always remains + } + } + + // If the last fragment is a hole, then seek to 1-byte before EOF, and + // write a single byte to ensure the file is the right size. + if writeLastByte && err == nil { + _, err = ws.Write([]byte{0}) + sr.pos++ + } + + n = sr.pos - pos0 + switch { + case err == io.EOF: + return n, errMissData // Less data in dense file than sparse file + case err != nil: + return n, err + case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0: + return n, errUnrefData // More data in dense file than sparse file + default: + return n, nil + } +} + +func (sr sparseFileReader) LogicalRemaining() int64 { + return sr.sp[len(sr.sp)-1].endOffset() - sr.pos +} +func (sr sparseFileReader) PhysicalRemaining() int64 { + return sr.fr.PhysicalRemaining() +} + +type zeroReader struct{} + +func (zeroReader) Read(b []byte) (int, error) { + for i := range b { + b[i] = 0 + } + return len(b), nil +} + +// mustReadFull is like io.ReadFull except it returns +// io.ErrUnexpectedEOF when io.EOF is hit before len(b) bytes are read. +func mustReadFull(r io.Reader, b []byte) (int, error) { + n, err := tryReadFull(r, b) + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return n, err +} + +// tryReadFull is like io.ReadFull except it returns +// io.EOF when it is hit before len(b) bytes are read. +func tryReadFull(r io.Reader, b []byte) (n int, err error) { + for len(b) > n && err == nil { + var nn int + nn, err = r.Read(b[n:]) + n += nn + } + if len(b) == n && err == io.EOF { + err = nil + } + return n, err +} + +// discard skips n bytes in r, reporting an error if unable to do so. +func discard(r io.Reader, n int64) error { + // If possible, Seek to the last byte before the end of the data section. + // Do this because Seek is often lazy about reporting errors; this will mask + // the fact that the stream may be truncated. We can rely on the + // io.CopyN done shortly afterwards to trigger any IO errors. + var seekSkipped int64 // Number of bytes skipped via Seek + if sr, ok := r.(io.Seeker); ok && n > 1 { + // Not all io.Seeker can actually Seek. For example, os.Stdin implements + // io.Seeker, but calling Seek always returns an error and performs + // no action. Thus, we try an innocent seek to the current position + // to see if Seek is really supported. + pos1, err := sr.Seek(0, io.SeekCurrent) + if pos1 >= 0 && err == nil { + // Seek seems supported, so perform the real Seek. + pos2, err := sr.Seek(n-1, io.SeekCurrent) + if pos2 < 0 || err != nil { + return err + } + seekSkipped = pos2 - pos1 + } + } + + copySkipped, err := io.CopyN(io.Discard, r, n-seekSkipped) + if err == io.EOF && seekSkipped+copySkipped < n { + err = io.ErrUnexpectedEOF + } + return err +} diff --git a/src/archive/tar/reader_test.go b/src/archive/tar/reader_test.go new file mode 100644 index 0000000..789ddc1 --- /dev/null +++ b/src/archive/tar/reader_test.go @@ -0,0 +1,1610 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "bytes" + "crypto/md5" + "errors" + "fmt" + "io" + "math" + "os" + "path" + "reflect" + "strconv" + "strings" + "testing" + "time" +) + +func TestReader(t *testing.T) { + vectors := []struct { + file string // Test input file + headers []*Header // Expected output headers + chksums []string // MD5 checksum of files, leave as nil if not checked + err error // Expected error to occur + }{{ + file: "testdata/gnu.tar", + headers: []*Header{{ + Name: "small.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 5, + ModTime: time.Unix(1244428340, 0), + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + Format: FormatGNU, + }, { + Name: "small2.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 11, + ModTime: time.Unix(1244436044, 0), + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + Format: FormatGNU, + }}, + chksums: []string{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, + }, { + file: "testdata/sparse-formats.tar", + headers: []*Header{{ + Name: "sparse-gnu", + Mode: 420, + Uid: 1000, + Gid: 1000, + Size: 200, + ModTime: time.Unix(1392395740, 0), + Typeflag: 0x53, + Linkname: "", + Uname: "david", + Gname: "david", + Devmajor: 0, + Devminor: 0, + Format: FormatGNU, + }, { + Name: "sparse-posix-0.0", + Mode: 420, + Uid: 1000, + Gid: 1000, + Size: 200, + ModTime: time.Unix(1392342187, 0), + Typeflag: 0x30, + Linkname: "", + Uname: "david", + Gname: "david", + Devmajor: 0, + Devminor: 0, + PAXRecords: map[string]string{ + "GNU.sparse.size": "200", + "GNU.sparse.numblocks": "95", + "GNU.sparse.map": "1,1,3,1,5,1,7,1,9,1,11,1,13,1,15,1,17,1,19,1,21,1,23,1,25,1,27,1,29,1,31,1,33,1,35,1,37,1,39,1,41,1,43,1,45,1,47,1,49,1,51,1,53,1,55,1,57,1,59,1,61,1,63,1,65,1,67,1,69,1,71,1,73,1,75,1,77,1,79,1,81,1,83,1,85,1,87,1,89,1,91,1,93,1,95,1,97,1,99,1,101,1,103,1,105,1,107,1,109,1,111,1,113,1,115,1,117,1,119,1,121,1,123,1,125,1,127,1,129,1,131,1,133,1,135,1,137,1,139,1,141,1,143,1,145,1,147,1,149,1,151,1,153,1,155,1,157,1,159,1,161,1,163,1,165,1,167,1,169,1,171,1,173,1,175,1,177,1,179,1,181,1,183,1,185,1,187,1,189,1", + }, + Format: FormatPAX, + }, { + Name: "sparse-posix-0.1", + Mode: 420, + Uid: 1000, + Gid: 1000, + Size: 200, + ModTime: time.Unix(1392340456, 0), + Typeflag: 0x30, + Linkname: "", + Uname: "david", + Gname: "david", + Devmajor: 0, + Devminor: 0, + PAXRecords: map[string]string{ + "GNU.sparse.size": "200", + "GNU.sparse.numblocks": "95", + "GNU.sparse.map": "1,1,3,1,5,1,7,1,9,1,11,1,13,1,15,1,17,1,19,1,21,1,23,1,25,1,27,1,29,1,31,1,33,1,35,1,37,1,39,1,41,1,43,1,45,1,47,1,49,1,51,1,53,1,55,1,57,1,59,1,61,1,63,1,65,1,67,1,69,1,71,1,73,1,75,1,77,1,79,1,81,1,83,1,85,1,87,1,89,1,91,1,93,1,95,1,97,1,99,1,101,1,103,1,105,1,107,1,109,1,111,1,113,1,115,1,117,1,119,1,121,1,123,1,125,1,127,1,129,1,131,1,133,1,135,1,137,1,139,1,141,1,143,1,145,1,147,1,149,1,151,1,153,1,155,1,157,1,159,1,161,1,163,1,165,1,167,1,169,1,171,1,173,1,175,1,177,1,179,1,181,1,183,1,185,1,187,1,189,1", + "GNU.sparse.name": "sparse-posix-0.1", + }, + Format: FormatPAX, + }, { + Name: "sparse-posix-1.0", + Mode: 420, + Uid: 1000, + Gid: 1000, + Size: 200, + ModTime: time.Unix(1392337404, 0), + Typeflag: 0x30, + Linkname: "", + Uname: "david", + Gname: "david", + Devmajor: 0, + Devminor: 0, + PAXRecords: map[string]string{ + "GNU.sparse.major": "1", + "GNU.sparse.minor": "0", + "GNU.sparse.realsize": "200", + "GNU.sparse.name": "sparse-posix-1.0", + }, + Format: FormatPAX, + }, { + Name: "end", + Mode: 420, + Uid: 1000, + Gid: 1000, + Size: 4, + ModTime: time.Unix(1392398319, 0), + Typeflag: 0x30, + Linkname: "", + Uname: "david", + Gname: "david", + Devmajor: 0, + Devminor: 0, + Format: FormatGNU, + }}, + chksums: []string{ + "6f53234398c2449fe67c1812d993012f", + "6f53234398c2449fe67c1812d993012f", + "6f53234398c2449fe67c1812d993012f", + "6f53234398c2449fe67c1812d993012f", + "b0061974914468de549a2af8ced10316", + }, + }, { + file: "testdata/star.tar", + headers: []*Header{{ + Name: "small.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 5, + ModTime: time.Unix(1244592783, 0), + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + AccessTime: time.Unix(1244592783, 0), + ChangeTime: time.Unix(1244592783, 0), + }, { + Name: "small2.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 11, + ModTime: time.Unix(1244592783, 0), + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + AccessTime: time.Unix(1244592783, 0), + ChangeTime: time.Unix(1244592783, 0), + }}, + }, { + file: "testdata/v7.tar", + headers: []*Header{{ + Name: "small.txt", + Mode: 0444, + Uid: 73025, + Gid: 5000, + Size: 5, + ModTime: time.Unix(1244593104, 0), + Typeflag: '0', + }, { + Name: "small2.txt", + Mode: 0444, + Uid: 73025, + Gid: 5000, + Size: 11, + ModTime: time.Unix(1244593104, 0), + Typeflag: '0', + }}, + }, { + file: "testdata/pax.tar", + headers: []*Header{{ + Name: "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + Mode: 0664, + Uid: 1000, + Gid: 1000, + Uname: "shane", + Gname: "shane", + Size: 7, + ModTime: time.Unix(1350244992, 23960108), + ChangeTime: time.Unix(1350244992, 23960108), + AccessTime: time.Unix(1350244992, 23960108), + Typeflag: TypeReg, + PAXRecords: map[string]string{ + "path": "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + "mtime": "1350244992.023960108", + "atime": "1350244992.023960108", + "ctime": "1350244992.023960108", + }, + Format: FormatPAX, + }, { + Name: "a/b", + Mode: 0777, + Uid: 1000, + Gid: 1000, + Uname: "shane", + Gname: "shane", + Size: 0, + ModTime: time.Unix(1350266320, 910238425), + ChangeTime: time.Unix(1350266320, 910238425), + AccessTime: time.Unix(1350266320, 910238425), + Typeflag: TypeSymlink, + Linkname: "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + PAXRecords: map[string]string{ + "linkpath": "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + "mtime": "1350266320.910238425", + "atime": "1350266320.910238425", + "ctime": "1350266320.910238425", + }, + Format: FormatPAX, + }}, + }, { + file: "testdata/pax-bad-hdr-file.tar", + err: ErrHeader, + }, { + file: "testdata/pax-bad-mtime-file.tar", + err: ErrHeader, + }, { + file: "testdata/pax-pos-size-file.tar", + headers: []*Header{{ + Name: "foo", + Mode: 0640, + Uid: 319973, + Gid: 5000, + Size: 999, + ModTime: time.Unix(1442282516, 0), + Typeflag: '0', + Uname: "joetsai", + Gname: "eng", + PAXRecords: map[string]string{ + "size": "000000000000000000000999", + }, + Format: FormatPAX, + }}, + chksums: []string{ + "0afb597b283fe61b5d4879669a350556", + }, + }, { + file: "testdata/pax-records.tar", + headers: []*Header{{ + Typeflag: TypeReg, + Name: "file", + Uname: strings.Repeat("long", 10), + ModTime: time.Unix(0, 0), + PAXRecords: map[string]string{ + "GOLANG.pkg": "tar", + "comment": "Hello, 世界", + "uname": strings.Repeat("long", 10), + }, + Format: FormatPAX, + }}, + }, { + file: "testdata/pax-global-records.tar", + headers: []*Header{{ + Typeflag: TypeXGlobalHeader, + Name: "global1", + PAXRecords: map[string]string{"path": "global1", "mtime": "1500000000.0"}, + Format: FormatPAX, + }, { + Typeflag: TypeReg, + Name: "file1", + ModTime: time.Unix(0, 0), + Format: FormatUSTAR, + }, { + Typeflag: TypeReg, + Name: "file2", + PAXRecords: map[string]string{"path": "file2"}, + ModTime: time.Unix(0, 0), + Format: FormatPAX, + }, { + Typeflag: TypeXGlobalHeader, + Name: "GlobalHead.0.0", + PAXRecords: map[string]string{"path": ""}, + Format: FormatPAX, + }, { + Typeflag: TypeReg, + Name: "file3", + ModTime: time.Unix(0, 0), + Format: FormatUSTAR, + }, { + Typeflag: TypeReg, + Name: "file4", + ModTime: time.Unix(1400000000, 0), + PAXRecords: map[string]string{"mtime": "1400000000"}, + Format: FormatPAX, + }}, + }, { + file: "testdata/nil-uid.tar", // golang.org/issue/5290 + headers: []*Header{{ + Name: "P1050238.JPG.log", + Mode: 0664, + Uid: 0, + Gid: 0, + Size: 14, + ModTime: time.Unix(1365454838, 0), + Typeflag: TypeReg, + Linkname: "", + Uname: "eyefi", + Gname: "eyefi", + Devmajor: 0, + Devminor: 0, + Format: FormatGNU, + }}, + }, { + file: "testdata/xattrs.tar", + headers: []*Header{{ + Name: "small.txt", + Mode: 0644, + Uid: 1000, + Gid: 10, + Size: 5, + ModTime: time.Unix(1386065770, 448252320), + Typeflag: '0', + Uname: "alex", + Gname: "wheel", + AccessTime: time.Unix(1389782991, 419875220), + ChangeTime: time.Unix(1389782956, 794414986), + Xattrs: map[string]string{ + "user.key": "value", + "user.key2": "value2", + // Interestingly, selinux encodes the terminating null inside the xattr + "security.selinux": "unconfined_u:object_r:default_t:s0\x00", + }, + PAXRecords: map[string]string{ + "mtime": "1386065770.44825232", + "atime": "1389782991.41987522", + "ctime": "1389782956.794414986", + "SCHILY.xattr.user.key": "value", + "SCHILY.xattr.user.key2": "value2", + "SCHILY.xattr.security.selinux": "unconfined_u:object_r:default_t:s0\x00", + }, + Format: FormatPAX, + }, { + Name: "small2.txt", + Mode: 0644, + Uid: 1000, + Gid: 10, + Size: 11, + ModTime: time.Unix(1386065770, 449252304), + Typeflag: '0', + Uname: "alex", + Gname: "wheel", + AccessTime: time.Unix(1389782991, 419875220), + ChangeTime: time.Unix(1386065770, 449252304), + Xattrs: map[string]string{ + "security.selinux": "unconfined_u:object_r:default_t:s0\x00", + }, + PAXRecords: map[string]string{ + "mtime": "1386065770.449252304", + "atime": "1389782991.41987522", + "ctime": "1386065770.449252304", + "SCHILY.xattr.security.selinux": "unconfined_u:object_r:default_t:s0\x00", + }, + Format: FormatPAX, + }}, + }, { + // Matches the behavior of GNU, BSD, and STAR tar utilities. + file: "testdata/gnu-multi-hdrs.tar", + headers: []*Header{{ + Name: "GNU2/GNU2/long-path-name", + Linkname: "GNU4/GNU4/long-linkpath-name", + ModTime: time.Unix(0, 0), + Typeflag: '2', + Format: FormatGNU, + }}, + }, { + // GNU tar file with atime and ctime fields set. + // Created with the GNU tar v1.27.1. + // tar --incremental -S -cvf gnu-incremental.tar test2 + file: "testdata/gnu-incremental.tar", + headers: []*Header{{ + Name: "test2/", + Mode: 16877, + Uid: 1000, + Gid: 1000, + Size: 14, + ModTime: time.Unix(1441973427, 0), + Typeflag: 'D', + Uname: "rawr", + Gname: "dsnet", + AccessTime: time.Unix(1441974501, 0), + ChangeTime: time.Unix(1441973436, 0), + Format: FormatGNU, + }, { + Name: "test2/foo", + Mode: 33188, + Uid: 1000, + Gid: 1000, + Size: 64, + ModTime: time.Unix(1441973363, 0), + Typeflag: '0', + Uname: "rawr", + Gname: "dsnet", + AccessTime: time.Unix(1441974501, 0), + ChangeTime: time.Unix(1441973436, 0), + Format: FormatGNU, + }, { + Name: "test2/sparse", + Mode: 33188, + Uid: 1000, + Gid: 1000, + Size: 536870912, + ModTime: time.Unix(1441973427, 0), + Typeflag: 'S', + Uname: "rawr", + Gname: "dsnet", + AccessTime: time.Unix(1441991948, 0), + ChangeTime: time.Unix(1441973436, 0), + Format: FormatGNU, + }}, + }, { + // Matches the behavior of GNU and BSD tar utilities. + file: "testdata/pax-multi-hdrs.tar", + headers: []*Header{{ + Name: "bar", + Linkname: "PAX4/PAX4/long-linkpath-name", + ModTime: time.Unix(0, 0), + Typeflag: '2', + PAXRecords: map[string]string{ + "linkpath": "PAX4/PAX4/long-linkpath-name", + }, + Format: FormatPAX, + }}, + }, { + // Both BSD and GNU tar truncate long names at first NUL even + // if there is data following that NUL character. + // This is reasonable as GNU long names are C-strings. + file: "testdata/gnu-long-nul.tar", + headers: []*Header{{ + Name: "0123456789", + Mode: 0644, + Uid: 1000, + Gid: 1000, + ModTime: time.Unix(1486082191, 0), + Typeflag: '0', + Uname: "rawr", + Gname: "dsnet", + Format: FormatGNU, + }}, + }, { + // This archive was generated by Writer but is readable by both + // GNU and BSD tar utilities. + // The archive generated by GNU is nearly byte-for-byte identical + // to the Go version except the Go version sets a negative Devminor + // just to force the GNU format. + file: "testdata/gnu-utf8.tar", + headers: []*Header{{ + Name: "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", + Mode: 0644, + Uid: 1000, Gid: 1000, + ModTime: time.Unix(0, 0), + Typeflag: '0', + Uname: "☺", + Gname: "⚹", + Format: FormatGNU, + }}, + }, { + // This archive was generated by Writer but is readable by both + // GNU and BSD tar utilities. + // The archive generated by GNU is nearly byte-for-byte identical + // to the Go version except the Go version sets a negative Devminor + // just to force the GNU format. + file: "testdata/gnu-not-utf8.tar", + headers: []*Header{{ + Name: "hi\x80\x81\x82\x83bye", + Mode: 0644, + Uid: 1000, + Gid: 1000, + ModTime: time.Unix(0, 0), + Typeflag: '0', + Uname: "rawr", + Gname: "dsnet", + Format: FormatGNU, + }}, + }, { + // BSD tar v3.1.2 and GNU tar v1.27.1 both rejects PAX records + // with NULs in the key. + file: "testdata/pax-nul-xattrs.tar", + err: ErrHeader, + }, { + // BSD tar v3.1.2 rejects a PAX path with NUL in the value, while + // GNU tar v1.27.1 simply truncates at first NUL. + // We emulate the behavior of BSD since it is strange doing NUL + // truncations since PAX records are length-prefix strings instead + // of NUL-terminated C-strings. + file: "testdata/pax-nul-path.tar", + err: ErrHeader, + }, { + file: "testdata/neg-size.tar", + err: ErrHeader, + }, { + file: "testdata/issue10968.tar", + err: ErrHeader, + }, { + file: "testdata/issue11169.tar", + err: ErrHeader, + }, { + file: "testdata/issue12435.tar", + err: ErrHeader, + }, { + // Ensure that we can read back the original Header as written with + // a buggy pre-Go1.8 tar.Writer. + file: "testdata/invalid-go17.tar", + headers: []*Header{{ + Name: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo", + Uid: 010000000, + ModTime: time.Unix(0, 0), + Typeflag: '0', + }}, + }, { + // USTAR archive with a regular entry with non-zero device numbers. + file: "testdata/ustar-file-devs.tar", + headers: []*Header{{ + Name: "file", + Mode: 0644, + Typeflag: '0', + ModTime: time.Unix(0, 0), + Devmajor: 1, + Devminor: 1, + Format: FormatUSTAR, + }}, + }, { + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + file: "testdata/gnu-nil-sparse-data.tar", + headers: []*Header{{ + Name: "sparse.db", + Typeflag: TypeGNUSparse, + Size: 1000, + ModTime: time.Unix(0, 0), + Format: FormatGNU, + }}, + }, { + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + file: "testdata/gnu-nil-sparse-hole.tar", + headers: []*Header{{ + Name: "sparse.db", + Typeflag: TypeGNUSparse, + Size: 1000, + ModTime: time.Unix(0, 0), + Format: FormatGNU, + }}, + }, { + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + file: "testdata/pax-nil-sparse-data.tar", + headers: []*Header{{ + Name: "sparse.db", + Typeflag: TypeReg, + Size: 1000, + ModTime: time.Unix(0, 0), + PAXRecords: map[string]string{ + "size": "1512", + "GNU.sparse.major": "1", + "GNU.sparse.minor": "0", + "GNU.sparse.realsize": "1000", + "GNU.sparse.name": "sparse.db", + }, + Format: FormatPAX, + }}, + }, { + // Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1. + file: "testdata/pax-nil-sparse-hole.tar", + headers: []*Header{{ + Name: "sparse.db", + Typeflag: TypeReg, + Size: 1000, + ModTime: time.Unix(0, 0), + PAXRecords: map[string]string{ + "size": "512", + "GNU.sparse.major": "1", + "GNU.sparse.minor": "0", + "GNU.sparse.realsize": "1000", + "GNU.sparse.name": "sparse.db", + }, + Format: FormatPAX, + }}, + }, { + file: "testdata/trailing-slash.tar", + headers: []*Header{{ + Typeflag: TypeDir, + Name: strings.Repeat("123456789/", 30), + ModTime: time.Unix(0, 0), + PAXRecords: map[string]string{ + "path": strings.Repeat("123456789/", 30), + }, + Format: FormatPAX, + }}, + }} + + for _, v := range vectors { + t.Run(path.Base(v.file), func(t *testing.T) { + f, err := os.Open(v.file) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + defer f.Close() + + // Capture all headers and checksums. + var ( + tr = NewReader(f) + hdrs []*Header + chksums []string + rdbuf = make([]byte, 8) + ) + for { + var hdr *Header + hdr, err = tr.Next() + if err != nil { + if err == io.EOF { + err = nil // Expected error + } + break + } + hdrs = append(hdrs, hdr) + + if v.chksums == nil { + continue + } + h := md5.New() + _, err = io.CopyBuffer(h, tr, rdbuf) // Effectively an incremental read + if err != nil { + break + } + chksums = append(chksums, fmt.Sprintf("%x", h.Sum(nil))) + } + + for i, hdr := range hdrs { + if i >= len(v.headers) { + t.Fatalf("entry %d: unexpected header:\ngot %+v", i, *hdr) + continue + } + if !reflect.DeepEqual(*hdr, *v.headers[i]) { + t.Fatalf("entry %d: incorrect header:\ngot %+v\nwant %+v", i, *hdr, *v.headers[i]) + } + } + if len(hdrs) != len(v.headers) { + t.Fatalf("got %d headers, want %d headers", len(hdrs), len(v.headers)) + } + + for i, sum := range chksums { + if i >= len(v.chksums) { + t.Fatalf("entry %d: unexpected sum: got %s", i, sum) + continue + } + if sum != v.chksums[i] { + t.Fatalf("entry %d: incorrect checksum: got %s, want %s", i, sum, v.chksums[i]) + } + } + + if err != v.err { + t.Fatalf("unexpected error: got %v, want %v", err, v.err) + } + f.Close() + }) + } +} + +func TestPartialRead(t *testing.T) { + type testCase struct { + cnt int // Number of bytes to read + output string // Expected value of string read + } + vectors := []struct { + file string + cases []testCase + }{{ + file: "testdata/gnu.tar", + cases: []testCase{ + {4, "Kilt"}, + {6, "Google"}, + }, + }, { + file: "testdata/sparse-formats.tar", + cases: []testCase{ + {2, "\x00G"}, + {4, "\x00G\x00o"}, + {6, "\x00G\x00o\x00G"}, + {8, "\x00G\x00o\x00G\x00o"}, + {4, "end\n"}, + }, + }} + + for _, v := range vectors { + t.Run(path.Base(v.file), func(t *testing.T) { + f, err := os.Open(v.file) + if err != nil { + t.Fatalf("Open() error: %v", err) + } + defer f.Close() + + tr := NewReader(f) + for i, tc := range v.cases { + hdr, err := tr.Next() + if err != nil || hdr == nil { + t.Fatalf("entry %d, Next(): got %v, want %v", i, err, nil) + } + buf := make([]byte, tc.cnt) + if _, err := io.ReadFull(tr, buf); err != nil { + t.Fatalf("entry %d, ReadFull(): got %v, want %v", i, err, nil) + } + if string(buf) != tc.output { + t.Fatalf("entry %d, ReadFull(): got %q, want %q", i, string(buf), tc.output) + } + } + + if _, err := tr.Next(); err != io.EOF { + t.Fatalf("Next(): got %v, want EOF", err) + } + }) + } +} + +func TestUninitializedRead(t *testing.T) { + f, err := os.Open("testdata/gnu.tar") + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer f.Close() + + tr := NewReader(f) + _, err = tr.Read([]byte{}) + if err == nil || err != io.EOF { + t.Errorf("Unexpected error: %v, wanted %v", err, io.EOF) + } + +} + +type reader struct{ io.Reader } +type readSeeker struct{ io.ReadSeeker } +type readBadSeeker struct{ io.ReadSeeker } + +func (rbs *readBadSeeker) Seek(int64, int) (int64, error) { return 0, fmt.Errorf("illegal seek") } + +// TestReadTruncation test the ending condition on various truncated files and +// that truncated files are still detected even if the underlying io.Reader +// satisfies io.Seeker. +func TestReadTruncation(t *testing.T) { + var ss []string + for _, p := range []string{ + "testdata/gnu.tar", + "testdata/ustar-file-reg.tar", + "testdata/pax-path-hdr.tar", + "testdata/sparse-formats.tar", + } { + buf, err := os.ReadFile(p) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + ss = append(ss, string(buf)) + } + + data1, data2, pax, sparse := ss[0], ss[1], ss[2], ss[3] + data2 += strings.Repeat("\x00", 10*512) + trash := strings.Repeat("garbage ", 64) // Exactly 512 bytes + + vectors := []struct { + input string // Input stream + cnt int // Expected number of headers read + err error // Expected error outcome + }{ + {"", 0, io.EOF}, // Empty file is a "valid" tar file + {data1[:511], 0, io.ErrUnexpectedEOF}, + {data1[:512], 1, io.ErrUnexpectedEOF}, + {data1[:1024], 1, io.EOF}, + {data1[:1536], 2, io.ErrUnexpectedEOF}, + {data1[:2048], 2, io.EOF}, + {data1, 2, io.EOF}, + {data1[:2048] + data2[:1536], 3, io.EOF}, + {data2[:511], 0, io.ErrUnexpectedEOF}, + {data2[:512], 1, io.ErrUnexpectedEOF}, + {data2[:1195], 1, io.ErrUnexpectedEOF}, + {data2[:1196], 1, io.EOF}, // Exact end of data and start of padding + {data2[:1200], 1, io.EOF}, + {data2[:1535], 1, io.EOF}, + {data2[:1536], 1, io.EOF}, // Exact end of padding + {data2[:1536] + trash[:1], 1, io.ErrUnexpectedEOF}, + {data2[:1536] + trash[:511], 1, io.ErrUnexpectedEOF}, + {data2[:1536] + trash, 1, ErrHeader}, + {data2[:2048], 1, io.EOF}, // Exactly 1 empty block + {data2[:2048] + trash[:1], 1, io.ErrUnexpectedEOF}, + {data2[:2048] + trash[:511], 1, io.ErrUnexpectedEOF}, + {data2[:2048] + trash, 1, ErrHeader}, + {data2[:2560], 1, io.EOF}, // Exactly 2 empty blocks (normal end-of-stream) + {data2[:2560] + trash[:1], 1, io.EOF}, + {data2[:2560] + trash[:511], 1, io.EOF}, + {data2[:2560] + trash, 1, io.EOF}, + {data2[:3072], 1, io.EOF}, + {pax, 0, io.EOF}, // PAX header without data is a "valid" tar file + {pax + trash[:1], 0, io.ErrUnexpectedEOF}, + {pax + trash[:511], 0, io.ErrUnexpectedEOF}, + {sparse[:511], 0, io.ErrUnexpectedEOF}, + {sparse[:512], 0, io.ErrUnexpectedEOF}, + {sparse[:3584], 1, io.EOF}, + {sparse[:9200], 1, io.EOF}, // Terminate in padding of sparse header + {sparse[:9216], 1, io.EOF}, + {sparse[:9728], 2, io.ErrUnexpectedEOF}, + {sparse[:10240], 2, io.EOF}, + {sparse[:11264], 2, io.ErrUnexpectedEOF}, + {sparse, 5, io.EOF}, + {sparse + trash, 5, io.EOF}, + } + + for i, v := range vectors { + for j := 0; j < 6; j++ { + var tr *Reader + var s1, s2 string + + switch j { + case 0: + tr = NewReader(&reader{strings.NewReader(v.input)}) + s1, s2 = "io.Reader", "auto" + case 1: + tr = NewReader(&reader{strings.NewReader(v.input)}) + s1, s2 = "io.Reader", "manual" + case 2: + tr = NewReader(&readSeeker{strings.NewReader(v.input)}) + s1, s2 = "io.ReadSeeker", "auto" + case 3: + tr = NewReader(&readSeeker{strings.NewReader(v.input)}) + s1, s2 = "io.ReadSeeker", "manual" + case 4: + tr = NewReader(&readBadSeeker{strings.NewReader(v.input)}) + s1, s2 = "ReadBadSeeker", "auto" + case 5: + tr = NewReader(&readBadSeeker{strings.NewReader(v.input)}) + s1, s2 = "ReadBadSeeker", "manual" + } + + var cnt int + var err error + for { + if _, err = tr.Next(); err != nil { + break + } + cnt++ + if s2 == "manual" { + if _, err = tr.writeTo(io.Discard); err != nil { + break + } + } + } + if err != v.err { + t.Errorf("test %d, NewReader(%s) with %s discard: got %v, want %v", + i, s1, s2, err, v.err) + } + if cnt != v.cnt { + t.Errorf("test %d, NewReader(%s) with %s discard: got %d headers, want %d headers", + i, s1, s2, cnt, v.cnt) + } + } + } +} + +// TestReadHeaderOnly tests that Reader does not attempt to read special +// header-only files. +func TestReadHeaderOnly(t *testing.T) { + f, err := os.Open("testdata/hdr-only.tar") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + defer f.Close() + + var hdrs []*Header + tr := NewReader(f) + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + t.Errorf("Next(): got %v, want %v", err, nil) + continue + } + hdrs = append(hdrs, hdr) + + // If a special flag, we should read nothing. + cnt, _ := io.ReadFull(tr, []byte{0}) + if cnt > 0 && hdr.Typeflag != TypeReg { + t.Errorf("ReadFull(...): got %d bytes, want 0 bytes", cnt) + } + } + + // File is crafted with 16 entries. The later 8 are identical to the first + // 8 except that the size is set. + if len(hdrs) != 16 { + t.Fatalf("len(hdrs): got %d, want %d", len(hdrs), 16) + } + for i := 0; i < 8; i++ { + hdr1, hdr2 := hdrs[i+0], hdrs[i+8] + hdr1.Size, hdr2.Size = 0, 0 + if !reflect.DeepEqual(*hdr1, *hdr2) { + t.Errorf("incorrect header:\ngot %+v\nwant %+v", *hdr1, *hdr2) + } + } +} + +func TestMergePAX(t *testing.T) { + vectors := []struct { + in map[string]string + want *Header + ok bool + }{{ + in: map[string]string{ + "path": "a/b/c", + "uid": "1000", + "mtime": "1350244992.023960108", + }, + want: &Header{ + Name: "a/b/c", + Uid: 1000, + ModTime: time.Unix(1350244992, 23960108), + PAXRecords: map[string]string{ + "path": "a/b/c", + "uid": "1000", + "mtime": "1350244992.023960108", + }, + }, + ok: true, + }, { + in: map[string]string{ + "gid": "gtgergergersagersgers", + }, + ok: false, + }, { + in: map[string]string{ + "missing": "missing", + "SCHILY.xattr.key": "value", + }, + want: &Header{ + Xattrs: map[string]string{"key": "value"}, + PAXRecords: map[string]string{ + "missing": "missing", + "SCHILY.xattr.key": "value", + }, + }, + ok: true, + }} + + for i, v := range vectors { + got := new(Header) + err := mergePAX(got, v.in) + if v.ok && !reflect.DeepEqual(*got, *v.want) { + t.Errorf("test %d, mergePAX(...):\ngot %+v\nwant %+v", i, *got, *v.want) + } + if ok := err == nil; ok != v.ok { + t.Errorf("test %d, mergePAX(...): got %v, want %v", i, ok, v.ok) + } + } +} + +func TestParsePAX(t *testing.T) { + vectors := []struct { + in string + want map[string]string + ok bool + }{ + {"", nil, true}, + {"6 k=1\n", map[string]string{"k": "1"}, true}, + {"10 a=name\n", map[string]string{"a": "name"}, true}, + {"9 a=name\n", map[string]string{"a": "name"}, true}, + {"30 mtime=1350244992.023960108\n", map[string]string{"mtime": "1350244992.023960108"}, true}, + {"3 somelongkey=\n", nil, false}, + {"50 tooshort=\n", nil, false}, + {"13 key1=haha\n13 key2=nana\n13 key3=kaka\n", + map[string]string{"key1": "haha", "key2": "nana", "key3": "kaka"}, true}, + {"13 key1=val1\n13 key2=val2\n8 key1=\n", + map[string]string{"key1": "", "key2": "val2"}, true}, + {"22 GNU.sparse.size=10\n26 GNU.sparse.numblocks=2\n" + + "23 GNU.sparse.offset=1\n25 GNU.sparse.numbytes=2\n" + + "23 GNU.sparse.offset=3\n25 GNU.sparse.numbytes=4\n", + map[string]string{paxGNUSparseSize: "10", paxGNUSparseNumBlocks: "2", paxGNUSparseMap: "1,2,3,4"}, true}, + {"22 GNU.sparse.size=10\n26 GNU.sparse.numblocks=1\n" + + "25 GNU.sparse.numbytes=2\n23 GNU.sparse.offset=1\n", + nil, false}, + {"22 GNU.sparse.size=10\n26 GNU.sparse.numblocks=1\n" + + "25 GNU.sparse.offset=1,2\n25 GNU.sparse.numbytes=2\n", + nil, false}, + } + + for i, v := range vectors { + r := strings.NewReader(v.in) + got, err := parsePAX(r) + if !reflect.DeepEqual(got, v.want) && !(len(got) == 0 && len(v.want) == 0) { + t.Errorf("test %d, parsePAX():\ngot %v\nwant %v", i, got, v.want) + } + if ok := err == nil; ok != v.ok { + t.Errorf("test %d, parsePAX(): got %v, want %v", i, ok, v.ok) + } + } +} + +func TestReadOldGNUSparseMap(t *testing.T) { + populateSparseMap := func(sa sparseArray, sps []string) []string { + for i := 0; len(sps) > 0 && i < sa.MaxEntries(); i++ { + copy(sa.Entry(i), sps[0]) + sps = sps[1:] + } + if len(sps) > 0 { + copy(sa.IsExtended(), "\x80") + } + return sps + } + + makeInput := func(format Format, size string, sps ...string) (out []byte) { + // Write the initial GNU header. + var blk block + gnu := blk.GNU() + sparse := gnu.Sparse() + copy(gnu.RealSize(), size) + sps = populateSparseMap(sparse, sps) + if format != FormatUnknown { + blk.SetFormat(format) + } + out = append(out, blk[:]...) + + // Write extended sparse blocks. + for len(sps) > 0 { + var blk block + sps = populateSparseMap(blk.Sparse(), sps) + out = append(out, blk[:]...) + } + return out + } + + makeSparseStrings := func(sp []sparseEntry) (out []string) { + var f formatter + for _, s := range sp { + var b [24]byte + f.formatNumeric(b[:12], s.Offset) + f.formatNumeric(b[12:], s.Length) + out = append(out, string(b[:])) + } + return out + } + + vectors := []struct { + input []byte + wantMap sparseDatas + wantSize int64 + wantErr error + }{{ + input: makeInput(FormatUnknown, ""), + wantErr: ErrHeader, + }, { + input: makeInput(FormatGNU, "1234", "fewa"), + wantSize: 01234, + wantErr: ErrHeader, + }, { + input: makeInput(FormatGNU, "0031"), + wantSize: 031, + }, { + input: makeInput(FormatGNU, "80"), + wantErr: ErrHeader, + }, { + input: makeInput(FormatGNU, "1234", + makeSparseStrings(sparseDatas{{0, 0}, {1, 1}})...), + wantMap: sparseDatas{{0, 0}, {1, 1}}, + wantSize: 01234, + }, { + input: makeInput(FormatGNU, "1234", + append(makeSparseStrings(sparseDatas{{0, 0}, {1, 1}}), []string{"", "blah"}...)...), + wantMap: sparseDatas{{0, 0}, {1, 1}}, + wantSize: 01234, + }, { + input: makeInput(FormatGNU, "3333", + makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}})...), + wantMap: sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}}, + wantSize: 03333, + }, { + input: makeInput(FormatGNU, "", + append(append( + makeSparseStrings(sparseDatas{{0, 1}, {2, 1}}), + []string{"", ""}...), + makeSparseStrings(sparseDatas{{4, 1}, {6, 1}})...)...), + wantMap: sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}}, + }, { + input: makeInput(FormatGNU, "", + makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}})...)[:blockSize], + wantErr: io.ErrUnexpectedEOF, + }, { + input: makeInput(FormatGNU, "", + makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}})...)[:3*blockSize/2], + wantErr: io.ErrUnexpectedEOF, + }, { + input: makeInput(FormatGNU, "", + makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}})...), + wantMap: sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}}, + }, { + input: makeInput(FormatGNU, "", + makeSparseStrings(sparseDatas{{10 << 30, 512}, {20 << 30, 512}})...), + wantMap: sparseDatas{{10 << 30, 512}, {20 << 30, 512}}, + }} + + for i, v := range vectors { + var blk block + var hdr Header + v.input = v.input[copy(blk[:], v.input):] + tr := Reader{r: bytes.NewReader(v.input)} + got, err := tr.readOldGNUSparseMap(&hdr, &blk) + if !equalSparseEntries(got, v.wantMap) { + t.Errorf("test %d, readOldGNUSparseMap(): got %v, want %v", i, got, v.wantMap) + } + if err != v.wantErr { + t.Errorf("test %d, readOldGNUSparseMap() = %v, want %v", i, err, v.wantErr) + } + if hdr.Size != v.wantSize { + t.Errorf("test %d, Header.Size = %d, want %d", i, hdr.Size, v.wantSize) + } + } +} + +func TestReadGNUSparsePAXHeaders(t *testing.T) { + padInput := func(s string) string { + return s + string(zeroBlock[:blockPadding(int64(len(s)))]) + } + + vectors := []struct { + inputData string + inputHdrs map[string]string + wantMap sparseDatas + wantSize int64 + wantName string + wantErr error + }{{ + inputHdrs: nil, + wantErr: nil, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: strconv.FormatInt(math.MaxInt64, 10), + paxGNUSparseMap: "0,1,2,3", + }, + wantErr: ErrHeader, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "4\x00", + paxGNUSparseMap: "0,1,2,3", + }, + wantErr: ErrHeader, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "4", + paxGNUSparseMap: "0,1,2,3", + }, + wantErr: ErrHeader, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "2", + paxGNUSparseMap: "0,1,2,3", + }, + wantMap: sparseDatas{{0, 1}, {2, 3}}, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "2", + paxGNUSparseMap: "0, 1,2,3", + }, + wantErr: ErrHeader, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "2", + paxGNUSparseMap: "0,1,02,3", + paxGNUSparseRealSize: "4321", + }, + wantMap: sparseDatas{{0, 1}, {2, 3}}, + wantSize: 4321, + }, { + inputHdrs: map[string]string{ + paxGNUSparseNumBlocks: "2", + paxGNUSparseMap: "0,one1,2,3", + }, + wantErr: ErrHeader, + }, { + inputHdrs: map[string]string{ + paxGNUSparseMajor: "0", + paxGNUSparseMinor: "0", + paxGNUSparseNumBlocks: "2", + paxGNUSparseMap: "0,1,2,3", + paxGNUSparseSize: "1234", + paxGNUSparseRealSize: "4321", + paxGNUSparseName: "realname", + }, + wantMap: sparseDatas{{0, 1}, {2, 3}}, + wantSize: 1234, + wantName: "realname", + }, { + inputHdrs: map[string]string{ + paxGNUSparseMajor: "0", + paxGNUSparseMinor: "0", + paxGNUSparseNumBlocks: "1", + paxGNUSparseMap: "10737418240,512", + paxGNUSparseSize: "10737418240", + paxGNUSparseName: "realname", + }, + wantMap: sparseDatas{{10737418240, 512}}, + wantSize: 10737418240, + wantName: "realname", + }, { + inputHdrs: map[string]string{ + paxGNUSparseMajor: "0", + paxGNUSparseMinor: "0", + paxGNUSparseNumBlocks: "0", + paxGNUSparseMap: "", + }, + wantMap: sparseDatas{}, + }, { + inputHdrs: map[string]string{ + paxGNUSparseMajor: "0", + paxGNUSparseMinor: "1", + paxGNUSparseNumBlocks: "4", + paxGNUSparseMap: "0,5,10,5,20,5,30,5", + }, + wantMap: sparseDatas{{0, 5}, {10, 5}, {20, 5}, {30, 5}}, + }, { + inputHdrs: map[string]string{ + paxGNUSparseMajor: "1", + paxGNUSparseMinor: "0", + paxGNUSparseNumBlocks: "4", + paxGNUSparseMap: "0,5,10,5,20,5,30,5", + }, + wantErr: io.ErrUnexpectedEOF, + }, { + inputData: padInput("0\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{}, + }, { + inputData: padInput("0\n")[:blockSize-1] + "#", + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{}, + }, { + inputData: padInput("0"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: io.ErrUnexpectedEOF, + }, { + inputData: padInput("ab\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: ErrHeader, + }, { + inputData: padInput("1\n2\n3\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{{2, 3}}, + }, { + inputData: padInput("1\n2\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: io.ErrUnexpectedEOF, + }, { + inputData: padInput("1\n2\n\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: ErrHeader, + }, { + inputData: string(zeroBlock[:]) + padInput("0\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: ErrHeader, + }, { + inputData: strings.Repeat("0", blockSize) + padInput("1\n5\n1\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{{5, 1}}, + }, { + inputData: padInput(fmt.Sprintf("%d\n", int64(math.MaxInt64))), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantErr: ErrHeader, + }, { + inputData: padInput(strings.Repeat("0", 300) + "1\n" + strings.Repeat("0", 1000) + "5\n" + strings.Repeat("0", 800) + "2\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{{5, 2}}, + }, { + inputData: padInput("2\n10737418240\n512\n21474836480\n512\n"), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: sparseDatas{{10737418240, 512}, {21474836480, 512}}, + }, { + inputData: padInput("100\n" + func() string { + var ss []string + for i := 0; i < 100; i++ { + ss = append(ss, fmt.Sprintf("%d\n%d\n", int64(i)<<30, 512)) + } + return strings.Join(ss, "") + }()), + inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"}, + wantMap: func() (spd sparseDatas) { + for i := 0; i < 100; i++ { + spd = append(spd, sparseEntry{int64(i) << 30, 512}) + } + return spd + }(), + }} + + for i, v := range vectors { + var hdr Header + hdr.PAXRecords = v.inputHdrs + r := strings.NewReader(v.inputData + "#") // Add canary byte + tr := Reader{curr: ®FileReader{r, int64(r.Len())}} + got, err := tr.readGNUSparsePAXHeaders(&hdr) + if !equalSparseEntries(got, v.wantMap) { + t.Errorf("test %d, readGNUSparsePAXHeaders(): got %v, want %v", i, got, v.wantMap) + } + if err != v.wantErr { + t.Errorf("test %d, readGNUSparsePAXHeaders() = %v, want %v", i, err, v.wantErr) + } + if hdr.Size != v.wantSize { + t.Errorf("test %d, Header.Size = %d, want %d", i, hdr.Size, v.wantSize) + } + if hdr.Name != v.wantName { + t.Errorf("test %d, Header.Name = %s, want %s", i, hdr.Name, v.wantName) + } + if v.wantErr == nil && r.Len() == 0 { + t.Errorf("test %d, canary byte unexpectedly consumed", i) + } + } +} + +// testNonEmptyReader wraps an io.Reader and ensures that +// Read is never called with an empty buffer. +type testNonEmptyReader struct{ io.Reader } + +func (r testNonEmptyReader) Read(b []byte) (int, error) { + if len(b) == 0 { + return 0, errors.New("unexpected empty Read call") + } + return r.Reader.Read(b) +} + +func TestFileReader(t *testing.T) { + type ( + testRead struct { // Read(cnt) == (wantStr, wantErr) + cnt int + wantStr string + wantErr error + } + testWriteTo struct { // WriteTo(testFile{ops}) == (wantCnt, wantErr) + ops fileOps + wantCnt int64 + wantErr error + } + testRemaining struct { // LogicalRemaining() == wantLCnt, PhysicalRemaining() == wantPCnt + wantLCnt int64 + wantPCnt int64 + } + testFnc interface{} // testRead | testWriteTo | testRemaining + ) + + type ( + makeReg struct { + str string + size int64 + } + makeSparse struct { + makeReg makeReg + spd sparseDatas + size int64 + } + fileMaker interface{} // makeReg | makeSparse + ) + + vectors := []struct { + maker fileMaker + tests []testFnc + }{{ + maker: makeReg{"", 0}, + tests: []testFnc{ + testRemaining{0, 0}, + testRead{0, "", io.EOF}, + testRead{1, "", io.EOF}, + testWriteTo{nil, 0, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{"", 1}, + tests: []testFnc{ + testRemaining{1, 1}, + testRead{5, "", io.ErrUnexpectedEOF}, + testWriteTo{nil, 0, io.ErrUnexpectedEOF}, + testRemaining{1, 1}, + }, + }, { + maker: makeReg{"hello", 5}, + tests: []testFnc{ + testRemaining{5, 5}, + testRead{5, "hello", io.EOF}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{"hello, world", 50}, + tests: []testFnc{ + testRemaining{50, 50}, + testRead{7, "hello, ", nil}, + testRemaining{43, 43}, + testRead{5, "world", nil}, + testRemaining{38, 38}, + testWriteTo{nil, 0, io.ErrUnexpectedEOF}, + testRead{1, "", io.ErrUnexpectedEOF}, + testRemaining{38, 38}, + }, + }, { + maker: makeReg{"hello, world", 5}, + tests: []testFnc{ + testRemaining{5, 5}, + testRead{0, "", nil}, + testRead{4, "hell", nil}, + testRemaining{1, 1}, + testWriteTo{fileOps{"o"}, 1, nil}, + testRemaining{0, 0}, + testWriteTo{nil, 0, nil}, + testRead{0, "", io.EOF}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 8}, + tests: []testFnc{ + testRemaining{8, 5}, + testRead{3, "ab\x00", nil}, + testRead{10, "\x00\x00cde", io.EOF}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 8}, + tests: []testFnc{ + testRemaining{8, 5}, + testWriteTo{fileOps{"ab", int64(3), "cde"}, 8, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 10}, + tests: []testFnc{ + testRemaining{10, 5}, + testRead{100, "ab\x00\x00\x00cde\x00\x00", io.EOF}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{"abc", 5}, sparseDatas{{0, 2}, {5, 3}}, 10}, + tests: []testFnc{ + testRemaining{10, 5}, + testRead{100, "ab\x00\x00\x00c", io.ErrUnexpectedEOF}, + testRemaining{4, 2}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 8}, + tests: []testFnc{ + testRemaining{8, 5}, + testRead{8, "\x00abc\x00\x00de", io.EOF}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 0}, {6, 0}, {6, 2}}, 8}, + tests: []testFnc{ + testRemaining{8, 5}, + testRead{8, "\x00abc\x00\x00de", io.EOF}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 0}, {6, 0}, {6, 2}}, 8}, + tests: []testFnc{ + testRemaining{8, 5}, + testWriteTo{fileOps{int64(1), "abc", int64(2), "de"}, 8, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 10}, + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 10}, + tests: []testFnc{ + testWriteTo{fileOps{int64(1), "abc", int64(2), "de", int64(1), "\x00"}, 10, nil}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}, {8, 0}, {8, 0}, {8, 0}, {8, 0}}, 10}, + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF}, + }, + }, { + maker: makeSparse{makeReg{"", 0}, sparseDatas{}, 2}, + tests: []testFnc{ + testRead{100, "\x00\x00", io.EOF}, + }, + }, { + maker: makeSparse{makeReg{"", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRead{100, "\x00", io.ErrUnexpectedEOF}, + }, + }, { + maker: makeSparse{makeReg{"ab", 2}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRead{100, "\x00ab", errMissData}, + }, + }, { + maker: makeSparse{makeReg{"ab", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRead{100, "\x00ab", io.ErrUnexpectedEOF}, + }, + }, { + maker: makeSparse{makeReg{"abc", 3}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00", errMissData}, + }, + }, { + maker: makeSparse{makeReg{"abc", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00", io.ErrUnexpectedEOF}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00de", errMissData}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testWriteTo{fileOps{int64(1), "abc", int64(2), "de"}, 8, errMissData}, + }, + }, { + maker: makeSparse{makeReg{"abcde", 8}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRead{100, "\x00abc\x00\x00de", io.ErrUnexpectedEOF}, + }, + }, { + maker: makeSparse{makeReg{"abcdefghEXTRA", 13}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRemaining{15, 13}, + testRead{100, "\x00abc\x00\x00defgh\x00\x00\x00\x00", errUnrefData}, + testWriteTo{nil, 0, errUnrefData}, + testRemaining{0, 5}, + }, + }, { + maker: makeSparse{makeReg{"abcdefghEXTRA", 13}, sparseDatas{{1, 3}, {6, 5}}, 15}, + tests: []testFnc{ + testRemaining{15, 13}, + testWriteTo{fileOps{int64(1), "abc", int64(2), "defgh", int64(4)}, 15, errUnrefData}, + testRead{100, "", errUnrefData}, + testRemaining{0, 5}, + }, + }} + + for i, v := range vectors { + var fr fileReader + switch maker := v.maker.(type) { + case makeReg: + r := testNonEmptyReader{strings.NewReader(maker.str)} + fr = ®FileReader{r, maker.size} + case makeSparse: + if !validateSparseEntries(maker.spd, maker.size) { + t.Fatalf("invalid sparse map: %v", maker.spd) + } + sph := invertSparseEntries(maker.spd, maker.size) + r := testNonEmptyReader{strings.NewReader(maker.makeReg.str)} + fr = ®FileReader{r, maker.makeReg.size} + fr = &sparseFileReader{fr, sph, 0} + default: + t.Fatalf("test %d, unknown make operation: %T", i, maker) + } + + for j, tf := range v.tests { + switch tf := tf.(type) { + case testRead: + b := make([]byte, tf.cnt) + n, err := fr.Read(b) + if got := string(b[:n]); got != tf.wantStr || err != tf.wantErr { + t.Errorf("test %d.%d, Read(%d):\ngot (%q, %v)\nwant (%q, %v)", i, j, tf.cnt, got, err, tf.wantStr, tf.wantErr) + } + case testWriteTo: + f := &testFile{ops: tf.ops} + got, err := fr.WriteTo(f) + if _, ok := err.(testError); ok { + t.Errorf("test %d.%d, WriteTo(): %v", i, j, err) + } else if got != tf.wantCnt || err != tf.wantErr { + t.Errorf("test %d.%d, WriteTo() = (%d, %v), want (%d, %v)", i, j, got, err, tf.wantCnt, tf.wantErr) + } + if len(f.ops) > 0 { + t.Errorf("test %d.%d, expected %d more operations", i, j, len(f.ops)) + } + case testRemaining: + if got := fr.LogicalRemaining(); got != tf.wantLCnt { + t.Errorf("test %d.%d, LogicalRemaining() = %d, want %d", i, j, got, tf.wantLCnt) + } + if got := fr.PhysicalRemaining(); got != tf.wantPCnt { + t.Errorf("test %d.%d, PhysicalRemaining() = %d, want %d", i, j, got, tf.wantPCnt) + } + default: + t.Fatalf("test %d.%d, unknown test operation: %T", i, j, tf) + } + } + } +} diff --git a/src/archive/tar/stat_actime1.go b/src/archive/tar/stat_actime1.go new file mode 100644 index 0000000..1bdd1c9 --- /dev/null +++ b/src/archive/tar/stat_actime1.go @@ -0,0 +1,20 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build aix linux dragonfly openbsd solaris + +package tar + +import ( + "syscall" + "time" +) + +func statAtime(st *syscall.Stat_t) time.Time { + return time.Unix(st.Atim.Unix()) +} + +func statCtime(st *syscall.Stat_t) time.Time { + return time.Unix(st.Ctim.Unix()) +} diff --git a/src/archive/tar/stat_actime2.go b/src/archive/tar/stat_actime2.go new file mode 100644 index 0000000..6f17dbe --- /dev/null +++ b/src/archive/tar/stat_actime2.go @@ -0,0 +1,20 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build darwin freebsd netbsd + +package tar + +import ( + "syscall" + "time" +) + +func statAtime(st *syscall.Stat_t) time.Time { + return time.Unix(st.Atimespec.Unix()) +} + +func statCtime(st *syscall.Stat_t) time.Time { + return time.Unix(st.Ctimespec.Unix()) +} diff --git a/src/archive/tar/stat_unix.go b/src/archive/tar/stat_unix.go new file mode 100644 index 0000000..581d87d --- /dev/null +++ b/src/archive/tar/stat_unix.go @@ -0,0 +1,101 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build aix linux darwin dragonfly freebsd openbsd netbsd solaris + +package tar + +import ( + "io/fs" + "os/user" + "runtime" + "strconv" + "sync" + "syscall" +) + +func init() { + sysStat = statUnix +} + +// userMap and groupMap caches UID and GID lookups for performance reasons. +// The downside is that renaming uname or gname by the OS never takes effect. +var userMap, groupMap sync.Map // map[int]string + +func statUnix(fi fs.FileInfo, h *Header) error { + sys, ok := fi.Sys().(*syscall.Stat_t) + if !ok { + return nil + } + h.Uid = int(sys.Uid) + h.Gid = int(sys.Gid) + + // Best effort at populating Uname and Gname. + // The os/user functions may fail for any number of reasons + // (not implemented on that platform, cgo not enabled, etc). + if u, ok := userMap.Load(h.Uid); ok { + h.Uname = u.(string) + } else if u, err := user.LookupId(strconv.Itoa(h.Uid)); err == nil { + h.Uname = u.Username + userMap.Store(h.Uid, h.Uname) + } + if g, ok := groupMap.Load(h.Gid); ok { + h.Gname = g.(string) + } else if g, err := user.LookupGroupId(strconv.Itoa(h.Gid)); err == nil { + h.Gname = g.Name + groupMap.Store(h.Gid, h.Gname) + } + + h.AccessTime = statAtime(sys) + h.ChangeTime = statCtime(sys) + + // Best effort at populating Devmajor and Devminor. + if h.Typeflag == TypeChar || h.Typeflag == TypeBlock { + dev := uint64(sys.Rdev) // May be int32 or uint32 + switch runtime.GOOS { + case "aix": + var major, minor uint32 + major = uint32((dev & 0x3fffffff00000000) >> 32) + minor = uint32((dev & 0x00000000ffffffff) >> 0) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "linux": + // Copied from golang.org/x/sys/unix/dev_linux.go. + major := uint32((dev & 0x00000000000fff00) >> 8) + major |= uint32((dev & 0xfffff00000000000) >> 32) + minor := uint32((dev & 0x00000000000000ff) >> 0) + minor |= uint32((dev & 0x00000ffffff00000) >> 12) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "darwin", "ios": + // Copied from golang.org/x/sys/unix/dev_darwin.go. + major := uint32((dev >> 24) & 0xff) + minor := uint32(dev & 0xffffff) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "dragonfly": + // Copied from golang.org/x/sys/unix/dev_dragonfly.go. + major := uint32((dev >> 8) & 0xff) + minor := uint32(dev & 0xffff00ff) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "freebsd": + // Copied from golang.org/x/sys/unix/dev_freebsd.go. + major := uint32((dev >> 8) & 0xff) + minor := uint32(dev & 0xffff00ff) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "netbsd": + // Copied from golang.org/x/sys/unix/dev_netbsd.go. + major := uint32((dev & 0x000fff00) >> 8) + minor := uint32((dev & 0x000000ff) >> 0) + minor |= uint32((dev & 0xfff00000) >> 12) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "openbsd": + // Copied from golang.org/x/sys/unix/dev_openbsd.go. + major := uint32((dev & 0x0000ff00) >> 8) + minor := uint32((dev & 0x000000ff) >> 0) + minor |= uint32((dev & 0xffff0000) >> 8) + h.Devmajor, h.Devminor = int64(major), int64(minor) + default: + // TODO: Implement solaris (see https://golang.org/issue/8106) + } + } + return nil +} diff --git a/src/archive/tar/strconv.go b/src/archive/tar/strconv.go new file mode 100644 index 0000000..f0b61e6 --- /dev/null +++ b/src/archive/tar/strconv.go @@ -0,0 +1,345 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "bytes" + "fmt" + "strconv" + "strings" + "time" +) + +// hasNUL reports whether the NUL character exists within s. +func hasNUL(s string) bool { + return strings.IndexByte(s, 0) >= 0 +} + +// isASCII reports whether the input is an ASCII C-style string. +func isASCII(s string) bool { + for _, c := range s { + if c >= 0x80 || c == 0x00 { + return false + } + } + return true +} + +// toASCII converts the input to an ASCII C-style string. +// This is a best effort conversion, so invalid characters are dropped. +func toASCII(s string) string { + if isASCII(s) { + return s + } + b := make([]byte, 0, len(s)) + for _, c := range s { + if c < 0x80 && c != 0x00 { + b = append(b, byte(c)) + } + } + return string(b) +} + +type parser struct { + err error // Last error seen +} + +type formatter struct { + err error // Last error seen +} + +// parseString parses bytes as a NUL-terminated C-style string. +// If a NUL byte is not found then the whole slice is returned as a string. +func (*parser) parseString(b []byte) string { + if i := bytes.IndexByte(b, 0); i >= 0 { + return string(b[:i]) + } + return string(b) +} + +// formatString copies s into b, NUL-terminating if possible. +func (f *formatter) formatString(b []byte, s string) { + if len(s) > len(b) { + f.err = ErrFieldTooLong + } + copy(b, s) + if len(s) < len(b) { + b[len(s)] = 0 + } + + // Some buggy readers treat regular files with a trailing slash + // in the V7 path field as a directory even though the full path + // recorded elsewhere (e.g., via PAX record) contains no trailing slash. + if len(s) > len(b) && b[len(b)-1] == '/' { + n := len(strings.TrimRight(s[:len(b)], "/")) + b[n] = 0 // Replace trailing slash with NUL terminator + } +} + +// fitsInBase256 reports whether x can be encoded into n bytes using base-256 +// encoding. Unlike octal encoding, base-256 encoding does not require that the +// string ends with a NUL character. Thus, all n bytes are available for output. +// +// If operating in binary mode, this assumes strict GNU binary mode; which means +// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is +// equivalent to the sign bit in two's complement form. +func fitsInBase256(n int, x int64) bool { + binBits := uint(n-1) * 8 + return n >= 9 || (x >= -1<<binBits && x < 1<<binBits) +} + +// parseNumeric parses the input as being encoded in either base-256 or octal. +// This function may return negative numbers. +// If parsing fails or an integer overflow occurs, err will be set. +func (p *parser) parseNumeric(b []byte) int64 { + // Check for base-256 (binary) format first. + // If the first bit is set, then all following bits constitute a two's + // complement encoded number in big-endian byte order. + if len(b) > 0 && b[0]&0x80 != 0 { + // Handling negative numbers relies on the following identity: + // -a-1 == ^a + // + // If the number is negative, we use an inversion mask to invert the + // data bytes and treat the value as an unsigned number. + var inv byte // 0x00 if positive or zero, 0xff if negative + if b[0]&0x40 != 0 { + inv = 0xff + } + + var x uint64 + for i, c := range b { + c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing + if i == 0 { + c &= 0x7f // Ignore signal bit in first byte + } + if (x >> 56) > 0 { + p.err = ErrHeader // Integer overflow + return 0 + } + x = x<<8 | uint64(c) + } + if (x >> 63) > 0 { + p.err = ErrHeader // Integer overflow + return 0 + } + if inv == 0xff { + return ^int64(x) + } + return int64(x) + } + + // Normal case is base-8 (octal) format. + return p.parseOctal(b) +} + +// formatNumeric encodes x into b using base-8 (octal) encoding if possible. +// Otherwise it will attempt to use base-256 (binary) encoding. +func (f *formatter) formatNumeric(b []byte, x int64) { + if fitsInOctal(len(b), x) { + f.formatOctal(b, x) + return + } + + if fitsInBase256(len(b), x) { + for i := len(b) - 1; i >= 0; i-- { + b[i] = byte(x) + x >>= 8 + } + b[0] |= 0x80 // Highest bit indicates binary format + return + } + + f.formatOctal(b, 0) // Last resort, just write zero + f.err = ErrFieldTooLong +} + +func (p *parser) parseOctal(b []byte) int64 { + // Because unused fields are filled with NULs, we need + // to skip leading NULs. Fields may also be padded with + // spaces or NULs. + // So we remove leading and trailing NULs and spaces to + // be sure. + b = bytes.Trim(b, " \x00") + + if len(b) == 0 { + return 0 + } + x, perr := strconv.ParseUint(p.parseString(b), 8, 64) + if perr != nil { + p.err = ErrHeader + } + return int64(x) +} + +func (f *formatter) formatOctal(b []byte, x int64) { + if !fitsInOctal(len(b), x) { + x = 0 // Last resort, just write zero + f.err = ErrFieldTooLong + } + + s := strconv.FormatInt(x, 8) + // Add leading zeros, but leave room for a NUL. + if n := len(b) - len(s) - 1; n > 0 { + s = strings.Repeat("0", n) + s + } + f.formatString(b, s) +} + +// fitsInOctal reports whether the integer x fits in a field n-bytes long +// using octal encoding with the appropriate NUL terminator. +func fitsInOctal(n int, x int64) bool { + octBits := uint(n-1) * 3 + return x >= 0 && (n >= 22 || x < 1<<octBits) +} + +// parsePAXTime takes a string of the form %d.%d as described in the PAX +// specification. Note that this implementation allows for negative timestamps, +// which is allowed for by the PAX specification, but not always portable. +func parsePAXTime(s string) (time.Time, error) { + const maxNanoSecondDigits = 9 + + // Split string into seconds and sub-seconds parts. + ss, sn := s, "" + if pos := strings.IndexByte(s, '.'); pos >= 0 { + ss, sn = s[:pos], s[pos+1:] + } + + // Parse the seconds. + secs, err := strconv.ParseInt(ss, 10, 64) + if err != nil { + return time.Time{}, ErrHeader + } + if len(sn) == 0 { + return time.Unix(secs, 0), nil // No sub-second values + } + + // Parse the nanoseconds. + if strings.Trim(sn, "0123456789") != "" { + return time.Time{}, ErrHeader + } + if len(sn) < maxNanoSecondDigits { + sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad + } else { + sn = sn[:maxNanoSecondDigits] // Right truncate + } + nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed + if len(ss) > 0 && ss[0] == '-' { + return time.Unix(secs, -1*nsecs), nil // Negative correction + } + return time.Unix(secs, nsecs), nil +} + +// formatPAXTime converts ts into a time of the form %d.%d as described in the +// PAX specification. This function is capable of negative timestamps. +func formatPAXTime(ts time.Time) (s string) { + secs, nsecs := ts.Unix(), ts.Nanosecond() + if nsecs == 0 { + return strconv.FormatInt(secs, 10) + } + + // If seconds is negative, then perform correction. + sign := "" + if secs < 0 { + sign = "-" // Remember sign + secs = -(secs + 1) // Add a second to secs + nsecs = -(nsecs - 1e9) // Take that second away from nsecs + } + return strings.TrimRight(fmt.Sprintf("%s%d.%09d", sign, secs, nsecs), "0") +} + +// parsePAXRecord parses the input PAX record string into a key-value pair. +// If parsing is successful, it will slice off the currently read record and +// return the remainder as r. +func parsePAXRecord(s string) (k, v, r string, err error) { + // The size field ends at the first space. + sp := strings.IndexByte(s, ' ') + if sp == -1 { + return "", "", s, ErrHeader + } + + // Parse the first token as a decimal integer. + n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int + if perr != nil || n < 5 || int64(len(s)) < n { + return "", "", s, ErrHeader + } + + afterSpace := int64(sp + 1) + beforeLastNewLine := n - 1 + // In some cases, "length" was perhaps padded/malformed, and + // trying to index past where the space supposedly is goes past + // the end of the actual record. + // For example: + // "0000000000000000000000000000000030 mtime=1432668921.098285006\n30 ctime=2147483649.15163319" + // ^ ^ + // | | + // | afterSpace=35 + // | + // beforeLastNewLine=29 + // yet indexOf(firstSpace) MUST BE before endOfRecord. + // + // See https://golang.org/issues/40196. + if afterSpace >= beforeLastNewLine { + return "", "", s, ErrHeader + } + + // Extract everything between the space and the final newline. + rec, nl, rem := s[afterSpace:beforeLastNewLine], s[beforeLastNewLine:n], s[n:] + if nl != "\n" { + return "", "", s, ErrHeader + } + + // The first equals separates the key from the value. + eq := strings.IndexByte(rec, '=') + if eq == -1 { + return "", "", s, ErrHeader + } + k, v = rec[:eq], rec[eq+1:] + + if !validPAXRecord(k, v) { + return "", "", s, ErrHeader + } + return k, v, rem, nil +} + +// formatPAXRecord formats a single PAX record, prefixing it with the +// appropriate length. +func formatPAXRecord(k, v string) (string, error) { + if !validPAXRecord(k, v) { + return "", ErrHeader + } + + const padding = 3 // Extra padding for ' ', '=', and '\n' + size := len(k) + len(v) + padding + size += len(strconv.Itoa(size)) + record := strconv.Itoa(size) + " " + k + "=" + v + "\n" + + // Final adjustment if adding size field increased the record size. + if len(record) != size { + size = len(record) + record = strconv.Itoa(size) + " " + k + "=" + v + "\n" + } + return record, nil +} + +// validPAXRecord reports whether the key-value pair is valid where each +// record is formatted as: +// "%d %s=%s\n" % (size, key, value) +// +// Keys and values should be UTF-8, but the number of bad writers out there +// forces us to be a more liberal. +// Thus, we only reject all keys with NUL, and only reject NULs in values +// for the PAX version of the USTAR string fields. +// The key must not contain an '=' character. +func validPAXRecord(k, v string) bool { + if k == "" || strings.IndexByte(k, '=') >= 0 { + return false + } + switch k { + case paxPath, paxLinkpath, paxUname, paxGname: + return !hasNUL(v) + default: + return !hasNUL(k) + } +} diff --git a/src/archive/tar/strconv_test.go b/src/archive/tar/strconv_test.go new file mode 100644 index 0000000..add65e2 --- /dev/null +++ b/src/archive/tar/strconv_test.go @@ -0,0 +1,441 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "math" + "strings" + "testing" + "time" +) + +func TestFitsInBase256(t *testing.T) { + vectors := []struct { + in int64 + width int + ok bool + }{ + {+1, 8, true}, + {0, 8, true}, + {-1, 8, true}, + {1 << 56, 8, false}, + {(1 << 56) - 1, 8, true}, + {-1 << 56, 8, true}, + {(-1 << 56) - 1, 8, false}, + {121654, 8, true}, + {-9849849, 8, true}, + {math.MaxInt64, 9, true}, + {0, 9, true}, + {math.MinInt64, 9, true}, + {math.MaxInt64, 12, true}, + {0, 12, true}, + {math.MinInt64, 12, true}, + } + + for _, v := range vectors { + ok := fitsInBase256(v.width, v.in) + if ok != v.ok { + t.Errorf("fitsInBase256(%d, %d): got %v, want %v", v.in, v.width, ok, v.ok) + } + } +} + +func TestParseNumeric(t *testing.T) { + vectors := []struct { + in string + want int64 + ok bool + }{ + // Test base-256 (binary) encoded values. + {"", 0, true}, + {"\x80", 0, true}, + {"\x80\x00", 0, true}, + {"\x80\x00\x00", 0, true}, + {"\xbf", (1 << 6) - 1, true}, + {"\xbf\xff", (1 << 14) - 1, true}, + {"\xbf\xff\xff", (1 << 22) - 1, true}, + {"\xff", -1, true}, + {"\xff\xff", -1, true}, + {"\xff\xff\xff", -1, true}, + {"\xc0", -1 * (1 << 6), true}, + {"\xc0\x00", -1 * (1 << 14), true}, + {"\xc0\x00\x00", -1 * (1 << 22), true}, + {"\x87\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true}, + {"\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", 537795476381659745, true}, + {"\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true}, + {"\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", -615126028225187231, true}, + {"\x80\x7f\xff\xff\xff\xff\xff\xff\xff", math.MaxInt64, true}, + {"\x80\x80\x00\x00\x00\x00\x00\x00\x00", 0, false}, + {"\xff\x80\x00\x00\x00\x00\x00\x00\x00", math.MinInt64, true}, + {"\xff\x7f\xff\xff\xff\xff\xff\xff\xff", 0, false}, + {"\xf5\xec\xd1\xc7\x7e\x5f\x26\x48\x81\x9f\x8f\x9b", 0, false}, + + // Test base-8 (octal) encoded values. + {"0000000\x00", 0, true}, + {" \x0000000\x00", 0, true}, + {" \x0000003\x00", 3, true}, + {"00000000227\x00", 0227, true}, + {"032033\x00 ", 032033, true}, + {"320330\x00 ", 0320330, true}, + {"0000660\x00 ", 0660, true}, + {"\x00 0000660\x00 ", 0660, true}, + {"0123456789abcdef", 0, false}, + {"0123456789\x00abcdef", 0, false}, + {"01234567\x0089abcdef", 342391, true}, + {"0123\x7e\x5f\x264123", 0, false}, + } + + for _, v := range vectors { + var p parser + got := p.parseNumeric([]byte(v.in)) + ok := (p.err == nil) + if ok != v.ok { + if v.ok { + t.Errorf("parseNumeric(%q): got parsing failure, want success", v.in) + } else { + t.Errorf("parseNumeric(%q): got parsing success, want failure", v.in) + } + } + if ok && got != v.want { + t.Errorf("parseNumeric(%q): got %d, want %d", v.in, got, v.want) + } + } +} + +func TestFormatNumeric(t *testing.T) { + vectors := []struct { + in int64 + want string + ok bool + }{ + // Test base-8 (octal) encoded values. + {0, "0\x00", true}, + {7, "7\x00", true}, + {8, "\x80\x08", true}, + {077, "77\x00", true}, + {0100, "\x80\x00\x40", true}, + {0, "0000000\x00", true}, + {0123, "0000123\x00", true}, + {07654321, "7654321\x00", true}, + {07777777, "7777777\x00", true}, + {010000000, "\x80\x00\x00\x00\x00\x20\x00\x00", true}, + {0, "00000000000\x00", true}, + {000001234567, "00001234567\x00", true}, + {076543210321, "76543210321\x00", true}, + {012345670123, "12345670123\x00", true}, + {077777777777, "77777777777\x00", true}, + {0100000000000, "\x80\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00", true}, + {math.MaxInt64, "777777777777777777777\x00", true}, + + // Test base-256 (binary) encoded values. + {-1, "\xff", true}, + {-1, "\xff\xff", true}, + {-1, "\xff\xff\xff", true}, + {(1 << 0), "0", false}, + {(1 << 8) - 1, "\x80\xff", true}, + {(1 << 8), "0\x00", false}, + {(1 << 16) - 1, "\x80\xff\xff", true}, + {(1 << 16), "00\x00", false}, + {-1 * (1 << 0), "\xff", true}, + {-1*(1<<0) - 1, "0", false}, + {-1 * (1 << 8), "\xff\x00", true}, + {-1*(1<<8) - 1, "0\x00", false}, + {-1 * (1 << 16), "\xff\x00\x00", true}, + {-1*(1<<16) - 1, "00\x00", false}, + {537795476381659745, "0000000\x00", false}, + {537795476381659745, "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", true}, + {-615126028225187231, "0000000\x00", false}, + {-615126028225187231, "\xff\xff\xff\xff\xf7\x76\xa2\x22\xeb\x8a\x72\x61", true}, + {math.MaxInt64, "0000000\x00", false}, + {math.MaxInt64, "\x80\x00\x00\x00\x7f\xff\xff\xff\xff\xff\xff\xff", true}, + {math.MinInt64, "0000000\x00", false}, + {math.MinInt64, "\xff\xff\xff\xff\x80\x00\x00\x00\x00\x00\x00\x00", true}, + {math.MaxInt64, "\x80\x7f\xff\xff\xff\xff\xff\xff\xff", true}, + {math.MinInt64, "\xff\x80\x00\x00\x00\x00\x00\x00\x00", true}, + } + + for _, v := range vectors { + var f formatter + got := make([]byte, len(v.want)) + f.formatNumeric(got, v.in) + ok := (f.err == nil) + if ok != v.ok { + if v.ok { + t.Errorf("formatNumeric(%d): got formatting failure, want success", v.in) + } else { + t.Errorf("formatNumeric(%d): got formatting success, want failure", v.in) + } + } + if string(got) != v.want { + t.Errorf("formatNumeric(%d): got %q, want %q", v.in, got, v.want) + } + } +} + +func TestFitsInOctal(t *testing.T) { + vectors := []struct { + input int64 + width int + ok bool + }{ + {-1, 1, false}, + {-1, 2, false}, + {-1, 3, false}, + {0, 1, true}, + {0 + 1, 1, false}, + {0, 2, true}, + {07, 2, true}, + {07 + 1, 2, false}, + {0, 4, true}, + {0777, 4, true}, + {0777 + 1, 4, false}, + {0, 8, true}, + {07777777, 8, true}, + {07777777 + 1, 8, false}, + {0, 12, true}, + {077777777777, 12, true}, + {077777777777 + 1, 12, false}, + {math.MaxInt64, 22, true}, + {012345670123, 12, true}, + {01564164, 12, true}, + {-012345670123, 12, false}, + {-01564164, 12, false}, + {-1564164, 30, false}, + } + + for _, v := range vectors { + ok := fitsInOctal(v.width, v.input) + if ok != v.ok { + t.Errorf("checkOctal(%d, %d): got %v, want %v", v.input, v.width, ok, v.ok) + } + } +} + +func TestParsePAXTime(t *testing.T) { + vectors := []struct { + in string + want time.Time + ok bool + }{ + {"1350244992.023960108", time.Unix(1350244992, 23960108), true}, + {"1350244992.02396010", time.Unix(1350244992, 23960100), true}, + {"1350244992.0239601089", time.Unix(1350244992, 23960108), true}, + {"1350244992.3", time.Unix(1350244992, 300000000), true}, + {"1350244992", time.Unix(1350244992, 0), true}, + {"-1.000000001", time.Unix(-1, -1e0+0e0), true}, + {"-1.000001", time.Unix(-1, -1e3+0e0), true}, + {"-1.001000", time.Unix(-1, -1e6+0e0), true}, + {"-1", time.Unix(-1, -0e0+0e0), true}, + {"-1.999000", time.Unix(-1, -1e9+1e6), true}, + {"-1.999999", time.Unix(-1, -1e9+1e3), true}, + {"-1.999999999", time.Unix(-1, -1e9+1e0), true}, + {"0.000000001", time.Unix(0, 1e0+0e0), true}, + {"0.000001", time.Unix(0, 1e3+0e0), true}, + {"0.001000", time.Unix(0, 1e6+0e0), true}, + {"0", time.Unix(0, 0e0), true}, + {"0.999000", time.Unix(0, 1e9-1e6), true}, + {"0.999999", time.Unix(0, 1e9-1e3), true}, + {"0.999999999", time.Unix(0, 1e9-1e0), true}, + {"1.000000001", time.Unix(+1, +1e0-0e0), true}, + {"1.000001", time.Unix(+1, +1e3-0e0), true}, + {"1.001000", time.Unix(+1, +1e6-0e0), true}, + {"1", time.Unix(+1, +0e0-0e0), true}, + {"1.999000", time.Unix(+1, +1e9-1e6), true}, + {"1.999999", time.Unix(+1, +1e9-1e3), true}, + {"1.999999999", time.Unix(+1, +1e9-1e0), true}, + {"-1350244992.023960108", time.Unix(-1350244992, -23960108), true}, + {"-1350244992.02396010", time.Unix(-1350244992, -23960100), true}, + {"-1350244992.0239601089", time.Unix(-1350244992, -23960108), true}, + {"-1350244992.3", time.Unix(-1350244992, -300000000), true}, + {"-1350244992", time.Unix(-1350244992, 0), true}, + {"", time.Time{}, false}, + {"0", time.Unix(0, 0), true}, + {"1.", time.Unix(1, 0), true}, + {"0.0", time.Unix(0, 0), true}, + {".5", time.Time{}, false}, + {"-1.3", time.Unix(-1, -3e8), true}, + {"-1.0", time.Unix(-1, -0e0), true}, + {"-0.0", time.Unix(-0, -0e0), true}, + {"-0.1", time.Unix(-0, -1e8), true}, + {"-0.01", time.Unix(-0, -1e7), true}, + {"-0.99", time.Unix(-0, -99e7), true}, + {"-0.98", time.Unix(-0, -98e7), true}, + {"-1.1", time.Unix(-1, -1e8), true}, + {"-1.01", time.Unix(-1, -1e7), true}, + {"-2.99", time.Unix(-2, -99e7), true}, + {"-5.98", time.Unix(-5, -98e7), true}, + {"-", time.Time{}, false}, + {"+", time.Time{}, false}, + {"-1.-1", time.Time{}, false}, + {"99999999999999999999999999999999999999999999999", time.Time{}, false}, + {"0.123456789abcdef", time.Time{}, false}, + {"foo", time.Time{}, false}, + {"\x00", time.Time{}, false}, + {"𝟵𝟴𝟳𝟲𝟱.𝟰𝟯𝟮𝟭𝟬", time.Time{}, false}, // Unicode numbers (U+1D7EC to U+1D7F5) + {"98765﹒43210", time.Time{}, false}, // Unicode period (U+FE52) + } + + for _, v := range vectors { + ts, err := parsePAXTime(v.in) + ok := (err == nil) + if v.ok != ok { + if v.ok { + t.Errorf("parsePAXTime(%q): got parsing failure, want success", v.in) + } else { + t.Errorf("parsePAXTime(%q): got parsing success, want failure", v.in) + } + } + if ok && !ts.Equal(v.want) { + t.Errorf("parsePAXTime(%q): got (%ds %dns), want (%ds %dns)", + v.in, ts.Unix(), ts.Nanosecond(), v.want.Unix(), v.want.Nanosecond()) + } + } +} + +func TestFormatPAXTime(t *testing.T) { + vectors := []struct { + sec, nsec int64 + want string + }{ + {1350244992, 0, "1350244992"}, + {1350244992, 300000000, "1350244992.3"}, + {1350244992, 23960100, "1350244992.0239601"}, + {1350244992, 23960108, "1350244992.023960108"}, + {+1, +1e9 - 1e0, "1.999999999"}, + {+1, +1e9 - 1e3, "1.999999"}, + {+1, +1e9 - 1e6, "1.999"}, + {+1, +0e0 - 0e0, "1"}, + {+1, +1e6 - 0e0, "1.001"}, + {+1, +1e3 - 0e0, "1.000001"}, + {+1, +1e0 - 0e0, "1.000000001"}, + {0, 1e9 - 1e0, "0.999999999"}, + {0, 1e9 - 1e3, "0.999999"}, + {0, 1e9 - 1e6, "0.999"}, + {0, 0e0, "0"}, + {0, 1e6 + 0e0, "0.001"}, + {0, 1e3 + 0e0, "0.000001"}, + {0, 1e0 + 0e0, "0.000000001"}, + {-1, -1e9 + 1e0, "-1.999999999"}, + {-1, -1e9 + 1e3, "-1.999999"}, + {-1, -1e9 + 1e6, "-1.999"}, + {-1, -0e0 + 0e0, "-1"}, + {-1, -1e6 + 0e0, "-1.001"}, + {-1, -1e3 + 0e0, "-1.000001"}, + {-1, -1e0 + 0e0, "-1.000000001"}, + {-1350244992, 0, "-1350244992"}, + {-1350244992, -300000000, "-1350244992.3"}, + {-1350244992, -23960100, "-1350244992.0239601"}, + {-1350244992, -23960108, "-1350244992.023960108"}, + } + + for _, v := range vectors { + got := formatPAXTime(time.Unix(v.sec, v.nsec)) + if got != v.want { + t.Errorf("formatPAXTime(%ds, %dns): got %q, want %q", + v.sec, v.nsec, got, v.want) + } + } +} + +func TestParsePAXRecord(t *testing.T) { + medName := strings.Repeat("CD", 50) + longName := strings.Repeat("AB", 100) + + vectors := []struct { + in string + wantRes string + wantKey string + wantVal string + ok bool + }{ + {"6 k=v\n\n", "\n", "k", "v", true}, + {"19 path=/etc/hosts\n", "", "path", "/etc/hosts", true}, + {"210 path=" + longName + "\nabc", "abc", "path", longName, true}, + {"110 path=" + medName + "\n", "", "path", medName, true}, + {"9 foo=ba\n", "", "foo", "ba", true}, + {"11 foo=bar\n\x00", "\x00", "foo", "bar", true}, + {"18 foo=b=\nar=\n==\x00\n", "", "foo", "b=\nar=\n==\x00", true}, + {"27 foo=hello9 foo=ba\nworld\n", "", "foo", "hello9 foo=ba\nworld", true}, + {"27 ☺☻☹=日a本b語ç\nmeow mix", "meow mix", "☺☻☹", "日a本b語ç", true}, + {"17 \x00hello=\x00world\n", "17 \x00hello=\x00world\n", "", "", false}, + {"1 k=1\n", "1 k=1\n", "", "", false}, + {"6 k~1\n", "6 k~1\n", "", "", false}, + {"6_k=1\n", "6_k=1\n", "", "", false}, + {"6 k=1 ", "6 k=1 ", "", "", false}, + {"632 k=1\n", "632 k=1\n", "", "", false}, + {"16 longkeyname=hahaha\n", "16 longkeyname=hahaha\n", "", "", false}, + {"3 somelongkey=\n", "3 somelongkey=\n", "", "", false}, + {"50 tooshort=\n", "50 tooshort=\n", "", "", false}, + {"0000000000000000000000000000000030 mtime=1432668921.098285006\n30 ctime=2147483649.15163319", "0000000000000000000000000000000030 mtime=1432668921.098285006\n30 ctime=2147483649.15163319", "mtime", "1432668921.098285006", false}, + {"06 k=v\n", "06 k=v\n", "", "", false}, + {"00006 k=v\n", "00006 k=v\n", "", "", false}, + {"000006 k=v\n", "000006 k=v\n", "", "", false}, + {"000000 k=v\n", "000000 k=v\n", "", "", false}, + {"0 k=v\n", "0 k=v\n", "", "", false}, + {"+0000005 x=\n", "+0000005 x=\n", "", "", false}, + } + + for _, v := range vectors { + key, val, res, err := parsePAXRecord(v.in) + ok := (err == nil) + if ok != v.ok { + if v.ok { + t.Errorf("parsePAXRecord(%q): got parsing failure, want success", v.in) + } else { + t.Errorf("parsePAXRecord(%q): got parsing success, want failure", v.in) + } + } + if v.ok && (key != v.wantKey || val != v.wantVal) { + t.Errorf("parsePAXRecord(%q): got (%q: %q), want (%q: %q)", + v.in, key, val, v.wantKey, v.wantVal) + } + if res != v.wantRes { + t.Errorf("parsePAXRecord(%q): got residual %q, want residual %q", + v.in, res, v.wantRes) + } + } +} + +func TestFormatPAXRecord(t *testing.T) { + medName := strings.Repeat("CD", 50) + longName := strings.Repeat("AB", 100) + + vectors := []struct { + inKey string + inVal string + want string + ok bool + }{ + {"k", "v", "6 k=v\n", true}, + {"path", "/etc/hosts", "19 path=/etc/hosts\n", true}, + {"path", longName, "210 path=" + longName + "\n", true}, + {"path", medName, "110 path=" + medName + "\n", true}, + {"foo", "ba", "9 foo=ba\n", true}, + {"foo", "bar", "11 foo=bar\n", true}, + {"foo", "b=\nar=\n==\x00", "18 foo=b=\nar=\n==\x00\n", true}, + {"foo", "hello9 foo=ba\nworld", "27 foo=hello9 foo=ba\nworld\n", true}, + {"☺☻☹", "日a本b語ç", "27 ☺☻☹=日a本b語ç\n", true}, + {"xhello", "\x00world", "17 xhello=\x00world\n", true}, + {"path", "null\x00", "", false}, + {"null\x00", "value", "", false}, + {paxSchilyXattr + "key", "null\x00", "26 SCHILY.xattr.key=null\x00\n", true}, + } + + for _, v := range vectors { + got, err := formatPAXRecord(v.inKey, v.inVal) + ok := (err == nil) + if ok != v.ok { + if v.ok { + t.Errorf("formatPAXRecord(%q, %q): got format failure, want success", v.inKey, v.inVal) + } else { + t.Errorf("formatPAXRecord(%q, %q): got format success, want failure", v.inKey, v.inVal) + } + } + if got != v.want { + t.Errorf("formatPAXRecord(%q, %q): got %q, want %q", + v.inKey, v.inVal, got, v.want) + } + } +} diff --git a/src/archive/tar/tar_test.go b/src/archive/tar/tar_test.go new file mode 100644 index 0000000..91b3840 --- /dev/null +++ b/src/archive/tar/tar_test.go @@ -0,0 +1,855 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "bytes" + "errors" + "fmt" + "internal/testenv" + "io" + "io/fs" + "math" + "os" + "path" + "path/filepath" + "reflect" + "strings" + "testing" + "time" +) + +type testError struct{ error } + +type fileOps []interface{} // []T where T is (string | int64) + +// testFile is an io.ReadWriteSeeker where the IO operations performed +// on it must match the list of operations in ops. +type testFile struct { + ops fileOps + pos int64 +} + +func (f *testFile) Read(b []byte) (int, error) { + if len(b) == 0 { + return 0, nil + } + if len(f.ops) == 0 { + return 0, io.EOF + } + s, ok := f.ops[0].(string) + if !ok { + return 0, errors.New("unexpected Read operation") + } + + n := copy(b, s) + if len(s) > n { + f.ops[0] = s[n:] + } else { + f.ops = f.ops[1:] + } + f.pos += int64(len(b)) + return n, nil +} + +func (f *testFile) Write(b []byte) (int, error) { + if len(b) == 0 { + return 0, nil + } + if len(f.ops) == 0 { + return 0, errors.New("unexpected Write operation") + } + s, ok := f.ops[0].(string) + if !ok { + return 0, errors.New("unexpected Write operation") + } + + if !strings.HasPrefix(s, string(b)) { + return 0, testError{fmt.Errorf("got Write(%q), want Write(%q)", b, s)} + } + if len(s) > len(b) { + f.ops[0] = s[len(b):] + } else { + f.ops = f.ops[1:] + } + f.pos += int64(len(b)) + return len(b), nil +} + +func (f *testFile) Seek(pos int64, whence int) (int64, error) { + if pos == 0 && whence == io.SeekCurrent { + return f.pos, nil + } + if len(f.ops) == 0 { + return 0, errors.New("unexpected Seek operation") + } + s, ok := f.ops[0].(int64) + if !ok { + return 0, errors.New("unexpected Seek operation") + } + + if s != pos || whence != io.SeekCurrent { + return 0, testError{fmt.Errorf("got Seek(%d, %d), want Seek(%d, %d)", pos, whence, s, io.SeekCurrent)} + } + f.pos += s + f.ops = f.ops[1:] + return f.pos, nil +} + +func equalSparseEntries(x, y []sparseEntry) bool { + return (len(x) == 0 && len(y) == 0) || reflect.DeepEqual(x, y) +} + +func TestSparseEntries(t *testing.T) { + vectors := []struct { + in []sparseEntry + size int64 + + wantValid bool // Result of validateSparseEntries + wantAligned []sparseEntry // Result of alignSparseEntries + wantInverted []sparseEntry // Result of invertSparseEntries + }{{ + in: []sparseEntry{}, size: 0, + wantValid: true, + wantInverted: []sparseEntry{{0, 0}}, + }, { + in: []sparseEntry{}, size: 5000, + wantValid: true, + wantInverted: []sparseEntry{{0, 5000}}, + }, { + in: []sparseEntry{{0, 5000}}, size: 5000, + wantValid: true, + wantAligned: []sparseEntry{{0, 5000}}, + wantInverted: []sparseEntry{{5000, 0}}, + }, { + in: []sparseEntry{{1000, 4000}}, size: 5000, + wantValid: true, + wantAligned: []sparseEntry{{1024, 3976}}, + wantInverted: []sparseEntry{{0, 1000}, {5000, 0}}, + }, { + in: []sparseEntry{{0, 3000}}, size: 5000, + wantValid: true, + wantAligned: []sparseEntry{{0, 2560}}, + wantInverted: []sparseEntry{{3000, 2000}}, + }, { + in: []sparseEntry{{3000, 2000}}, size: 5000, + wantValid: true, + wantAligned: []sparseEntry{{3072, 1928}}, + wantInverted: []sparseEntry{{0, 3000}, {5000, 0}}, + }, { + in: []sparseEntry{{2000, 2000}}, size: 5000, + wantValid: true, + wantAligned: []sparseEntry{{2048, 1536}}, + wantInverted: []sparseEntry{{0, 2000}, {4000, 1000}}, + }, { + in: []sparseEntry{{0, 2000}, {8000, 2000}}, size: 10000, + wantValid: true, + wantAligned: []sparseEntry{{0, 1536}, {8192, 1808}}, + wantInverted: []sparseEntry{{2000, 6000}, {10000, 0}}, + }, { + in: []sparseEntry{{0, 2000}, {2000, 2000}, {4000, 0}, {4000, 3000}, {7000, 1000}, {8000, 0}, {8000, 2000}}, size: 10000, + wantValid: true, + wantAligned: []sparseEntry{{0, 1536}, {2048, 1536}, {4096, 2560}, {7168, 512}, {8192, 1808}}, + wantInverted: []sparseEntry{{10000, 0}}, + }, { + in: []sparseEntry{{0, 0}, {1000, 0}, {2000, 0}, {3000, 0}, {4000, 0}, {5000, 0}}, size: 5000, + wantValid: true, + wantInverted: []sparseEntry{{0, 5000}}, + }, { + in: []sparseEntry{{1, 0}}, size: 0, + wantValid: false, + }, { + in: []sparseEntry{{-1, 0}}, size: 100, + wantValid: false, + }, { + in: []sparseEntry{{0, -1}}, size: 100, + wantValid: false, + }, { + in: []sparseEntry{{0, 0}}, size: -100, + wantValid: false, + }, { + in: []sparseEntry{{math.MaxInt64, 3}, {6, -5}}, size: 35, + wantValid: false, + }, { + in: []sparseEntry{{1, 3}, {6, -5}}, size: 35, + wantValid: false, + }, { + in: []sparseEntry{{math.MaxInt64, math.MaxInt64}}, size: math.MaxInt64, + wantValid: false, + }, { + in: []sparseEntry{{3, 3}}, size: 5, + wantValid: false, + }, { + in: []sparseEntry{{2, 0}, {1, 0}, {0, 0}}, size: 3, + wantValid: false, + }, { + in: []sparseEntry{{1, 3}, {2, 2}}, size: 10, + wantValid: false, + }} + + for i, v := range vectors { + gotValid := validateSparseEntries(v.in, v.size) + if gotValid != v.wantValid { + t.Errorf("test %d, validateSparseEntries() = %v, want %v", i, gotValid, v.wantValid) + } + if !v.wantValid { + continue + } + gotAligned := alignSparseEntries(append([]sparseEntry{}, v.in...), v.size) + if !equalSparseEntries(gotAligned, v.wantAligned) { + t.Errorf("test %d, alignSparseEntries():\ngot %v\nwant %v", i, gotAligned, v.wantAligned) + } + gotInverted := invertSparseEntries(append([]sparseEntry{}, v.in...), v.size) + if !equalSparseEntries(gotInverted, v.wantInverted) { + t.Errorf("test %d, inverseSparseEntries():\ngot %v\nwant %v", i, gotInverted, v.wantInverted) + } + } +} + +func TestFileInfoHeader(t *testing.T) { + fi, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + h, err := FileInfoHeader(fi, "") + if err != nil { + t.Fatalf("FileInfoHeader: %v", err) + } + if g, e := h.Name, "small.txt"; g != e { + t.Errorf("Name = %q; want %q", g, e) + } + if g, e := h.Mode, int64(fi.Mode().Perm()); g != e { + t.Errorf("Mode = %#o; want %#o", g, e) + } + if g, e := h.Size, int64(5); g != e { + t.Errorf("Size = %v; want %v", g, e) + } + if g, e := h.ModTime, fi.ModTime(); !g.Equal(e) { + t.Errorf("ModTime = %v; want %v", g, e) + } + // FileInfoHeader should error when passing nil FileInfo + if _, err := FileInfoHeader(nil, ""); err == nil { + t.Fatalf("Expected error when passing nil to FileInfoHeader") + } +} + +func TestFileInfoHeaderDir(t *testing.T) { + fi, err := os.Stat("testdata") + if err != nil { + t.Fatal(err) + } + h, err := FileInfoHeader(fi, "") + if err != nil { + t.Fatalf("FileInfoHeader: %v", err) + } + if g, e := h.Name, "testdata/"; g != e { + t.Errorf("Name = %q; want %q", g, e) + } + // Ignoring c_ISGID for golang.org/issue/4867 + if g, e := h.Mode&^c_ISGID, int64(fi.Mode().Perm()); g != e { + t.Errorf("Mode = %#o; want %#o", g, e) + } + if g, e := h.Size, int64(0); g != e { + t.Errorf("Size = %v; want %v", g, e) + } + if g, e := h.ModTime, fi.ModTime(); !g.Equal(e) { + t.Errorf("ModTime = %v; want %v", g, e) + } +} + +func TestFileInfoHeaderSymlink(t *testing.T) { + testenv.MustHaveSymlink(t) + + tmpdir, err := os.MkdirTemp("", "TestFileInfoHeaderSymlink") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpdir) + + link := filepath.Join(tmpdir, "link") + target := tmpdir + err = os.Symlink(target, link) + if err != nil { + t.Fatal(err) + } + fi, err := os.Lstat(link) + if err != nil { + t.Fatal(err) + } + + h, err := FileInfoHeader(fi, target) + if err != nil { + t.Fatal(err) + } + if g, e := h.Name, fi.Name(); g != e { + t.Errorf("Name = %q; want %q", g, e) + } + if g, e := h.Linkname, target; g != e { + t.Errorf("Linkname = %q; want %q", g, e) + } + if g, e := h.Typeflag, byte(TypeSymlink); g != e { + t.Errorf("Typeflag = %v; want %v", g, e) + } +} + +func TestRoundTrip(t *testing.T) { + data := []byte("some file contents") + + var b bytes.Buffer + tw := NewWriter(&b) + hdr := &Header{ + Name: "file.txt", + Uid: 1 << 21, // Too big for 8 octal digits + Size: int64(len(data)), + ModTime: time.Now().Round(time.Second), + PAXRecords: map[string]string{"uid": "2097152"}, + Format: FormatPAX, + Typeflag: TypeReg, + } + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("tw.WriteHeader: %v", err) + } + if _, err := tw.Write(data); err != nil { + t.Fatalf("tw.Write: %v", err) + } + if err := tw.Close(); err != nil { + t.Fatalf("tw.Close: %v", err) + } + + // Read it back. + tr := NewReader(&b) + rHdr, err := tr.Next() + if err != nil { + t.Fatalf("tr.Next: %v", err) + } + if !reflect.DeepEqual(rHdr, hdr) { + t.Errorf("Header mismatch.\n got %+v\nwant %+v", rHdr, hdr) + } + rData, err := io.ReadAll(tr) + if err != nil { + t.Fatalf("Read: %v", err) + } + if !bytes.Equal(rData, data) { + t.Errorf("Data mismatch.\n got %q\nwant %q", rData, data) + } +} + +type headerRoundTripTest struct { + h *Header + fm fs.FileMode +} + +func TestHeaderRoundTrip(t *testing.T) { + vectors := []headerRoundTripTest{{ + // regular file. + h: &Header{ + Name: "test.txt", + Mode: 0644, + Size: 12, + ModTime: time.Unix(1360600916, 0), + Typeflag: TypeReg, + }, + fm: 0644, + }, { + // symbolic link. + h: &Header{ + Name: "link.txt", + Mode: 0777, + Size: 0, + ModTime: time.Unix(1360600852, 0), + Typeflag: TypeSymlink, + }, + fm: 0777 | fs.ModeSymlink, + }, { + // character device node. + h: &Header{ + Name: "dev/null", + Mode: 0666, + Size: 0, + ModTime: time.Unix(1360578951, 0), + Typeflag: TypeChar, + }, + fm: 0666 | fs.ModeDevice | fs.ModeCharDevice, + }, { + // block device node. + h: &Header{ + Name: "dev/sda", + Mode: 0660, + Size: 0, + ModTime: time.Unix(1360578954, 0), + Typeflag: TypeBlock, + }, + fm: 0660 | fs.ModeDevice, + }, { + // directory. + h: &Header{ + Name: "dir/", + Mode: 0755, + Size: 0, + ModTime: time.Unix(1360601116, 0), + Typeflag: TypeDir, + }, + fm: 0755 | fs.ModeDir, + }, { + // fifo node. + h: &Header{ + Name: "dev/initctl", + Mode: 0600, + Size: 0, + ModTime: time.Unix(1360578949, 0), + Typeflag: TypeFifo, + }, + fm: 0600 | fs.ModeNamedPipe, + }, { + // setuid. + h: &Header{ + Name: "bin/su", + Mode: 0755 | c_ISUID, + Size: 23232, + ModTime: time.Unix(1355405093, 0), + Typeflag: TypeReg, + }, + fm: 0755 | fs.ModeSetuid, + }, { + // setguid. + h: &Header{ + Name: "group.txt", + Mode: 0750 | c_ISGID, + Size: 0, + ModTime: time.Unix(1360602346, 0), + Typeflag: TypeReg, + }, + fm: 0750 | fs.ModeSetgid, + }, { + // sticky. + h: &Header{ + Name: "sticky.txt", + Mode: 0600 | c_ISVTX, + Size: 7, + ModTime: time.Unix(1360602540, 0), + Typeflag: TypeReg, + }, + fm: 0600 | fs.ModeSticky, + }, { + // hard link. + h: &Header{ + Name: "hard.txt", + Mode: 0644, + Size: 0, + Linkname: "file.txt", + ModTime: time.Unix(1360600916, 0), + Typeflag: TypeLink, + }, + fm: 0644, + }, { + // More information. + h: &Header{ + Name: "info.txt", + Mode: 0600, + Size: 0, + Uid: 1000, + Gid: 1000, + ModTime: time.Unix(1360602540, 0), + Uname: "slartibartfast", + Gname: "users", + Typeflag: TypeReg, + }, + fm: 0600, + }} + + for i, v := range vectors { + fi := v.h.FileInfo() + h2, err := FileInfoHeader(fi, "") + if err != nil { + t.Error(err) + continue + } + if strings.Contains(fi.Name(), "/") { + t.Errorf("FileInfo of %q contains slash: %q", v.h.Name, fi.Name()) + } + name := path.Base(v.h.Name) + if fi.IsDir() { + name += "/" + } + if got, want := h2.Name, name; got != want { + t.Errorf("i=%d: Name: got %v, want %v", i, got, want) + } + if got, want := h2.Size, v.h.Size; got != want { + t.Errorf("i=%d: Size: got %v, want %v", i, got, want) + } + if got, want := h2.Uid, v.h.Uid; got != want { + t.Errorf("i=%d: Uid: got %d, want %d", i, got, want) + } + if got, want := h2.Gid, v.h.Gid; got != want { + t.Errorf("i=%d: Gid: got %d, want %d", i, got, want) + } + if got, want := h2.Uname, v.h.Uname; got != want { + t.Errorf("i=%d: Uname: got %q, want %q", i, got, want) + } + if got, want := h2.Gname, v.h.Gname; got != want { + t.Errorf("i=%d: Gname: got %q, want %q", i, got, want) + } + if got, want := h2.Linkname, v.h.Linkname; got != want { + t.Errorf("i=%d: Linkname: got %v, want %v", i, got, want) + } + if got, want := h2.Typeflag, v.h.Typeflag; got != want { + t.Logf("%#v %#v", v.h, fi.Sys()) + t.Errorf("i=%d: Typeflag: got %q, want %q", i, got, want) + } + if got, want := h2.Mode, v.h.Mode; got != want { + t.Errorf("i=%d: Mode: got %o, want %o", i, got, want) + } + if got, want := fi.Mode(), v.fm; got != want { + t.Errorf("i=%d: fi.Mode: got %o, want %o", i, got, want) + } + if got, want := h2.AccessTime, v.h.AccessTime; got != want { + t.Errorf("i=%d: AccessTime: got %v, want %v", i, got, want) + } + if got, want := h2.ChangeTime, v.h.ChangeTime; got != want { + t.Errorf("i=%d: ChangeTime: got %v, want %v", i, got, want) + } + if got, want := h2.ModTime, v.h.ModTime; got != want { + t.Errorf("i=%d: ModTime: got %v, want %v", i, got, want) + } + if sysh, ok := fi.Sys().(*Header); !ok || sysh != v.h { + t.Errorf("i=%d: Sys didn't return original *Header", i) + } + } +} + +func TestHeaderAllowedFormats(t *testing.T) { + vectors := []struct { + header *Header // Input header + paxHdrs map[string]string // Expected PAX headers that may be needed + formats Format // Expected formats that can encode the header + }{{ + header: &Header{}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Size: 077777777777}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Size: 077777777777, Format: FormatUSTAR}, + formats: FormatUSTAR, + }, { + header: &Header{Size: 077777777777, Format: FormatPAX}, + formats: FormatUSTAR | FormatPAX, + }, { + header: &Header{Size: 077777777777, Format: FormatGNU}, + formats: FormatGNU, + }, { + header: &Header{Size: 077777777777 + 1}, + paxHdrs: map[string]string{paxSize: "8589934592"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{Size: 077777777777 + 1, Format: FormatPAX}, + paxHdrs: map[string]string{paxSize: "8589934592"}, + formats: FormatPAX, + }, { + header: &Header{Size: 077777777777 + 1, Format: FormatGNU}, + paxHdrs: map[string]string{paxSize: "8589934592"}, + formats: FormatGNU, + }, { + header: &Header{Mode: 07777777}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Mode: 07777777 + 1}, + formats: FormatGNU, + }, { + header: &Header{Devmajor: -123}, + formats: FormatGNU, + }, { + header: &Header{Devmajor: 1<<56 - 1}, + formats: FormatGNU, + }, { + header: &Header{Devmajor: 1 << 56}, + formats: FormatUnknown, + }, { + header: &Header{Devmajor: -1 << 56}, + formats: FormatGNU, + }, { + header: &Header{Devmajor: -1<<56 - 1}, + formats: FormatUnknown, + }, { + header: &Header{Name: "用戶名", Devmajor: -1 << 56}, + formats: FormatGNU, + }, { + header: &Header{Size: math.MaxInt64}, + paxHdrs: map[string]string{paxSize: "9223372036854775807"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{Size: math.MinInt64}, + paxHdrs: map[string]string{paxSize: "-9223372036854775808"}, + formats: FormatUnknown, + }, { + header: &Header{Uname: "0123456789abcdef0123456789abcdef"}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Uname: "0123456789abcdef0123456789abcdefx"}, + paxHdrs: map[string]string{paxUname: "0123456789abcdef0123456789abcdefx"}, + formats: FormatPAX, + }, { + header: &Header{Name: "foobar"}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Name: strings.Repeat("a", nameSize)}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Name: strings.Repeat("a", nameSize+1)}, + paxHdrs: map[string]string{paxPath: strings.Repeat("a", nameSize+1)}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{Linkname: "用戶名"}, + paxHdrs: map[string]string{paxLinkpath: "用戶名"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{Linkname: strings.Repeat("用戶名\x00", nameSize)}, + paxHdrs: map[string]string{paxLinkpath: strings.Repeat("用戶名\x00", nameSize)}, + formats: FormatUnknown, + }, { + header: &Header{Linkname: "\x00hello"}, + paxHdrs: map[string]string{paxLinkpath: "\x00hello"}, + formats: FormatUnknown, + }, { + header: &Header{Uid: 07777777}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Uid: 07777777 + 1}, + paxHdrs: map[string]string{paxUid: "2097152"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{Xattrs: nil}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Xattrs: map[string]string{"foo": "bar"}}, + paxHdrs: map[string]string{paxSchilyXattr + "foo": "bar"}, + formats: FormatPAX, + }, { + header: &Header{Xattrs: map[string]string{"foo": "bar"}, Format: FormatGNU}, + paxHdrs: map[string]string{paxSchilyXattr + "foo": "bar"}, + formats: FormatUnknown, + }, { + header: &Header{Xattrs: map[string]string{"用戶名": "\x00hello"}}, + paxHdrs: map[string]string{paxSchilyXattr + "用戶名": "\x00hello"}, + formats: FormatPAX, + }, { + header: &Header{Xattrs: map[string]string{"foo=bar": "baz"}}, + formats: FormatUnknown, + }, { + header: &Header{Xattrs: map[string]string{"foo": ""}}, + paxHdrs: map[string]string{paxSchilyXattr + "foo": ""}, + formats: FormatPAX, + }, { + header: &Header{ModTime: time.Unix(0, 0)}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(077777777777, 0)}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(077777777777+1, 0)}, + paxHdrs: map[string]string{paxMtime: "8589934592"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(math.MaxInt64, 0)}, + paxHdrs: map[string]string{paxMtime: "9223372036854775807"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(math.MaxInt64, 0), Format: FormatUSTAR}, + paxHdrs: map[string]string{paxMtime: "9223372036854775807"}, + formats: FormatUnknown, + }, { + header: &Header{ModTime: time.Unix(-1, 0)}, + paxHdrs: map[string]string{paxMtime: "-1"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(1, 500)}, + paxHdrs: map[string]string{paxMtime: "1.0000005"}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(1, 0)}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(1, 0), Format: FormatPAX}, + formats: FormatUSTAR | FormatPAX, + }, { + header: &Header{ModTime: time.Unix(1, 500), Format: FormatUSTAR}, + paxHdrs: map[string]string{paxMtime: "1.0000005"}, + formats: FormatUSTAR, + }, { + header: &Header{ModTime: time.Unix(1, 500), Format: FormatPAX}, + paxHdrs: map[string]string{paxMtime: "1.0000005"}, + formats: FormatPAX, + }, { + header: &Header{ModTime: time.Unix(1, 500), Format: FormatGNU}, + paxHdrs: map[string]string{paxMtime: "1.0000005"}, + formats: FormatGNU, + }, { + header: &Header{ModTime: time.Unix(-1, 500)}, + paxHdrs: map[string]string{paxMtime: "-0.9999995"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(-1, 500), Format: FormatGNU}, + paxHdrs: map[string]string{paxMtime: "-0.9999995"}, + formats: FormatGNU, + }, { + header: &Header{AccessTime: time.Unix(0, 0)}, + paxHdrs: map[string]string{paxAtime: "0"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{AccessTime: time.Unix(0, 0), Format: FormatUSTAR}, + paxHdrs: map[string]string{paxAtime: "0"}, + formats: FormatUnknown, + }, { + header: &Header{AccessTime: time.Unix(0, 0), Format: FormatPAX}, + paxHdrs: map[string]string{paxAtime: "0"}, + formats: FormatPAX, + }, { + header: &Header{AccessTime: time.Unix(0, 0), Format: FormatGNU}, + paxHdrs: map[string]string{paxAtime: "0"}, + formats: FormatGNU, + }, { + header: &Header{AccessTime: time.Unix(-123, 0)}, + paxHdrs: map[string]string{paxAtime: "-123"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{AccessTime: time.Unix(-123, 0), Format: FormatPAX}, + paxHdrs: map[string]string{paxAtime: "-123"}, + formats: FormatPAX, + }, { + header: &Header{ChangeTime: time.Unix(123, 456)}, + paxHdrs: map[string]string{paxCtime: "123.000000456"}, + formats: FormatPAX | FormatGNU, + }, { + header: &Header{ChangeTime: time.Unix(123, 456), Format: FormatUSTAR}, + paxHdrs: map[string]string{paxCtime: "123.000000456"}, + formats: FormatUnknown, + }, { + header: &Header{ChangeTime: time.Unix(123, 456), Format: FormatGNU}, + paxHdrs: map[string]string{paxCtime: "123.000000456"}, + formats: FormatGNU, + }, { + header: &Header{ChangeTime: time.Unix(123, 456), Format: FormatPAX}, + paxHdrs: map[string]string{paxCtime: "123.000000456"}, + formats: FormatPAX, + }, { + header: &Header{Name: "foo/", Typeflag: TypeDir}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }, { + header: &Header{Name: "foo/", Typeflag: TypeReg}, + formats: FormatUnknown, + }, { + header: &Header{Name: "foo/", Typeflag: TypeSymlink}, + formats: FormatUSTAR | FormatPAX | FormatGNU, + }} + + for i, v := range vectors { + formats, paxHdrs, err := v.header.allowedFormats() + if formats != v.formats { + t.Errorf("test %d, allowedFormats(): got %v, want %v", i, formats, v.formats) + } + if formats&FormatPAX > 0 && !reflect.DeepEqual(paxHdrs, v.paxHdrs) && !(len(paxHdrs) == 0 && len(v.paxHdrs) == 0) { + t.Errorf("test %d, allowedFormats():\ngot %v\nwant %s", i, paxHdrs, v.paxHdrs) + } + if (formats != FormatUnknown) && (err != nil) { + t.Errorf("test %d, unexpected error: %v", i, err) + } + if (formats == FormatUnknown) && (err == nil) { + t.Errorf("test %d, got nil-error, want non-nil error", i) + } + } +} + +func Benchmark(b *testing.B) { + type file struct { + hdr *Header + body []byte + } + + vectors := []struct { + label string + files []file + }{{ + "USTAR", + []file{{ + &Header{Name: "bar", Mode: 0640, Size: int64(3)}, + []byte("foo"), + }, { + &Header{Name: "world", Mode: 0640, Size: int64(5)}, + []byte("hello"), + }}, + }, { + "GNU", + []file{{ + &Header{Name: "bar", Mode: 0640, Size: int64(3), Devmajor: -1}, + []byte("foo"), + }, { + &Header{Name: "world", Mode: 0640, Size: int64(5), Devmajor: -1}, + []byte("hello"), + }}, + }, { + "PAX", + []file{{ + &Header{Name: "bar", Mode: 0640, Size: int64(3), Xattrs: map[string]string{"foo": "bar"}}, + []byte("foo"), + }, { + &Header{Name: "world", Mode: 0640, Size: int64(5), Xattrs: map[string]string{"foo": "bar"}}, + []byte("hello"), + }}, + }} + + b.Run("Writer", func(b *testing.B) { + for _, v := range vectors { + b.Run(v.label, func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + // Writing to io.Discard because we want to + // test purely the writer code and not bring in disk performance into this. + tw := NewWriter(io.Discard) + for _, file := range v.files { + if err := tw.WriteHeader(file.hdr); err != nil { + b.Errorf("unexpected WriteHeader error: %v", err) + } + if _, err := tw.Write(file.body); err != nil { + b.Errorf("unexpected Write error: %v", err) + } + } + if err := tw.Close(); err != nil { + b.Errorf("unexpected Close error: %v", err) + } + } + }) + } + }) + + b.Run("Reader", func(b *testing.B) { + for _, v := range vectors { + var buf bytes.Buffer + var r bytes.Reader + + // Write the archive to a byte buffer. + tw := NewWriter(&buf) + for _, file := range v.files { + tw.WriteHeader(file.hdr) + tw.Write(file.body) + } + tw.Close() + b.Run(v.label, func(b *testing.B) { + b.ReportAllocs() + // Read from the byte buffer. + for i := 0; i < b.N; i++ { + r.Reset(buf.Bytes()) + tr := NewReader(&r) + if _, err := tr.Next(); err != nil { + b.Errorf("unexpected Next error: %v", err) + } + if _, err := io.Copy(io.Discard, tr); err != nil { + b.Errorf("unexpected Copy error : %v", err) + } + } + }) + } + }) + +} diff --git a/src/archive/tar/testdata/file-and-dir.tar b/src/archive/tar/testdata/file-and-dir.tar Binary files differnew file mode 100644 index 0000000..c18d428 --- /dev/null +++ b/src/archive/tar/testdata/file-and-dir.tar diff --git a/src/archive/tar/testdata/gnu-incremental.tar b/src/archive/tar/testdata/gnu-incremental.tar Binary files differnew file mode 100644 index 0000000..4c442e5 --- /dev/null +++ b/src/archive/tar/testdata/gnu-incremental.tar diff --git a/src/archive/tar/testdata/gnu-long-nul.tar b/src/archive/tar/testdata/gnu-long-nul.tar Binary files differnew file mode 100644 index 0000000..28bc812 --- /dev/null +++ b/src/archive/tar/testdata/gnu-long-nul.tar diff --git a/src/archive/tar/testdata/gnu-multi-hdrs.tar b/src/archive/tar/testdata/gnu-multi-hdrs.tar Binary files differnew file mode 100644 index 0000000..8bcad55 --- /dev/null +++ b/src/archive/tar/testdata/gnu-multi-hdrs.tar diff --git a/src/archive/tar/testdata/gnu-nil-sparse-data.tar b/src/archive/tar/testdata/gnu-nil-sparse-data.tar Binary files differnew file mode 100644 index 0000000..df1aa83 --- /dev/null +++ b/src/archive/tar/testdata/gnu-nil-sparse-data.tar diff --git a/src/archive/tar/testdata/gnu-nil-sparse-hole.tar b/src/archive/tar/testdata/gnu-nil-sparse-hole.tar Binary files differnew file mode 100644 index 0000000..496abfe --- /dev/null +++ b/src/archive/tar/testdata/gnu-nil-sparse-hole.tar diff --git a/src/archive/tar/testdata/gnu-not-utf8.tar b/src/archive/tar/testdata/gnu-not-utf8.tar Binary files differnew file mode 100644 index 0000000..81cec67 --- /dev/null +++ b/src/archive/tar/testdata/gnu-not-utf8.tar diff --git a/src/archive/tar/testdata/gnu-sparse-big.tar b/src/archive/tar/testdata/gnu-sparse-big.tar Binary files differnew file mode 100644 index 0000000..1a5cfc9 --- /dev/null +++ b/src/archive/tar/testdata/gnu-sparse-big.tar diff --git a/src/archive/tar/testdata/gnu-utf8.tar b/src/archive/tar/testdata/gnu-utf8.tar Binary files differnew file mode 100644 index 0000000..2c9c807 --- /dev/null +++ b/src/archive/tar/testdata/gnu-utf8.tar diff --git a/src/archive/tar/testdata/gnu.tar b/src/archive/tar/testdata/gnu.tar Binary files differnew file mode 100644 index 0000000..fc899dc --- /dev/null +++ b/src/archive/tar/testdata/gnu.tar diff --git a/src/archive/tar/testdata/hardlink.tar b/src/archive/tar/testdata/hardlink.tar Binary files differnew file mode 100644 index 0000000..9cd1a26 --- /dev/null +++ b/src/archive/tar/testdata/hardlink.tar diff --git a/src/archive/tar/testdata/hdr-only.tar b/src/archive/tar/testdata/hdr-only.tar Binary files differnew file mode 100644 index 0000000..f250340 --- /dev/null +++ b/src/archive/tar/testdata/hdr-only.tar diff --git a/src/archive/tar/testdata/invalid-go17.tar b/src/archive/tar/testdata/invalid-go17.tar Binary files differnew file mode 100644 index 0000000..58f2488 --- /dev/null +++ b/src/archive/tar/testdata/invalid-go17.tar diff --git a/src/archive/tar/testdata/issue10968.tar b/src/archive/tar/testdata/issue10968.tar Binary files differnew file mode 100644 index 0000000..1cc837b --- /dev/null +++ b/src/archive/tar/testdata/issue10968.tar diff --git a/src/archive/tar/testdata/issue11169.tar b/src/archive/tar/testdata/issue11169.tar Binary files differnew file mode 100644 index 0000000..4d71fa1 --- /dev/null +++ b/src/archive/tar/testdata/issue11169.tar diff --git a/src/archive/tar/testdata/issue12435.tar b/src/archive/tar/testdata/issue12435.tar Binary files differnew file mode 100644 index 0000000..3542dd8 --- /dev/null +++ b/src/archive/tar/testdata/issue12435.tar diff --git a/src/archive/tar/testdata/neg-size.tar b/src/archive/tar/testdata/neg-size.tar Binary files differnew file mode 100644 index 0000000..21edf38 --- /dev/null +++ b/src/archive/tar/testdata/neg-size.tar diff --git a/src/archive/tar/testdata/nil-uid.tar b/src/archive/tar/testdata/nil-uid.tar Binary files differnew file mode 100644 index 0000000..cc9cfaa --- /dev/null +++ b/src/archive/tar/testdata/nil-uid.tar diff --git a/src/archive/tar/testdata/pax-bad-hdr-file.tar b/src/archive/tar/testdata/pax-bad-hdr-file.tar Binary files differnew file mode 100644 index 0000000..b97cc98 --- /dev/null +++ b/src/archive/tar/testdata/pax-bad-hdr-file.tar diff --git a/src/archive/tar/testdata/pax-bad-mtime-file.tar b/src/archive/tar/testdata/pax-bad-mtime-file.tar Binary files differnew file mode 100644 index 0000000..9b22f7e --- /dev/null +++ b/src/archive/tar/testdata/pax-bad-mtime-file.tar diff --git a/src/archive/tar/testdata/pax-global-records.tar b/src/archive/tar/testdata/pax-global-records.tar Binary files differnew file mode 100644 index 0000000..3d3d241 --- /dev/null +++ b/src/archive/tar/testdata/pax-global-records.tar diff --git a/src/archive/tar/testdata/pax-multi-hdrs.tar b/src/archive/tar/testdata/pax-multi-hdrs.tar Binary files differnew file mode 100644 index 0000000..14bc759 --- /dev/null +++ b/src/archive/tar/testdata/pax-multi-hdrs.tar diff --git a/src/archive/tar/testdata/pax-nil-sparse-data.tar b/src/archive/tar/testdata/pax-nil-sparse-data.tar Binary files differnew file mode 100644 index 0000000..e59bd94 --- /dev/null +++ b/src/archive/tar/testdata/pax-nil-sparse-data.tar diff --git a/src/archive/tar/testdata/pax-nil-sparse-hole.tar b/src/archive/tar/testdata/pax-nil-sparse-hole.tar Binary files differnew file mode 100644 index 0000000..b44327b --- /dev/null +++ b/src/archive/tar/testdata/pax-nil-sparse-hole.tar diff --git a/src/archive/tar/testdata/pax-nul-path.tar b/src/archive/tar/testdata/pax-nul-path.tar Binary files differnew file mode 100644 index 0000000..c78f82b --- /dev/null +++ b/src/archive/tar/testdata/pax-nul-path.tar diff --git a/src/archive/tar/testdata/pax-nul-xattrs.tar b/src/archive/tar/testdata/pax-nul-xattrs.tar Binary files differnew file mode 100644 index 0000000..881f517 --- /dev/null +++ b/src/archive/tar/testdata/pax-nul-xattrs.tar diff --git a/src/archive/tar/testdata/pax-path-hdr.tar b/src/archive/tar/testdata/pax-path-hdr.tar Binary files differnew file mode 100644 index 0000000..ab8fc32 --- /dev/null +++ b/src/archive/tar/testdata/pax-path-hdr.tar diff --git a/src/archive/tar/testdata/pax-pos-size-file.tar b/src/archive/tar/testdata/pax-pos-size-file.tar Binary files differnew file mode 100644 index 0000000..ea5ccf9 --- /dev/null +++ b/src/archive/tar/testdata/pax-pos-size-file.tar diff --git a/src/archive/tar/testdata/pax-records.tar b/src/archive/tar/testdata/pax-records.tar Binary files differnew file mode 100644 index 0000000..276c211 --- /dev/null +++ b/src/archive/tar/testdata/pax-records.tar diff --git a/src/archive/tar/testdata/pax-sparse-big.tar b/src/archive/tar/testdata/pax-sparse-big.tar Binary files differnew file mode 100644 index 0000000..65d1f8e --- /dev/null +++ b/src/archive/tar/testdata/pax-sparse-big.tar diff --git a/src/archive/tar/testdata/pax.tar b/src/archive/tar/testdata/pax.tar Binary files differnew file mode 100644 index 0000000..9bc24b6 --- /dev/null +++ b/src/archive/tar/testdata/pax.tar diff --git a/src/archive/tar/testdata/small.txt b/src/archive/tar/testdata/small.txt new file mode 100644 index 0000000..b249bfc --- /dev/null +++ b/src/archive/tar/testdata/small.txt @@ -0,0 +1 @@ +Kilts
\ No newline at end of file diff --git a/src/archive/tar/testdata/small2.txt b/src/archive/tar/testdata/small2.txt new file mode 100644 index 0000000..394ee3e --- /dev/null +++ b/src/archive/tar/testdata/small2.txt @@ -0,0 +1 @@ +Google.com diff --git a/src/archive/tar/testdata/sparse-formats.tar b/src/archive/tar/testdata/sparse-formats.tar Binary files differnew file mode 100644 index 0000000..8bd4e74 --- /dev/null +++ b/src/archive/tar/testdata/sparse-formats.tar diff --git a/src/archive/tar/testdata/star.tar b/src/archive/tar/testdata/star.tar Binary files differnew file mode 100644 index 0000000..59e2d4e --- /dev/null +++ b/src/archive/tar/testdata/star.tar diff --git a/src/archive/tar/testdata/trailing-slash.tar b/src/archive/tar/testdata/trailing-slash.tar Binary files differnew file mode 100644 index 0000000..93718b3 --- /dev/null +++ b/src/archive/tar/testdata/trailing-slash.tar diff --git a/src/archive/tar/testdata/ustar-file-devs.tar b/src/archive/tar/testdata/ustar-file-devs.tar Binary files differnew file mode 100644 index 0000000..146e25b --- /dev/null +++ b/src/archive/tar/testdata/ustar-file-devs.tar diff --git a/src/archive/tar/testdata/ustar-file-reg.tar b/src/archive/tar/testdata/ustar-file-reg.tar Binary files differnew file mode 100644 index 0000000..c84fa27 --- /dev/null +++ b/src/archive/tar/testdata/ustar-file-reg.tar diff --git a/src/archive/tar/testdata/ustar.tar b/src/archive/tar/testdata/ustar.tar Binary files differnew file mode 100644 index 0000000..29679d9 --- /dev/null +++ b/src/archive/tar/testdata/ustar.tar diff --git a/src/archive/tar/testdata/v7.tar b/src/archive/tar/testdata/v7.tar Binary files differnew file mode 100644 index 0000000..eb65fc9 --- /dev/null +++ b/src/archive/tar/testdata/v7.tar diff --git a/src/archive/tar/testdata/writer-big-long.tar b/src/archive/tar/testdata/writer-big-long.tar Binary files differnew file mode 100644 index 0000000..09fc5dd --- /dev/null +++ b/src/archive/tar/testdata/writer-big-long.tar diff --git a/src/archive/tar/testdata/writer-big.tar b/src/archive/tar/testdata/writer-big.tar Binary files differnew file mode 100644 index 0000000..435dcbc --- /dev/null +++ b/src/archive/tar/testdata/writer-big.tar diff --git a/src/archive/tar/testdata/writer.tar b/src/archive/tar/testdata/writer.tar Binary files differnew file mode 100644 index 0000000..e6d816a --- /dev/null +++ b/src/archive/tar/testdata/writer.tar diff --git a/src/archive/tar/testdata/xattrs.tar b/src/archive/tar/testdata/xattrs.tar Binary files differnew file mode 100644 index 0000000..9701950 --- /dev/null +++ b/src/archive/tar/testdata/xattrs.tar diff --git a/src/archive/tar/writer.go b/src/archive/tar/writer.go new file mode 100644 index 0000000..e80498d --- /dev/null +++ b/src/archive/tar/writer.go @@ -0,0 +1,653 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "fmt" + "io" + "path" + "sort" + "strings" + "time" +) + +// Writer provides sequential writing of a tar archive. +// Write.WriteHeader begins a new file with the provided Header, +// and then Writer can be treated as an io.Writer to supply that file's data. +type Writer struct { + w io.Writer + pad int64 // Amount of padding to write after current file entry + curr fileWriter // Writer for current file entry + hdr Header // Shallow copy of Header that is safe for mutations + blk block // Buffer to use as temporary local storage + + // err is a persistent error. + // It is only the responsibility of every exported method of Writer to + // ensure that this error is sticky. + err error +} + +// NewWriter creates a new Writer writing to w. +func NewWriter(w io.Writer) *Writer { + return &Writer{w: w, curr: ®FileWriter{w, 0}} +} + +type fileWriter interface { + io.Writer + fileState + + ReadFrom(io.Reader) (int64, error) +} + +// Flush finishes writing the current file's block padding. +// The current file must be fully written before Flush can be called. +// +// This is unnecessary as the next call to WriteHeader or Close +// will implicitly flush out the file's padding. +func (tw *Writer) Flush() error { + if tw.err != nil { + return tw.err + } + if nb := tw.curr.LogicalRemaining(); nb > 0 { + return fmt.Errorf("archive/tar: missed writing %d bytes", nb) + } + if _, tw.err = tw.w.Write(zeroBlock[:tw.pad]); tw.err != nil { + return tw.err + } + tw.pad = 0 + return nil +} + +// WriteHeader writes hdr and prepares to accept the file's contents. +// The Header.Size determines how many bytes can be written for the next file. +// If the current file is not fully written, then this returns an error. +// This implicitly flushes any padding necessary before writing the header. +func (tw *Writer) WriteHeader(hdr *Header) error { + if err := tw.Flush(); err != nil { + return err + } + tw.hdr = *hdr // Shallow copy of Header + + // Avoid usage of the legacy TypeRegA flag, and automatically promote + // it to use TypeReg or TypeDir. + if tw.hdr.Typeflag == TypeRegA { + if strings.HasSuffix(tw.hdr.Name, "/") { + tw.hdr.Typeflag = TypeDir + } else { + tw.hdr.Typeflag = TypeReg + } + } + + // Round ModTime and ignore AccessTime and ChangeTime unless + // the format is explicitly chosen. + // This ensures nominal usage of WriteHeader (without specifying the format) + // does not always result in the PAX format being chosen, which + // causes a 1KiB increase to every header. + if tw.hdr.Format == FormatUnknown { + tw.hdr.ModTime = tw.hdr.ModTime.Round(time.Second) + tw.hdr.AccessTime = time.Time{} + tw.hdr.ChangeTime = time.Time{} + } + + allowedFormats, paxHdrs, err := tw.hdr.allowedFormats() + switch { + case allowedFormats.has(FormatUSTAR): + tw.err = tw.writeUSTARHeader(&tw.hdr) + return tw.err + case allowedFormats.has(FormatPAX): + tw.err = tw.writePAXHeader(&tw.hdr, paxHdrs) + return tw.err + case allowedFormats.has(FormatGNU): + tw.err = tw.writeGNUHeader(&tw.hdr) + return tw.err + default: + return err // Non-fatal error + } +} + +func (tw *Writer) writeUSTARHeader(hdr *Header) error { + // Check if we can use USTAR prefix/suffix splitting. + var namePrefix string + if prefix, suffix, ok := splitUSTARPath(hdr.Name); ok { + namePrefix, hdr.Name = prefix, suffix + } + + // Pack the main header. + var f formatter + blk := tw.templateV7Plus(hdr, f.formatString, f.formatOctal) + f.formatString(blk.USTAR().Prefix(), namePrefix) + blk.SetFormat(FormatUSTAR) + if f.err != nil { + return f.err // Should never happen since header is validated + } + return tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag) +} + +func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error { + realName, realSize := hdr.Name, hdr.Size + + // TODO(dsnet): Re-enable this when adding sparse support. + // See https://golang.org/issue/22735 + /* + // Handle sparse files. + var spd sparseDatas + var spb []byte + if len(hdr.SparseHoles) > 0 { + sph := append([]sparseEntry{}, hdr.SparseHoles...) // Copy sparse map + sph = alignSparseEntries(sph, hdr.Size) + spd = invertSparseEntries(sph, hdr.Size) + + // Format the sparse map. + hdr.Size = 0 // Replace with encoded size + spb = append(strconv.AppendInt(spb, int64(len(spd)), 10), '\n') + for _, s := range spd { + hdr.Size += s.Length + spb = append(strconv.AppendInt(spb, s.Offset, 10), '\n') + spb = append(strconv.AppendInt(spb, s.Length, 10), '\n') + } + pad := blockPadding(int64(len(spb))) + spb = append(spb, zeroBlock[:pad]...) + hdr.Size += int64(len(spb)) // Accounts for encoded sparse map + + // Add and modify appropriate PAX records. + dir, file := path.Split(realName) + hdr.Name = path.Join(dir, "GNUSparseFile.0", file) + paxHdrs[paxGNUSparseMajor] = "1" + paxHdrs[paxGNUSparseMinor] = "0" + paxHdrs[paxGNUSparseName] = realName + paxHdrs[paxGNUSparseRealSize] = strconv.FormatInt(realSize, 10) + paxHdrs[paxSize] = strconv.FormatInt(hdr.Size, 10) + delete(paxHdrs, paxPath) // Recorded by paxGNUSparseName + } + */ + _ = realSize + + // Write PAX records to the output. + isGlobal := hdr.Typeflag == TypeXGlobalHeader + if len(paxHdrs) > 0 || isGlobal { + // Sort keys for deterministic ordering. + var keys []string + for k := range paxHdrs { + keys = append(keys, k) + } + sort.Strings(keys) + + // Write each record to a buffer. + var buf strings.Builder + for _, k := range keys { + rec, err := formatPAXRecord(k, paxHdrs[k]) + if err != nil { + return err + } + buf.WriteString(rec) + } + + // Write the extended header file. + var name string + var flag byte + if isGlobal { + name = realName + if name == "" { + name = "GlobalHead.0.0" + } + flag = TypeXGlobalHeader + } else { + dir, file := path.Split(realName) + name = path.Join(dir, "PaxHeaders.0", file) + flag = TypeXHeader + } + data := buf.String() + if err := tw.writeRawFile(name, data, flag, FormatPAX); err != nil || isGlobal { + return err // Global headers return here + } + } + + // Pack the main header. + var f formatter // Ignore errors since they are expected + fmtStr := func(b []byte, s string) { f.formatString(b, toASCII(s)) } + blk := tw.templateV7Plus(hdr, fmtStr, f.formatOctal) + blk.SetFormat(FormatPAX) + if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil { + return err + } + + // TODO(dsnet): Re-enable this when adding sparse support. + // See https://golang.org/issue/22735 + /* + // Write the sparse map and setup the sparse writer if necessary. + if len(spd) > 0 { + // Use tw.curr since the sparse map is accounted for in hdr.Size. + if _, err := tw.curr.Write(spb); err != nil { + return err + } + tw.curr = &sparseFileWriter{tw.curr, spd, 0} + } + */ + return nil +} + +func (tw *Writer) writeGNUHeader(hdr *Header) error { + // Use long-link files if Name or Linkname exceeds the field size. + const longName = "././@LongLink" + if len(hdr.Name) > nameSize { + data := hdr.Name + "\x00" + if err := tw.writeRawFile(longName, data, TypeGNULongName, FormatGNU); err != nil { + return err + } + } + if len(hdr.Linkname) > nameSize { + data := hdr.Linkname + "\x00" + if err := tw.writeRawFile(longName, data, TypeGNULongLink, FormatGNU); err != nil { + return err + } + } + + // Pack the main header. + var f formatter // Ignore errors since they are expected + var spd sparseDatas + var spb []byte + blk := tw.templateV7Plus(hdr, f.formatString, f.formatNumeric) + if !hdr.AccessTime.IsZero() { + f.formatNumeric(blk.GNU().AccessTime(), hdr.AccessTime.Unix()) + } + if !hdr.ChangeTime.IsZero() { + f.formatNumeric(blk.GNU().ChangeTime(), hdr.ChangeTime.Unix()) + } + // TODO(dsnet): Re-enable this when adding sparse support. + // See https://golang.org/issue/22735 + /* + if hdr.Typeflag == TypeGNUSparse { + sph := append([]sparseEntry{}, hdr.SparseHoles...) // Copy sparse map + sph = alignSparseEntries(sph, hdr.Size) + spd = invertSparseEntries(sph, hdr.Size) + + // Format the sparse map. + formatSPD := func(sp sparseDatas, sa sparseArray) sparseDatas { + for i := 0; len(sp) > 0 && i < sa.MaxEntries(); i++ { + f.formatNumeric(sa.Entry(i).Offset(), sp[0].Offset) + f.formatNumeric(sa.Entry(i).Length(), sp[0].Length) + sp = sp[1:] + } + if len(sp) > 0 { + sa.IsExtended()[0] = 1 + } + return sp + } + sp2 := formatSPD(spd, blk.GNU().Sparse()) + for len(sp2) > 0 { + var spHdr block + sp2 = formatSPD(sp2, spHdr.Sparse()) + spb = append(spb, spHdr[:]...) + } + + // Update size fields in the header block. + realSize := hdr.Size + hdr.Size = 0 // Encoded size; does not account for encoded sparse map + for _, s := range spd { + hdr.Size += s.Length + } + copy(blk.V7().Size(), zeroBlock[:]) // Reset field + f.formatNumeric(blk.V7().Size(), hdr.Size) + f.formatNumeric(blk.GNU().RealSize(), realSize) + } + */ + blk.SetFormat(FormatGNU) + if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil { + return err + } + + // Write the extended sparse map and setup the sparse writer if necessary. + if len(spd) > 0 { + // Use tw.w since the sparse map is not accounted for in hdr.Size. + if _, err := tw.w.Write(spb); err != nil { + return err + } + tw.curr = &sparseFileWriter{tw.curr, spd, 0} + } + return nil +} + +type ( + stringFormatter func([]byte, string) + numberFormatter func([]byte, int64) +) + +// templateV7Plus fills out the V7 fields of a block using values from hdr. +// It also fills out fields (uname, gname, devmajor, devminor) that are +// shared in the USTAR, PAX, and GNU formats using the provided formatters. +// +// The block returned is only valid until the next call to +// templateV7Plus or writeRawFile. +func (tw *Writer) templateV7Plus(hdr *Header, fmtStr stringFormatter, fmtNum numberFormatter) *block { + tw.blk.Reset() + + modTime := hdr.ModTime + if modTime.IsZero() { + modTime = time.Unix(0, 0) + } + + v7 := tw.blk.V7() + v7.TypeFlag()[0] = hdr.Typeflag + fmtStr(v7.Name(), hdr.Name) + fmtStr(v7.LinkName(), hdr.Linkname) + fmtNum(v7.Mode(), hdr.Mode) + fmtNum(v7.UID(), int64(hdr.Uid)) + fmtNum(v7.GID(), int64(hdr.Gid)) + fmtNum(v7.Size(), hdr.Size) + fmtNum(v7.ModTime(), modTime.Unix()) + + ustar := tw.blk.USTAR() + fmtStr(ustar.UserName(), hdr.Uname) + fmtStr(ustar.GroupName(), hdr.Gname) + fmtNum(ustar.DevMajor(), hdr.Devmajor) + fmtNum(ustar.DevMinor(), hdr.Devminor) + + return &tw.blk +} + +// writeRawFile writes a minimal file with the given name and flag type. +// It uses format to encode the header format and will write data as the body. +// It uses default values for all of the other fields (as BSD and GNU tar does). +func (tw *Writer) writeRawFile(name, data string, flag byte, format Format) error { + tw.blk.Reset() + + // Best effort for the filename. + name = toASCII(name) + if len(name) > nameSize { + name = name[:nameSize] + } + name = strings.TrimRight(name, "/") + + var f formatter + v7 := tw.blk.V7() + v7.TypeFlag()[0] = flag + f.formatString(v7.Name(), name) + f.formatOctal(v7.Mode(), 0) + f.formatOctal(v7.UID(), 0) + f.formatOctal(v7.GID(), 0) + f.formatOctal(v7.Size(), int64(len(data))) // Must be < 8GiB + f.formatOctal(v7.ModTime(), 0) + tw.blk.SetFormat(format) + if f.err != nil { + return f.err // Only occurs if size condition is violated + } + + // Write the header and data. + if err := tw.writeRawHeader(&tw.blk, int64(len(data)), flag); err != nil { + return err + } + _, err := io.WriteString(tw, data) + return err +} + +// writeRawHeader writes the value of blk, regardless of its value. +// It sets up the Writer such that it can accept a file of the given size. +// If the flag is a special header-only flag, then the size is treated as zero. +func (tw *Writer) writeRawHeader(blk *block, size int64, flag byte) error { + if err := tw.Flush(); err != nil { + return err + } + if _, err := tw.w.Write(blk[:]); err != nil { + return err + } + if isHeaderOnlyType(flag) { + size = 0 + } + tw.curr = ®FileWriter{tw.w, size} + tw.pad = blockPadding(size) + return nil +} + +// splitUSTARPath splits a path according to USTAR prefix and suffix rules. +// If the path is not splittable, then it will return ("", "", false). +func splitUSTARPath(name string) (prefix, suffix string, ok bool) { + length := len(name) + if length <= nameSize || !isASCII(name) { + return "", "", false + } else if length > prefixSize+1 { + length = prefixSize + 1 + } else if name[length-1] == '/' { + length-- + } + + i := strings.LastIndex(name[:length], "/") + nlen := len(name) - i - 1 // nlen is length of suffix + plen := i // plen is length of prefix + if i <= 0 || nlen > nameSize || nlen == 0 || plen > prefixSize { + return "", "", false + } + return name[:i], name[i+1:], true +} + +// Write writes to the current file in the tar archive. +// Write returns the error ErrWriteTooLong if more than +// Header.Size bytes are written after WriteHeader. +// +// Calling Write on special types like TypeLink, TypeSymlink, TypeChar, +// TypeBlock, TypeDir, and TypeFifo returns (0, ErrWriteTooLong) regardless +// of what the Header.Size claims. +func (tw *Writer) Write(b []byte) (int, error) { + if tw.err != nil { + return 0, tw.err + } + n, err := tw.curr.Write(b) + if err != nil && err != ErrWriteTooLong { + tw.err = err + } + return n, err +} + +// readFrom populates the content of the current file by reading from r. +// The bytes read must match the number of remaining bytes in the current file. +// +// If the current file is sparse and r is an io.ReadSeeker, +// then readFrom uses Seek to skip past holes defined in Header.SparseHoles, +// assuming that skipped regions are all NULs. +// This always reads the last byte to ensure r is the right size. +// +// TODO(dsnet): Re-export this when adding sparse file support. +// See https://golang.org/issue/22735 +func (tw *Writer) readFrom(r io.Reader) (int64, error) { + if tw.err != nil { + return 0, tw.err + } + n, err := tw.curr.ReadFrom(r) + if err != nil && err != ErrWriteTooLong { + tw.err = err + } + return n, err +} + +// Close closes the tar archive by flushing the padding, and writing the footer. +// If the current file (from a prior call to WriteHeader) is not fully written, +// then this returns an error. +func (tw *Writer) Close() error { + if tw.err == ErrWriteAfterClose { + return nil + } + if tw.err != nil { + return tw.err + } + + // Trailer: two zero blocks. + err := tw.Flush() + for i := 0; i < 2 && err == nil; i++ { + _, err = tw.w.Write(zeroBlock[:]) + } + + // Ensure all future actions are invalid. + tw.err = ErrWriteAfterClose + return err // Report IO errors +} + +// regFileWriter is a fileWriter for writing data to a regular file entry. +type regFileWriter struct { + w io.Writer // Underlying Writer + nb int64 // Number of remaining bytes to write +} + +func (fw *regFileWriter) Write(b []byte) (n int, err error) { + overwrite := int64(len(b)) > fw.nb + if overwrite { + b = b[:fw.nb] + } + if len(b) > 0 { + n, err = fw.w.Write(b) + fw.nb -= int64(n) + } + switch { + case err != nil: + return n, err + case overwrite: + return n, ErrWriteTooLong + default: + return n, nil + } +} + +func (fw *regFileWriter) ReadFrom(r io.Reader) (int64, error) { + return io.Copy(struct{ io.Writer }{fw}, r) +} + +func (fw regFileWriter) LogicalRemaining() int64 { + return fw.nb +} +func (fw regFileWriter) PhysicalRemaining() int64 { + return fw.nb +} + +// sparseFileWriter is a fileWriter for writing data to a sparse file entry. +type sparseFileWriter struct { + fw fileWriter // Underlying fileWriter + sp sparseDatas // Normalized list of data fragments + pos int64 // Current position in sparse file +} + +func (sw *sparseFileWriter) Write(b []byte) (n int, err error) { + overwrite := int64(len(b)) > sw.LogicalRemaining() + if overwrite { + b = b[:sw.LogicalRemaining()] + } + + b0 := b + endPos := sw.pos + int64(len(b)) + for endPos > sw.pos && err == nil { + var nf int // Bytes written in fragment + dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset() + if sw.pos < dataStart { // In a hole fragment + bf := b[:min(int64(len(b)), dataStart-sw.pos)] + nf, err = zeroWriter{}.Write(bf) + } else { // In a data fragment + bf := b[:min(int64(len(b)), dataEnd-sw.pos)] + nf, err = sw.fw.Write(bf) + } + b = b[nf:] + sw.pos += int64(nf) + if sw.pos >= dataEnd && len(sw.sp) > 1 { + sw.sp = sw.sp[1:] // Ensure last fragment always remains + } + } + + n = len(b0) - len(b) + switch { + case err == ErrWriteTooLong: + return n, errMissData // Not possible; implies bug in validation logic + case err != nil: + return n, err + case sw.LogicalRemaining() == 0 && sw.PhysicalRemaining() > 0: + return n, errUnrefData // Not possible; implies bug in validation logic + case overwrite: + return n, ErrWriteTooLong + default: + return n, nil + } +} + +func (sw *sparseFileWriter) ReadFrom(r io.Reader) (n int64, err error) { + rs, ok := r.(io.ReadSeeker) + if ok { + if _, err := rs.Seek(0, io.SeekCurrent); err != nil { + ok = false // Not all io.Seeker can really seek + } + } + if !ok { + return io.Copy(struct{ io.Writer }{sw}, r) + } + + var readLastByte bool + pos0 := sw.pos + for sw.LogicalRemaining() > 0 && !readLastByte && err == nil { + var nf int64 // Size of fragment + dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset() + if sw.pos < dataStart { // In a hole fragment + nf = dataStart - sw.pos + if sw.PhysicalRemaining() == 0 { + readLastByte = true + nf-- + } + _, err = rs.Seek(nf, io.SeekCurrent) + } else { // In a data fragment + nf = dataEnd - sw.pos + nf, err = io.CopyN(sw.fw, rs, nf) + } + sw.pos += nf + if sw.pos >= dataEnd && len(sw.sp) > 1 { + sw.sp = sw.sp[1:] // Ensure last fragment always remains + } + } + + // If the last fragment is a hole, then seek to 1-byte before EOF, and + // read a single byte to ensure the file is the right size. + if readLastByte && err == nil { + _, err = mustReadFull(rs, []byte{0}) + sw.pos++ + } + + n = sw.pos - pos0 + switch { + case err == io.EOF: + return n, io.ErrUnexpectedEOF + case err == ErrWriteTooLong: + return n, errMissData // Not possible; implies bug in validation logic + case err != nil: + return n, err + case sw.LogicalRemaining() == 0 && sw.PhysicalRemaining() > 0: + return n, errUnrefData // Not possible; implies bug in validation logic + default: + return n, ensureEOF(rs) + } +} + +func (sw sparseFileWriter) LogicalRemaining() int64 { + return sw.sp[len(sw.sp)-1].endOffset() - sw.pos +} +func (sw sparseFileWriter) PhysicalRemaining() int64 { + return sw.fw.PhysicalRemaining() +} + +// zeroWriter may only be written with NULs, otherwise it returns errWriteHole. +type zeroWriter struct{} + +func (zeroWriter) Write(b []byte) (int, error) { + for i, c := range b { + if c != 0 { + return i, errWriteHole + } + } + return len(b), nil +} + +// ensureEOF checks whether r is at EOF, reporting ErrWriteTooLong if not so. +func ensureEOF(r io.Reader) error { + n, err := tryReadFull(r, []byte{0}) + switch { + case n > 0: + return ErrWriteTooLong + case err == io.EOF: + return nil + default: + return err + } +} diff --git a/src/archive/tar/writer_test.go b/src/archive/tar/writer_test.go new file mode 100644 index 0000000..a00f02d --- /dev/null +++ b/src/archive/tar/writer_test.go @@ -0,0 +1,1310 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "bytes" + "encoding/hex" + "errors" + "io" + "os" + "path" + "reflect" + "sort" + "strings" + "testing" + "testing/iotest" + "time" +) + +func bytediff(a, b []byte) string { + const ( + uniqueA = "- " + uniqueB = "+ " + identity = " " + ) + var ss []string + sa := strings.Split(strings.TrimSpace(hex.Dump(a)), "\n") + sb := strings.Split(strings.TrimSpace(hex.Dump(b)), "\n") + for len(sa) > 0 && len(sb) > 0 { + if sa[0] == sb[0] { + ss = append(ss, identity+sa[0]) + } else { + ss = append(ss, uniqueA+sa[0]) + ss = append(ss, uniqueB+sb[0]) + } + sa, sb = sa[1:], sb[1:] + } + for len(sa) > 0 { + ss = append(ss, uniqueA+sa[0]) + sa = sa[1:] + } + for len(sb) > 0 { + ss = append(ss, uniqueB+sb[0]) + sb = sb[1:] + } + return strings.Join(ss, "\n") +} + +func TestWriter(t *testing.T) { + type ( + testHeader struct { // WriteHeader(hdr) == wantErr + hdr Header + wantErr error + } + testWrite struct { // Write(str) == (wantCnt, wantErr) + str string + wantCnt int + wantErr error + } + testReadFrom struct { // ReadFrom(testFile{ops}) == (wantCnt, wantErr) + ops fileOps + wantCnt int64 + wantErr error + } + testClose struct { // Close() == wantErr + wantErr error + } + testFnc interface{} // testHeader | testWrite | testReadFrom | testClose + ) + + vectors := []struct { + file string // Optional filename of expected output + tests []testFnc + }{{ + // The writer test file was produced with this command: + // tar (GNU tar) 1.26 + // ln -s small.txt link.txt + // tar -b 1 --format=ustar -c -f writer.tar small.txt small2.txt link.txt + file: "testdata/writer.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "small.txt", + Size: 5, + Mode: 0640, + Uid: 73025, + Gid: 5000, + Uname: "dsymonds", + Gname: "eng", + ModTime: time.Unix(1246508266, 0), + }, nil}, + testWrite{"Kilts", 5, nil}, + + testHeader{Header{ + Typeflag: TypeReg, + Name: "small2.txt", + Size: 11, + Mode: 0640, + Uid: 73025, + Uname: "dsymonds", + Gname: "eng", + Gid: 5000, + ModTime: time.Unix(1245217492, 0), + }, nil}, + testWrite{"Google.com\n", 11, nil}, + + testHeader{Header{ + Typeflag: TypeSymlink, + Name: "link.txt", + Linkname: "small.txt", + Mode: 0777, + Uid: 1000, + Gid: 1000, + Uname: "strings", + Gname: "strings", + ModTime: time.Unix(1314603082, 0), + }, nil}, + testWrite{"", 0, nil}, + + testClose{nil}, + }, + }, { + // The truncated test file was produced using these commands: + // dd if=/dev/zero bs=1048576 count=16384 > /tmp/16gig.txt + // tar -b 1 -c -f- /tmp/16gig.txt | dd bs=512 count=8 > writer-big.tar + file: "testdata/writer-big.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "tmp/16gig.txt", + Size: 16 << 30, + Mode: 0640, + Uid: 73025, + Gid: 5000, + Uname: "dsymonds", + Gname: "eng", + ModTime: time.Unix(1254699560, 0), + Format: FormatGNU, + }, nil}, + }, + }, { + // This truncated file was produced using this library. + // It was verified to work with GNU tar 1.27.1 and BSD tar 3.1.2. + // dd if=/dev/zero bs=1G count=16 >> writer-big-long.tar + // gnutar -xvf writer-big-long.tar + // bsdtar -xvf writer-big-long.tar + // + // This file is in PAX format. + file: "testdata/writer-big-long.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: strings.Repeat("longname/", 15) + "16gig.txt", + Size: 16 << 30, + Mode: 0644, + Uid: 1000, + Gid: 1000, + Uname: "guillaume", + Gname: "guillaume", + ModTime: time.Unix(1399583047, 0), + }, nil}, + }, + }, { + // This file was produced using GNU tar v1.17. + // gnutar -b 4 --format=ustar (longname/)*15 + file.txt + file: "testdata/ustar.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: strings.Repeat("longname/", 15) + "file.txt", + Size: 6, + Mode: 0644, + Uid: 501, + Gid: 20, + Uname: "shane", + Gname: "staff", + ModTime: time.Unix(1360135598, 0), + }, nil}, + testWrite{"hello\n", 6, nil}, + testClose{nil}, + }, + }, { + // This file was produced using GNU tar v1.26: + // echo "Slartibartfast" > file.txt + // ln file.txt hard.txt + // tar -b 1 --format=ustar -c -f hardlink.tar file.txt hard.txt + file: "testdata/hardlink.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "file.txt", + Size: 15, + Mode: 0644, + Uid: 1000, + Gid: 100, + Uname: "vbatts", + Gname: "users", + ModTime: time.Unix(1425484303, 0), + }, nil}, + testWrite{"Slartibartfast\n", 15, nil}, + + testHeader{Header{ + Typeflag: TypeLink, + Name: "hard.txt", + Linkname: "file.txt", + Mode: 0644, + Uid: 1000, + Gid: 100, + Uname: "vbatts", + Gname: "users", + ModTime: time.Unix(1425484303, 0), + }, nil}, + testWrite{"", 0, nil}, + + testClose{nil}, + }, + }, { + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "bad-null.txt", + Xattrs: map[string]string{"null\x00null\x00": "fizzbuzz"}, + }, headerError{}}, + }, + }, { + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "null\x00.txt", + }, headerError{}}, + }, + }, { + file: "testdata/pax-records.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "file", + Uname: strings.Repeat("long", 10), + PAXRecords: map[string]string{ + "path": "FILE", // Should be ignored + "GNU.sparse.map": "0,0", // Should be ignored + "comment": "Hello, 世界", + "GOLANG.pkg": "tar", + }, + }, nil}, + testClose{nil}, + }, + }, { + // Craft a theoretically valid PAX archive with global headers. + // The GNU and BSD tar tools do not parse these the same way. + // + // BSD tar v3.1.2 parses and ignores all global headers; + // the behavior is verified by researching the source code. + // + // $ bsdtar -tvf pax-global-records.tar + // ---------- 0 0 0 0 Dec 31 1969 file1 + // ---------- 0 0 0 0 Dec 31 1969 file2 + // ---------- 0 0 0 0 Dec 31 1969 file3 + // ---------- 0 0 0 0 May 13 2014 file4 + // + // GNU tar v1.27.1 applies global headers to subsequent records, + // but does not do the following properly: + // * It does not treat an empty record as deletion. + // * It does not use subsequent global headers to update previous ones. + // + // $ gnutar -tvf pax-global-records.tar + // ---------- 0/0 0 2017-07-13 19:40 global1 + // ---------- 0/0 0 2017-07-13 19:40 file2 + // gnutar: Substituting `.' for empty member name + // ---------- 0/0 0 1969-12-31 16:00 + // gnutar: Substituting `.' for empty member name + // ---------- 0/0 0 2014-05-13 09:53 + // + // According to the PAX specification, this should have been the result: + // ---------- 0/0 0 2017-07-13 19:40 global1 + // ---------- 0/0 0 2017-07-13 19:40 file2 + // ---------- 0/0 0 2017-07-13 19:40 file3 + // ---------- 0/0 0 2014-05-13 09:53 file4 + file: "testdata/pax-global-records.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeXGlobalHeader, + PAXRecords: map[string]string{"path": "global1", "mtime": "1500000000.0"}, + }, nil}, + testHeader{Header{ + Typeflag: TypeReg, Name: "file1", + }, nil}, + testHeader{Header{ + Typeflag: TypeReg, + Name: "file2", + PAXRecords: map[string]string{"path": "file2"}, + }, nil}, + testHeader{Header{ + Typeflag: TypeXGlobalHeader, + PAXRecords: map[string]string{"path": ""}, // Should delete "path", but keep "mtime" + }, nil}, + testHeader{Header{ + Typeflag: TypeReg, Name: "file3", + }, nil}, + testHeader{Header{ + Typeflag: TypeReg, + Name: "file4", + ModTime: time.Unix(1400000000, 0), + PAXRecords: map[string]string{"mtime": "1400000000"}, + }, nil}, + testClose{nil}, + }, + }, { + file: "testdata/gnu-utf8.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", + Mode: 0644, + Uid: 1000, Gid: 1000, + Uname: "☺", + Gname: "⚹", + ModTime: time.Unix(0, 0), + Format: FormatGNU, + }, nil}, + testClose{nil}, + }, + }, { + file: "testdata/gnu-not-utf8.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "hi\x80\x81\x82\x83bye", + Mode: 0644, + Uid: 1000, + Gid: 1000, + Uname: "rawr", + Gname: "dsnet", + ModTime: time.Unix(0, 0), + Format: FormatGNU, + }, nil}, + testClose{nil}, + }, + // TODO(dsnet): Re-enable this test when adding sparse support. + // See https://golang.org/issue/22735 + /* + }, { + file: "testdata/gnu-nil-sparse-data.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeGNUSparse, + Name: "sparse.db", + Size: 1000, + SparseHoles: []sparseEntry{{Offset: 1000, Length: 0}}, + }, nil}, + testWrite{strings.Repeat("0123456789", 100), 1000, nil}, + testClose{}, + }, + }, { + file: "testdata/gnu-nil-sparse-hole.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeGNUSparse, + Name: "sparse.db", + Size: 1000, + SparseHoles: []sparseEntry{{Offset: 0, Length: 1000}}, + }, nil}, + testWrite{strings.Repeat("\x00", 1000), 1000, nil}, + testClose{}, + }, + }, { + file: "testdata/pax-nil-sparse-data.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "sparse.db", + Size: 1000, + SparseHoles: []sparseEntry{{Offset: 1000, Length: 0}}, + }, nil}, + testWrite{strings.Repeat("0123456789", 100), 1000, nil}, + testClose{}, + }, + }, { + file: "testdata/pax-nil-sparse-hole.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "sparse.db", + Size: 1000, + SparseHoles: []sparseEntry{{Offset: 0, Length: 1000}}, + }, nil}, + testWrite{strings.Repeat("\x00", 1000), 1000, nil}, + testClose{}, + }, + }, { + file: "testdata/gnu-sparse-big.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeGNUSparse, + Name: "gnu-sparse", + Size: 6e10, + SparseHoles: []sparseEntry{ + {Offset: 0e10, Length: 1e10 - 100}, + {Offset: 1e10, Length: 1e10 - 100}, + {Offset: 2e10, Length: 1e10 - 100}, + {Offset: 3e10, Length: 1e10 - 100}, + {Offset: 4e10, Length: 1e10 - 100}, + {Offset: 5e10, Length: 1e10 - 100}, + }, + }, nil}, + testReadFrom{fileOps{ + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + }, 6e10, nil}, + testClose{nil}, + }, + }, { + file: "testdata/pax-sparse-big.tar", + tests: []testFnc{ + testHeader{Header{ + Typeflag: TypeReg, + Name: "pax-sparse", + Size: 6e10, + SparseHoles: []sparseEntry{ + {Offset: 0e10, Length: 1e10 - 100}, + {Offset: 1e10, Length: 1e10 - 100}, + {Offset: 2e10, Length: 1e10 - 100}, + {Offset: 3e10, Length: 1e10 - 100}, + {Offset: 4e10, Length: 1e10 - 100}, + {Offset: 5e10, Length: 1e10 - 100}, + }, + }, nil}, + testReadFrom{fileOps{ + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + int64(1e10 - blockSize), + strings.Repeat("\x00", blockSize-100) + strings.Repeat("0123456789", 10), + }, 6e10, nil}, + testClose{nil}, + }, + */ + }, { + file: "testdata/trailing-slash.tar", + tests: []testFnc{ + testHeader{Header{Name: strings.Repeat("123456789/", 30)}, nil}, + testClose{nil}, + }, + }, { + // Automatically promote zero value of Typeflag depending on the name. + file: "testdata/file-and-dir.tar", + tests: []testFnc{ + testHeader{Header{Name: "small.txt", Size: 5}, nil}, + testWrite{"Kilts", 5, nil}, + testHeader{Header{Name: "dir/"}, nil}, + testClose{nil}, + }, + }} + + equalError := func(x, y error) bool { + _, ok1 := x.(headerError) + _, ok2 := y.(headerError) + if ok1 || ok2 { + return ok1 && ok2 + } + return x == y + } + for _, v := range vectors { + t.Run(path.Base(v.file), func(t *testing.T) { + const maxSize = 10 << 10 // 10KiB + buf := new(bytes.Buffer) + tw := NewWriter(iotest.TruncateWriter(buf, maxSize)) + + for i, tf := range v.tests { + switch tf := tf.(type) { + case testHeader: + err := tw.WriteHeader(&tf.hdr) + if !equalError(err, tf.wantErr) { + t.Fatalf("test %d, WriteHeader() = %v, want %v", i, err, tf.wantErr) + } + case testWrite: + got, err := tw.Write([]byte(tf.str)) + if got != tf.wantCnt || !equalError(err, tf.wantErr) { + t.Fatalf("test %d, Write() = (%d, %v), want (%d, %v)", i, got, err, tf.wantCnt, tf.wantErr) + } + case testReadFrom: + f := &testFile{ops: tf.ops} + got, err := tw.readFrom(f) + if _, ok := err.(testError); ok { + t.Errorf("test %d, ReadFrom(): %v", i, err) + } else if got != tf.wantCnt || !equalError(err, tf.wantErr) { + t.Errorf("test %d, ReadFrom() = (%d, %v), want (%d, %v)", i, got, err, tf.wantCnt, tf.wantErr) + } + if len(f.ops) > 0 { + t.Errorf("test %d, expected %d more operations", i, len(f.ops)) + } + case testClose: + err := tw.Close() + if !equalError(err, tf.wantErr) { + t.Fatalf("test %d, Close() = %v, want %v", i, err, tf.wantErr) + } + default: + t.Fatalf("test %d, unknown test operation: %T", i, tf) + } + } + + if v.file != "" { + want, err := os.ReadFile(v.file) + if err != nil { + t.Fatalf("ReadFile() = %v, want nil", err) + } + got := buf.Bytes() + if !bytes.Equal(want, got) { + t.Fatalf("incorrect result: (-got +want)\n%v", bytediff(got, want)) + } + } + }) + } +} + +func TestPax(t *testing.T) { + // Create an archive with a large name + fileinfo, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + hdr, err := FileInfoHeader(fileinfo, "") + if err != nil { + t.Fatalf("os.Stat: %v", err) + } + // Force a PAX long name to be written + longName := strings.Repeat("ab", 100) + contents := strings.Repeat(" ", int(hdr.Size)) + hdr.Name = longName + var buf bytes.Buffer + writer := NewWriter(&buf) + if err := writer.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if _, err = writer.Write([]byte(contents)); err != nil { + t.Fatal(err) + } + if err := writer.Close(); err != nil { + t.Fatal(err) + } + // Simple test to make sure PAX extensions are in effect + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { + t.Fatal("Expected at least one PAX header to be written.") + } + // Test that we can get a long name back out of the archive. + reader := NewReader(&buf) + hdr, err = reader.Next() + if err != nil { + t.Fatal(err) + } + if hdr.Name != longName { + t.Fatal("Couldn't recover long file name") + } +} + +func TestPaxSymlink(t *testing.T) { + // Create an archive with a large linkname + fileinfo, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + hdr, err := FileInfoHeader(fileinfo, "") + hdr.Typeflag = TypeSymlink + if err != nil { + t.Fatalf("os.Stat:1 %v", err) + } + // Force a PAX long linkname to be written + longLinkname := strings.Repeat("1234567890/1234567890", 10) + hdr.Linkname = longLinkname + + hdr.Size = 0 + var buf bytes.Buffer + writer := NewWriter(&buf) + if err := writer.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if err := writer.Close(); err != nil { + t.Fatal(err) + } + // Simple test to make sure PAX extensions are in effect + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { + t.Fatal("Expected at least one PAX header to be written.") + } + // Test that we can get a long name back out of the archive. + reader := NewReader(&buf) + hdr, err = reader.Next() + if err != nil { + t.Fatal(err) + } + if hdr.Linkname != longLinkname { + t.Fatal("Couldn't recover long link name") + } +} + +func TestPaxNonAscii(t *testing.T) { + // Create an archive with non ascii. These should trigger a pax header + // because pax headers have a defined utf-8 encoding. + fileinfo, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + + hdr, err := FileInfoHeader(fileinfo, "") + if err != nil { + t.Fatalf("os.Stat:1 %v", err) + } + + // some sample data + chineseFilename := "文件名" + chineseGroupname := "組" + chineseUsername := "用戶名" + + hdr.Name = chineseFilename + hdr.Gname = chineseGroupname + hdr.Uname = chineseUsername + + contents := strings.Repeat(" ", int(hdr.Size)) + + var buf bytes.Buffer + writer := NewWriter(&buf) + if err := writer.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if _, err = writer.Write([]byte(contents)); err != nil { + t.Fatal(err) + } + if err := writer.Close(); err != nil { + t.Fatal(err) + } + // Simple test to make sure PAX extensions are in effect + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { + t.Fatal("Expected at least one PAX header to be written.") + } + // Test that we can get a long name back out of the archive. + reader := NewReader(&buf) + hdr, err = reader.Next() + if err != nil { + t.Fatal(err) + } + if hdr.Name != chineseFilename { + t.Fatal("Couldn't recover unicode name") + } + if hdr.Gname != chineseGroupname { + t.Fatal("Couldn't recover unicode group") + } + if hdr.Uname != chineseUsername { + t.Fatal("Couldn't recover unicode user") + } +} + +func TestPaxXattrs(t *testing.T) { + xattrs := map[string]string{ + "user.key": "value", + } + + // Create an archive with an xattr + fileinfo, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + hdr, err := FileInfoHeader(fileinfo, "") + if err != nil { + t.Fatalf("os.Stat: %v", err) + } + contents := "Kilts" + hdr.Xattrs = xattrs + var buf bytes.Buffer + writer := NewWriter(&buf) + if err := writer.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if _, err = writer.Write([]byte(contents)); err != nil { + t.Fatal(err) + } + if err := writer.Close(); err != nil { + t.Fatal(err) + } + // Test that we can get the xattrs back out of the archive. + reader := NewReader(&buf) + hdr, err = reader.Next() + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(hdr.Xattrs, xattrs) { + t.Fatalf("xattrs did not survive round trip: got %+v, want %+v", + hdr.Xattrs, xattrs) + } +} + +func TestPaxHeadersSorted(t *testing.T) { + fileinfo, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + hdr, err := FileInfoHeader(fileinfo, "") + if err != nil { + t.Fatalf("os.Stat: %v", err) + } + contents := strings.Repeat(" ", int(hdr.Size)) + + hdr.Xattrs = map[string]string{ + "foo": "foo", + "bar": "bar", + "baz": "baz", + "qux": "qux", + } + + var buf bytes.Buffer + writer := NewWriter(&buf) + if err := writer.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if _, err = writer.Write([]byte(contents)); err != nil { + t.Fatal(err) + } + if err := writer.Close(); err != nil { + t.Fatal(err) + } + // Simple test to make sure PAX extensions are in effect + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.0")) { + t.Fatal("Expected at least one PAX header to be written.") + } + + // xattr bar should always appear before others + indices := []int{ + bytes.Index(buf.Bytes(), []byte("bar=bar")), + bytes.Index(buf.Bytes(), []byte("baz=baz")), + bytes.Index(buf.Bytes(), []byte("foo=foo")), + bytes.Index(buf.Bytes(), []byte("qux=qux")), + } + if !sort.IntsAreSorted(indices) { + t.Fatal("PAX headers are not sorted") + } +} + +func TestUSTARLongName(t *testing.T) { + // Create an archive with a path that failed to split with USTAR extension in previous versions. + fileinfo, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + hdr, err := FileInfoHeader(fileinfo, "") + hdr.Typeflag = TypeDir + if err != nil { + t.Fatalf("os.Stat:1 %v", err) + } + // Force a PAX long name to be written. The name was taken from a practical example + // that fails and replaced ever char through numbers to anonymize the sample. + longName := "/0000_0000000/00000-000000000/0000_0000000/00000-0000000000000/0000_0000000/00000-0000000-00000000/0000_0000000/00000000/0000_0000000/000/0000_0000000/00000000v00/0000_0000000/000000/0000_0000000/0000000/0000_0000000/00000y-00/0000/0000/00000000/0x000000/" + hdr.Name = longName + + hdr.Size = 0 + var buf bytes.Buffer + writer := NewWriter(&buf) + if err := writer.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if err := writer.Close(); err != nil { + t.Fatal(err) + } + // Test that we can get a long name back out of the archive. + reader := NewReader(&buf) + hdr, err = reader.Next() + if err != nil { + t.Fatal(err) + } + if hdr.Name != longName { + t.Fatal("Couldn't recover long name") + } +} + +func TestValidTypeflagWithPAXHeader(t *testing.T) { + var buffer bytes.Buffer + tw := NewWriter(&buffer) + + fileName := strings.Repeat("ab", 100) + + hdr := &Header{ + Name: fileName, + Size: 4, + Typeflag: 0, + } + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("Failed to write header: %s", err) + } + if _, err := tw.Write([]byte("fooo")); err != nil { + t.Fatalf("Failed to write the file's data: %s", err) + } + tw.Close() + + tr := NewReader(&buffer) + + for { + header, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("Failed to read header: %s", err) + } + if header.Typeflag != TypeReg { + t.Fatalf("Typeflag should've been %d, found %d", TypeReg, header.Typeflag) + } + } +} + +// failOnceWriter fails exactly once and then always reports success. +type failOnceWriter bool + +func (w *failOnceWriter) Write(b []byte) (int, error) { + if !*w { + return 0, io.ErrShortWrite + } + *w = true + return len(b), nil +} + +func TestWriterErrors(t *testing.T) { + t.Run("HeaderOnly", func(t *testing.T) { + tw := NewWriter(new(bytes.Buffer)) + hdr := &Header{Name: "dir/", Typeflag: TypeDir} + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("WriteHeader() = %v, want nil", err) + } + if _, err := tw.Write([]byte{0x00}); err != ErrWriteTooLong { + t.Fatalf("Write() = %v, want %v", err, ErrWriteTooLong) + } + }) + + t.Run("NegativeSize", func(t *testing.T) { + tw := NewWriter(new(bytes.Buffer)) + hdr := &Header{Name: "small.txt", Size: -1} + if err := tw.WriteHeader(hdr); err == nil { + t.Fatalf("WriteHeader() = nil, want non-nil error") + } + }) + + t.Run("BeforeHeader", func(t *testing.T) { + tw := NewWriter(new(bytes.Buffer)) + if _, err := tw.Write([]byte("Kilts")); err != ErrWriteTooLong { + t.Fatalf("Write() = %v, want %v", err, ErrWriteTooLong) + } + }) + + t.Run("AfterClose", func(t *testing.T) { + tw := NewWriter(new(bytes.Buffer)) + hdr := &Header{Name: "small.txt"} + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("WriteHeader() = %v, want nil", err) + } + if err := tw.Close(); err != nil { + t.Fatalf("Close() = %v, want nil", err) + } + if _, err := tw.Write([]byte("Kilts")); err != ErrWriteAfterClose { + t.Fatalf("Write() = %v, want %v", err, ErrWriteAfterClose) + } + if err := tw.Flush(); err != ErrWriteAfterClose { + t.Fatalf("Flush() = %v, want %v", err, ErrWriteAfterClose) + } + if err := tw.Close(); err != nil { + t.Fatalf("Close() = %v, want nil", err) + } + }) + + t.Run("PrematureFlush", func(t *testing.T) { + tw := NewWriter(new(bytes.Buffer)) + hdr := &Header{Name: "small.txt", Size: 5} + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("WriteHeader() = %v, want nil", err) + } + if err := tw.Flush(); err == nil { + t.Fatalf("Flush() = %v, want non-nil error", err) + } + }) + + t.Run("PrematureClose", func(t *testing.T) { + tw := NewWriter(new(bytes.Buffer)) + hdr := &Header{Name: "small.txt", Size: 5} + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("WriteHeader() = %v, want nil", err) + } + if err := tw.Close(); err == nil { + t.Fatalf("Close() = %v, want non-nil error", err) + } + }) + + t.Run("Persistence", func(t *testing.T) { + tw := NewWriter(new(failOnceWriter)) + if err := tw.WriteHeader(&Header{}); err != io.ErrShortWrite { + t.Fatalf("WriteHeader() = %v, want %v", err, io.ErrShortWrite) + } + if err := tw.WriteHeader(&Header{Name: "small.txt"}); err == nil { + t.Errorf("WriteHeader() = got %v, want non-nil error", err) + } + if _, err := tw.Write(nil); err == nil { + t.Errorf("Write() = %v, want non-nil error", err) + } + if err := tw.Flush(); err == nil { + t.Errorf("Flush() = %v, want non-nil error", err) + } + if err := tw.Close(); err == nil { + t.Errorf("Close() = %v, want non-nil error", err) + } + }) +} + +func TestSplitUSTARPath(t *testing.T) { + sr := strings.Repeat + + vectors := []struct { + input string // Input path + prefix string // Expected output prefix + suffix string // Expected output suffix + ok bool // Split success? + }{ + {"", "", "", false}, + {"abc", "", "", false}, + {"用戶名", "", "", false}, + {sr("a", nameSize), "", "", false}, + {sr("a", nameSize) + "/", "", "", false}, + {sr("a", nameSize) + "/a", sr("a", nameSize), "a", true}, + {sr("a", prefixSize) + "/", "", "", false}, + {sr("a", prefixSize) + "/a", sr("a", prefixSize), "a", true}, + {sr("a", nameSize+1), "", "", false}, + {sr("/", nameSize+1), sr("/", nameSize-1), "/", true}, + {sr("a", prefixSize) + "/" + sr("b", nameSize), + sr("a", prefixSize), sr("b", nameSize), true}, + {sr("a", prefixSize) + "//" + sr("b", nameSize), "", "", false}, + {sr("a/", nameSize), sr("a/", 77) + "a", sr("a/", 22), true}, + } + + for _, v := range vectors { + prefix, suffix, ok := splitUSTARPath(v.input) + if prefix != v.prefix || suffix != v.suffix || ok != v.ok { + t.Errorf("splitUSTARPath(%q):\ngot (%q, %q, %v)\nwant (%q, %q, %v)", + v.input, prefix, suffix, ok, v.prefix, v.suffix, v.ok) + } + } +} + +// TestIssue12594 tests that the Writer does not attempt to populate the prefix +// field when encoding a header in the GNU format. The prefix field is valid +// in USTAR and PAX, but not GNU. +func TestIssue12594(t *testing.T) { + names := []string{ + "0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/file.txt", + "0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/33/file.txt", + "0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/333/file.txt", + "0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/33/34/35/36/37/38/39/40/file.txt", + "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000/file.txt", + "/home/support/.openoffice.org/3/user/uno_packages/cache/registry/com.sun.star.comp.deployment.executable.PackageRegistryBackend", + } + + for i, name := range names { + var b bytes.Buffer + + tw := NewWriter(&b) + if err := tw.WriteHeader(&Header{ + Name: name, + Uid: 1 << 25, // Prevent USTAR format + }); err != nil { + t.Errorf("test %d, unexpected WriteHeader error: %v", i, err) + } + if err := tw.Close(); err != nil { + t.Errorf("test %d, unexpected Close error: %v", i, err) + } + + // The prefix field should never appear in the GNU format. + var blk block + copy(blk[:], b.Bytes()) + prefix := string(blk.USTAR().Prefix()) + if i := strings.IndexByte(prefix, 0); i >= 0 { + prefix = prefix[:i] // Truncate at the NUL terminator + } + if blk.GetFormat() == FormatGNU && len(prefix) > 0 && strings.HasPrefix(name, prefix) { + t.Errorf("test %d, found prefix in GNU format: %s", i, prefix) + } + + tr := NewReader(&b) + hdr, err := tr.Next() + if err != nil { + t.Errorf("test %d, unexpected Next error: %v", i, err) + } + if hdr.Name != name { + t.Errorf("test %d, hdr.Name = %s, want %s", i, hdr.Name, name) + } + } +} + +// testNonEmptyWriter wraps an io.Writer and ensures that +// Write is never called with an empty buffer. +type testNonEmptyWriter struct{ io.Writer } + +func (w testNonEmptyWriter) Write(b []byte) (int, error) { + if len(b) == 0 { + return 0, errors.New("unexpected empty Write call") + } + return w.Writer.Write(b) +} + +func TestFileWriter(t *testing.T) { + type ( + testWrite struct { // Write(str) == (wantCnt, wantErr) + str string + wantCnt int + wantErr error + } + testReadFrom struct { // ReadFrom(testFile{ops}) == (wantCnt, wantErr) + ops fileOps + wantCnt int64 + wantErr error + } + testRemaining struct { // LogicalRemaining() == wantLCnt, PhysicalRemaining() == wantPCnt + wantLCnt int64 + wantPCnt int64 + } + testFnc interface{} // testWrite | testReadFrom | testRemaining + ) + + type ( + makeReg struct { + size int64 + wantStr string + } + makeSparse struct { + makeReg makeReg + sph sparseHoles + size int64 + } + fileMaker interface{} // makeReg | makeSparse + ) + + vectors := []struct { + maker fileMaker + tests []testFnc + }{{ + maker: makeReg{0, ""}, + tests: []testFnc{ + testRemaining{0, 0}, + testWrite{"", 0, nil}, + testWrite{"a", 0, ErrWriteTooLong}, + testReadFrom{fileOps{""}, 0, nil}, + testReadFrom{fileOps{"a"}, 0, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{1, "a"}, + tests: []testFnc{ + testRemaining{1, 1}, + testWrite{"", 0, nil}, + testWrite{"a", 1, nil}, + testWrite{"bcde", 0, ErrWriteTooLong}, + testWrite{"", 0, nil}, + testReadFrom{fileOps{""}, 0, nil}, + testReadFrom{fileOps{"a"}, 0, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{5, "hello"}, + tests: []testFnc{ + testRemaining{5, 5}, + testWrite{"hello", 5, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{5, "\x00\x00\x00\x00\x00"}, + tests: []testFnc{ + testRemaining{5, 5}, + testReadFrom{fileOps{"\x00\x00\x00\x00\x00"}, 5, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{5, "\x00\x00\x00\x00\x00"}, + tests: []testFnc{ + testRemaining{5, 5}, + testReadFrom{fileOps{"\x00\x00\x00\x00\x00extra"}, 5, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{5, "abc\x00\x00"}, + tests: []testFnc{ + testRemaining{5, 5}, + testWrite{"abc", 3, nil}, + testRemaining{2, 2}, + testReadFrom{fileOps{"\x00\x00"}, 2, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeReg{5, "\x00\x00abc"}, + tests: []testFnc{ + testRemaining{5, 5}, + testWrite{"\x00\x00", 2, nil}, + testRemaining{3, 3}, + testWrite{"abc", 3, nil}, + testReadFrom{fileOps{"z"}, 0, ErrWriteTooLong}, + testWrite{"z", 0, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testRemaining{8, 5}, + testWrite{"ab\x00\x00\x00cde", 8, nil}, + testWrite{"a", 0, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab\x00\x00\x00cdez", 8, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab\x00", 3, nil}, + testRemaining{5, 3}, + testWrite{"\x00\x00cde", 5, nil}, + testWrite{"a", 0, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab", 2, nil}, + testRemaining{6, 3}, + testReadFrom{fileOps{int64(3), "cde"}, 6, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testReadFrom{fileOps{"ab", int64(3), "cde"}, 8, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{5, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testReadFrom{fileOps{"ab", int64(3), "cdeX"}, 8, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{4, "abcd"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testReadFrom{fileOps{"ab", int64(3), "cd"}, 7, io.ErrUnexpectedEOF}, + testRemaining{1, 0}, + }, + }, { + maker: makeSparse{makeReg{4, "abcd"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testReadFrom{fileOps{"ab", int64(3), "cde"}, 7, errMissData}, + testRemaining{1, 0}, + }, + }, { + maker: makeSparse{makeReg{6, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testReadFrom{fileOps{"ab", int64(3), "cde"}, 8, errUnrefData}, + testRemaining{0, 1}, + }, + }, { + maker: makeSparse{makeReg{4, "abcd"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab", 2, nil}, + testRemaining{6, 2}, + testWrite{"\x00\x00\x00", 3, nil}, + testRemaining{3, 2}, + testWrite{"cde", 2, errMissData}, + testRemaining{1, 0}, + }, + }, { + maker: makeSparse{makeReg{6, "abcde"}, sparseHoles{{2, 3}}, 8}, + tests: []testFnc{ + testWrite{"ab", 2, nil}, + testRemaining{6, 4}, + testWrite{"\x00\x00\x00", 3, nil}, + testRemaining{3, 4}, + testWrite{"cde", 3, errUnrefData}, + testRemaining{0, 1}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testRemaining{7, 3}, + testWrite{"\x00\x00abc\x00\x00", 7, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testRemaining{7, 3}, + testReadFrom{fileOps{int64(2), "abc", int64(1), "\x00"}, 7, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{3, ""}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"abcdefg", 0, errWriteHole}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"\x00\x00abcde", 5, errWriteHole}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"\x00\x00abc\x00\x00z", 7, ErrWriteTooLong}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{3, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"\x00\x00", 2, nil}, + testRemaining{5, 3}, + testWrite{"abc", 3, nil}, + testRemaining{2, 0}, + testWrite{"\x00\x00", 2, nil}, + testRemaining{0, 0}, + }, + }, { + maker: makeSparse{makeReg{2, "ab"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"\x00\x00", 2, nil}, + testWrite{"abc", 2, errMissData}, + testWrite{"\x00\x00", 0, errMissData}, + }, + }, { + maker: makeSparse{makeReg{4, "abc"}, sparseHoles{{0, 2}, {5, 2}}, 7}, + tests: []testFnc{ + testWrite{"\x00\x00", 2, nil}, + testWrite{"abc", 3, nil}, + testWrite{"\x00\x00", 2, errUnrefData}, + }, + }} + + for i, v := range vectors { + var wantStr string + bb := new(bytes.Buffer) + w := testNonEmptyWriter{bb} + var fw fileWriter + switch maker := v.maker.(type) { + case makeReg: + fw = ®FileWriter{w, maker.size} + wantStr = maker.wantStr + case makeSparse: + if !validateSparseEntries(maker.sph, maker.size) { + t.Fatalf("invalid sparse map: %v", maker.sph) + } + spd := invertSparseEntries(maker.sph, maker.size) + fw = ®FileWriter{w, maker.makeReg.size} + fw = &sparseFileWriter{fw, spd, 0} + wantStr = maker.makeReg.wantStr + default: + t.Fatalf("test %d, unknown make operation: %T", i, maker) + } + + for j, tf := range v.tests { + switch tf := tf.(type) { + case testWrite: + got, err := fw.Write([]byte(tf.str)) + if got != tf.wantCnt || err != tf.wantErr { + t.Errorf("test %d.%d, Write(%s):\ngot (%d, %v)\nwant (%d, %v)", i, j, tf.str, got, err, tf.wantCnt, tf.wantErr) + } + case testReadFrom: + f := &testFile{ops: tf.ops} + got, err := fw.ReadFrom(f) + if _, ok := err.(testError); ok { + t.Errorf("test %d.%d, ReadFrom(): %v", i, j, err) + } else if got != tf.wantCnt || err != tf.wantErr { + t.Errorf("test %d.%d, ReadFrom() = (%d, %v), want (%d, %v)", i, j, got, err, tf.wantCnt, tf.wantErr) + } + if len(f.ops) > 0 { + t.Errorf("test %d.%d, expected %d more operations", i, j, len(f.ops)) + } + case testRemaining: + if got := fw.LogicalRemaining(); got != tf.wantLCnt { + t.Errorf("test %d.%d, LogicalRemaining() = %d, want %d", i, j, got, tf.wantLCnt) + } + if got := fw.PhysicalRemaining(); got != tf.wantPCnt { + t.Errorf("test %d.%d, PhysicalRemaining() = %d, want %d", i, j, got, tf.wantPCnt) + } + default: + t.Fatalf("test %d.%d, unknown test operation: %T", i, j, tf) + } + } + + if got := bb.String(); got != wantStr { + t.Fatalf("test %d, String() = %q, want %q", i, got, wantStr) + } + } +} diff --git a/src/archive/zip/example_test.go b/src/archive/zip/example_test.go new file mode 100644 index 0000000..1eed304 --- /dev/null +++ b/src/archive/zip/example_test.go @@ -0,0 +1,93 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip_test + +import ( + "archive/zip" + "bytes" + "compress/flate" + "fmt" + "io" + "log" + "os" +) + +func ExampleWriter() { + // Create a buffer to write our archive to. + buf := new(bytes.Buffer) + + // Create a new zip archive. + w := zip.NewWriter(buf) + + // Add some files to the archive. + var files = []struct { + Name, Body string + }{ + {"readme.txt", "This archive contains some text files."}, + {"gopher.txt", "Gopher names:\nGeorge\nGeoffrey\nGonzo"}, + {"todo.txt", "Get animal handling licence.\nWrite more examples."}, + } + for _, file := range files { + f, err := w.Create(file.Name) + if err != nil { + log.Fatal(err) + } + _, err = f.Write([]byte(file.Body)) + if err != nil { + log.Fatal(err) + } + } + + // Make sure to check the error on Close. + err := w.Close() + if err != nil { + log.Fatal(err) + } +} + +func ExampleReader() { + // Open a zip archive for reading. + r, err := zip.OpenReader("testdata/readme.zip") + if err != nil { + log.Fatal(err) + } + defer r.Close() + + // Iterate through the files in the archive, + // printing some of their contents. + for _, f := range r.File { + fmt.Printf("Contents of %s:\n", f.Name) + rc, err := f.Open() + if err != nil { + log.Fatal(err) + } + _, err = io.CopyN(os.Stdout, rc, 68) + if err != nil { + log.Fatal(err) + } + rc.Close() + fmt.Println() + } + // Output: + // Contents of README: + // This is the source code repository for the Go programming language. +} + +func ExampleWriter_RegisterCompressor() { + // Override the default Deflate compressor with a higher compression level. + + // Create a buffer to write our archive to. + buf := new(bytes.Buffer) + + // Create a new zip archive. + w := zip.NewWriter(buf) + + // Register a custom Deflate compressor. + w.RegisterCompressor(zip.Deflate, func(out io.Writer) (io.WriteCloser, error) { + return flate.NewWriter(out, flate.BestCompression) + }) + + // Proceed to add files to w. +} diff --git a/src/archive/zip/reader.go b/src/archive/zip/reader.go new file mode 100644 index 0000000..b68d323 --- /dev/null +++ b/src/archive/zip/reader.go @@ -0,0 +1,836 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip + +import ( + "bufio" + "encoding/binary" + "errors" + "hash" + "hash/crc32" + "io" + "io/fs" + "os" + "path" + "sort" + "strings" + "sync" + "time" +) + +var ( + ErrFormat = errors.New("zip: not a valid zip file") + ErrAlgorithm = errors.New("zip: unsupported compression algorithm") + ErrChecksum = errors.New("zip: checksum error") +) + +// A Reader serves content from a ZIP archive. +type Reader struct { + r io.ReaderAt + File []*File + Comment string + decompressors map[uint16]Decompressor + + // fileList is a list of files sorted by ename, + // for use by the Open method. + fileListOnce sync.Once + fileList []fileListEntry +} + +// A ReadCloser is a Reader that must be closed when no longer needed. +type ReadCloser struct { + f *os.File + Reader +} + +// A File is a single file in a ZIP archive. +// The file information is in the embedded FileHeader. +// The file content can be accessed by calling Open. +type File struct { + FileHeader + zip *Reader + zipr io.ReaderAt + zipsize int64 + headerOffset int64 +} + +func (f *File) hasDataDescriptor() bool { + return f.Flags&0x8 != 0 +} + +// OpenReader will open the Zip file specified by name and return a ReadCloser. +func OpenReader(name string) (*ReadCloser, error) { + f, err := os.Open(name) + if err != nil { + return nil, err + } + fi, err := f.Stat() + if err != nil { + f.Close() + return nil, err + } + r := new(ReadCloser) + if err := r.init(f, fi.Size()); err != nil { + f.Close() + return nil, err + } + r.f = f + return r, nil +} + +// NewReader returns a new Reader reading from r, which is assumed to +// have the given size in bytes. +func NewReader(r io.ReaderAt, size int64) (*Reader, error) { + if size < 0 { + return nil, errors.New("zip: size cannot be negative") + } + zr := new(Reader) + if err := zr.init(r, size); err != nil { + return nil, err + } + return zr, nil +} + +func (z *Reader) init(r io.ReaderAt, size int64) error { + end, err := readDirectoryEnd(r, size) + if err != nil { + return err + } + z.r = r + // Since the number of directory records is not validated, it is not + // safe to preallocate z.File without first checking that the specified + // number of files is reasonable, since a malformed archive may + // indicate it contains up to 1 << 128 - 1 files. Since each file has a + // header which will be _at least_ 30 bytes we can safely preallocate + // if (data size / 30) >= end.directoryRecords. + if end.directorySize < uint64(size) && (uint64(size)-end.directorySize)/30 >= end.directoryRecords { + z.File = make([]*File, 0, end.directoryRecords) + } + z.Comment = end.comment + rs := io.NewSectionReader(r, 0, size) + if _, err = rs.Seek(int64(end.directoryOffset), io.SeekStart); err != nil { + return err + } + buf := bufio.NewReader(rs) + + // The count of files inside a zip is truncated to fit in a uint16. + // Gloss over this by reading headers until we encounter + // a bad one, and then only report an ErrFormat or UnexpectedEOF if + // the file count modulo 65536 is incorrect. + for { + f := &File{zip: z, zipr: r, zipsize: size} + err = readDirectoryHeader(f, buf) + if err == ErrFormat || err == io.ErrUnexpectedEOF { + break + } + if err != nil { + return err + } + z.File = append(z.File, f) + } + if uint16(len(z.File)) != uint16(end.directoryRecords) { // only compare 16 bits here + // Return the readDirectoryHeader error if we read + // the wrong number of directory entries. + return err + } + return nil +} + +// RegisterDecompressor registers or overrides a custom decompressor for a +// specific method ID. If a decompressor for a given method is not found, +// Reader will default to looking up the decompressor at the package level. +func (z *Reader) RegisterDecompressor(method uint16, dcomp Decompressor) { + if z.decompressors == nil { + z.decompressors = make(map[uint16]Decompressor) + } + z.decompressors[method] = dcomp +} + +func (z *Reader) decompressor(method uint16) Decompressor { + dcomp := z.decompressors[method] + if dcomp == nil { + dcomp = decompressor(method) + } + return dcomp +} + +// Close closes the Zip file, rendering it unusable for I/O. +func (rc *ReadCloser) Close() error { + return rc.f.Close() +} + +// DataOffset returns the offset of the file's possibly-compressed +// data, relative to the beginning of the zip file. +// +// Most callers should instead use Open, which transparently +// decompresses data and verifies checksums. +func (f *File) DataOffset() (offset int64, err error) { + bodyOffset, err := f.findBodyOffset() + if err != nil { + return + } + return f.headerOffset + bodyOffset, nil +} + +// Open returns a ReadCloser that provides access to the File's contents. +// Multiple files may be read concurrently. +func (f *File) Open() (io.ReadCloser, error) { + bodyOffset, err := f.findBodyOffset() + if err != nil { + return nil, err + } + size := int64(f.CompressedSize64) + r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size) + dcomp := f.zip.decompressor(f.Method) + if dcomp == nil { + return nil, ErrAlgorithm + } + var rc io.ReadCloser = dcomp(r) + var desr io.Reader + if f.hasDataDescriptor() { + desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen) + } + rc = &checksumReader{ + rc: rc, + hash: crc32.NewIEEE(), + f: f, + desr: desr, + } + return rc, nil +} + +type checksumReader struct { + rc io.ReadCloser + hash hash.Hash32 + nread uint64 // number of bytes read so far + f *File + desr io.Reader // if non-nil, where to read the data descriptor + err error // sticky error +} + +func (r *checksumReader) Stat() (fs.FileInfo, error) { + return headerFileInfo{&r.f.FileHeader}, nil +} + +func (r *checksumReader) Read(b []byte) (n int, err error) { + if r.err != nil { + return 0, r.err + } + n, err = r.rc.Read(b) + r.hash.Write(b[:n]) + r.nread += uint64(n) + if err == nil { + return + } + if err == io.EOF { + if r.nread != r.f.UncompressedSize64 { + return 0, io.ErrUnexpectedEOF + } + if r.desr != nil { + if err1 := readDataDescriptor(r.desr, r.f); err1 != nil { + if err1 == io.EOF { + err = io.ErrUnexpectedEOF + } else { + err = err1 + } + } else if r.hash.Sum32() != r.f.CRC32 { + err = ErrChecksum + } + } else { + // If there's not a data descriptor, we still compare + // the CRC32 of what we've read against the file header + // or TOC's CRC32, if it seems like it was set. + if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 { + err = ErrChecksum + } + } + } + r.err = err + return +} + +func (r *checksumReader) Close() error { return r.rc.Close() } + +// findBodyOffset does the minimum work to verify the file has a header +// and returns the file body offset. +func (f *File) findBodyOffset() (int64, error) { + var buf [fileHeaderLen]byte + if _, err := f.zipr.ReadAt(buf[:], f.headerOffset); err != nil { + return 0, err + } + b := readBuf(buf[:]) + if sig := b.uint32(); sig != fileHeaderSignature { + return 0, ErrFormat + } + b = b[22:] // skip over most of the header + filenameLen := int(b.uint16()) + extraLen := int(b.uint16()) + return int64(fileHeaderLen + filenameLen + extraLen), nil +} + +// readDirectoryHeader attempts to read a directory header from r. +// It returns io.ErrUnexpectedEOF if it cannot read a complete header, +// and ErrFormat if it doesn't find a valid header signature. +func readDirectoryHeader(f *File, r io.Reader) error { + var buf [directoryHeaderLen]byte + if _, err := io.ReadFull(r, buf[:]); err != nil { + return err + } + b := readBuf(buf[:]) + if sig := b.uint32(); sig != directoryHeaderSignature { + return ErrFormat + } + f.CreatorVersion = b.uint16() + f.ReaderVersion = b.uint16() + f.Flags = b.uint16() + f.Method = b.uint16() + f.ModifiedTime = b.uint16() + f.ModifiedDate = b.uint16() + f.CRC32 = b.uint32() + f.CompressedSize = b.uint32() + f.UncompressedSize = b.uint32() + f.CompressedSize64 = uint64(f.CompressedSize) + f.UncompressedSize64 = uint64(f.UncompressedSize) + filenameLen := int(b.uint16()) + extraLen := int(b.uint16()) + commentLen := int(b.uint16()) + b = b[4:] // skipped start disk number and internal attributes (2x uint16) + f.ExternalAttrs = b.uint32() + f.headerOffset = int64(b.uint32()) + d := make([]byte, filenameLen+extraLen+commentLen) + if _, err := io.ReadFull(r, d); err != nil { + return err + } + f.Name = string(d[:filenameLen]) + f.Extra = d[filenameLen : filenameLen+extraLen] + f.Comment = string(d[filenameLen+extraLen:]) + + // Determine the character encoding. + utf8Valid1, utf8Require1 := detectUTF8(f.Name) + utf8Valid2, utf8Require2 := detectUTF8(f.Comment) + switch { + case !utf8Valid1 || !utf8Valid2: + // Name and Comment definitely not UTF-8. + f.NonUTF8 = true + case !utf8Require1 && !utf8Require2: + // Name and Comment use only single-byte runes that overlap with UTF-8. + f.NonUTF8 = false + default: + // Might be UTF-8, might be some other encoding; preserve existing flag. + // Some ZIP writers use UTF-8 encoding without setting the UTF-8 flag. + // Since it is impossible to always distinguish valid UTF-8 from some + // other encoding (e.g., GBK or Shift-JIS), we trust the flag. + f.NonUTF8 = f.Flags&0x800 == 0 + } + + needUSize := f.UncompressedSize == ^uint32(0) + needCSize := f.CompressedSize == ^uint32(0) + needHeaderOffset := f.headerOffset == int64(^uint32(0)) + + // Best effort to find what we need. + // Other zip authors might not even follow the basic format, + // and we'll just ignore the Extra content in that case. + var modified time.Time +parseExtras: + for extra := readBuf(f.Extra); len(extra) >= 4; { // need at least tag and size + fieldTag := extra.uint16() + fieldSize := int(extra.uint16()) + if len(extra) < fieldSize { + break + } + fieldBuf := extra.sub(fieldSize) + + switch fieldTag { + case zip64ExtraID: + // update directory values from the zip64 extra block. + // They should only be consulted if the sizes read earlier + // are maxed out. + // See golang.org/issue/13367. + if needUSize { + needUSize = false + if len(fieldBuf) < 8 { + return ErrFormat + } + f.UncompressedSize64 = fieldBuf.uint64() + } + if needCSize { + needCSize = false + if len(fieldBuf) < 8 { + return ErrFormat + } + f.CompressedSize64 = fieldBuf.uint64() + } + if needHeaderOffset { + needHeaderOffset = false + if len(fieldBuf) < 8 { + return ErrFormat + } + f.headerOffset = int64(fieldBuf.uint64()) + } + case ntfsExtraID: + if len(fieldBuf) < 4 { + continue parseExtras + } + fieldBuf.uint32() // reserved (ignored) + for len(fieldBuf) >= 4 { // need at least tag and size + attrTag := fieldBuf.uint16() + attrSize := int(fieldBuf.uint16()) + if len(fieldBuf) < attrSize { + continue parseExtras + } + attrBuf := fieldBuf.sub(attrSize) + if attrTag != 1 || attrSize != 24 { + continue // Ignore irrelevant attributes + } + + const ticksPerSecond = 1e7 // Windows timestamp resolution + ts := int64(attrBuf.uint64()) // ModTime since Windows epoch + secs := int64(ts / ticksPerSecond) + nsecs := (1e9 / ticksPerSecond) * int64(ts%ticksPerSecond) + epoch := time.Date(1601, time.January, 1, 0, 0, 0, 0, time.UTC) + modified = time.Unix(epoch.Unix()+secs, nsecs) + } + case unixExtraID, infoZipUnixExtraID: + if len(fieldBuf) < 8 { + continue parseExtras + } + fieldBuf.uint32() // AcTime (ignored) + ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch + modified = time.Unix(ts, 0) + case extTimeExtraID: + if len(fieldBuf) < 5 || fieldBuf.uint8()&1 == 0 { + continue parseExtras + } + ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch + modified = time.Unix(ts, 0) + } + } + + msdosModified := msDosTimeToTime(f.ModifiedDate, f.ModifiedTime) + f.Modified = msdosModified + if !modified.IsZero() { + f.Modified = modified.UTC() + + // If legacy MS-DOS timestamps are set, we can use the delta between + // the legacy and extended versions to estimate timezone offset. + // + // A non-UTC timezone is always used (even if offset is zero). + // Thus, FileHeader.Modified.Location() == time.UTC is useful for + // determining whether extended timestamps are present. + // This is necessary for users that need to do additional time + // calculations when dealing with legacy ZIP formats. + if f.ModifiedTime != 0 || f.ModifiedDate != 0 { + f.Modified = modified.In(timeZone(msdosModified.Sub(modified))) + } + } + + // Assume that uncompressed size 2³²-1 could plausibly happen in + // an old zip32 file that was sharding inputs into the largest chunks + // possible (or is just malicious; search the web for 42.zip). + // If needUSize is true still, it means we didn't see a zip64 extension. + // As long as the compressed size is not also 2³²-1 (implausible) + // and the header is not also 2³²-1 (equally implausible), + // accept the uncompressed size 2³²-1 as valid. + // If nothing else, this keeps archive/zip working with 42.zip. + _ = needUSize + + if needCSize || needHeaderOffset { + return ErrFormat + } + + return nil +} + +func readDataDescriptor(r io.Reader, f *File) error { + var buf [dataDescriptorLen]byte + + // The spec says: "Although not originally assigned a + // signature, the value 0x08074b50 has commonly been adopted + // as a signature value for the data descriptor record. + // Implementers should be aware that ZIP files may be + // encountered with or without this signature marking data + // descriptors and should account for either case when reading + // ZIP files to ensure compatibility." + // + // dataDescriptorLen includes the size of the signature but + // first read just those 4 bytes to see if it exists. + if _, err := io.ReadFull(r, buf[:4]); err != nil { + return err + } + off := 0 + maybeSig := readBuf(buf[:4]) + if maybeSig.uint32() != dataDescriptorSignature { + // No data descriptor signature. Keep these four + // bytes. + off += 4 + } + if _, err := io.ReadFull(r, buf[off:12]); err != nil { + return err + } + b := readBuf(buf[:12]) + if b.uint32() != f.CRC32 { + return ErrChecksum + } + + // The two sizes that follow here can be either 32 bits or 64 bits + // but the spec is not very clear on this and different + // interpretations has been made causing incompatibilities. We + // already have the sizes from the central directory so we can + // just ignore these. + + return nil +} + +func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) { + // look for directoryEndSignature in the last 1k, then in the last 65k + var buf []byte + var directoryEndOffset int64 + for i, bLen := range []int64{1024, 65 * 1024} { + if bLen > size { + bLen = size + } + buf = make([]byte, int(bLen)) + if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF { + return nil, err + } + if p := findSignatureInBlock(buf); p >= 0 { + buf = buf[p:] + directoryEndOffset = size - bLen + int64(p) + break + } + if i == 1 || bLen == size { + return nil, ErrFormat + } + } + + // read header into struct + b := readBuf(buf[4:]) // skip signature + d := &directoryEnd{ + diskNbr: uint32(b.uint16()), + dirDiskNbr: uint32(b.uint16()), + dirRecordsThisDisk: uint64(b.uint16()), + directoryRecords: uint64(b.uint16()), + directorySize: uint64(b.uint32()), + directoryOffset: uint64(b.uint32()), + commentLen: b.uint16(), + } + l := int(d.commentLen) + if l > len(b) { + return nil, errors.New("zip: invalid comment length") + } + d.comment = string(b[:l]) + + // These values mean that the file can be a zip64 file + if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff { + p, err := findDirectory64End(r, directoryEndOffset) + if err == nil && p >= 0 { + err = readDirectory64End(r, p, d) + } + if err != nil { + return nil, err + } + } + // Make sure directoryOffset points to somewhere in our file. + if o := int64(d.directoryOffset); o < 0 || o >= size { + return nil, ErrFormat + } + return d, nil +} + +// findDirectory64End tries to read the zip64 locator just before the +// directory end and returns the offset of the zip64 directory end if +// found. +func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) { + locOffset := directoryEndOffset - directory64LocLen + if locOffset < 0 { + return -1, nil // no need to look for a header outside the file + } + buf := make([]byte, directory64LocLen) + if _, err := r.ReadAt(buf, locOffset); err != nil { + return -1, err + } + b := readBuf(buf) + if sig := b.uint32(); sig != directory64LocSignature { + return -1, nil + } + if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory + return -1, nil // the file is not a valid zip64-file + } + p := b.uint64() // relative offset of the zip64 end of central directory record + if b.uint32() != 1 { // total number of disks + return -1, nil // the file is not a valid zip64-file + } + return int64(p), nil +} + +// readDirectory64End reads the zip64 directory end and updates the +// directory end with the zip64 directory end values. +func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) { + buf := make([]byte, directory64EndLen) + if _, err := r.ReadAt(buf, offset); err != nil { + return err + } + + b := readBuf(buf) + if sig := b.uint32(); sig != directory64EndSignature { + return ErrFormat + } + + b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16) + d.diskNbr = b.uint32() // number of this disk + d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory + d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk + d.directoryRecords = b.uint64() // total number of entries in the central directory + d.directorySize = b.uint64() // size of the central directory + d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number + + return nil +} + +func findSignatureInBlock(b []byte) int { + for i := len(b) - directoryEndLen; i >= 0; i-- { + // defined from directoryEndSignature in struct.go + if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { + // n is length of comment + n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 + if n+directoryEndLen+i <= len(b) { + return i + } + } + } + return -1 +} + +type readBuf []byte + +func (b *readBuf) uint8() uint8 { + v := (*b)[0] + *b = (*b)[1:] + return v +} + +func (b *readBuf) uint16() uint16 { + v := binary.LittleEndian.Uint16(*b) + *b = (*b)[2:] + return v +} + +func (b *readBuf) uint32() uint32 { + v := binary.LittleEndian.Uint32(*b) + *b = (*b)[4:] + return v +} + +func (b *readBuf) uint64() uint64 { + v := binary.LittleEndian.Uint64(*b) + *b = (*b)[8:] + return v +} + +func (b *readBuf) sub(n int) readBuf { + b2 := (*b)[:n] + *b = (*b)[n:] + return b2 +} + +// A fileListEntry is a File and its ename. +// If file == nil, the fileListEntry describes a directory without metadata. +type fileListEntry struct { + name string + file *File + isDir bool +} + +type fileInfoDirEntry interface { + fs.FileInfo + fs.DirEntry +} + +func (e *fileListEntry) stat() fileInfoDirEntry { + if !e.isDir { + return headerFileInfo{&e.file.FileHeader} + } + return e +} + +// Only used for directories. +func (f *fileListEntry) Name() string { _, elem, _ := split(f.name); return elem } +func (f *fileListEntry) Size() int64 { return 0 } +func (f *fileListEntry) Mode() fs.FileMode { return fs.ModeDir | 0555 } +func (f *fileListEntry) Type() fs.FileMode { return fs.ModeDir } +func (f *fileListEntry) IsDir() bool { return true } +func (f *fileListEntry) Sys() interface{} { return nil } + +func (f *fileListEntry) ModTime() time.Time { + if f.file == nil { + return time.Time{} + } + return f.file.FileHeader.Modified.UTC() +} + +func (f *fileListEntry) Info() (fs.FileInfo, error) { return f, nil } + +// toValidName coerces name to be a valid name for fs.FS.Open. +func toValidName(name string) string { + name = strings.ReplaceAll(name, `\`, `/`) + p := path.Clean(name) + if strings.HasPrefix(p, "/") { + p = p[len("/"):] + } + for strings.HasPrefix(p, "../") { + p = p[len("../"):] + } + return p +} + +func (r *Reader) initFileList() { + r.fileListOnce.Do(func() { + dirs := make(map[string]bool) + knownDirs := make(map[string]bool) + for _, file := range r.File { + isDir := len(file.Name) > 0 && file.Name[len(file.Name)-1] == '/' + name := toValidName(file.Name) + if name == "" { + continue + } + for dir := path.Dir(name); dir != "."; dir = path.Dir(dir) { + dirs[dir] = true + } + entry := fileListEntry{ + name: name, + file: file, + isDir: isDir, + } + r.fileList = append(r.fileList, entry) + if isDir { + knownDirs[name] = true + } + } + for dir := range dirs { + if !knownDirs[dir] { + entry := fileListEntry{ + name: dir, + file: nil, + isDir: true, + } + r.fileList = append(r.fileList, entry) + } + } + + sort.Slice(r.fileList, func(i, j int) bool { return fileEntryLess(r.fileList[i].name, r.fileList[j].name) }) + }) +} + +func fileEntryLess(x, y string) bool { + xdir, xelem, _ := split(x) + ydir, yelem, _ := split(y) + return xdir < ydir || xdir == ydir && xelem < yelem +} + +// Open opens the named file in the ZIP archive, +// using the semantics of fs.FS.Open: +// paths are always slash separated, with no +// leading / or ../ elements. +func (r *Reader) Open(name string) (fs.File, error) { + r.initFileList() + + if !fs.ValidPath(name) { + return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid} + } + e := r.openLookup(name) + if e == nil { + return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrNotExist} + } + if e.isDir { + return &openDir{e, r.openReadDir(name), 0}, nil + } + rc, err := e.file.Open() + if err != nil { + return nil, err + } + return rc.(fs.File), nil +} + +func split(name string) (dir, elem string, isDir bool) { + if len(name) > 0 && name[len(name)-1] == '/' { + isDir = true + name = name[:len(name)-1] + } + i := len(name) - 1 + for i >= 0 && name[i] != '/' { + i-- + } + if i < 0 { + return ".", name, isDir + } + return name[:i], name[i+1:], isDir +} + +var dotFile = &fileListEntry{name: "./", isDir: true} + +func (r *Reader) openLookup(name string) *fileListEntry { + if name == "." { + return dotFile + } + + dir, elem, _ := split(name) + files := r.fileList + i := sort.Search(len(files), func(i int) bool { + idir, ielem, _ := split(files[i].name) + return idir > dir || idir == dir && ielem >= elem + }) + if i < len(files) { + fname := files[i].name + if fname == name || len(fname) == len(name)+1 && fname[len(name)] == '/' && fname[:len(name)] == name { + return &files[i] + } + } + return nil +} + +func (r *Reader) openReadDir(dir string) []fileListEntry { + files := r.fileList + i := sort.Search(len(files), func(i int) bool { + idir, _, _ := split(files[i].name) + return idir >= dir + }) + j := sort.Search(len(files), func(j int) bool { + jdir, _, _ := split(files[j].name) + return jdir > dir + }) + return files[i:j] +} + +type openDir struct { + e *fileListEntry + files []fileListEntry + offset int +} + +func (d *openDir) Close() error { return nil } +func (d *openDir) Stat() (fs.FileInfo, error) { return d.e.stat(), nil } + +func (d *openDir) Read([]byte) (int, error) { + return 0, &fs.PathError{Op: "read", Path: d.e.name, Err: errors.New("is a directory")} +} + +func (d *openDir) ReadDir(count int) ([]fs.DirEntry, error) { + n := len(d.files) - d.offset + if count > 0 && n > count { + n = count + } + if n == 0 { + if count <= 0 { + return nil, nil + } + return nil, io.EOF + } + list := make([]fs.DirEntry, n) + for i := range list { + list[i] = d.files[d.offset+i].stat() + } + d.offset += n + return list, nil +} diff --git a/src/archive/zip/reader_test.go b/src/archive/zip/reader_test.go new file mode 100644 index 0000000..1682c58 --- /dev/null +++ b/src/archive/zip/reader_test.go @@ -0,0 +1,1373 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip + +import ( + "bytes" + "encoding/binary" + "encoding/hex" + "internal/obscuretestdata" + "io" + "io/fs" + "os" + "path/filepath" + "reflect" + "regexp" + "strings" + "testing" + "testing/fstest" + "time" +) + +type ZipTest struct { + Name string + Source func() (r io.ReaderAt, size int64) // if non-nil, used instead of testdata/<Name> file + Comment string + File []ZipTestFile + Obscured bool // needed for Apple notarization (golang.org/issue/34986) + Error error // the error that Opening this file should return +} + +type ZipTestFile struct { + Name string + Mode fs.FileMode + NonUTF8 bool + ModTime time.Time + Modified time.Time + + // Information describing expected zip file content. + // First, reading the entire content should produce the error ContentErr. + // Second, if ContentErr==nil, the content should match Content. + // If content is large, an alternative to setting Content is to set File, + // which names a file in the testdata/ directory containing the + // uncompressed expected content. + // If content is very large, an alternative to setting Content or File + // is to set Size, which will then be checked against the header-reported size + // but will bypass the decompressing of the actual data. + // This last option is used for testing very large (multi-GB) compressed files. + ContentErr error + Content []byte + File string + Size uint64 +} + +var tests = []ZipTest{ + { + Name: "test.zip", + Comment: "This is a zipfile comment.", + File: []ZipTestFile{ + { + Name: "test.txt", + Content: []byte("This is a test text file.\n"), + Modified: time.Date(2010, 9, 5, 12, 12, 1, 0, timeZone(+10*time.Hour)), + Mode: 0644, + }, + { + Name: "gophercolor16x16.png", + File: "gophercolor16x16.png", + Modified: time.Date(2010, 9, 5, 15, 52, 58, 0, timeZone(+10*time.Hour)), + Mode: 0644, + }, + }, + }, + { + Name: "test-trailing-junk.zip", + Comment: "This is a zipfile comment.", + File: []ZipTestFile{ + { + Name: "test.txt", + Content: []byte("This is a test text file.\n"), + Modified: time.Date(2010, 9, 5, 12, 12, 1, 0, timeZone(+10*time.Hour)), + Mode: 0644, + }, + { + Name: "gophercolor16x16.png", + File: "gophercolor16x16.png", + Modified: time.Date(2010, 9, 5, 15, 52, 58, 0, timeZone(+10*time.Hour)), + Mode: 0644, + }, + }, + }, + { + Name: "r.zip", + Source: returnRecursiveZip, + File: []ZipTestFile{ + { + Name: "r/r.zip", + Content: rZipBytes(), + Modified: time.Date(2010, 3, 4, 0, 24, 16, 0, time.UTC), + Mode: 0666, + }, + }, + }, + { + Name: "symlink.zip", + File: []ZipTestFile{ + { + Name: "symlink", + Content: []byte("../target"), + Modified: time.Date(2012, 2, 3, 19, 56, 48, 0, timeZone(-2*time.Hour)), + Mode: 0777 | fs.ModeSymlink, + }, + }, + }, + { + Name: "readme.zip", + }, + { + Name: "readme.notzip", + Error: ErrFormat, + }, + { + Name: "dd.zip", + File: []ZipTestFile{ + { + Name: "filename", + Content: []byte("This is a test textfile.\n"), + Modified: time.Date(2011, 2, 2, 13, 6, 20, 0, time.UTC), + Mode: 0666, + }, + }, + }, + { + // created in windows XP file manager. + Name: "winxp.zip", + File: []ZipTestFile{ + { + Name: "hello", + Content: []byte("world \r\n"), + Modified: time.Date(2011, 12, 8, 10, 4, 24, 0, time.UTC), + Mode: 0666, + }, + { + Name: "dir/bar", + Content: []byte("foo \r\n"), + Modified: time.Date(2011, 12, 8, 10, 4, 50, 0, time.UTC), + Mode: 0666, + }, + { + Name: "dir/empty/", + Content: []byte{}, + Modified: time.Date(2011, 12, 8, 10, 8, 6, 0, time.UTC), + Mode: fs.ModeDir | 0777, + }, + { + Name: "readonly", + Content: []byte("important \r\n"), + Modified: time.Date(2011, 12, 8, 10, 6, 8, 0, time.UTC), + Mode: 0444, + }, + }, + }, + { + // created by Zip 3.0 under Linux + Name: "unix.zip", + File: []ZipTestFile{ + { + Name: "hello", + Content: []byte("world \r\n"), + Modified: time.Date(2011, 12, 8, 10, 4, 24, 0, timeZone(0)), + Mode: 0666, + }, + { + Name: "dir/bar", + Content: []byte("foo \r\n"), + Modified: time.Date(2011, 12, 8, 10, 4, 50, 0, timeZone(0)), + Mode: 0666, + }, + { + Name: "dir/empty/", + Content: []byte{}, + Modified: time.Date(2011, 12, 8, 10, 8, 6, 0, timeZone(0)), + Mode: fs.ModeDir | 0777, + }, + { + Name: "readonly", + Content: []byte("important \r\n"), + Modified: time.Date(2011, 12, 8, 10, 6, 8, 0, timeZone(0)), + Mode: 0444, + }, + }, + }, + { + // created by Go, before we wrote the "optional" data + // descriptor signatures (which are required by macOS). + // Use obscured file to avoid Apple’s notarization service + // rejecting the toolchain due to an inability to unzip this archive. + // See golang.org/issue/34986 + Name: "go-no-datadesc-sig.zip.base64", + Obscured: true, + File: []ZipTestFile{ + { + Name: "foo.txt", + Content: []byte("foo\n"), + Modified: time.Date(2012, 3, 8, 16, 59, 10, 0, timeZone(-8*time.Hour)), + Mode: 0644, + }, + { + Name: "bar.txt", + Content: []byte("bar\n"), + Modified: time.Date(2012, 3, 8, 16, 59, 12, 0, timeZone(-8*time.Hour)), + Mode: 0644, + }, + }, + }, + { + // created by Go, after we wrote the "optional" data + // descriptor signatures (which are required by macOS) + Name: "go-with-datadesc-sig.zip", + File: []ZipTestFile{ + { + Name: "foo.txt", + Content: []byte("foo\n"), + Modified: time.Date(1979, 11, 30, 0, 0, 0, 0, time.UTC), + Mode: 0666, + }, + { + Name: "bar.txt", + Content: []byte("bar\n"), + Modified: time.Date(1979, 11, 30, 0, 0, 0, 0, time.UTC), + Mode: 0666, + }, + }, + }, + { + Name: "Bad-CRC32-in-data-descriptor", + Source: returnCorruptCRC32Zip, + File: []ZipTestFile{ + { + Name: "foo.txt", + Content: []byte("foo\n"), + Modified: time.Date(1979, 11, 30, 0, 0, 0, 0, time.UTC), + Mode: 0666, + ContentErr: ErrChecksum, + }, + { + Name: "bar.txt", + Content: []byte("bar\n"), + Modified: time.Date(1979, 11, 30, 0, 0, 0, 0, time.UTC), + Mode: 0666, + }, + }, + }, + // Tests that we verify (and accept valid) crc32s on files + // with crc32s in their file header (not in data descriptors) + { + Name: "crc32-not-streamed.zip", + File: []ZipTestFile{ + { + Name: "foo.txt", + Content: []byte("foo\n"), + Modified: time.Date(2012, 3, 8, 16, 59, 10, 0, timeZone(-8*time.Hour)), + Mode: 0644, + }, + { + Name: "bar.txt", + Content: []byte("bar\n"), + Modified: time.Date(2012, 3, 8, 16, 59, 12, 0, timeZone(-8*time.Hour)), + Mode: 0644, + }, + }, + }, + // Tests that we verify (and reject invalid) crc32s on files + // with crc32s in their file header (not in data descriptors) + { + Name: "crc32-not-streamed.zip", + Source: returnCorruptNotStreamedZip, + File: []ZipTestFile{ + { + Name: "foo.txt", + Content: []byte("foo\n"), + Modified: time.Date(2012, 3, 8, 16, 59, 10, 0, timeZone(-8*time.Hour)), + Mode: 0644, + ContentErr: ErrChecksum, + }, + { + Name: "bar.txt", + Content: []byte("bar\n"), + Modified: time.Date(2012, 3, 8, 16, 59, 12, 0, timeZone(-8*time.Hour)), + Mode: 0644, + }, + }, + }, + { + Name: "zip64.zip", + File: []ZipTestFile{ + { + Name: "README", + Content: []byte("This small file is in ZIP64 format.\n"), + Modified: time.Date(2012, 8, 10, 14, 33, 32, 0, time.UTC), + Mode: 0644, + }, + }, + }, + // Another zip64 file with different Extras fields. (golang.org/issue/7069) + { + Name: "zip64-2.zip", + File: []ZipTestFile{ + { + Name: "README", + Content: []byte("This small file is in ZIP64 format.\n"), + Modified: time.Date(2012, 8, 10, 14, 33, 32, 0, timeZone(-4*time.Hour)), + Mode: 0644, + }, + }, + }, + // Largest possible non-zip64 file, with no zip64 header. + { + Name: "big.zip", + Source: returnBigZipBytes, + File: []ZipTestFile{ + { + Name: "big.file", + Content: nil, + Size: 1<<32 - 1, + Modified: time.Date(1979, 11, 30, 0, 0, 0, 0, time.UTC), + Mode: 0666, + }, + }, + }, + { + Name: "utf8-7zip.zip", + File: []ZipTestFile{ + { + Name: "世界", + Content: []byte{}, + Mode: 0666, + Modified: time.Date(2017, 11, 6, 13, 9, 27, 867862500, timeZone(-8*time.Hour)), + }, + }, + }, + { + Name: "utf8-infozip.zip", + File: []ZipTestFile{ + { + Name: "世界", + Content: []byte{}, + Mode: 0644, + // Name is valid UTF-8, but format does not have UTF-8 flag set. + // We don't do UTF-8 detection for multi-byte runes due to + // false-positives with other encodings (e.g., Shift-JIS). + // Format says encoding is not UTF-8, so we trust it. + NonUTF8: true, + Modified: time.Date(2017, 11, 6, 13, 9, 27, 0, timeZone(-8*time.Hour)), + }, + }, + }, + { + Name: "utf8-osx.zip", + File: []ZipTestFile{ + { + Name: "世界", + Content: []byte{}, + Mode: 0644, + // Name is valid UTF-8, but format does not have UTF-8 set. + NonUTF8: true, + Modified: time.Date(2017, 11, 6, 13, 9, 27, 0, timeZone(-8*time.Hour)), + }, + }, + }, + { + Name: "utf8-winrar.zip", + File: []ZipTestFile{ + { + Name: "世界", + Content: []byte{}, + Mode: 0666, + Modified: time.Date(2017, 11, 6, 13, 9, 27, 867862500, timeZone(-8*time.Hour)), + }, + }, + }, + { + Name: "utf8-winzip.zip", + File: []ZipTestFile{ + { + Name: "世界", + Content: []byte{}, + Mode: 0666, + Modified: time.Date(2017, 11, 6, 13, 9, 27, 867000000, timeZone(-8*time.Hour)), + }, + }, + }, + { + Name: "time-7zip.zip", + File: []ZipTestFile{ + { + Name: "test.txt", + Content: []byte{}, + Size: 1<<32 - 1, + Modified: time.Date(2017, 10, 31, 21, 11, 57, 244817900, timeZone(-7*time.Hour)), + Mode: 0666, + }, + }, + }, + { + Name: "time-infozip.zip", + File: []ZipTestFile{ + { + Name: "test.txt", + Content: []byte{}, + Size: 1<<32 - 1, + Modified: time.Date(2017, 10, 31, 21, 11, 57, 0, timeZone(-7*time.Hour)), + Mode: 0644, + }, + }, + }, + { + Name: "time-osx.zip", + File: []ZipTestFile{ + { + Name: "test.txt", + Content: []byte{}, + Size: 1<<32 - 1, + Modified: time.Date(2017, 10, 31, 21, 11, 57, 0, timeZone(-7*time.Hour)), + Mode: 0644, + }, + }, + }, + { + Name: "time-win7.zip", + File: []ZipTestFile{ + { + Name: "test.txt", + Content: []byte{}, + Size: 1<<32 - 1, + Modified: time.Date(2017, 10, 31, 21, 11, 58, 0, time.UTC), + Mode: 0666, + }, + }, + }, + { + Name: "time-winrar.zip", + File: []ZipTestFile{ + { + Name: "test.txt", + Content: []byte{}, + Size: 1<<32 - 1, + Modified: time.Date(2017, 10, 31, 21, 11, 57, 244817900, timeZone(-7*time.Hour)), + Mode: 0666, + }, + }, + }, + { + Name: "time-winzip.zip", + File: []ZipTestFile{ + { + Name: "test.txt", + Content: []byte{}, + Size: 1<<32 - 1, + Modified: time.Date(2017, 10, 31, 21, 11, 57, 244000000, timeZone(-7*time.Hour)), + Mode: 0666, + }, + }, + }, + { + Name: "time-go.zip", + File: []ZipTestFile{ + { + Name: "test.txt", + Content: []byte{}, + Size: 1<<32 - 1, + Modified: time.Date(2017, 10, 31, 21, 11, 57, 0, timeZone(-7*time.Hour)), + Mode: 0666, + }, + }, + }, + { + Name: "time-22738.zip", + File: []ZipTestFile{ + { + Name: "file", + Content: []byte{}, + Mode: 0666, + Modified: time.Date(1999, 12, 31, 19, 0, 0, 0, timeZone(-5*time.Hour)), + ModTime: time.Date(1999, 12, 31, 19, 0, 0, 0, time.UTC), + }, + }, + }, +} + +func TestReader(t *testing.T) { + for _, zt := range tests { + t.Run(zt.Name, func(t *testing.T) { + readTestZip(t, zt) + }) + } +} + +func readTestZip(t *testing.T, zt ZipTest) { + var z *Reader + var err error + if zt.Source != nil { + rat, size := zt.Source() + z, err = NewReader(rat, size) + } else { + path := filepath.Join("testdata", zt.Name) + if zt.Obscured { + tf, err := obscuretestdata.DecodeToTempFile(path) + if err != nil { + t.Errorf("obscuretestdata.DecodeToTempFile(%s): %v", path, err) + return + } + defer os.Remove(tf) + path = tf + } + var rc *ReadCloser + rc, err = OpenReader(path) + if err == nil { + defer rc.Close() + z = &rc.Reader + } + } + if err != zt.Error { + t.Errorf("error=%v, want %v", err, zt.Error) + return + } + + // bail if file is not zip + if err == ErrFormat { + return + } + + // bail here if no Files expected to be tested + // (there may actually be files in the zip, but we don't care) + if zt.File == nil { + return + } + + if z.Comment != zt.Comment { + t.Errorf("comment=%q, want %q", z.Comment, zt.Comment) + } + if len(z.File) != len(zt.File) { + t.Fatalf("file count=%d, want %d", len(z.File), len(zt.File)) + } + + // test read of each file + for i, ft := range zt.File { + readTestFile(t, zt, ft, z.File[i]) + } + if t.Failed() { + return + } + + // test simultaneous reads + n := 0 + done := make(chan bool) + for i := 0; i < 5; i++ { + for j, ft := range zt.File { + go func(j int, ft ZipTestFile) { + readTestFile(t, zt, ft, z.File[j]) + done <- true + }(j, ft) + n++ + } + } + for ; n > 0; n-- { + <-done + } +} + +func equalTimeAndZone(t1, t2 time.Time) bool { + name1, offset1 := t1.Zone() + name2, offset2 := t2.Zone() + return t1.Equal(t2) && name1 == name2 && offset1 == offset2 +} + +func readTestFile(t *testing.T, zt ZipTest, ft ZipTestFile, f *File) { + if f.Name != ft.Name { + t.Errorf("name=%q, want %q", f.Name, ft.Name) + } + if !ft.Modified.IsZero() && !equalTimeAndZone(f.Modified, ft.Modified) { + t.Errorf("%s: Modified=%s, want %s", f.Name, f.Modified, ft.Modified) + } + if !ft.ModTime.IsZero() && !equalTimeAndZone(f.ModTime(), ft.ModTime) { + t.Errorf("%s: ModTime=%s, want %s", f.Name, f.ModTime(), ft.ModTime) + } + + testFileMode(t, f, ft.Mode) + + size := uint64(f.UncompressedSize) + if size == uint32max { + size = f.UncompressedSize64 + } else if size != f.UncompressedSize64 { + t.Errorf("%v: UncompressedSize=%#x does not match UncompressedSize64=%#x", f.Name, size, f.UncompressedSize64) + } + + r, err := f.Open() + if err != nil { + t.Errorf("%v", err) + return + } + + // For very large files, just check that the size is correct. + // The content is expected to be all zeros. + // Don't bother uncompressing: too big. + if ft.Content == nil && ft.File == "" && ft.Size > 0 { + if size != ft.Size { + t.Errorf("%v: uncompressed size %#x, want %#x", ft.Name, size, ft.Size) + } + r.Close() + return + } + + var b bytes.Buffer + _, err = io.Copy(&b, r) + if err != ft.ContentErr { + t.Errorf("copying contents: %v (want %v)", err, ft.ContentErr) + } + if err != nil { + return + } + r.Close() + + if g := uint64(b.Len()); g != size { + t.Errorf("%v: read %v bytes but f.UncompressedSize == %v", f.Name, g, size) + } + + var c []byte + if ft.Content != nil { + c = ft.Content + } else if c, err = os.ReadFile("testdata/" + ft.File); err != nil { + t.Error(err) + return + } + + if b.Len() != len(c) { + t.Errorf("%s: len=%d, want %d", f.Name, b.Len(), len(c)) + return + } + + for i, b := range b.Bytes() { + if b != c[i] { + t.Errorf("%s: content[%d]=%q want %q", f.Name, i, b, c[i]) + return + } + } +} + +func testFileMode(t *testing.T, f *File, want fs.FileMode) { + mode := f.Mode() + if want == 0 { + t.Errorf("%s mode: got %v, want none", f.Name, mode) + } else if mode != want { + t.Errorf("%s mode: want %v, got %v", f.Name, want, mode) + } +} + +func TestInvalidFiles(t *testing.T) { + const size = 1024 * 70 // 70kb + b := make([]byte, size) + + // zeroes + _, err := NewReader(bytes.NewReader(b), size) + if err != ErrFormat { + t.Errorf("zeroes: error=%v, want %v", err, ErrFormat) + } + + // repeated directoryEndSignatures + sig := make([]byte, 4) + binary.LittleEndian.PutUint32(sig, directoryEndSignature) + for i := 0; i < size-4; i += 4 { + copy(b[i:i+4], sig) + } + _, err = NewReader(bytes.NewReader(b), size) + if err != ErrFormat { + t.Errorf("sigs: error=%v, want %v", err, ErrFormat) + } + + // negative size + _, err = NewReader(bytes.NewReader([]byte("foobar")), -1) + if err == nil { + t.Errorf("archive/zip.NewReader: expected error when negative size is passed") + } +} + +func messWith(fileName string, corrupter func(b []byte)) (r io.ReaderAt, size int64) { + data, err := os.ReadFile(filepath.Join("testdata", fileName)) + if err != nil { + panic("Error reading " + fileName + ": " + err.Error()) + } + corrupter(data) + return bytes.NewReader(data), int64(len(data)) +} + +func returnCorruptCRC32Zip() (r io.ReaderAt, size int64) { + return messWith("go-with-datadesc-sig.zip", func(b []byte) { + // Corrupt one of the CRC32s in the data descriptor: + b[0x2d]++ + }) +} + +func returnCorruptNotStreamedZip() (r io.ReaderAt, size int64) { + return messWith("crc32-not-streamed.zip", func(b []byte) { + // Corrupt foo.txt's final crc32 byte, in both + // the file header and TOC. (0x7e -> 0x7f) + b[0x11]++ + b[0x9d]++ + + // TODO(bradfitz): add a new test that only corrupts + // one of these values, and verify that that's also an + // error. Currently, the reader code doesn't verify the + // fileheader and TOC's crc32 match if they're both + // non-zero and only the second line above, the TOC, + // is what matters. + }) +} + +// rZipBytes returns the bytes of a recursive zip file, without +// putting it on disk and triggering certain virus scanners. +func rZipBytes() []byte { + s := ` +0000000 50 4b 03 04 14 00 00 00 08 00 08 03 64 3c f9 f4 +0000010 89 64 48 01 00 00 b8 01 00 00 07 00 00 00 72 2f +0000020 72 2e 7a 69 70 00 25 00 da ff 50 4b 03 04 14 00 +0000030 00 00 08 00 08 03 64 3c f9 f4 89 64 48 01 00 00 +0000040 b8 01 00 00 07 00 00 00 72 2f 72 2e 7a 69 70 00 +0000050 2f 00 d0 ff 00 25 00 da ff 50 4b 03 04 14 00 00 +0000060 00 08 00 08 03 64 3c f9 f4 89 64 48 01 00 00 b8 +0000070 01 00 00 07 00 00 00 72 2f 72 2e 7a 69 70 00 2f +0000080 00 d0 ff c2 54 8e 57 39 00 05 00 fa ff c2 54 8e +0000090 57 39 00 05 00 fa ff 00 05 00 fa ff 00 14 00 eb +00000a0 ff c2 54 8e 57 39 00 05 00 fa ff 00 05 00 fa ff +00000b0 00 14 00 eb ff 42 88 21 c4 00 00 14 00 eb ff 42 +00000c0 88 21 c4 00 00 14 00 eb ff 42 88 21 c4 00 00 14 +00000d0 00 eb ff 42 88 21 c4 00 00 14 00 eb ff 42 88 21 +00000e0 c4 00 00 00 00 ff ff 00 00 00 ff ff 00 34 00 cb +00000f0 ff 42 88 21 c4 00 00 00 00 ff ff 00 00 00 ff ff +0000100 00 34 00 cb ff 42 e8 21 5e 0f 00 00 00 ff ff 0a +0000110 f0 66 64 12 61 c0 15 dc e8 a0 48 bf 48 af 2a b3 +0000120 20 c0 9b 95 0d c4 67 04 42 53 06 06 06 40 00 06 +0000130 00 f9 ff 6d 01 00 00 00 00 42 e8 21 5e 0f 00 00 +0000140 00 ff ff 0a f0 66 64 12 61 c0 15 dc e8 a0 48 bf +0000150 48 af 2a b3 20 c0 9b 95 0d c4 67 04 42 53 06 06 +0000160 06 40 00 06 00 f9 ff 6d 01 00 00 00 00 50 4b 01 +0000170 02 14 00 14 00 00 00 08 00 08 03 64 3c f9 f4 89 +0000180 64 48 01 00 00 b8 01 00 00 07 00 00 00 00 00 00 +0000190 00 00 00 00 00 00 00 00 00 00 00 72 2f 72 2e 7a +00001a0 69 70 50 4b 05 06 00 00 00 00 01 00 01 00 35 00 +00001b0 00 00 6d 01 00 00 00 00` + s = regexp.MustCompile(`[0-9a-f]{7}`).ReplaceAllString(s, "") + s = regexp.MustCompile(`\s+`).ReplaceAllString(s, "") + b, err := hex.DecodeString(s) + if err != nil { + panic(err) + } + return b +} + +func returnRecursiveZip() (r io.ReaderAt, size int64) { + b := rZipBytes() + return bytes.NewReader(b), int64(len(b)) +} + +// biggestZipBytes returns the bytes of a zip file biggest.zip +// that contains a zip file bigger.zip that contains a zip file +// big.zip that contains big.file, which contains 2³²-1 zeros. +// The big.zip file is interesting because it has no zip64 header, +// much like the innermost zip files in the well-known 42.zip. +// +// biggest.zip was generated by changing isZip64 to use > uint32max +// instead of >= uint32max and then running this program: +// +// package main +// +// import ( +// "archive/zip" +// "bytes" +// "io" +// "io/ioutil" +// "log" +// ) +// +// type zeros struct{} +// +// func (zeros) Read(b []byte) (int, error) { +// for i := range b { +// b[i] = 0 +// } +// return len(b), nil +// } +// +// func main() { +// bigZip := makeZip("big.file", io.LimitReader(zeros{}, 1<<32-1)) +// if err := os.WriteFile("/tmp/big.zip", bigZip, 0666); err != nil { +// log.Fatal(err) +// } +// +// biggerZip := makeZip("big.zip", bytes.NewReader(bigZip)) +// if err := os.WriteFile("/tmp/bigger.zip", biggerZip, 0666); err != nil { +// log.Fatal(err) +// } +// +// biggestZip := makeZip("bigger.zip", bytes.NewReader(biggerZip)) +// if err := os.WriteFile("/tmp/biggest.zip", biggestZip, 0666); err != nil { +// log.Fatal(err) +// } +// } +// +// func makeZip(name string, r io.Reader) []byte { +// var buf bytes.Buffer +// w := zip.NewWriter(&buf) +// wf, err := w.Create(name) +// if err != nil { +// log.Fatal(err) +// } +// if _, err = io.Copy(wf, r); err != nil { +// log.Fatal(err) +// } +// if err := w.Close(); err != nil { +// log.Fatal(err) +// } +// return buf.Bytes() +// } +// +// The 4 GB of zeros compresses to 4 MB, which compresses to 20 kB, +// which compresses to 1252 bytes (in the hex dump below). +// +// It's here in hex for the same reason as rZipBytes above: to avoid +// problems with on-disk virus scanners or other zip processors. +// +func biggestZipBytes() []byte { + s := ` +0000000 50 4b 03 04 14 00 08 00 08 00 00 00 00 00 00 00 +0000010 00 00 00 00 00 00 00 00 00 00 0a 00 00 00 62 69 +0000020 67 67 65 72 2e 7a 69 70 ec dc 6b 4c 53 67 18 07 +0000030 f0 16 c5 ca 65 2e cb b8 94 20 61 1f 44 33 c7 cd +0000040 c0 86 4a b5 c0 62 8a 61 05 c6 cd 91 b2 54 8c 1b +0000050 63 8b 03 9c 1b 95 52 5a e3 a0 19 6c b2 05 59 44 +0000060 64 9d 73 83 71 11 46 61 14 b9 1d 14 09 4a c3 60 +0000070 2e 4c 6e a5 60 45 02 62 81 95 b6 94 9e 9e 77 e7 +0000080 d0 43 b6 f8 71 df 96 3c e7 a4 69 ce bf cf e9 79 +0000090 ce ef 79 3f bf f1 31 db b6 bb 31 76 92 e7 f3 07 +00000a0 8b fc 9c ca cc 08 cc cb cc 5e d2 1c 88 d9 7e bb +00000b0 4f bb 3a 3f 75 f1 5d 7f 8f c2 68 67 77 8f 25 ff +00000c0 84 e2 93 2d ef a4 95 3d 71 4e 2c b9 b0 87 c3 be +00000d0 3d f8 a7 60 24 61 c5 ef ae 9e c8 6c 6d 4e 69 c8 +00000e0 67 65 34 f8 37 76 2d 76 5c 54 f3 95 65 49 c7 0f +00000f0 18 71 4b 7e 5b 6a d1 79 47 61 41 b0 4e 2a 74 45 +0000100 43 58 12 b2 5a a5 c6 7d 68 55 88 d4 98 75 18 6d +0000110 08 d1 1f 8f 5a 9e 96 ee 45 cf a4 84 4e 4b e8 50 +0000120 a7 13 d9 06 de 52 81 97 36 b2 d7 b8 fc 2b 5f 55 +0000130 23 1f 32 59 cf 30 27 fb e2 8a b9 de 45 dd 63 9c +0000140 4b b5 8b 96 4c 7a 62 62 cc a1 a7 cf fa f1 fe dd +0000150 54 62 11 bf 36 78 b3 c7 b1 b5 f2 61 4d 4e dd 66 +0000160 32 2e e6 70 34 5f f4 c9 e6 6c 43 6f da 6b c6 c3 +0000170 09 2c ce 09 57 7f d2 7e b4 23 ba 7c 1b 99 bc 22 +0000180 3e f1 de 91 2f e3 9c 1b 82 cc c2 84 39 aa e6 de +0000190 b4 69 fc cc cb 72 a6 61 45 f0 d3 1d 26 19 7c 8d +00001a0 29 c8 66 02 be 77 6a f9 3d 34 79 17 19 c8 96 24 +00001b0 a3 ac e4 dd 3b 1a 8e c6 fe 96 38 6b bf 67 5a 23 +00001c0 f4 16 f4 e6 8a b4 fc c2 cd bf 95 66 1d bb 35 aa +00001d0 92 7d 66 d8 08 8d a5 1f 54 2a af 09 cf 61 ff d2 +00001e0 85 9d 8f b6 d7 88 07 4a 86 03 db 64 f3 d9 92 73 +00001f0 df ec a7 fc 23 4c 8d 83 79 63 2a d9 fd 8d b3 c8 +0000200 8f 7e d4 19 85 e6 8d 1c 76 f0 8b 58 32 fd 9a d6 +0000210 85 e2 48 ad c3 d5 60 6f 7e 22 dd ef 09 49 7c 7f +0000220 3a 45 c3 71 b7 df f3 4c 63 fb b5 d9 31 5f 6e d6 +0000230 24 1d a4 4a fe 32 a7 5c 16 48 5c 3e 08 6b 8a d3 +0000240 25 1d a2 12 a5 59 24 ea 20 5f 52 6d ad 94 db 6b +0000250 94 b9 5d eb 4b a7 5c 44 bb 1e f2 3c 6b cf 52 c9 +0000260 e9 e5 ba 06 b9 c4 e5 0a d0 00 0d d0 00 0d d0 00 +0000270 0d d0 00 0d d0 00 0d d0 00 0d d0 00 0d d0 00 0d +0000280 d0 00 0d d0 00 0d d0 00 0d d0 00 0d d0 00 0d d0 +0000290 00 0d d0 00 0d d0 00 0d d0 00 0d d0 00 0d d0 00 +00002a0 0d d0 00 cd ff 9e 46 86 fa a7 7d 3a 43 d7 8e 10 +00002b0 52 e9 be e6 6e cf eb 9e 85 4d 65 ce cc 30 c1 44 +00002c0 c0 4e af bc 9c 6c 4b a0 d7 54 ff 1d d5 5c 89 fb +00002d0 b5 34 7e c4 c2 9e f5 a0 f6 5b 7e 6e ca 73 c7 ef +00002e0 5d be de f9 e8 81 eb a5 0a a5 63 54 2c d7 1c d1 +00002f0 89 17 85 f8 16 94 f2 8a b2 a3 f5 b6 6d df 75 cd +0000300 90 dd 64 bd 5d 55 4e f2 55 19 1b b7 cc ef 1b ea +0000310 2e 05 9c f4 aa 1e a8 cd a6 82 c7 59 0f 5e 9d e0 +0000320 bb fc 6c d6 99 23 eb 36 ad c6 c5 e1 d8 e1 e2 3e +0000330 d9 90 5a f7 91 5d 6f bc 33 6d 98 47 d2 7c 2e 2f +0000340 99 a4 25 72 85 49 2c be 0b 5b af 8f e5 6e 81 a6 +0000350 a3 5a 6f 39 53 3a ab 7a 8b 1e 26 f7 46 6c 7d 26 +0000360 53 b3 22 31 94 d3 83 f2 18 4d f5 92 33 27 53 97 +0000370 0f d3 e6 55 9c a6 c5 31 87 6f d3 f3 ae 39 6f 56 +0000380 10 7b ab 7e d0 b4 ca f2 b8 05 be 3f 0e 6e 5a 75 +0000390 ab 0c f5 37 0e ba 8e 75 71 7a aa ed 7a dd 6a 63 +00003a0 be 9b a0 97 27 6a 6f e7 d3 8b c4 7c ec d3 91 56 +00003b0 d9 ac 5e bf 16 42 2f 00 1f 93 a2 23 87 bd e2 59 +00003c0 a0 de 1a 66 c8 62 eb 55 8f 91 17 b4 61 42 7a 50 +00003d0 40 03 34 40 03 34 40 03 34 40 03 34 40 03 34 40 +00003e0 03 34 40 03 34 40 03 34 40 03 34 40 03 34 40 03 +00003f0 34 40 03 34 40 03 34 ff 85 86 90 8b ea 67 90 0d +0000400 e1 42 1b d2 61 d6 79 ec fd 3e 44 28 a4 51 6c 5c +0000410 fc d2 72 ca ba 82 18 46 16 61 cd 93 a9 0f d1 24 +0000420 17 99 e2 2c 71 16 84 0c c8 7a 13 0f 9a 5e c5 f0 +0000430 79 64 e2 12 4d c8 82 a1 81 19 2d aa 44 6d 87 54 +0000440 84 71 c1 f6 d4 ca 25 8c 77 b9 08 c7 c8 5e 10 8a +0000450 8f 61 ed 8c ba 30 1f 79 9a c7 60 34 2b b9 8c f8 +0000460 18 a6 83 1b e3 9f ad 79 fe fd 1b 8b f1 fc 41 6f +0000470 d4 13 1f e3 b8 83 ba 64 92 e7 eb e4 77 05 8f ba +0000480 fa 3b 00 00 ff ff 50 4b 07 08 a6 18 b1 91 5e 04 +0000490 00 00 e4 47 00 00 50 4b 01 02 14 00 14 00 08 00 +00004a0 08 00 00 00 00 00 a6 18 b1 91 5e 04 00 00 e4 47 +00004b0 00 00 0a 00 00 00 00 00 00 00 00 00 00 00 00 00 +00004c0 00 00 00 00 62 69 67 67 65 72 2e 7a 69 70 50 4b +00004d0 05 06 00 00 00 00 01 00 01 00 38 00 00 00 96 04 +00004e0 00 00 00 00` + s = regexp.MustCompile(`[0-9a-f]{7}`).ReplaceAllString(s, "") + s = regexp.MustCompile(`\s+`).ReplaceAllString(s, "") + b, err := hex.DecodeString(s) + if err != nil { + panic(err) + } + return b +} + +func returnBigZipBytes() (r io.ReaderAt, size int64) { + b := biggestZipBytes() + for i := 0; i < 2; i++ { + r, err := NewReader(bytes.NewReader(b), int64(len(b))) + if err != nil { + panic(err) + } + f, err := r.File[0].Open() + if err != nil { + panic(err) + } + b, err = io.ReadAll(f) + if err != nil { + panic(err) + } + } + return bytes.NewReader(b), int64(len(b)) +} + +func TestIssue8186(t *testing.T) { + // Directory headers & data found in the TOC of a JAR file. + dirEnts := []string{ + "PK\x01\x02\n\x00\n\x00\x00\b\x00\x004\x9d3?\xaa\x1b\x06\xf0\x81\x02\x00\x00\x81\x02\x00\x00-\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00res/drawable-xhdpi-v4/ic_actionbar_accept.png\xfe\xca\x00\x00\x00", + "PK\x01\x02\n\x00\n\x00\x00\b\x00\x004\x9d3?\x90K\x89\xc7t\n\x00\x00t\n\x00\x00\x0e\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd1\x02\x00\x00resources.arsc\x00\x00\x00", + "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?\xff$\x18\xed3\x03\x00\x00\xb4\b\x00\x00\x13\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00t\r\x00\x00AndroidManifest.xml", + "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?\x14\xc5K\xab\x192\x02\x00\xc8\xcd\x04\x00\v\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xe8\x10\x00\x00classes.dex", + "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?E\x96\nD\xac\x01\x00\x00P\x03\x00\x00&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00:C\x02\x00res/layout/actionbar_set_wallpaper.xml", + "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?Ļ\x14\xe3\xd8\x01\x00\x00\xd8\x03\x00\x00 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00:E\x02\x00res/layout/wallpaper_cropper.xml", + "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?}\xc1\x15\x9eZ\x01\x00\x00!\x02\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00`G\x02\x00META-INF/MANIFEST.MF", + "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?\xe6\x98Ьo\x01\x00\x00\x84\x02\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfcH\x02\x00META-INF/CERT.SF", + "PK\x01\x02\x14\x00\x14\x00\b\b\b\x004\x9d3?\xbfP\x96b\x86\x04\x00\x00\xb2\x06\x00\x00\x11\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa9J\x02\x00META-INF/CERT.RSA", + } + for i, s := range dirEnts { + var f File + err := readDirectoryHeader(&f, strings.NewReader(s)) + if err != nil { + t.Errorf("error reading #%d: %v", i, err) + } + } +} + +// Verify we return ErrUnexpectedEOF when length is short. +func TestIssue10957(t *testing.T) { + data := []byte("PK\x03\x040000000PK\x01\x0200000" + + "0000000000000000000\x00" + + "\x00\x00\x00\x00\x00000000000000PK\x01" + + "\x020000000000000000000" + + "00000\v\x00\x00\x00\x00\x00000000000" + + "00000000000000PK\x01\x0200" + + "00000000000000000000" + + "00\v\x00\x00\x00\x00\x00000000000000" + + "00000000000PK\x01\x020000<" + + "0\x00\x0000000000000000\v\x00\v" + + "\x00\x00\x00\x00\x0000000000\x00\x00\x00\x00000" + + "00000000PK\x01\x0200000000" + + "0000000000000000\v\x00\x00\x00" + + "\x00\x0000PK\x05\x06000000\x05\x000000" + + "\v\x00\x00\x00\x00\x00") + z, err := NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + t.Fatal(err) + } + for i, f := range z.File { + r, err := f.Open() + if err != nil { + continue + } + if f.UncompressedSize64 < 1e6 { + n, err := io.Copy(io.Discard, r) + if i == 3 && err != io.ErrUnexpectedEOF { + t.Errorf("File[3] error = %v; want io.ErrUnexpectedEOF", err) + } + if err == nil && uint64(n) != f.UncompressedSize64 { + t.Errorf("file %d: bad size: copied=%d; want=%d", i, n, f.UncompressedSize64) + } + } + r.Close() + } +} + +// Verify that this particular malformed zip file is rejected. +func TestIssue10956(t *testing.T) { + data := []byte("PK\x06\x06PK\x06\a0000\x00\x00\x00\x00\x00\x00\x00\x00" + + "0000PK\x05\x06000000000000" + + "0000\v\x00000\x00\x00\x00\x00\x00\x00\x000") + r, err := NewReader(bytes.NewReader(data), int64(len(data))) + if err == nil { + t.Errorf("got nil error, want ErrFormat") + } + if r != nil { + t.Errorf("got non-nil Reader, want nil") + } +} + +// Verify we return ErrUnexpectedEOF when reading truncated data descriptor. +func TestIssue11146(t *testing.T) { + data := []byte("PK\x03\x040000000000000000" + + "000000\x01\x00\x00\x000\x01\x00\x00\xff\xff0000" + + "0000000000000000PK\x01\x02" + + "0000\b0\b\x00000000000000" + + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x000000PK\x05\x06\x00\x00" + + "\x00\x0000\x01\x0000008\x00\x00\x00\x00\x00") + z, err := NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + t.Fatal(err) + } + r, err := z.File[0].Open() + if err != nil { + t.Fatal(err) + } + _, err = io.ReadAll(r) + if err != io.ErrUnexpectedEOF { + t.Errorf("File[0] error = %v; want io.ErrUnexpectedEOF", err) + } + r.Close() +} + +// Verify we do not treat non-zip64 archives as zip64 +func TestIssue12449(t *testing.T) { + data := []byte{ + 0x50, 0x4b, 0x03, 0x04, 0x14, 0x00, 0x08, 0x00, + 0x00, 0x00, 0x6b, 0xb4, 0xba, 0x46, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x03, 0x00, 0x18, 0x00, 0xca, 0x64, + 0x55, 0x75, 0x78, 0x0b, 0x00, 0x50, 0x4b, 0x05, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, + 0x00, 0x49, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, + 0x00, 0x31, 0x31, 0x31, 0x32, 0x32, 0x32, 0x0a, + 0x50, 0x4b, 0x07, 0x08, 0x1d, 0x88, 0x77, 0xb0, + 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x50, 0x4b, 0x01, 0x02, 0x14, 0x03, 0x14, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x6b, 0xb4, 0xba, 0x46, + 0x1d, 0x88, 0x77, 0xb0, 0x07, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x18, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xa0, 0x81, 0x00, 0x00, 0x00, 0x00, 0xca, 0x64, + 0x55, 0x75, 0x78, 0x0b, 0x00, 0x50, 0x4b, 0x05, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, + 0x00, 0x49, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, + 0x00, 0x97, 0x2b, 0x49, 0x23, 0x05, 0xc5, 0x0b, + 0xa7, 0xd1, 0x52, 0xa2, 0x9c, 0x50, 0x4b, 0x06, + 0x07, 0xc8, 0x19, 0xc1, 0xaf, 0x94, 0x9c, 0x61, + 0x44, 0xbe, 0x94, 0x19, 0x42, 0x58, 0x12, 0xc6, + 0x5b, 0x50, 0x4b, 0x05, 0x06, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x69, 0x00, 0x00, + 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, + } + // Read in the archive. + _, err := NewReader(bytes.NewReader([]byte(data)), int64(len(data))) + if err != nil { + t.Errorf("Error reading the archive: %v", err) + } +} + +func TestFS(t *testing.T) { + for _, test := range []struct { + file string + want []string + }{ + { + "testdata/unix.zip", + []string{"hello", "dir/bar", "readonly"}, + }, + { + "testdata/subdir.zip", + []string{"a/b/c"}, + }, + } { + t.Run(test.file, func(t *testing.T) { + t.Parallel() + z, err := OpenReader(test.file) + if err != nil { + t.Fatal(err) + } + defer z.Close() + if err := fstest.TestFS(z, test.want...); err != nil { + t.Error(err) + } + }) + } +} + +func TestFSModTime(t *testing.T) { + t.Parallel() + z, err := OpenReader("testdata/subdir.zip") + if err != nil { + t.Fatal(err) + } + defer z.Close() + + for _, test := range []struct { + name string + want time.Time + }{ + { + "a", + time.Date(2021, 4, 19, 12, 29, 56, 0, timeZone(-7*time.Hour)).UTC(), + }, + { + "a/b/c", + time.Date(2021, 4, 19, 12, 29, 59, 0, timeZone(-7*time.Hour)).UTC(), + }, + } { + fi, err := fs.Stat(z, test.name) + if err != nil { + t.Errorf("%s: %v", test.name, err) + continue + } + if got := fi.ModTime(); !got.Equal(test.want) { + t.Errorf("%s: got modtime %v, want %v", test.name, got, test.want) + } + } +} + +func TestCVE202127919(t *testing.T) { + // Archive containing only the file "../test.txt" + data := []byte{ + 0x50, 0x4b, 0x03, 0x04, 0x14, 0x00, 0x08, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x2e, 0x2e, + 0x2f, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, + 0x74, 0x0a, 0xc9, 0xc8, 0x2c, 0x56, 0xc8, 0x2c, + 0x56, 0x48, 0x54, 0x28, 0x49, 0x2d, 0x2e, 0x51, + 0x28, 0x49, 0xad, 0x28, 0x51, 0x48, 0xcb, 0xcc, + 0x49, 0xd5, 0xe3, 0x02, 0x04, 0x00, 0x00, 0xff, + 0xff, 0x50, 0x4b, 0x07, 0x08, 0xc0, 0xd7, 0xed, + 0xc3, 0x20, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, + 0x00, 0x50, 0x4b, 0x01, 0x02, 0x14, 0x00, 0x14, + 0x00, 0x08, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xc0, 0xd7, 0xed, 0xc3, 0x20, 0x00, 0x00, + 0x00, 0x1a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, + 0x2e, 0x2f, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, + 0x78, 0x74, 0x50, 0x4b, 0x05, 0x06, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x39, 0x00, + 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x00, 0x00, + } + r, err := NewReader(bytes.NewReader([]byte(data)), int64(len(data))) + if err != nil { + t.Fatalf("Error reading the archive: %v", err) + } + _, err = r.Open("test.txt") + if err != nil { + t.Errorf("Error reading file: %v", err) + } + if len(r.File) != 1 { + t.Fatalf("No entries in the file list") + } + if r.File[0].Name != "../test.txt" { + t.Errorf("Unexpected entry name: %s", r.File[0].Name) + } + if _, err := r.File[0].Open(); err != nil { + t.Errorf("Error opening file: %v", err) + } +} + +func TestCVE202133196(t *testing.T) { + // Archive that indicates it has 1 << 128 -1 files, + // this would previously cause a panic due to attempting + // to allocate a slice with 1 << 128 -1 elements. + data := []byte{ + 0x50, 0x4b, 0x03, 0x04, 0x14, 0x00, 0x08, 0x08, + 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x02, + 0x03, 0x62, 0x61, 0x65, 0x03, 0x04, 0x00, 0x00, + 0xff, 0xff, 0x50, 0x4b, 0x07, 0x08, 0xbe, 0x20, + 0x5c, 0x6c, 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, + 0x00, 0x00, 0x50, 0x4b, 0x01, 0x02, 0x14, 0x00, + 0x14, 0x00, 0x08, 0x08, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xbe, 0x20, 0x5c, 0x6c, 0x09, 0x00, + 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x02, 0x03, 0x50, 0x4b, 0x06, 0x06, 0x2c, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2d, + 0x00, 0x2d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x31, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x50, 0x4b, 0x06, 0x07, 0x00, + 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x50, + 0x4b, 0x05, 0x06, 0x00, 0x00, 0x00, 0x00, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x00, 0x00, + } + _, err := NewReader(bytes.NewReader(data), int64(len(data))) + if err != ErrFormat { + t.Fatalf("unexpected error, got: %v, want: %v", err, ErrFormat) + } + + // Also check that an archive containing a handful of empty + // files doesn't cause an issue + b := bytes.NewBuffer(nil) + w := NewWriter(b) + for i := 0; i < 5; i++ { + _, err := w.Create("") + if err != nil { + t.Fatalf("Writer.Create failed: %s", err) + } + } + if err := w.Close(); err != nil { + t.Fatalf("Writer.Close failed: %s", err) + } + r, err := NewReader(bytes.NewReader(b.Bytes()), int64(b.Len())) + if err != nil { + t.Fatalf("NewReader failed: %s", err) + } + if len(r.File) != 5 { + t.Errorf("Archive has unexpected number of files, got %d, want 5", len(r.File)) + } +} + +func TestCVE202139293(t *testing.T) { + // directory size is so large, that the check in Reader.init + // overflows when subtracting from the archive size, causing + // the pre-allocation check to be bypassed. + data := []byte{ + 0x50, 0x4b, 0x06, 0x06, 0x05, 0x06, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x4b, + 0x06, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x50, 0x4b, 0x05, 0x06, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x4b, + 0x06, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x50, 0x4b, 0x05, 0x06, 0x00, 0x31, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, + 0xff, 0x50, 0xfe, 0x00, 0xff, 0x00, 0x3a, 0x00, 0x00, 0x00, 0xff, + } + _, err := NewReader(bytes.NewReader(data), int64(len(data))) + if err != ErrFormat { + t.Fatalf("unexpected error, got: %v, want: %v", err, ErrFormat) + } +} + +func TestCVE202141772(t *testing.T) { + // Archive contains a file whose name is exclusively made up of '/', '\' + // characters, or "../", "..\" paths, which would previously cause a panic. + // + // Length Method Size Cmpr Date Time CRC-32 Name + // -------- ------ ------- ---- ---------- ----- -------- ---- + // 0 Stored 0 0% 08-05-2021 18:32 00000000 / + // 0 Stored 0 0% 09-14-2021 12:59 00000000 // + // 0 Stored 0 0% 09-14-2021 12:59 00000000 \ + // 11 Stored 11 0% 09-14-2021 13:04 0d4a1185 /test.txt + // -------- ------- --- ------- + // 11 11 0% 4 files + data := []byte{ + 0x50, 0x4b, 0x03, 0x04, 0x0a, 0x00, 0x00, 0x08, + 0x00, 0x00, 0x06, 0x94, 0x05, 0x53, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2f, 0x50, + 0x4b, 0x03, 0x04, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x78, 0x67, 0x2e, 0x53, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x02, 0x00, 0x00, 0x00, 0x2f, 0x2f, 0x50, + 0x4b, 0x03, 0x04, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x78, 0x67, 0x2e, 0x53, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x5c, 0x50, 0x4b, + 0x03, 0x04, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x91, 0x68, 0x2e, 0x53, 0x85, 0x11, 0x4a, 0x0d, + 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x2f, 0x74, 0x65, 0x73, + 0x74, 0x2e, 0x74, 0x78, 0x74, 0x68, 0x65, 0x6c, + 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, + 0x50, 0x4b, 0x01, 0x02, 0x14, 0x03, 0x0a, 0x00, + 0x00, 0x08, 0x00, 0x00, 0x06, 0x94, 0x05, 0x53, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, + 0xed, 0x41, 0x00, 0x00, 0x00, 0x00, 0x2f, 0x50, + 0x4b, 0x01, 0x02, 0x3f, 0x00, 0x0a, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x78, 0x67, 0x2e, 0x53, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x24, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, + 0x00, 0x1f, 0x00, 0x00, 0x00, 0x2f, 0x2f, 0x0a, + 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x18, 0x00, 0x93, 0x98, 0x25, 0x57, 0x25, + 0xa9, 0xd7, 0x01, 0x93, 0x98, 0x25, 0x57, 0x25, + 0xa9, 0xd7, 0x01, 0x93, 0x98, 0x25, 0x57, 0x25, + 0xa9, 0xd7, 0x01, 0x50, 0x4b, 0x01, 0x02, 0x3f, + 0x00, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, + 0x67, 0x2e, 0x53, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x20, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, + 0x00, 0x5c, 0x0a, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x18, 0x00, 0x93, 0x98, + 0x25, 0x57, 0x25, 0xa9, 0xd7, 0x01, 0x93, 0x98, + 0x25, 0x57, 0x25, 0xa9, 0xd7, 0x01, 0x93, 0x98, + 0x25, 0x57, 0x25, 0xa9, 0xd7, 0x01, 0x50, 0x4b, + 0x01, 0x02, 0x3f, 0x00, 0x0a, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x91, 0x68, 0x2e, 0x53, 0x85, 0x11, + 0x4a, 0x0d, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, + 0x00, 0x00, 0x09, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x5e, 0x00, 0x00, 0x00, 0x2f, 0x74, 0x65, 0x73, + 0x74, 0x2e, 0x74, 0x78, 0x74, 0x0a, 0x00, 0x20, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x18, + 0x00, 0xa9, 0x80, 0x51, 0x01, 0x26, 0xa9, 0xd7, + 0x01, 0x31, 0xd1, 0x57, 0x01, 0x26, 0xa9, 0xd7, + 0x01, 0xdf, 0x48, 0x85, 0xf9, 0x25, 0xa9, 0xd7, + 0x01, 0x50, 0x4b, 0x05, 0x06, 0x00, 0x00, 0x00, + 0x00, 0x04, 0x00, 0x04, 0x00, 0x31, 0x01, 0x00, + 0x00, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, + } + r, err := NewReader(bytes.NewReader([]byte(data)), int64(len(data))) + if err != nil { + t.Fatalf("Error reading the archive: %v", err) + } + entryNames := []string{`/`, `//`, `\`, `/test.txt`} + var names []string + for _, f := range r.File { + names = append(names, f.Name) + if _, err := f.Open(); err != nil { + t.Errorf("Error opening %q: %v", f.Name, err) + } + if _, err := r.Open(f.Name); err == nil { + t.Errorf("Opening %q with fs.FS API succeeded", f.Name) + } + } + if !reflect.DeepEqual(names, entryNames) { + t.Errorf("Unexpected file entries: %q", names) + } + if _, err := r.Open(""); err == nil { + t.Errorf("Opening %q with fs.FS API succeeded", "") + } + if _, err := r.Open("test.txt"); err != nil { + t.Errorf("Error opening %q with fs.FS API: %v", "test.txt", err) + } + dirEntries, err := fs.ReadDir(r, ".") + if err != nil { + t.Fatalf("Error reading the root directory: %v", err) + } + if len(dirEntries) != 1 || dirEntries[0].Name() != "test.txt" { + t.Errorf("Unexpected directory entries") + for _, dirEntry := range dirEntries { + _, err := r.Open(dirEntry.Name()) + t.Logf("%q (Open error: %v)", dirEntry.Name(), err) + } + t.FailNow() + } + info, err := dirEntries[0].Info() + if err != nil { + t.Fatalf("Error reading info entry: %v", err) + } + if name := info.Name(); name != "test.txt" { + t.Errorf("Inconsistent name in info entry: %v", name) + } +} diff --git a/src/archive/zip/register.go b/src/archive/zip/register.go new file mode 100644 index 0000000..4389246 --- /dev/null +++ b/src/archive/zip/register.go @@ -0,0 +1,147 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip + +import ( + "compress/flate" + "errors" + "io" + "sync" +) + +// A Compressor returns a new compressing writer, writing to w. +// The WriteCloser's Close method must be used to flush pending data to w. +// The Compressor itself must be safe to invoke from multiple goroutines +// simultaneously, but each returned writer will be used only by +// one goroutine at a time. +type Compressor func(w io.Writer) (io.WriteCloser, error) + +// A Decompressor returns a new decompressing reader, reading from r. +// The ReadCloser's Close method must be used to release associated resources. +// The Decompressor itself must be safe to invoke from multiple goroutines +// simultaneously, but each returned reader will be used only by +// one goroutine at a time. +type Decompressor func(r io.Reader) io.ReadCloser + +var flateWriterPool sync.Pool + +func newFlateWriter(w io.Writer) io.WriteCloser { + fw, ok := flateWriterPool.Get().(*flate.Writer) + if ok { + fw.Reset(w) + } else { + fw, _ = flate.NewWriter(w, 5) + } + return &pooledFlateWriter{fw: fw} +} + +type pooledFlateWriter struct { + mu sync.Mutex // guards Close and Write + fw *flate.Writer +} + +func (w *pooledFlateWriter) Write(p []byte) (n int, err error) { + w.mu.Lock() + defer w.mu.Unlock() + if w.fw == nil { + return 0, errors.New("Write after Close") + } + return w.fw.Write(p) +} + +func (w *pooledFlateWriter) Close() error { + w.mu.Lock() + defer w.mu.Unlock() + var err error + if w.fw != nil { + err = w.fw.Close() + flateWriterPool.Put(w.fw) + w.fw = nil + } + return err +} + +var flateReaderPool sync.Pool + +func newFlateReader(r io.Reader) io.ReadCloser { + fr, ok := flateReaderPool.Get().(io.ReadCloser) + if ok { + fr.(flate.Resetter).Reset(r, nil) + } else { + fr = flate.NewReader(r) + } + return &pooledFlateReader{fr: fr} +} + +type pooledFlateReader struct { + mu sync.Mutex // guards Close and Read + fr io.ReadCloser +} + +func (r *pooledFlateReader) Read(p []byte) (n int, err error) { + r.mu.Lock() + defer r.mu.Unlock() + if r.fr == nil { + return 0, errors.New("Read after Close") + } + return r.fr.Read(p) +} + +func (r *pooledFlateReader) Close() error { + r.mu.Lock() + defer r.mu.Unlock() + var err error + if r.fr != nil { + err = r.fr.Close() + flateReaderPool.Put(r.fr) + r.fr = nil + } + return err +} + +var ( + compressors sync.Map // map[uint16]Compressor + decompressors sync.Map // map[uint16]Decompressor +) + +func init() { + compressors.Store(Store, Compressor(func(w io.Writer) (io.WriteCloser, error) { return &nopCloser{w}, nil })) + compressors.Store(Deflate, Compressor(func(w io.Writer) (io.WriteCloser, error) { return newFlateWriter(w), nil })) + + decompressors.Store(Store, Decompressor(io.NopCloser)) + decompressors.Store(Deflate, Decompressor(newFlateReader)) +} + +// RegisterDecompressor allows custom decompressors for a specified method ID. +// The common methods Store and Deflate are built in. +func RegisterDecompressor(method uint16, dcomp Decompressor) { + if _, dup := decompressors.LoadOrStore(method, dcomp); dup { + panic("decompressor already registered") + } +} + +// RegisterCompressor registers custom compressors for a specified method ID. +// The common methods Store and Deflate are built in. +func RegisterCompressor(method uint16, comp Compressor) { + if _, dup := compressors.LoadOrStore(method, comp); dup { + panic("compressor already registered") + } +} + +func compressor(method uint16) Compressor { + ci, ok := compressors.Load(method) + if !ok { + return nil + } + return ci.(Compressor) +} + +func decompressor(method uint16) Decompressor { + di, ok := decompressors.Load(method) + if !ok { + return nil + } + return di.(Decompressor) +} diff --git a/src/archive/zip/struct.go b/src/archive/zip/struct.go new file mode 100644 index 0000000..4dd29f3 --- /dev/null +++ b/src/archive/zip/struct.go @@ -0,0 +1,390 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package zip provides support for reading and writing ZIP archives. + +See: https://www.pkware.com/appnote + +This package does not support disk spanning. + +A note about ZIP64: + +To be backwards compatible the FileHeader has both 32 and 64 bit Size +fields. The 64 bit fields will always contain the correct value and +for normal archives both fields will be the same. For files requiring +the ZIP64 format the 32 bit fields will be 0xffffffff and the 64 bit +fields must be used instead. +*/ +package zip + +import ( + "io/fs" + "path" + "time" +) + +// Compression methods. +const ( + Store uint16 = 0 // no compression + Deflate uint16 = 8 // DEFLATE compressed +) + +const ( + fileHeaderSignature = 0x04034b50 + directoryHeaderSignature = 0x02014b50 + directoryEndSignature = 0x06054b50 + directory64LocSignature = 0x07064b50 + directory64EndSignature = 0x06064b50 + dataDescriptorSignature = 0x08074b50 // de-facto standard; required by OS X Finder + fileHeaderLen = 30 // + filename + extra + directoryHeaderLen = 46 // + filename + extra + comment + directoryEndLen = 22 // + comment + dataDescriptorLen = 16 // four uint32: descriptor signature, crc32, compressed size, size + dataDescriptor64Len = 24 // descriptor with 8 byte sizes + directory64LocLen = 20 // + directory64EndLen = 56 // + extra + + // Constants for the first byte in CreatorVersion. + creatorFAT = 0 + creatorUnix = 3 + creatorNTFS = 11 + creatorVFAT = 14 + creatorMacOSX = 19 + + // Version numbers. + zipVersion20 = 20 // 2.0 + zipVersion45 = 45 // 4.5 (reads and writes zip64 archives) + + // Limits for non zip64 files. + uint16max = (1 << 16) - 1 + uint32max = (1 << 32) - 1 + + // Extra header IDs. + // + // IDs 0..31 are reserved for official use by PKWARE. + // IDs above that range are defined by third-party vendors. + // Since ZIP lacked high precision timestamps (nor a official specification + // of the timezone used for the date fields), many competing extra fields + // have been invented. Pervasive use effectively makes them "official". + // + // See http://mdfs.net/Docs/Comp/Archiving/Zip/ExtraField + zip64ExtraID = 0x0001 // Zip64 extended information + ntfsExtraID = 0x000a // NTFS + unixExtraID = 0x000d // UNIX + extTimeExtraID = 0x5455 // Extended timestamp + infoZipUnixExtraID = 0x5855 // Info-ZIP Unix extension +) + +// FileHeader describes a file within a zip file. +// See the zip spec for details. +type FileHeader struct { + // Name is the name of the file. + // + // It must be a relative path, not start with a drive letter (such as "C:"), + // and must use forward slashes instead of back slashes. A trailing slash + // indicates that this file is a directory and should have no data. + // + // When reading zip files, the Name field is populated from + // the zip file directly and is not validated for correctness. + // It is the caller's responsibility to sanitize it as + // appropriate, including canonicalizing slash directions, + // validating that paths are relative, and preventing path + // traversal through filenames ("../../../"). + Name string + + // Comment is any arbitrary user-defined string shorter than 64KiB. + Comment string + + // NonUTF8 indicates that Name and Comment are not encoded in UTF-8. + // + // By specification, the only other encoding permitted should be CP-437, + // but historically many ZIP readers interpret Name and Comment as whatever + // the system's local character encoding happens to be. + // + // This flag should only be set if the user intends to encode a non-portable + // ZIP file for a specific localized region. Otherwise, the Writer + // automatically sets the ZIP format's UTF-8 flag for valid UTF-8 strings. + NonUTF8 bool + + CreatorVersion uint16 + ReaderVersion uint16 + Flags uint16 + + // Method is the compression method. If zero, Store is used. + Method uint16 + + // Modified is the modified time of the file. + // + // When reading, an extended timestamp is preferred over the legacy MS-DOS + // date field, and the offset between the times is used as the timezone. + // If only the MS-DOS date is present, the timezone is assumed to be UTC. + // + // When writing, an extended timestamp (which is timezone-agnostic) is + // always emitted. The legacy MS-DOS date field is encoded according to the + // location of the Modified time. + Modified time.Time + ModifiedTime uint16 // Deprecated: Legacy MS-DOS date; use Modified instead. + ModifiedDate uint16 // Deprecated: Legacy MS-DOS time; use Modified instead. + + CRC32 uint32 + CompressedSize uint32 // Deprecated: Use CompressedSize64 instead. + UncompressedSize uint32 // Deprecated: Use UncompressedSize64 instead. + CompressedSize64 uint64 + UncompressedSize64 uint64 + Extra []byte + ExternalAttrs uint32 // Meaning depends on CreatorVersion +} + +// FileInfo returns an fs.FileInfo for the FileHeader. +func (h *FileHeader) FileInfo() fs.FileInfo { + return headerFileInfo{h} +} + +// headerFileInfo implements fs.FileInfo. +type headerFileInfo struct { + fh *FileHeader +} + +func (fi headerFileInfo) Name() string { return path.Base(fi.fh.Name) } +func (fi headerFileInfo) Size() int64 { + if fi.fh.UncompressedSize64 > 0 { + return int64(fi.fh.UncompressedSize64) + } + return int64(fi.fh.UncompressedSize) +} +func (fi headerFileInfo) IsDir() bool { return fi.Mode().IsDir() } +func (fi headerFileInfo) ModTime() time.Time { + if fi.fh.Modified.IsZero() { + return fi.fh.ModTime() + } + return fi.fh.Modified.UTC() +} +func (fi headerFileInfo) Mode() fs.FileMode { return fi.fh.Mode() } +func (fi headerFileInfo) Type() fs.FileMode { return fi.fh.Mode().Type() } +func (fi headerFileInfo) Sys() interface{} { return fi.fh } + +func (fi headerFileInfo) Info() (fs.FileInfo, error) { return fi, nil } + +// FileInfoHeader creates a partially-populated FileHeader from an +// fs.FileInfo. +// Because fs.FileInfo's Name method returns only the base name of +// the file it describes, it may be necessary to modify the Name field +// of the returned header to provide the full path name of the file. +// If compression is desired, callers should set the FileHeader.Method +// field; it is unset by default. +func FileInfoHeader(fi fs.FileInfo) (*FileHeader, error) { + size := fi.Size() + fh := &FileHeader{ + Name: fi.Name(), + UncompressedSize64: uint64(size), + } + fh.SetModTime(fi.ModTime()) + fh.SetMode(fi.Mode()) + if fh.UncompressedSize64 > uint32max { + fh.UncompressedSize = uint32max + } else { + fh.UncompressedSize = uint32(fh.UncompressedSize64) + } + return fh, nil +} + +type directoryEnd struct { + diskNbr uint32 // unused + dirDiskNbr uint32 // unused + dirRecordsThisDisk uint64 // unused + directoryRecords uint64 + directorySize uint64 + directoryOffset uint64 // relative to file + commentLen uint16 + comment string +} + +// timeZone returns a *time.Location based on the provided offset. +// If the offset is non-sensible, then this uses an offset of zero. +func timeZone(offset time.Duration) *time.Location { + const ( + minOffset = -12 * time.Hour // E.g., Baker island at -12:00 + maxOffset = +14 * time.Hour // E.g., Line island at +14:00 + offsetAlias = 15 * time.Minute // E.g., Nepal at +5:45 + ) + offset = offset.Round(offsetAlias) + if offset < minOffset || maxOffset < offset { + offset = 0 + } + return time.FixedZone("", int(offset/time.Second)) +} + +// msDosTimeToTime converts an MS-DOS date and time into a time.Time. +// The resolution is 2s. +// See: https://msdn.microsoft.com/en-us/library/ms724247(v=VS.85).aspx +func msDosTimeToTime(dosDate, dosTime uint16) time.Time { + return time.Date( + // date bits 0-4: day of month; 5-8: month; 9-15: years since 1980 + int(dosDate>>9+1980), + time.Month(dosDate>>5&0xf), + int(dosDate&0x1f), + + // time bits 0-4: second/2; 5-10: minute; 11-15: hour + int(dosTime>>11), + int(dosTime>>5&0x3f), + int(dosTime&0x1f*2), + 0, // nanoseconds + + time.UTC, + ) +} + +// timeToMsDosTime converts a time.Time to an MS-DOS date and time. +// The resolution is 2s. +// See: https://msdn.microsoft.com/en-us/library/ms724274(v=VS.85).aspx +func timeToMsDosTime(t time.Time) (fDate uint16, fTime uint16) { + fDate = uint16(t.Day() + int(t.Month())<<5 + (t.Year()-1980)<<9) + fTime = uint16(t.Second()/2 + t.Minute()<<5 + t.Hour()<<11) + return +} + +// ModTime returns the modification time in UTC using the legacy +// ModifiedDate and ModifiedTime fields. +// +// Deprecated: Use Modified instead. +func (h *FileHeader) ModTime() time.Time { + return msDosTimeToTime(h.ModifiedDate, h.ModifiedTime) +} + +// SetModTime sets the Modified, ModifiedTime, and ModifiedDate fields +// to the given time in UTC. +// +// Deprecated: Use Modified instead. +func (h *FileHeader) SetModTime(t time.Time) { + t = t.UTC() // Convert to UTC for compatibility + h.Modified = t + h.ModifiedDate, h.ModifiedTime = timeToMsDosTime(t) +} + +const ( + // Unix constants. The specification doesn't mention them, + // but these seem to be the values agreed on by tools. + s_IFMT = 0xf000 + s_IFSOCK = 0xc000 + s_IFLNK = 0xa000 + s_IFREG = 0x8000 + s_IFBLK = 0x6000 + s_IFDIR = 0x4000 + s_IFCHR = 0x2000 + s_IFIFO = 0x1000 + s_ISUID = 0x800 + s_ISGID = 0x400 + s_ISVTX = 0x200 + + msdosDir = 0x10 + msdosReadOnly = 0x01 +) + +// Mode returns the permission and mode bits for the FileHeader. +func (h *FileHeader) Mode() (mode fs.FileMode) { + switch h.CreatorVersion >> 8 { + case creatorUnix, creatorMacOSX: + mode = unixModeToFileMode(h.ExternalAttrs >> 16) + case creatorNTFS, creatorVFAT, creatorFAT: + mode = msdosModeToFileMode(h.ExternalAttrs) + } + if len(h.Name) > 0 && h.Name[len(h.Name)-1] == '/' { + mode |= fs.ModeDir + } + return mode +} + +// SetMode changes the permission and mode bits for the FileHeader. +func (h *FileHeader) SetMode(mode fs.FileMode) { + h.CreatorVersion = h.CreatorVersion&0xff | creatorUnix<<8 + h.ExternalAttrs = fileModeToUnixMode(mode) << 16 + + // set MSDOS attributes too, as the original zip does. + if mode&fs.ModeDir != 0 { + h.ExternalAttrs |= msdosDir + } + if mode&0200 == 0 { + h.ExternalAttrs |= msdosReadOnly + } +} + +// isZip64 reports whether the file size exceeds the 32 bit limit +func (h *FileHeader) isZip64() bool { + return h.CompressedSize64 >= uint32max || h.UncompressedSize64 >= uint32max +} + +func msdosModeToFileMode(m uint32) (mode fs.FileMode) { + if m&msdosDir != 0 { + mode = fs.ModeDir | 0777 + } else { + mode = 0666 + } + if m&msdosReadOnly != 0 { + mode &^= 0222 + } + return mode +} + +func fileModeToUnixMode(mode fs.FileMode) uint32 { + var m uint32 + switch mode & fs.ModeType { + default: + m = s_IFREG + case fs.ModeDir: + m = s_IFDIR + case fs.ModeSymlink: + m = s_IFLNK + case fs.ModeNamedPipe: + m = s_IFIFO + case fs.ModeSocket: + m = s_IFSOCK + case fs.ModeDevice: + if mode&fs.ModeCharDevice != 0 { + m = s_IFCHR + } else { + m = s_IFBLK + } + } + if mode&fs.ModeSetuid != 0 { + m |= s_ISUID + } + if mode&fs.ModeSetgid != 0 { + m |= s_ISGID + } + if mode&fs.ModeSticky != 0 { + m |= s_ISVTX + } + return m | uint32(mode&0777) +} + +func unixModeToFileMode(m uint32) fs.FileMode { + mode := fs.FileMode(m & 0777) + switch m & s_IFMT { + case s_IFBLK: + mode |= fs.ModeDevice + case s_IFCHR: + mode |= fs.ModeDevice | fs.ModeCharDevice + case s_IFDIR: + mode |= fs.ModeDir + case s_IFIFO: + mode |= fs.ModeNamedPipe + case s_IFLNK: + mode |= fs.ModeSymlink + case s_IFREG: + // nothing to do + case s_IFSOCK: + mode |= fs.ModeSocket + } + if m&s_ISGID != 0 { + mode |= fs.ModeSetgid + } + if m&s_ISUID != 0 { + mode |= fs.ModeSetuid + } + if m&s_ISVTX != 0 { + mode |= fs.ModeSticky + } + return mode +} diff --git a/src/archive/zip/testdata/crc32-not-streamed.zip b/src/archive/zip/testdata/crc32-not-streamed.zip Binary files differnew file mode 100644 index 0000000..f268d88 --- /dev/null +++ b/src/archive/zip/testdata/crc32-not-streamed.zip diff --git a/src/archive/zip/testdata/dd.zip b/src/archive/zip/testdata/dd.zip Binary files differnew file mode 100644 index 0000000..e53378b --- /dev/null +++ b/src/archive/zip/testdata/dd.zip diff --git a/src/archive/zip/testdata/go-no-datadesc-sig.zip.base64 b/src/archive/zip/testdata/go-no-datadesc-sig.zip.base64 new file mode 100644 index 0000000..1c2c071 --- /dev/null +++ b/src/archive/zip/testdata/go-no-datadesc-sig.zip.base64 @@ -0,0 +1 @@ +UEsDBBQACAAAAGWHaECoZTJ+BAAAAAQAAAAHABgAZm9vLnR4dFVUBQAD3lVZT3V4CwABBPUBAAAEFAAAAGZvbwqoZTJ+BAAAAAQAAABQSwMEFAAIAAAAZodoQOmzogQEAAAABAAAAAcAGABiYXIudHh0VVQFAAPgVVlPdXgLAAEE9QEAAAQUAAAAYmFyCumzogQEAAAABAAAAFBLAQIUAxQACAAAAGWHaECoZTJ+BAAAAAQAAAAHABgAAAAAAAAAAACkgQAAAABmb28udHh0VVQFAAPeVVlPdXgLAAEE9QEAAAQUAAAAUEsBAhQDFAAIAAAAZodoQOmzogQEAAAABAAAAAcAGAAAAAAAAAAAAKSBTQAAAGJhci50eHRVVAUAA+BVWU91eAsAAQT1AQAABBQAAABQSwUGAAAAAAIAAgCaAAAAmgAAAAAA diff --git a/src/archive/zip/testdata/go-with-datadesc-sig.zip b/src/archive/zip/testdata/go-with-datadesc-sig.zip Binary files differnew file mode 100644 index 0000000..bcfe121 --- /dev/null +++ b/src/archive/zip/testdata/go-with-datadesc-sig.zip diff --git a/src/archive/zip/testdata/gophercolor16x16.png b/src/archive/zip/testdata/gophercolor16x16.png Binary files differnew file mode 100644 index 0000000..48854ff --- /dev/null +++ b/src/archive/zip/testdata/gophercolor16x16.png diff --git a/src/archive/zip/testdata/readme.notzip b/src/archive/zip/testdata/readme.notzip Binary files differnew file mode 100644 index 0000000..8173727 --- /dev/null +++ b/src/archive/zip/testdata/readme.notzip diff --git a/src/archive/zip/testdata/readme.zip b/src/archive/zip/testdata/readme.zip Binary files differnew file mode 100644 index 0000000..5642a67 --- /dev/null +++ b/src/archive/zip/testdata/readme.zip diff --git a/src/archive/zip/testdata/subdir.zip b/src/archive/zip/testdata/subdir.zip Binary files differnew file mode 100644 index 0000000..324d06b --- /dev/null +++ b/src/archive/zip/testdata/subdir.zip diff --git a/src/archive/zip/testdata/symlink.zip b/src/archive/zip/testdata/symlink.zip Binary files differnew file mode 100644 index 0000000..af84693 --- /dev/null +++ b/src/archive/zip/testdata/symlink.zip diff --git a/src/archive/zip/testdata/test-trailing-junk.zip b/src/archive/zip/testdata/test-trailing-junk.zip Binary files differnew file mode 100644 index 0000000..42281b4 --- /dev/null +++ b/src/archive/zip/testdata/test-trailing-junk.zip diff --git a/src/archive/zip/testdata/test.zip b/src/archive/zip/testdata/test.zip Binary files differnew file mode 100644 index 0000000..03890c0 --- /dev/null +++ b/src/archive/zip/testdata/test.zip diff --git a/src/archive/zip/testdata/time-22738.zip b/src/archive/zip/testdata/time-22738.zip Binary files differnew file mode 100644 index 0000000..eb85b57 --- /dev/null +++ b/src/archive/zip/testdata/time-22738.zip diff --git a/src/archive/zip/testdata/time-7zip.zip b/src/archive/zip/testdata/time-7zip.zip Binary files differnew file mode 100644 index 0000000..4f74819 --- /dev/null +++ b/src/archive/zip/testdata/time-7zip.zip diff --git a/src/archive/zip/testdata/time-go.zip b/src/archive/zip/testdata/time-go.zip Binary files differnew file mode 100644 index 0000000..f008805 --- /dev/null +++ b/src/archive/zip/testdata/time-go.zip diff --git a/src/archive/zip/testdata/time-infozip.zip b/src/archive/zip/testdata/time-infozip.zip Binary files differnew file mode 100644 index 0000000..8e63948 --- /dev/null +++ b/src/archive/zip/testdata/time-infozip.zip diff --git a/src/archive/zip/testdata/time-osx.zip b/src/archive/zip/testdata/time-osx.zip Binary files differnew file mode 100644 index 0000000..e82c5c2 --- /dev/null +++ b/src/archive/zip/testdata/time-osx.zip diff --git a/src/archive/zip/testdata/time-win7.zip b/src/archive/zip/testdata/time-win7.zip Binary files differnew file mode 100644 index 0000000..8ba222b --- /dev/null +++ b/src/archive/zip/testdata/time-win7.zip diff --git a/src/archive/zip/testdata/time-winrar.zip b/src/archive/zip/testdata/time-winrar.zip Binary files differnew file mode 100644 index 0000000..a8a19b0 --- /dev/null +++ b/src/archive/zip/testdata/time-winrar.zip diff --git a/src/archive/zip/testdata/time-winzip.zip b/src/archive/zip/testdata/time-winzip.zip Binary files differnew file mode 100644 index 0000000..f6e8f8b --- /dev/null +++ b/src/archive/zip/testdata/time-winzip.zip diff --git a/src/archive/zip/testdata/unix.zip b/src/archive/zip/testdata/unix.zip Binary files differnew file mode 100644 index 0000000..ce1a981 --- /dev/null +++ b/src/archive/zip/testdata/unix.zip diff --git a/src/archive/zip/testdata/utf8-7zip.zip b/src/archive/zip/testdata/utf8-7zip.zip Binary files differnew file mode 100644 index 0000000..0e97884 --- /dev/null +++ b/src/archive/zip/testdata/utf8-7zip.zip diff --git a/src/archive/zip/testdata/utf8-infozip.zip b/src/archive/zip/testdata/utf8-infozip.zip Binary files differnew file mode 100644 index 0000000..25a8926 --- /dev/null +++ b/src/archive/zip/testdata/utf8-infozip.zip diff --git a/src/archive/zip/testdata/utf8-osx.zip b/src/archive/zip/testdata/utf8-osx.zip Binary files differnew file mode 100644 index 0000000..9b0c058 --- /dev/null +++ b/src/archive/zip/testdata/utf8-osx.zip diff --git a/src/archive/zip/testdata/utf8-winrar.zip b/src/archive/zip/testdata/utf8-winrar.zip Binary files differnew file mode 100644 index 0000000..4bad6c3 --- /dev/null +++ b/src/archive/zip/testdata/utf8-winrar.zip diff --git a/src/archive/zip/testdata/utf8-winzip.zip b/src/archive/zip/testdata/utf8-winzip.zip Binary files differnew file mode 100644 index 0000000..909d52e --- /dev/null +++ b/src/archive/zip/testdata/utf8-winzip.zip diff --git a/src/archive/zip/testdata/winxp.zip b/src/archive/zip/testdata/winxp.zip Binary files differnew file mode 100644 index 0000000..3919322 --- /dev/null +++ b/src/archive/zip/testdata/winxp.zip diff --git a/src/archive/zip/testdata/zip64-2.zip b/src/archive/zip/testdata/zip64-2.zip Binary files differnew file mode 100644 index 0000000..f844e35 --- /dev/null +++ b/src/archive/zip/testdata/zip64-2.zip diff --git a/src/archive/zip/testdata/zip64.zip b/src/archive/zip/testdata/zip64.zip Binary files differnew file mode 100644 index 0000000..a2ee1fa --- /dev/null +++ b/src/archive/zip/testdata/zip64.zip diff --git a/src/archive/zip/writer.go b/src/archive/zip/writer.go new file mode 100644 index 0000000..cdc534e --- /dev/null +++ b/src/archive/zip/writer.go @@ -0,0 +1,541 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip + +import ( + "bufio" + "encoding/binary" + "errors" + "hash" + "hash/crc32" + "io" + "strings" + "unicode/utf8" +) + +var ( + errLongName = errors.New("zip: FileHeader.Name too long") + errLongExtra = errors.New("zip: FileHeader.Extra too long") +) + +// Writer implements a zip file writer. +type Writer struct { + cw *countWriter + dir []*header + last *fileWriter + closed bool + compressors map[uint16]Compressor + comment string + + // testHookCloseSizeOffset if non-nil is called with the size + // of offset of the central directory at Close. + testHookCloseSizeOffset func(size, offset uint64) +} + +type header struct { + *FileHeader + offset uint64 +} + +// NewWriter returns a new Writer writing a zip file to w. +func NewWriter(w io.Writer) *Writer { + return &Writer{cw: &countWriter{w: bufio.NewWriter(w)}} +} + +// SetOffset sets the offset of the beginning of the zip data within the +// underlying writer. It should be used when the zip data is appended to an +// existing file, such as a binary executable. +// It must be called before any data is written. +func (w *Writer) SetOffset(n int64) { + if w.cw.count != 0 { + panic("zip: SetOffset called after data was written") + } + w.cw.count = n +} + +// Flush flushes any buffered data to the underlying writer. +// Calling Flush is not normally necessary; calling Close is sufficient. +func (w *Writer) Flush() error { + return w.cw.w.(*bufio.Writer).Flush() +} + +// SetComment sets the end-of-central-directory comment field. +// It can only be called before Close. +func (w *Writer) SetComment(comment string) error { + if len(comment) > uint16max { + return errors.New("zip: Writer.Comment too long") + } + w.comment = comment + return nil +} + +// Close finishes writing the zip file by writing the central directory. +// It does not close the underlying writer. +func (w *Writer) Close() error { + if w.last != nil && !w.last.closed { + if err := w.last.close(); err != nil { + return err + } + w.last = nil + } + if w.closed { + return errors.New("zip: writer closed twice") + } + w.closed = true + + // write central directory + start := w.cw.count + for _, h := range w.dir { + var buf [directoryHeaderLen]byte + b := writeBuf(buf[:]) + b.uint32(uint32(directoryHeaderSignature)) + b.uint16(h.CreatorVersion) + b.uint16(h.ReaderVersion) + b.uint16(h.Flags) + b.uint16(h.Method) + b.uint16(h.ModifiedTime) + b.uint16(h.ModifiedDate) + b.uint32(h.CRC32) + if h.isZip64() || h.offset >= uint32max { + // the file needs a zip64 header. store maxint in both + // 32 bit size fields (and offset later) to signal that the + // zip64 extra header should be used. + b.uint32(uint32max) // compressed size + b.uint32(uint32max) // uncompressed size + + // append a zip64 extra block to Extra + var buf [28]byte // 2x uint16 + 3x uint64 + eb := writeBuf(buf[:]) + eb.uint16(zip64ExtraID) + eb.uint16(24) // size = 3x uint64 + eb.uint64(h.UncompressedSize64) + eb.uint64(h.CompressedSize64) + eb.uint64(h.offset) + h.Extra = append(h.Extra, buf[:]...) + } else { + b.uint32(h.CompressedSize) + b.uint32(h.UncompressedSize) + } + + b.uint16(uint16(len(h.Name))) + b.uint16(uint16(len(h.Extra))) + b.uint16(uint16(len(h.Comment))) + b = b[4:] // skip disk number start and internal file attr (2x uint16) + b.uint32(h.ExternalAttrs) + if h.offset > uint32max { + b.uint32(uint32max) + } else { + b.uint32(uint32(h.offset)) + } + if _, err := w.cw.Write(buf[:]); err != nil { + return err + } + if _, err := io.WriteString(w.cw, h.Name); err != nil { + return err + } + if _, err := w.cw.Write(h.Extra); err != nil { + return err + } + if _, err := io.WriteString(w.cw, h.Comment); err != nil { + return err + } + } + end := w.cw.count + + records := uint64(len(w.dir)) + size := uint64(end - start) + offset := uint64(start) + + if f := w.testHookCloseSizeOffset; f != nil { + f(size, offset) + } + + if records >= uint16max || size >= uint32max || offset >= uint32max { + var buf [directory64EndLen + directory64LocLen]byte + b := writeBuf(buf[:]) + + // zip64 end of central directory record + b.uint32(directory64EndSignature) + b.uint64(directory64EndLen - 12) // length minus signature (uint32) and length fields (uint64) + b.uint16(zipVersion45) // version made by + b.uint16(zipVersion45) // version needed to extract + b.uint32(0) // number of this disk + b.uint32(0) // number of the disk with the start of the central directory + b.uint64(records) // total number of entries in the central directory on this disk + b.uint64(records) // total number of entries in the central directory + b.uint64(size) // size of the central directory + b.uint64(offset) // offset of start of central directory with respect to the starting disk number + + // zip64 end of central directory locator + b.uint32(directory64LocSignature) + b.uint32(0) // number of the disk with the start of the zip64 end of central directory + b.uint64(uint64(end)) // relative offset of the zip64 end of central directory record + b.uint32(1) // total number of disks + + if _, err := w.cw.Write(buf[:]); err != nil { + return err + } + + // store max values in the regular end record to signal + // that the zip64 values should be used instead + records = uint16max + size = uint32max + offset = uint32max + } + + // write end record + var buf [directoryEndLen]byte + b := writeBuf(buf[:]) + b.uint32(uint32(directoryEndSignature)) + b = b[4:] // skip over disk number and first disk number (2x uint16) + b.uint16(uint16(records)) // number of entries this disk + b.uint16(uint16(records)) // number of entries total + b.uint32(uint32(size)) // size of directory + b.uint32(uint32(offset)) // start of directory + b.uint16(uint16(len(w.comment))) // byte size of EOCD comment + if _, err := w.cw.Write(buf[:]); err != nil { + return err + } + if _, err := io.WriteString(w.cw, w.comment); err != nil { + return err + } + + return w.cw.w.(*bufio.Writer).Flush() +} + +// Create adds a file to the zip file using the provided name. +// It returns a Writer to which the file contents should be written. +// The file contents will be compressed using the Deflate method. +// The name must be a relative path: it must not start with a drive +// letter (e.g. C:) or leading slash, and only forward slashes are +// allowed. To create a directory instead of a file, add a trailing +// slash to the name. +// The file's contents must be written to the io.Writer before the next +// call to Create, CreateHeader, or Close. +func (w *Writer) Create(name string) (io.Writer, error) { + header := &FileHeader{ + Name: name, + Method: Deflate, + } + return w.CreateHeader(header) +} + +// detectUTF8 reports whether s is a valid UTF-8 string, and whether the string +// must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII, +// or any other common encoding). +func detectUTF8(s string) (valid, require bool) { + for i := 0; i < len(s); { + r, size := utf8.DecodeRuneInString(s[i:]) + i += size + // Officially, ZIP uses CP-437, but many readers use the system's + // local character encoding. Most encoding are compatible with a large + // subset of CP-437, which itself is ASCII-like. + // + // Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those + // characters with localized currency and overline characters. + if r < 0x20 || r > 0x7d || r == 0x5c { + if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) { + return false, false + } + require = true + } + } + return true, require +} + +// CreateHeader adds a file to the zip archive using the provided FileHeader +// for the file metadata. Writer takes ownership of fh and may mutate +// its fields. The caller must not modify fh after calling CreateHeader. +// +// This returns a Writer to which the file contents should be written. +// The file's contents must be written to the io.Writer before the next +// call to Create, CreateHeader, or Close. +func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) { + if w.last != nil && !w.last.closed { + if err := w.last.close(); err != nil { + return nil, err + } + } + if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh { + // See https://golang.org/issue/11144 confusion. + return nil, errors.New("archive/zip: invalid duplicate FileHeader") + } + + // The ZIP format has a sad state of affairs regarding character encoding. + // Officially, the name and comment fields are supposed to be encoded + // in CP-437 (which is mostly compatible with ASCII), unless the UTF-8 + // flag bit is set. However, there are several problems: + // + // * Many ZIP readers still do not support UTF-8. + // * If the UTF-8 flag is cleared, several readers simply interpret the + // name and comment fields as whatever the local system encoding is. + // + // In order to avoid breaking readers without UTF-8 support, + // we avoid setting the UTF-8 flag if the strings are CP-437 compatible. + // However, if the strings require multibyte UTF-8 encoding and is a + // valid UTF-8 string, then we set the UTF-8 bit. + // + // For the case, where the user explicitly wants to specify the encoding + // as UTF-8, they will need to set the flag bit themselves. + utf8Valid1, utf8Require1 := detectUTF8(fh.Name) + utf8Valid2, utf8Require2 := detectUTF8(fh.Comment) + switch { + case fh.NonUTF8: + fh.Flags &^= 0x800 + case (utf8Require1 || utf8Require2) && (utf8Valid1 && utf8Valid2): + fh.Flags |= 0x800 + } + + fh.CreatorVersion = fh.CreatorVersion&0xff00 | zipVersion20 // preserve compatibility byte + fh.ReaderVersion = zipVersion20 + + // If Modified is set, this takes precedence over MS-DOS timestamp fields. + if !fh.Modified.IsZero() { + // Contrary to the FileHeader.SetModTime method, we intentionally + // do not convert to UTC, because we assume the user intends to encode + // the date using the specified timezone. A user may want this control + // because many legacy ZIP readers interpret the timestamp according + // to the local timezone. + // + // The timezone is only non-UTC if a user directly sets the Modified + // field directly themselves. All other approaches sets UTC. + fh.ModifiedDate, fh.ModifiedTime = timeToMsDosTime(fh.Modified) + + // Use "extended timestamp" format since this is what Info-ZIP uses. + // Nearly every major ZIP implementation uses a different format, + // but at least most seem to be able to understand the other formats. + // + // This format happens to be identical for both local and central header + // if modification time is the only timestamp being encoded. + var mbuf [9]byte // 2*SizeOf(uint16) + SizeOf(uint8) + SizeOf(uint32) + mt := uint32(fh.Modified.Unix()) + eb := writeBuf(mbuf[:]) + eb.uint16(extTimeExtraID) + eb.uint16(5) // Size: SizeOf(uint8) + SizeOf(uint32) + eb.uint8(1) // Flags: ModTime + eb.uint32(mt) // ModTime + fh.Extra = append(fh.Extra, mbuf[:]...) + } + + var ( + ow io.Writer + fw *fileWriter + ) + h := &header{ + FileHeader: fh, + offset: uint64(w.cw.count), + } + + if strings.HasSuffix(fh.Name, "/") { + // Set the compression method to Store to ensure data length is truly zero, + // which the writeHeader method always encodes for the size fields. + // This is necessary as most compression formats have non-zero lengths + // even when compressing an empty string. + fh.Method = Store + fh.Flags &^= 0x8 // we will not write a data descriptor + + // Explicitly clear sizes as they have no meaning for directories. + fh.CompressedSize = 0 + fh.CompressedSize64 = 0 + fh.UncompressedSize = 0 + fh.UncompressedSize64 = 0 + + ow = dirWriter{} + } else { + fh.Flags |= 0x8 // we will write a data descriptor + + fw = &fileWriter{ + zipw: w.cw, + compCount: &countWriter{w: w.cw}, + crc32: crc32.NewIEEE(), + } + comp := w.compressor(fh.Method) + if comp == nil { + return nil, ErrAlgorithm + } + var err error + fw.comp, err = comp(fw.compCount) + if err != nil { + return nil, err + } + fw.rawCount = &countWriter{w: fw.comp} + fw.header = h + ow = fw + } + w.dir = append(w.dir, h) + if err := writeHeader(w.cw, fh); err != nil { + return nil, err + } + // If we're creating a directory, fw is nil. + w.last = fw + return ow, nil +} + +func writeHeader(w io.Writer, h *FileHeader) error { + const maxUint16 = 1<<16 - 1 + if len(h.Name) > maxUint16 { + return errLongName + } + if len(h.Extra) > maxUint16 { + return errLongExtra + } + + var buf [fileHeaderLen]byte + b := writeBuf(buf[:]) + b.uint32(uint32(fileHeaderSignature)) + b.uint16(h.ReaderVersion) + b.uint16(h.Flags) + b.uint16(h.Method) + b.uint16(h.ModifiedTime) + b.uint16(h.ModifiedDate) + b.uint32(0) // since we are writing a data descriptor crc32, + b.uint32(0) // compressed size, + b.uint32(0) // and uncompressed size should be zero + b.uint16(uint16(len(h.Name))) + b.uint16(uint16(len(h.Extra))) + if _, err := w.Write(buf[:]); err != nil { + return err + } + if _, err := io.WriteString(w, h.Name); err != nil { + return err + } + _, err := w.Write(h.Extra) + return err +} + +// RegisterCompressor registers or overrides a custom compressor for a specific +// method ID. If a compressor for a given method is not found, Writer will +// default to looking up the compressor at the package level. +func (w *Writer) RegisterCompressor(method uint16, comp Compressor) { + if w.compressors == nil { + w.compressors = make(map[uint16]Compressor) + } + w.compressors[method] = comp +} + +func (w *Writer) compressor(method uint16) Compressor { + comp := w.compressors[method] + if comp == nil { + comp = compressor(method) + } + return comp +} + +type dirWriter struct{} + +func (dirWriter) Write(b []byte) (int, error) { + if len(b) == 0 { + return 0, nil + } + return 0, errors.New("zip: write to directory") +} + +type fileWriter struct { + *header + zipw io.Writer + rawCount *countWriter + comp io.WriteCloser + compCount *countWriter + crc32 hash.Hash32 + closed bool +} + +func (w *fileWriter) Write(p []byte) (int, error) { + if w.closed { + return 0, errors.New("zip: write to closed file") + } + w.crc32.Write(p) + return w.rawCount.Write(p) +} + +func (w *fileWriter) close() error { + if w.closed { + return errors.New("zip: file closed twice") + } + w.closed = true + if err := w.comp.Close(); err != nil { + return err + } + + // update FileHeader + fh := w.header.FileHeader + fh.CRC32 = w.crc32.Sum32() + fh.CompressedSize64 = uint64(w.compCount.count) + fh.UncompressedSize64 = uint64(w.rawCount.count) + + if fh.isZip64() { + fh.CompressedSize = uint32max + fh.UncompressedSize = uint32max + fh.ReaderVersion = zipVersion45 // requires 4.5 - File uses ZIP64 format extensions + } else { + fh.CompressedSize = uint32(fh.CompressedSize64) + fh.UncompressedSize = uint32(fh.UncompressedSize64) + } + + // Write data descriptor. This is more complicated than one would + // think, see e.g. comments in zipfile.c:putextended() and + // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588. + // The approach here is to write 8 byte sizes if needed without + // adding a zip64 extra in the local header (too late anyway). + var buf []byte + if fh.isZip64() { + buf = make([]byte, dataDescriptor64Len) + } else { + buf = make([]byte, dataDescriptorLen) + } + b := writeBuf(buf) + b.uint32(dataDescriptorSignature) // de-facto standard, required by OS X + b.uint32(fh.CRC32) + if fh.isZip64() { + b.uint64(fh.CompressedSize64) + b.uint64(fh.UncompressedSize64) + } else { + b.uint32(fh.CompressedSize) + b.uint32(fh.UncompressedSize) + } + _, err := w.zipw.Write(buf) + return err +} + +type countWriter struct { + w io.Writer + count int64 +} + +func (w *countWriter) Write(p []byte) (int, error) { + n, err := w.w.Write(p) + w.count += int64(n) + return n, err +} + +type nopCloser struct { + io.Writer +} + +func (w nopCloser) Close() error { + return nil +} + +type writeBuf []byte + +func (b *writeBuf) uint8(v uint8) { + (*b)[0] = v + *b = (*b)[1:] +} + +func (b *writeBuf) uint16(v uint16) { + binary.LittleEndian.PutUint16(*b, v) + *b = (*b)[2:] +} + +func (b *writeBuf) uint32(v uint32) { + binary.LittleEndian.PutUint32(*b, v) + *b = (*b)[4:] +} + +func (b *writeBuf) uint64(v uint64) { + binary.LittleEndian.PutUint64(*b, v) + *b = (*b)[8:] +} diff --git a/src/archive/zip/writer_test.go b/src/archive/zip/writer_test.go new file mode 100644 index 0000000..5985144 --- /dev/null +++ b/src/archive/zip/writer_test.go @@ -0,0 +1,425 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "io/fs" + "math/rand" + "os" + "strings" + "testing" + "time" +) + +// TODO(adg): a more sophisticated test suite + +type WriteTest struct { + Name string + Data []byte + Method uint16 + Mode fs.FileMode +} + +var writeTests = []WriteTest{ + { + Name: "foo", + Data: []byte("Rabbits, guinea pigs, gophers, marsupial rats, and quolls."), + Method: Store, + Mode: 0666, + }, + { + Name: "bar", + Data: nil, // large data set in the test + Method: Deflate, + Mode: 0644, + }, + { + Name: "setuid", + Data: []byte("setuid file"), + Method: Deflate, + Mode: 0755 | fs.ModeSetuid, + }, + { + Name: "setgid", + Data: []byte("setgid file"), + Method: Deflate, + Mode: 0755 | fs.ModeSetgid, + }, + { + Name: "symlink", + Data: []byte("../link/target"), + Method: Deflate, + Mode: 0755 | fs.ModeSymlink, + }, +} + +func TestWriter(t *testing.T) { + largeData := make([]byte, 1<<17) + if _, err := rand.Read(largeData); err != nil { + t.Fatal("rand.Read failed:", err) + } + writeTests[1].Data = largeData + defer func() { + writeTests[1].Data = nil + }() + + // write a zip file + buf := new(bytes.Buffer) + w := NewWriter(buf) + + for _, wt := range writeTests { + testCreate(t, w, &wt) + } + + if err := w.Close(); err != nil { + t.Fatal(err) + } + + // read it back + r, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) + if err != nil { + t.Fatal(err) + } + for i, wt := range writeTests { + testReadFile(t, r.File[i], &wt) + } +} + +// TestWriterComment is test for EOCD comment read/write. +func TestWriterComment(t *testing.T) { + var tests = []struct { + comment string + ok bool + }{ + {"hi, hello", true}, + {"hi, こんにちわ", true}, + {strings.Repeat("a", uint16max), true}, + {strings.Repeat("a", uint16max+1), false}, + } + + for _, test := range tests { + // write a zip file + buf := new(bytes.Buffer) + w := NewWriter(buf) + if err := w.SetComment(test.comment); err != nil { + if test.ok { + t.Fatalf("SetComment: unexpected error %v", err) + } + continue + } else { + if !test.ok { + t.Fatalf("SetComment: unexpected success, want error") + } + } + + if err := w.Close(); test.ok == (err != nil) { + t.Fatal(err) + } + + if w.closed != test.ok { + t.Fatalf("Writer.closed: got %v, want %v", w.closed, test.ok) + } + + // skip read test in failure cases + if !test.ok { + continue + } + + // read it back + r, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) + if err != nil { + t.Fatal(err) + } + if r.Comment != test.comment { + t.Fatalf("Reader.Comment: got %v, want %v", r.Comment, test.comment) + } + } +} + +func TestWriterUTF8(t *testing.T) { + var utf8Tests = []struct { + name string + comment string + nonUTF8 bool + flags uint16 + }{ + { + name: "hi, hello", + comment: "in the world", + flags: 0x8, + }, + { + name: "hi, こんにちわ", + comment: "in the world", + flags: 0x808, + }, + { + name: "hi, こんにちわ", + comment: "in the world", + nonUTF8: true, + flags: 0x8, + }, + { + name: "hi, hello", + comment: "in the 世界", + flags: 0x808, + }, + { + name: "hi, こんにちわ", + comment: "in the 世界", + flags: 0x808, + }, + { + name: "the replacement rune is �", + comment: "the replacement rune is �", + flags: 0x808, + }, + { + // Name is Japanese encoded in Shift JIS. + name: "\x93\xfa\x96{\x8c\xea.txt", + comment: "in the 世界", + flags: 0x008, // UTF-8 must not be set + }, + } + + // write a zip file + buf := new(bytes.Buffer) + w := NewWriter(buf) + + for _, test := range utf8Tests { + h := &FileHeader{ + Name: test.name, + Comment: test.comment, + NonUTF8: test.nonUTF8, + Method: Deflate, + } + w, err := w.CreateHeader(h) + if err != nil { + t.Fatal(err) + } + w.Write([]byte{}) + } + + if err := w.Close(); err != nil { + t.Fatal(err) + } + + // read it back + r, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) + if err != nil { + t.Fatal(err) + } + for i, test := range utf8Tests { + flags := r.File[i].Flags + if flags != test.flags { + t.Errorf("CreateHeader(name=%q comment=%q nonUTF8=%v): flags=%#x, want %#x", test.name, test.comment, test.nonUTF8, flags, test.flags) + } + } +} + +func TestWriterTime(t *testing.T) { + var buf bytes.Buffer + h := &FileHeader{ + Name: "test.txt", + Modified: time.Date(2017, 10, 31, 21, 11, 57, 0, timeZone(-7*time.Hour)), + } + w := NewWriter(&buf) + if _, err := w.CreateHeader(h); err != nil { + t.Fatalf("unexpected CreateHeader error: %v", err) + } + if err := w.Close(); err != nil { + t.Fatalf("unexpected Close error: %v", err) + } + + want, err := os.ReadFile("testdata/time-go.zip") + if err != nil { + t.Fatalf("unexpected ReadFile error: %v", err) + } + if got := buf.Bytes(); !bytes.Equal(got, want) { + fmt.Printf("%x\n%x\n", got, want) + t.Error("contents of time-go.zip differ") + } +} + +func TestWriterOffset(t *testing.T) { + largeData := make([]byte, 1<<17) + if _, err := rand.Read(largeData); err != nil { + t.Fatal("rand.Read failed:", err) + } + writeTests[1].Data = largeData + defer func() { + writeTests[1].Data = nil + }() + + // write a zip file + buf := new(bytes.Buffer) + existingData := []byte{1, 2, 3, 1, 2, 3, 1, 2, 3} + n, _ := buf.Write(existingData) + w := NewWriter(buf) + w.SetOffset(int64(n)) + + for _, wt := range writeTests { + testCreate(t, w, &wt) + } + + if err := w.Close(); err != nil { + t.Fatal(err) + } + + // read it back + r, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) + if err != nil { + t.Fatal(err) + } + for i, wt := range writeTests { + testReadFile(t, r.File[i], &wt) + } +} + +func TestWriterFlush(t *testing.T) { + var buf bytes.Buffer + w := NewWriter(struct{ io.Writer }{&buf}) + _, err := w.Create("foo") + if err != nil { + t.Fatal(err) + } + if buf.Len() > 0 { + t.Fatalf("Unexpected %d bytes already in buffer", buf.Len()) + } + if err := w.Flush(); err != nil { + t.Fatal(err) + } + if buf.Len() == 0 { + t.Fatal("No bytes written after Flush") + } +} + +func TestWriterDir(t *testing.T) { + w := NewWriter(io.Discard) + dw, err := w.Create("dir/") + if err != nil { + t.Fatal(err) + } + if _, err := dw.Write(nil); err != nil { + t.Errorf("Write(nil) to directory: got %v, want nil", err) + } + if _, err := dw.Write([]byte("hello")); err == nil { + t.Error(`Write("hello") to directory: got nil error, want non-nil`) + } +} + +func TestWriterDirAttributes(t *testing.T) { + var buf bytes.Buffer + w := NewWriter(&buf) + if _, err := w.CreateHeader(&FileHeader{ + Name: "dir/", + Method: Deflate, + CompressedSize64: 1234, + UncompressedSize64: 5678, + }); err != nil { + t.Fatal(err) + } + if err := w.Close(); err != nil { + t.Fatal(err) + } + b := buf.Bytes() + + var sig [4]byte + binary.LittleEndian.PutUint32(sig[:], uint32(fileHeaderSignature)) + + idx := bytes.Index(b, sig[:]) + if idx == -1 { + t.Fatal("file header not found") + } + b = b[idx:] + + if !bytes.Equal(b[6:10], []byte{0, 0, 0, 0}) { // FileHeader.Flags: 0, FileHeader.Method: 0 + t.Errorf("unexpected method and flags: %v", b[6:10]) + } + + if !bytes.Equal(b[14:26], make([]byte, 12)) { // FileHeader.{CRC32,CompressSize,UncompressedSize} all zero. + t.Errorf("unexpected crc, compress and uncompressed size to be 0 was: %v", b[14:26]) + } + + binary.LittleEndian.PutUint32(sig[:], uint32(dataDescriptorSignature)) + if bytes.Index(b, sig[:]) != -1 { + t.Error("there should be no data descriptor") + } +} + +func testCreate(t *testing.T, w *Writer, wt *WriteTest) { + header := &FileHeader{ + Name: wt.Name, + Method: wt.Method, + } + if wt.Mode != 0 { + header.SetMode(wt.Mode) + } + f, err := w.CreateHeader(header) + if err != nil { + t.Fatal(err) + } + _, err = f.Write(wt.Data) + if err != nil { + t.Fatal(err) + } +} + +func testReadFile(t *testing.T, f *File, wt *WriteTest) { + if f.Name != wt.Name { + t.Fatalf("File name: got %q, want %q", f.Name, wt.Name) + } + testFileMode(t, f, wt.Mode) + rc, err := f.Open() + if err != nil { + t.Fatal("opening:", err) + } + b, err := io.ReadAll(rc) + if err != nil { + t.Fatal("reading:", err) + } + err = rc.Close() + if err != nil { + t.Fatal("closing:", err) + } + if !bytes.Equal(b, wt.Data) { + t.Errorf("File contents %q, want %q", b, wt.Data) + } +} + +func BenchmarkCompressedZipGarbage(b *testing.B) { + bigBuf := bytes.Repeat([]byte("a"), 1<<20) + + runOnce := func(buf *bytes.Buffer) { + buf.Reset() + zw := NewWriter(buf) + for j := 0; j < 3; j++ { + w, _ := zw.CreateHeader(&FileHeader{ + Name: "foo", + Method: Deflate, + }) + w.Write(bigBuf) + } + zw.Close() + } + + b.ReportAllocs() + // Run once and then reset the timer. + // This effectively discards the very large initial flate setup cost, + // as well as the initialization of bigBuf. + runOnce(&bytes.Buffer{}) + b.ResetTimer() + + b.RunParallel(func(pb *testing.PB) { + var buf bytes.Buffer + for pb.Next() { + runOnce(&buf) + } + }) +} diff --git a/src/archive/zip/zip_test.go b/src/archive/zip/zip_test.go new file mode 100644 index 0000000..ead9cd3 --- /dev/null +++ b/src/archive/zip/zip_test.go @@ -0,0 +1,828 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Tests that involve both reading and writing. + +package zip + +import ( + "bytes" + "errors" + "fmt" + "hash" + "internal/testenv" + "io" + "runtime" + "sort" + "strings" + "testing" + "time" +) + +func TestOver65kFiles(t *testing.T) { + if testing.Short() && testenv.Builder() == "" { + t.Skip("skipping in short mode") + } + buf := new(bytes.Buffer) + w := NewWriter(buf) + const nFiles = (1 << 16) + 42 + for i := 0; i < nFiles; i++ { + _, err := w.CreateHeader(&FileHeader{ + Name: fmt.Sprintf("%d.dat", i), + Method: Store, // avoid Issue 6136 and Issue 6138 + }) + if err != nil { + t.Fatalf("creating file %d: %v", i, err) + } + } + if err := w.Close(); err != nil { + t.Fatalf("Writer.Close: %v", err) + } + s := buf.String() + zr, err := NewReader(strings.NewReader(s), int64(len(s))) + if err != nil { + t.Fatalf("NewReader: %v", err) + } + if got := len(zr.File); got != nFiles { + t.Fatalf("File contains %d files, want %d", got, nFiles) + } + for i := 0; i < nFiles; i++ { + want := fmt.Sprintf("%d.dat", i) + if zr.File[i].Name != want { + t.Fatalf("File(%d) = %q, want %q", i, zr.File[i].Name, want) + } + } +} + +func TestModTime(t *testing.T) { + var testTime = time.Date(2009, time.November, 10, 23, 45, 58, 0, time.UTC) + fh := new(FileHeader) + fh.SetModTime(testTime) + outTime := fh.ModTime() + if !outTime.Equal(testTime) { + t.Errorf("times don't match: got %s, want %s", outTime, testTime) + } +} + +func testHeaderRoundTrip(fh *FileHeader, wantUncompressedSize uint32, wantUncompressedSize64 uint64, t *testing.T) { + fi := fh.FileInfo() + fh2, err := FileInfoHeader(fi) + if err != nil { + t.Fatal(err) + } + if got, want := fh2.Name, fh.Name; got != want { + t.Errorf("Name: got %s, want %s\n", got, want) + } + if got, want := fh2.UncompressedSize, wantUncompressedSize; got != want { + t.Errorf("UncompressedSize: got %d, want %d\n", got, want) + } + if got, want := fh2.UncompressedSize64, wantUncompressedSize64; got != want { + t.Errorf("UncompressedSize64: got %d, want %d\n", got, want) + } + if got, want := fh2.ModifiedTime, fh.ModifiedTime; got != want { + t.Errorf("ModifiedTime: got %d, want %d\n", got, want) + } + if got, want := fh2.ModifiedDate, fh.ModifiedDate; got != want { + t.Errorf("ModifiedDate: got %d, want %d\n", got, want) + } + + if sysfh, ok := fi.Sys().(*FileHeader); !ok && sysfh != fh { + t.Errorf("Sys didn't return original *FileHeader") + } +} + +func TestFileHeaderRoundTrip(t *testing.T) { + fh := &FileHeader{ + Name: "foo.txt", + UncompressedSize: 987654321, + ModifiedTime: 1234, + ModifiedDate: 5678, + } + testHeaderRoundTrip(fh, fh.UncompressedSize, uint64(fh.UncompressedSize), t) +} + +func TestFileHeaderRoundTrip64(t *testing.T) { + fh := &FileHeader{ + Name: "foo.txt", + UncompressedSize64: 9876543210, + ModifiedTime: 1234, + ModifiedDate: 5678, + } + testHeaderRoundTrip(fh, uint32max, fh.UncompressedSize64, t) +} + +func TestFileHeaderRoundTripModified(t *testing.T) { + fh := &FileHeader{ + Name: "foo.txt", + UncompressedSize: 987654321, + Modified: time.Now().Local(), + ModifiedTime: 1234, + ModifiedDate: 5678, + } + fi := fh.FileInfo() + fh2, err := FileInfoHeader(fi) + if err != nil { + t.Fatal(err) + } + if got, want := fh2.Modified, fh.Modified.UTC(); got != want { + t.Errorf("Modified: got %s, want %s\n", got, want) + } + if got, want := fi.ModTime(), fh.Modified.UTC(); got != want { + t.Errorf("Modified: got %s, want %s\n", got, want) + } +} + +func TestFileHeaderRoundTripWithoutModified(t *testing.T) { + fh := &FileHeader{ + Name: "foo.txt", + UncompressedSize: 987654321, + ModifiedTime: 1234, + ModifiedDate: 5678, + } + fi := fh.FileInfo() + fh2, err := FileInfoHeader(fi) + if err != nil { + t.Fatal(err) + } + if got, want := fh2.ModTime(), fh.ModTime(); got != want { + t.Errorf("Modified: got %s, want %s\n", got, want) + } + if got, want := fi.ModTime(), fh.ModTime(); got != want { + t.Errorf("Modified: got %s, want %s\n", got, want) + } +} + +type repeatedByte struct { + off int64 + b byte + n int64 +} + +// rleBuffer is a run-length-encoded byte buffer. +// It's an io.Writer (like a bytes.Buffer) and also an io.ReaderAt, +// allowing random-access reads. +type rleBuffer struct { + buf []repeatedByte +} + +func (r *rleBuffer) Size() int64 { + if len(r.buf) == 0 { + return 0 + } + last := &r.buf[len(r.buf)-1] + return last.off + last.n +} + +func (r *rleBuffer) Write(p []byte) (n int, err error) { + var rp *repeatedByte + if len(r.buf) > 0 { + rp = &r.buf[len(r.buf)-1] + // Fast path, if p is entirely the same byte repeated. + if lastByte := rp.b; len(p) > 0 && p[0] == lastByte { + if bytes.Count(p, []byte{lastByte}) == len(p) { + rp.n += int64(len(p)) + return len(p), nil + } + } + } + + for _, b := range p { + if rp == nil || rp.b != b { + r.buf = append(r.buf, repeatedByte{r.Size(), b, 1}) + rp = &r.buf[len(r.buf)-1] + } else { + rp.n++ + } + } + return len(p), nil +} + +func min(x, y int64) int64 { + if x < y { + return x + } + return y +} + +func memset(a []byte, b byte) { + if len(a) == 0 { + return + } + // Double, until we reach power of 2 >= len(a), same as bytes.Repeat, + // but without allocation. + a[0] = b + for i, l := 1, len(a); i < l; i *= 2 { + copy(a[i:], a[:i]) + } +} + +func (r *rleBuffer) ReadAt(p []byte, off int64) (n int, err error) { + if len(p) == 0 { + return + } + skipParts := sort.Search(len(r.buf), func(i int) bool { + part := &r.buf[i] + return part.off+part.n > off + }) + parts := r.buf[skipParts:] + if len(parts) > 0 { + skipBytes := off - parts[0].off + for _, part := range parts { + repeat := int(min(part.n-skipBytes, int64(len(p)-n))) + memset(p[n:n+repeat], part.b) + n += repeat + if n == len(p) { + return + } + skipBytes = 0 + } + } + if n != len(p) { + err = io.ErrUnexpectedEOF + } + return +} + +// Just testing the rleBuffer used in the Zip64 test above. Not used by the zip code. +func TestRLEBuffer(t *testing.T) { + b := new(rleBuffer) + var all []byte + writes := []string{"abcdeee", "eeeeeee", "eeeefghaaiii"} + for _, w := range writes { + b.Write([]byte(w)) + all = append(all, w...) + } + if len(b.buf) != 10 { + t.Fatalf("len(b.buf) = %d; want 10", len(b.buf)) + } + + for i := 0; i < len(all); i++ { + for j := 0; j < len(all)-i; j++ { + buf := make([]byte, j) + n, err := b.ReadAt(buf, int64(i)) + if err != nil || n != len(buf) { + t.Errorf("ReadAt(%d, %d) = %d, %v; want %d, nil", i, j, n, err, len(buf)) + } + if !bytes.Equal(buf, all[i:i+j]) { + t.Errorf("ReadAt(%d, %d) = %q; want %q", i, j, buf, all[i:i+j]) + } + } + } +} + +// fakeHash32 is a dummy Hash32 that always returns 0. +type fakeHash32 struct { + hash.Hash32 +} + +func (fakeHash32) Write(p []byte) (int, error) { return len(p), nil } +func (fakeHash32) Sum32() uint32 { return 0 } + +func TestZip64(t *testing.T) { + if testing.Short() { + t.Skip("slow test; skipping") + } + t.Parallel() + const size = 1 << 32 // before the "END\n" part + buf := testZip64(t, size) + testZip64DirectoryRecordLength(buf, t) +} + +func TestZip64EdgeCase(t *testing.T) { + if testing.Short() { + t.Skip("slow test; skipping") + } + t.Parallel() + // Test a zip file with uncompressed size 0xFFFFFFFF. + // That's the magic marker for a 64-bit file, so even though + // it fits in a 32-bit field we must use the 64-bit field. + // Go 1.5 and earlier got this wrong, + // writing an invalid zip file. + const size = 1<<32 - 1 - int64(len("END\n")) // before the "END\n" part + buf := testZip64(t, size) + testZip64DirectoryRecordLength(buf, t) +} + +// Tests that we generate a zip64 file if the directory at offset +// 0xFFFFFFFF, but not before. +func TestZip64DirectoryOffset(t *testing.T) { + if testing.Short() { + t.Skip("skipping in short mode") + } + t.Parallel() + const filename = "huge.txt" + gen := func(wantOff uint64) func(*Writer) { + return func(w *Writer) { + w.testHookCloseSizeOffset = func(size, off uint64) { + if off != wantOff { + t.Errorf("central directory offset = %d (%x); want %d", off, off, wantOff) + } + } + f, err := w.CreateHeader(&FileHeader{ + Name: filename, + Method: Store, + }) + if err != nil { + t.Fatal(err) + } + f.(*fileWriter).crc32 = fakeHash32{} + size := wantOff - fileHeaderLen - uint64(len(filename)) - dataDescriptorLen + if _, err := io.CopyN(f, zeros{}, int64(size)); err != nil { + t.Fatal(err) + } + if err := w.Close(); err != nil { + t.Fatal(err) + } + } + } + t.Run("uint32max-2_NoZip64", func(t *testing.T) { + t.Parallel() + if generatesZip64(t, gen(0xfffffffe)) { + t.Error("unexpected zip64") + } + }) + t.Run("uint32max-1_Zip64", func(t *testing.T) { + t.Parallel() + if !generatesZip64(t, gen(0xffffffff)) { + t.Error("expected zip64") + } + }) +} + +// At 16k records, we need to generate a zip64 file. +func TestZip64ManyRecords(t *testing.T) { + if testing.Short() { + t.Skip("skipping in short mode") + } + t.Parallel() + gen := func(numRec int) func(*Writer) { + return func(w *Writer) { + for i := 0; i < numRec; i++ { + _, err := w.CreateHeader(&FileHeader{ + Name: "a.txt", + Method: Store, + }) + if err != nil { + t.Fatal(err) + } + } + if err := w.Close(); err != nil { + t.Fatal(err) + } + } + } + // 16k-1 records shouldn't make a zip64: + t.Run("uint16max-1_NoZip64", func(t *testing.T) { + t.Parallel() + if generatesZip64(t, gen(0xfffe)) { + t.Error("unexpected zip64") + } + }) + // 16k records should make a zip64: + t.Run("uint16max_Zip64", func(t *testing.T) { + t.Parallel() + if !generatesZip64(t, gen(0xffff)) { + t.Error("expected zip64") + } + }) +} + +// suffixSaver is an io.Writer & io.ReaderAt that remembers the last 0 +// to 'keep' bytes of data written to it. Call Suffix to get the +// suffix bytes. +type suffixSaver struct { + keep int + buf []byte + start int + size int64 +} + +func (ss *suffixSaver) Size() int64 { return ss.size } + +var errDiscardedBytes = errors.New("ReadAt of discarded bytes") + +func (ss *suffixSaver) ReadAt(p []byte, off int64) (n int, err error) { + back := ss.size - off + if back > int64(ss.keep) { + return 0, errDiscardedBytes + } + suf := ss.Suffix() + n = copy(p, suf[len(suf)-int(back):]) + if n != len(p) { + err = io.EOF + } + return +} + +func (ss *suffixSaver) Suffix() []byte { + if len(ss.buf) < ss.keep { + return ss.buf + } + buf := make([]byte, ss.keep) + n := copy(buf, ss.buf[ss.start:]) + copy(buf[n:], ss.buf[:]) + return buf +} + +func (ss *suffixSaver) Write(p []byte) (n int, err error) { + n = len(p) + ss.size += int64(len(p)) + if len(ss.buf) < ss.keep { + space := ss.keep - len(ss.buf) + add := len(p) + if add > space { + add = space + } + ss.buf = append(ss.buf, p[:add]...) + p = p[add:] + } + for len(p) > 0 { + n := copy(ss.buf[ss.start:], p) + p = p[n:] + ss.start += n + if ss.start == ss.keep { + ss.start = 0 + } + } + return +} + +// generatesZip64 reports whether f wrote a zip64 file. +// f is also responsible for closing w. +func generatesZip64(t *testing.T, f func(w *Writer)) bool { + ss := &suffixSaver{keep: 10 << 20} + w := NewWriter(ss) + f(w) + return suffixIsZip64(t, ss) +} + +type sizedReaderAt interface { + io.ReaderAt + Size() int64 +} + +func suffixIsZip64(t *testing.T, zip sizedReaderAt) bool { + d := make([]byte, 1024) + if _, err := zip.ReadAt(d, zip.Size()-int64(len(d))); err != nil { + t.Fatalf("ReadAt: %v", err) + } + + sigOff := findSignatureInBlock(d) + if sigOff == -1 { + t.Errorf("failed to find signature in block") + return false + } + + dirOff, err := findDirectory64End(zip, zip.Size()-int64(len(d))+int64(sigOff)) + if err != nil { + t.Fatalf("findDirectory64End: %v", err) + } + if dirOff == -1 { + return false + } + + d = make([]byte, directory64EndLen) + if _, err := zip.ReadAt(d, dirOff); err != nil { + t.Fatalf("ReadAt(off=%d): %v", dirOff, err) + } + + b := readBuf(d) + if sig := b.uint32(); sig != directory64EndSignature { + return false + } + + size := b.uint64() + if size != directory64EndLen-12 { + t.Errorf("expected length of %d, got %d", directory64EndLen-12, size) + } + return true +} + +// Zip64 is required if the total size of the records is uint32max. +func TestZip64LargeDirectory(t *testing.T) { + if runtime.GOARCH == "wasm" { + t.Skip("too slow on wasm") + } + if testing.Short() { + t.Skip("skipping in short mode") + } + t.Parallel() + // gen returns a func that writes a zip with a wantLen bytes + // of central directory. + gen := func(wantLen int64) func(*Writer) { + return func(w *Writer) { + w.testHookCloseSizeOffset = func(size, off uint64) { + if size != uint64(wantLen) { + t.Errorf("Close central directory size = %d; want %d", size, wantLen) + } + } + + uint16string := strings.Repeat(".", uint16max) + remain := wantLen + for remain > 0 { + commentLen := int(uint16max) - directoryHeaderLen - 1 + thisRecLen := directoryHeaderLen + int(uint16max) + commentLen + if int64(thisRecLen) > remain { + remove := thisRecLen - int(remain) + commentLen -= remove + thisRecLen -= remove + } + remain -= int64(thisRecLen) + f, err := w.CreateHeader(&FileHeader{ + Name: uint16string, + Comment: uint16string[:commentLen], + }) + if err != nil { + t.Fatalf("CreateHeader: %v", err) + } + f.(*fileWriter).crc32 = fakeHash32{} + } + if err := w.Close(); err != nil { + t.Fatalf("Close: %v", err) + } + } + } + t.Run("uint32max-1_NoZip64", func(t *testing.T) { + t.Parallel() + if generatesZip64(t, gen(uint32max-1)) { + t.Error("unexpected zip64") + } + }) + t.Run("uint32max_HasZip64", func(t *testing.T) { + t.Parallel() + if !generatesZip64(t, gen(uint32max)) { + t.Error("expected zip64") + } + }) +} + +func testZip64(t testing.TB, size int64) *rleBuffer { + const chunkSize = 1024 + chunks := int(size / chunkSize) + // write size bytes plus "END\n" to a zip file + buf := new(rleBuffer) + w := NewWriter(buf) + f, err := w.CreateHeader(&FileHeader{ + Name: "huge.txt", + Method: Store, + }) + if err != nil { + t.Fatal(err) + } + f.(*fileWriter).crc32 = fakeHash32{} + chunk := make([]byte, chunkSize) + for i := range chunk { + chunk[i] = '.' + } + for i := 0; i < chunks; i++ { + _, err := f.Write(chunk) + if err != nil { + t.Fatal("write chunk:", err) + } + } + if frag := int(size % chunkSize); frag > 0 { + _, err := f.Write(chunk[:frag]) + if err != nil { + t.Fatal("write chunk:", err) + } + } + end := []byte("END\n") + _, err = f.Write(end) + if err != nil { + t.Fatal("write end:", err) + } + if err := w.Close(); err != nil { + t.Fatal(err) + } + + // read back zip file and check that we get to the end of it + r, err := NewReader(buf, int64(buf.Size())) + if err != nil { + t.Fatal("reader:", err) + } + f0 := r.File[0] + rc, err := f0.Open() + if err != nil { + t.Fatal("opening:", err) + } + rc.(*checksumReader).hash = fakeHash32{} + for i := 0; i < chunks; i++ { + _, err := io.ReadFull(rc, chunk) + if err != nil { + t.Fatal("read:", err) + } + } + if frag := int(size % chunkSize); frag > 0 { + _, err := io.ReadFull(rc, chunk[:frag]) + if err != nil { + t.Fatal("read:", err) + } + } + gotEnd, err := io.ReadAll(rc) + if err != nil { + t.Fatal("read end:", err) + } + if !bytes.Equal(gotEnd, end) { + t.Errorf("End of zip64 archive %q, want %q", gotEnd, end) + } + err = rc.Close() + if err != nil { + t.Fatal("closing:", err) + } + if size+int64(len("END\n")) >= 1<<32-1 { + if got, want := f0.UncompressedSize, uint32(uint32max); got != want { + t.Errorf("UncompressedSize %#x, want %#x", got, want) + } + } + + if got, want := f0.UncompressedSize64, uint64(size)+uint64(len(end)); got != want { + t.Errorf("UncompressedSize64 %#x, want %#x", got, want) + } + + return buf +} + +// Issue 9857 +func testZip64DirectoryRecordLength(buf *rleBuffer, t *testing.T) { + if !suffixIsZip64(t, buf) { + t.Fatal("not a zip64") + } +} + +func testValidHeader(h *FileHeader, t *testing.T) { + var buf bytes.Buffer + z := NewWriter(&buf) + + f, err := z.CreateHeader(h) + if err != nil { + t.Fatalf("error creating header: %v", err) + } + if _, err := f.Write([]byte("hi")); err != nil { + t.Fatalf("error writing content: %v", err) + } + if err := z.Close(); err != nil { + t.Fatalf("error closing zip writer: %v", err) + } + + b := buf.Bytes() + zf, err := NewReader(bytes.NewReader(b), int64(len(b))) + if err != nil { + t.Fatalf("got %v, expected nil", err) + } + zh := zf.File[0].FileHeader + if zh.Name != h.Name || zh.Method != h.Method || zh.UncompressedSize64 != uint64(len("hi")) { + t.Fatalf("got %q/%d/%d expected %q/%d/%d", zh.Name, zh.Method, zh.UncompressedSize64, h.Name, h.Method, len("hi")) + } +} + +// Issue 4302. +func TestHeaderInvalidTagAndSize(t *testing.T) { + const timeFormat = "20060102T150405.000.txt" + + ts := time.Now() + filename := ts.Format(timeFormat) + + h := FileHeader{ + Name: filename, + Method: Deflate, + Extra: []byte(ts.Format(time.RFC3339Nano)), // missing tag and len, but Extra is best-effort parsing + } + h.SetModTime(ts) + + testValidHeader(&h, t) +} + +func TestHeaderTooShort(t *testing.T) { + h := FileHeader{ + Name: "foo.txt", + Method: Deflate, + Extra: []byte{zip64ExtraID}, // missing size and second half of tag, but Extra is best-effort parsing + } + testValidHeader(&h, t) +} + +func TestHeaderTooLongErr(t *testing.T) { + var headerTests = []struct { + name string + extra []byte + wanterr error + }{ + { + name: strings.Repeat("x", 1<<16), + extra: []byte{}, + wanterr: errLongName, + }, + { + name: "long_extra", + extra: bytes.Repeat([]byte{0xff}, 1<<16), + wanterr: errLongExtra, + }, + } + + // write a zip file + buf := new(bytes.Buffer) + w := NewWriter(buf) + + for _, test := range headerTests { + h := &FileHeader{ + Name: test.name, + Extra: test.extra, + } + _, err := w.CreateHeader(h) + if err != test.wanterr { + t.Errorf("error=%v, want %v", err, test.wanterr) + } + } + + if err := w.Close(); err != nil { + t.Fatal(err) + } +} + +func TestHeaderIgnoredSize(t *testing.T) { + h := FileHeader{ + Name: "foo.txt", + Method: Deflate, + Extra: []byte{zip64ExtraID & 0xFF, zip64ExtraID >> 8, 24, 0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8}, // bad size but shouldn't be consulted + } + testValidHeader(&h, t) +} + +// Issue 4393. It is valid to have an extra data header +// which contains no body. +func TestZeroLengthHeader(t *testing.T) { + h := FileHeader{ + Name: "extadata.txt", + Method: Deflate, + Extra: []byte{ + 85, 84, 5, 0, 3, 154, 144, 195, 77, // tag 21589 size 5 + 85, 120, 0, 0, // tag 30805 size 0 + }, + } + testValidHeader(&h, t) +} + +// Just benchmarking how fast the Zip64 test above is. Not related to +// our zip performance, since the test above disabled CRC32 and flate. +func BenchmarkZip64Test(b *testing.B) { + for i := 0; i < b.N; i++ { + testZip64(b, 1<<26) + } +} + +func BenchmarkZip64TestSizes(b *testing.B) { + for _, size := range []int64{1 << 12, 1 << 20, 1 << 26} { + b.Run(fmt.Sprint(size), func(b *testing.B) { + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + testZip64(b, size) + } + }) + }) + } +} + +func TestSuffixSaver(t *testing.T) { + const keep = 10 + ss := &suffixSaver{keep: keep} + ss.Write([]byte("abc")) + if got := string(ss.Suffix()); got != "abc" { + t.Errorf("got = %q; want abc", got) + } + ss.Write([]byte("defghijklmno")) + if got := string(ss.Suffix()); got != "fghijklmno" { + t.Errorf("got = %q; want fghijklmno", got) + } + if got, want := ss.Size(), int64(len("abc")+len("defghijklmno")); got != want { + t.Errorf("Size = %d; want %d", got, want) + } + buf := make([]byte, ss.Size()) + for off := int64(0); off < ss.Size(); off++ { + for size := 1; size <= int(ss.Size()-off); size++ { + readBuf := buf[:size] + n, err := ss.ReadAt(readBuf, off) + if off < ss.Size()-keep { + if err != errDiscardedBytes { + t.Errorf("off %d, size %d = %v, %v (%q); want errDiscardedBytes", off, size, n, err, readBuf[:n]) + } + continue + } + want := "abcdefghijklmno"[off : off+int64(size)] + got := string(readBuf[:n]) + if err != nil || got != want { + t.Errorf("off %d, size %d = %v, %v (%q); want %q", off, size, n, err, got, want) + } + } + } + +} + +type zeros struct{} + +func (zeros) Read(p []byte) (int, error) { + for i := range p { + p[i] = 0 + } + return len(p), nil +} |