package storage import ( "bytes" "errors" "fmt" "io" "os" "path" "path/filepath" "reflect" "sort" "strings" "sync" "time" drivers "github.com/containers/storage/drivers" "github.com/containers/storage/pkg/archive" "github.com/containers/storage/pkg/idtools" "github.com/containers/storage/pkg/ioutils" "github.com/containers/storage/pkg/lockfile" "github.com/containers/storage/pkg/mount" "github.com/containers/storage/pkg/stringid" "github.com/containers/storage/pkg/system" "github.com/containers/storage/pkg/tarlog" "github.com/containers/storage/pkg/truncindex" multierror "github.com/hashicorp/go-multierror" "github.com/klauspost/pgzip" digest "github.com/opencontainers/go-digest" "github.com/opencontainers/selinux/go-selinux" "github.com/sirupsen/logrus" "github.com/vbatts/tar-split/archive/tar" "github.com/vbatts/tar-split/tar/asm" "github.com/vbatts/tar-split/tar/storage" ) const ( tarSplitSuffix = ".tar-split.gz" incompleteFlag = "incomplete" // maxLayerStoreCleanupIterations is the number of times we try to clean up inconsistent layer store state // in readers (which, for implementation reasons, gives other writers the opportunity to create more inconsistent state) // until we just give up. maxLayerStoreCleanupIterations = 3 ) type layerLocations uint8 // The backing store is split in two json files, one (the volatile) // that is written without fsync() meaning it isn't as robust to // unclean shutdown const ( stableLayerLocation layerLocations = 1 << iota volatileLayerLocation numLayerLocationIndex = iota ) func layerLocationFromIndex(index int) layerLocations { return 1 << index } // A Layer is a record of a copy-on-write layer that's stored by the lower // level graph driver. type Layer struct { // ID is either one which was specified at create-time, or a random // value which was generated by the library. ID string `json:"id"` // Names is an optional set of user-defined convenience values. The // layer can be referred to by its ID or any of its names. Names are // unique among layers. Names []string `json:"names,omitempty"` // Parent is the ID of a layer from which this layer inherits data. Parent string `json:"parent,omitempty"` // Metadata is data we keep for the convenience of the caller. It is not // expected to be large, since it is kept in memory. Metadata string `json:"metadata,omitempty"` // MountLabel is an SELinux label which should be used when attempting to mount // the layer. MountLabel string `json:"mountlabel,omitempty"` // MountPoint is the path where the layer is mounted, or where it was most // recently mounted. // // WARNING: This field is a snapshot in time: (except for users inside c/storage that // hold the mount lock) the true value can change between subsequent // calls to c/storage API. // // Users that need to handle concurrent mount/unmount attempts should not access this // field at all, and should only use the path returned by .Mount() (and that’s only // assuming no other user will concurrently decide to unmount that mount point). MountPoint string `json:"-"` // MountCount is used as a reference count for the container's layer being // mounted at the mount point. // // WARNING: This field is a snapshot in time; (except for users inside c/storage that // hold the mount lock) the true value can change between subsequent // calls to c/storage API. // // In situations where concurrent mount/unmount attempts can happen, this field // should not be used for any decisions, maybe apart from heuristic user warnings. MountCount int `json:"-"` // Created is the datestamp for when this layer was created. Older // versions of the library did not track this information, so callers // will likely want to use the IsZero() method to verify that a value // is set before using it. Created time.Time `json:"created,omitempty"` // CompressedDigest is the digest of the blob that was last passed to // ApplyDiff() or create(), as it was presented to us. CompressedDigest digest.Digest `json:"compressed-diff-digest,omitempty"` // CompressedSize is the length of the blob that was last passed to // ApplyDiff() or create(), as it was presented to us. If // CompressedDigest is not set, this should be treated as if it were an // uninitialized value. CompressedSize int64 `json:"compressed-size,omitempty"` // UncompressedDigest is the digest of the blob that was last passed to // ApplyDiff() or create(), after we decompressed it. Often referred to // as a DiffID. UncompressedDigest digest.Digest `json:"diff-digest,omitempty"` // TOCDigest represents the digest of the Table of Contents (TOC) of the blob. // This digest is utilized when the UncompressedDigest is not // validated during the partial image pull process, but the // TOC itself is validated. // It serves as an alternative reference under these specific conditions. TOCDigest digest.Digest `json:"toc-digest,omitempty"` // UncompressedSize is the length of the blob that was last passed to // ApplyDiff() or create(), after we decompressed it. If // UncompressedDigest is not set, this should be treated as if it were // an uninitialized value. UncompressedSize int64 `json:"diff-size,omitempty"` // CompressionType is the type of compression which we detected on the blob // that was last passed to ApplyDiff() or create(). CompressionType archive.Compression `json:"compression,omitempty"` // UIDs and GIDs are lists of UIDs and GIDs used in the layer. This // field is only populated (i.e., will only contain one or more // entries) if the layer was created using ApplyDiff() or create(). UIDs []uint32 `json:"uidset,omitempty"` GIDs []uint32 `json:"gidset,omitempty"` // Flags is arbitrary data about the layer. Flags map[string]interface{} `json:"flags,omitempty"` // UIDMap and GIDMap are used for setting up a layer's contents // for use inside of a user namespace where UID mapping is being used. UIDMap []idtools.IDMap `json:"uidmap,omitempty"` GIDMap []idtools.IDMap `json:"gidmap,omitempty"` // ReadOnly is true if this layer resides in a read-only layer store. ReadOnly bool `json:"-"` // volatileStore is true if the container is from the volatile json file volatileStore bool `json:"-"` // BigDataNames is a list of names of data items that we keep for the // convenience of the caller. They can be large, and are only in // memory when being read from or written to disk. BigDataNames []string `json:"big-data-names,omitempty"` } type layerMountPoint struct { ID string `json:"id"` MountPoint string `json:"path"` MountCount int `json:"count"` } // DiffOptions override the default behavior of Diff() methods. type DiffOptions struct { // Compression, if set overrides the default compressor when generating a diff. Compression *archive.Compression } // stagedLayerOptions are the options passed to .create to populate a staged // layer type stagedLayerOptions struct { DiffOutput *drivers.DriverWithDifferOutput DiffOptions *drivers.ApplyDiffWithDifferOpts } // roLayerStore wraps a graph driver, adding the ability to refer to layers by // name, and keeping track of parent-child relationships, along with a list of // all known layers. type roLayerStore interface { roMetadataStore roLayerBigDataStore // startReading makes sure the store is fresh, and locks it for reading. // If this succeeds, the caller MUST call stopReading(). startReading() error // stopReading releases locks obtained by startReading. stopReading() // Exists checks if a layer with the specified name or ID is known. Exists(id string) bool // Get retrieves information about a layer given an ID or name. Get(id string) (*Layer, error) // Status returns an slice of key-value pairs, suitable for human consumption, // relaying whatever status information the underlying driver can share. Status() ([][2]string, error) // Changes returns a slice of Change structures, which contain a pathname // (Path) and a description of what sort of change (Kind) was made by the // layer (either ChangeModify, ChangeAdd, or ChangeDelete), relative to a // specified layer. By default, the layer's parent is used as a reference. Changes(from, to string) ([]archive.Change, error) // Diff produces a tarstream which can be applied to a layer with the contents // of the first layer to produce a layer with the contents of the second layer. // By default, the parent of the second layer is used as the first // layer, so it need not be specified. Options can be used to override // default behavior, but are also not required. Diff(from, to string, options *DiffOptions) (io.ReadCloser, error) // DiffSize produces an estimate of the length of the tarstream which would be // produced by Diff. DiffSize(from, to string) (int64, error) // Size produces a cached value for the uncompressed size of the layer, // if one is known, or -1 if it is not known. If the layer can not be // found, it returns an error. Size(name string) (int64, error) // LayersByCompressedDigest returns a slice of the layers with the // specified compressed digest value recorded for them. LayersByCompressedDigest(d digest.Digest) ([]Layer, error) // LayersByUncompressedDigest returns a slice of the layers with the // specified uncompressed digest value recorded for them. LayersByUncompressedDigest(d digest.Digest) ([]Layer, error) // LayersByTOCDigest returns a slice of the layers with the // specified uncompressed digest value recorded for them. LayersByTOCDigest(d digest.Digest) ([]Layer, error) // Layers returns a slice of the known layers. Layers() ([]Layer, error) } // rwLayerStore wraps a graph driver, adding the ability to refer to layers by // name, and keeping track of parent-child relationships, along with a list of // all known layers. type rwLayerStore interface { roLayerStore rwMetadataStore flaggableStore rwLayerBigDataStore // startWriting makes sure the store is fresh, and locks it for writing. // If this succeeds, the caller MUST call stopWriting(). startWriting() error // stopWriting releases locks obtained by startWriting. stopWriting() // create creates a new layer, optionally giving it a specified ID rather than // a randomly-generated one, either inheriting data from another specified // layer or the empty base layer. The new layer can optionally be given names // and have an SELinux label specified for use when mounting it. Some // underlying drivers can accept a "size" option. At this time, most // underlying drivers do not themselves distinguish between writeable // and read-only layers. Returns the new layer structure and the size of the // diff which was applied to its parent to initialize its contents. create(id string, parent *Layer, names []string, mountLabel string, options map[string]string, moreOptions *LayerOptions, writeable bool, diff io.Reader, slo *stagedLayerOptions) (*Layer, int64, error) // updateNames modifies names associated with a layer based on (op, names). updateNames(id string, names []string, op updateNameOperation) error // Delete deletes a layer with the specified name or ID. Delete(id string) error // Wipe deletes all layers. Wipe() error // Mount mounts a layer for use. If the specified layer is the parent of other // layers, it should not be written to. An SELinux label to be applied to the // mount can be specified to override the one configured for the layer. // The mappings used by the container can be specified. Mount(id string, options drivers.MountOpts) (string, error) // unmount unmounts a layer when it is no longer in use. // If conditional is set, it will fail with ErrLayerNotMounted if the layer is not mounted (without conditional, the caller is // making a promise that the layer is actually mounted). // If force is set, it will physically try to unmount it even if it is mounted multiple times, or even if (!conditional and) // there are no records of it being mounted in the first place. // It returns whether the layer was still mounted at the time this function returned. // WARNING: The return value may already be obsolete by the time it is available // to the caller, so it can be used for heuristic sanity checks at best. It should almost always be ignored. unmount(id string, force bool, conditional bool) (bool, error) // Mounted returns number of times the layer has been mounted. Mounted(id string) (int, error) // ParentOwners returns the UIDs and GIDs of parents of the layer's mountpoint // for which the layer's UID and GID maps don't contain corresponding entries. ParentOwners(id string) (uids, gids []int, err error) // ApplyDiff reads a tarstream which was created by a previous call to Diff and // applies its changes to a specified layer. ApplyDiff(to string, diff io.Reader) (int64, error) // ApplyDiffWithDiffer applies the changes through the differ callback function. // If to is the empty string, then a staging directory is created by the driver. ApplyDiffWithDiffer(to string, options *drivers.ApplyDiffWithDifferOpts, differ drivers.Differ) (*drivers.DriverWithDifferOutput, error) // CleanupStagingDirectory cleanups the staging directory. It can be used to cleanup the staging directory on errors CleanupStagingDirectory(stagingDirectory string) error // applyDiffFromStagingDirectory uses diffOutput.Target to create the diff. applyDiffFromStagingDirectory(id string, diffOutput *drivers.DriverWithDifferOutput, options *drivers.ApplyDiffWithDifferOpts) error // DifferTarget gets the location where files are stored for the layer. DifferTarget(id string) (string, error) // PutAdditionalLayer creates a layer using the diff contained in the additional layer // store. // This API is experimental and can be changed without bumping the major version number. PutAdditionalLayer(id string, parentLayer *Layer, names []string, aLayer drivers.AdditionalLayer) (layer *Layer, err error) // Clean up unreferenced layers GarbageCollect() error } type multipleLockFile struct { lockfiles []*lockfile.LockFile } func (l multipleLockFile) Lock() { for _, lock := range l.lockfiles { lock.Lock() } } func (l multipleLockFile) RLock() { for _, lock := range l.lockfiles { lock.RLock() } } func (l multipleLockFile) Unlock() { for _, lock := range l.lockfiles { lock.Unlock() } } func (l multipleLockFile) ModifiedSince(lastWrite lockfile.LastWrite) (lockfile.LastWrite, bool, error) { // Look up only the first lockfile, since this is the value returned by RecordWrite(). return l.lockfiles[0].ModifiedSince(lastWrite) } func (l multipleLockFile) AssertLockedForWriting() { for _, lock := range l.lockfiles { lock.AssertLockedForWriting() } } func (l multipleLockFile) GetLastWrite() (lockfile.LastWrite, error) { return l.lockfiles[0].GetLastWrite() } func (l multipleLockFile) RecordWrite() (lockfile.LastWrite, error) { var lastWrite *lockfile.LastWrite for _, lock := range l.lockfiles { lw, err := lock.RecordWrite() if err != nil { return lw, err } // Return the first value we get so we know that // all the locks have a write time >= to this one. if lastWrite == nil { lastWrite = &lw } } return *lastWrite, nil } func (l multipleLockFile) IsReadWrite() bool { return l.lockfiles[0].IsReadWrite() } func newMultipleLockFile(l ...*lockfile.LockFile) *multipleLockFile { return &multipleLockFile{lockfiles: l} } type layerStore struct { // The following fields are only set when constructing layerStore, and must never be modified afterwards. // They are safe to access without any other locking. lockfile *multipleLockFile // lockfile.IsReadWrite can be used to distinguish between read-write and read-only layer stores. mountsLockfile *lockfile.LockFile // Can _only_ be obtained with inProcessLock held. rundir string jsonPath [numLayerLocationIndex]string layerdir string inProcessLock sync.RWMutex // Can _only_ be obtained with lockfile held. // The following fields can only be read/written with read/write ownership of inProcessLock, respectively. // Almost all users should use startReading() or startWriting(). lastWrite lockfile.LastWrite mountsLastWrite lockfile.LastWrite // Only valid if lockfile.IsReadWrite() layers []*Layer idindex *truncindex.TruncIndex byid map[string]*Layer byname map[string]*Layer bymount map[string]*Layer bycompressedsum map[digest.Digest][]string byuncompressedsum map[digest.Digest][]string bytocsum map[digest.Digest][]string layerspathsModified [numLayerLocationIndex]time.Time // FIXME: This field is only set when constructing layerStore, but locking rules of the driver // interface itself are not documented here. driver drivers.Driver } // The caller must hold r.inProcessLock for reading. func layerLocation(l *Layer) layerLocations { if l.volatileStore { return volatileLayerLocation } return stableLayerLocation } func copyLayer(l *Layer) *Layer { return &Layer{ ID: l.ID, Names: copyStringSlice(l.Names), Parent: l.Parent, Metadata: l.Metadata, MountLabel: l.MountLabel, MountPoint: l.MountPoint, MountCount: l.MountCount, Created: l.Created, CompressedDigest: l.CompressedDigest, CompressedSize: l.CompressedSize, UncompressedDigest: l.UncompressedDigest, UncompressedSize: l.UncompressedSize, TOCDigest: l.TOCDigest, CompressionType: l.CompressionType, ReadOnly: l.ReadOnly, volatileStore: l.volatileStore, BigDataNames: copyStringSlice(l.BigDataNames), Flags: copyStringInterfaceMap(l.Flags), UIDMap: copyIDMap(l.UIDMap), GIDMap: copyIDMap(l.GIDMap), UIDs: copyUint32Slice(l.UIDs), GIDs: copyUint32Slice(l.GIDs), } } // startWritingWithReload makes sure the store is fresh if canReload, and locks it for writing. // If this succeeds, the caller MUST call stopWriting(). // // This is an internal implementation detail of layerStore construction, every other caller // should use startWriting() instead. func (r *layerStore) startWritingWithReload(canReload bool) error { r.lockfile.Lock() r.inProcessLock.Lock() succeeded := false defer func() { if !succeeded { r.inProcessLock.Unlock() r.lockfile.Unlock() } }() if canReload { if _, err := r.reloadIfChanged(true); err != nil { return err } } succeeded = true return nil } // startWriting makes sure the store is fresh, and locks it for writing. // If this succeeds, the caller MUST call stopWriting(). func (r *layerStore) startWriting() error { return r.startWritingWithReload(true) } // stopWriting releases locks obtained by startWriting. func (r *layerStore) stopWriting() { r.inProcessLock.Unlock() r.lockfile.Unlock() } // startReadingWithReload makes sure the store is fresh if canReload, and locks it for reading. // If this succeeds, the caller MUST call stopReading(). // // This is an internal implementation detail of layerStore construction, every other caller // should use startReading() instead. func (r *layerStore) startReadingWithReload(canReload bool) error { // inProcessLocked calls the nested function with r.inProcessLock held for writing. inProcessLocked := func(fn func() error) error { r.inProcessLock.Lock() defer r.inProcessLock.Unlock() return fn() } r.lockfile.RLock() unlockFn := r.lockfile.Unlock // A function to call to clean up, or nil defer func() { if unlockFn != nil { unlockFn() } }() r.inProcessLock.RLock() unlockFn = r.stopReading if canReload { // If we are lucky, we can just hold the read locks, check that we are fresh, and continue. modified, err := r.modified() if err != nil { return err } if modified { // We are unlucky, and need to reload. // NOTE: Multiple goroutines can get to this place approximately simultaneously. r.inProcessLock.RUnlock() unlockFn = r.lockfile.Unlock cleanupsDone := 0 for { // First try reloading with r.lockfile held for reading. // r.inProcessLock will serialize all goroutines that got here; // each will re-check on-disk state vs. r.lastWrite, and the first one will actually reload the data. var tryLockedForWriting bool err := inProcessLocked(func() error { var err error tryLockedForWriting, err = r.reloadIfChanged(false) return err }) if err == nil { break } if !tryLockedForWriting { return err } if cleanupsDone >= maxLayerStoreCleanupIterations { return fmt.Errorf("(even after %d cleanup attempts:) %w", cleanupsDone, err) } // Not good enough, we need r.lockfile held for writing. So, let’s do that. unlockFn() unlockFn = nil r.lockfile.Lock() unlockFn = r.lockfile.Unlock if err := inProcessLocked(func() error { _, err := r.reloadIfChanged(true) return err }); err != nil { return err } unlockFn() unlockFn = nil r.lockfile.RLock() unlockFn = r.lockfile.Unlock // We need to check for a reload again because the on-disk state could have been modified // after we released the lock. cleanupsDone++ } // NOTE that we hold neither a read nor write inProcessLock at this point. That’s fine in ordinary operation, because // the on-filesystem r.lockfile should protect us against (cooperating) writers, and any use of r.inProcessLock // protects us against in-process writers modifying data. // In presence of non-cooperating writers, we just ensure that 1) the in-memory data is not clearly out-of-date // and 2) access to the in-memory data is not racy; // but we can’t protect against those out-of-process writers modifying _files_ while we are assuming they are in a consistent state. r.inProcessLock.RLock() } } unlockFn = nil return nil } // startReading makes sure the store is fresh, and locks it for reading. // If this succeeds, the caller MUST call stopReading(). func (r *layerStore) startReading() error { return r.startReadingWithReload(true) } // stopReading releases locks obtained by startReading. func (r *layerStore) stopReading() { r.inProcessLock.RUnlock() r.lockfile.Unlock() } // modified returns true if the on-disk state (of layers or mounts) has changed (ie if reloadIcHanged may need to modify the store) // // Note that unlike containerStore.modified and imageStore.modified, this function is not directly used in layerStore.reloadIfChanged(); // it exists only to help the reader ensure it has fresh enough state. // // The caller must hold r.lockfile for reading _or_ writing. // The caller must hold r.inProcessLock for reading or writing. func (r *layerStore) modified() (bool, error) { _, m, err := r.layersModified() if err != nil { return false, err } if m { return true, nil } if r.lockfile.IsReadWrite() { // This means we get, release, and re-obtain, r.mountsLockfile if we actually need to do any kind of reload. // That’s a bit expensive, but hopefully most callers will be read-only and see no changes. // We can’t eliminate these mountsLockfile accesses given the current assumption that Layer objects have _some_ not-very-obsolete // mount data. Maybe we can segregate the mount-dependent and mount-independent operations better... r.mountsLockfile.RLock() defer r.mountsLockfile.Unlock() _, m, err := r.mountsModified() if err != nil { return false, err } if m { return true, nil } } return false, nil } // layersModified() checks if the most recent writer to r.jsonPath[] was a party other than the // last recorded writer. If so, it returns a lockfile.LastWrite value to record on a successful // reload. // It should only be called with the lock held. // The caller must hold r.inProcessLock for reading. func (r *layerStore) layersModified() (lockfile.LastWrite, bool, error) { lastWrite, modified, err := r.lockfile.ModifiedSince(r.lastWrite) if err != nil { return lockfile.LastWrite{}, modified, err } if modified { return lastWrite, true, nil } // If the layers.json file or container-layers.json has been // modified manually, then we have to reload the storage in // any case. for locationIndex := 0; locationIndex < numLayerLocationIndex; locationIndex++ { info, err := os.Stat(r.jsonPath[locationIndex]) if err != nil && !os.IsNotExist(err) { return lockfile.LastWrite{}, false, fmt.Errorf("stat layers file: %w", err) } if info != nil && info.ModTime() != r.layerspathsModified[locationIndex] { // In this case the LastWrite value is equal to r.lastWrite; writing it back doesn’t hurt. return lastWrite, true, nil } } return lockfile.LastWrite{}, false, nil } // reloadIfChanged reloads the contents of the store from disk if it is changed. // // The caller must hold r.lockfile for reading _or_ writing; lockedForWriting is true // if it is held for writing. // // The caller must hold r.inProcessLock for WRITING. // // If !lockedForWriting and this function fails, the return value indicates whether // reloadIfChanged() with lockedForWriting could succeed. func (r *layerStore) reloadIfChanged(lockedForWriting bool) (bool, error) { lastWrite, layersModified, err := r.layersModified() if err != nil { return false, err } if layersModified { // r.load also reloads mounts data; so, on this path, we don’t need to call reloadMountsIfChanged. if tryLockedForWriting, err := r.load(lockedForWriting); err != nil { return tryLockedForWriting, err // r.lastWrite is unchanged, so we will load the next time again. } r.lastWrite = lastWrite return false, nil } if r.lockfile.IsReadWrite() { r.mountsLockfile.RLock() defer r.mountsLockfile.Unlock() if err := r.reloadMountsIfChanged(); err != nil { return false, err } } return false, nil } // mountsModified returns true if the on-disk mount state has changed (i.e. if reloadMountsIfChanged may need to modify the store), // and a lockfile.LastWrite value for that update. // // The caller must hold r.mountsLockfile for reading _or_ writing. // The caller must hold r.inProcessLock for reading or writing. func (r *layerStore) mountsModified() (lockfile.LastWrite, bool, error) { return r.mountsLockfile.ModifiedSince(r.mountsLastWrite) } // reloadMountsIfChanged reloads the contents of mountsPath from disk if it is changed. // // The caller must hold r.mountsLockFile for reading or writing. func (r *layerStore) reloadMountsIfChanged() error { lastWrite, modified, err := r.mountsModified() if err != nil { return err } if modified { if err = r.loadMounts(); err != nil { return err } r.mountsLastWrite = lastWrite } return nil } // Requires startReading or startWriting. func (r *layerStore) Layers() ([]Layer, error) { layers := make([]Layer, len(r.layers)) for i := range r.layers { layers[i] = *copyLayer(r.layers[i]) } return layers, nil } // Requires startWriting. func (r *layerStore) GarbageCollect() error { layers, err := r.driver.ListLayers() if err != nil { if errors.Is(err, drivers.ErrNotSupported) { return nil } return err } for _, id := range layers { // Is the id still referenced if r.byid[id] != nil { continue } // Remove layer and any related data of unreferenced id if err := r.driver.Remove(id); err != nil { logrus.Debugf("removing driver layer %q", id) return err } logrus.Debugf("removing %q", r.tspath(id)) os.Remove(r.tspath(id)) logrus.Debugf("removing %q", r.datadir(id)) os.RemoveAll(r.datadir(id)) } return nil } func (r *layerStore) mountspath() string { return filepath.Join(r.rundir, "mountpoints.json") } // load reloads the contents of the store from disk. // // Most callers should call reloadIfChanged() instead, to avoid overhead and to correctly // manage r.lastWrite. // // As a side effect, this sets r.mountsLastWrite. // // The caller must hold r.lockfile for reading _or_ writing; lockedForWriting is true // if it is held for writing. // The caller must hold r.inProcessLock for WRITING. // // If !lockedForWriting and this function fails, the return value indicates whether // retrying with lockedForWriting could succeed. func (r *layerStore) load(lockedForWriting bool) (bool, error) { var modifiedLocations layerLocations layers := []*Layer{} ids := make(map[string]*Layer) for locationIndex := 0; locationIndex < numLayerLocationIndex; locationIndex++ { location := layerLocationFromIndex(locationIndex) rpath := r.jsonPath[locationIndex] info, err := os.Stat(rpath) if err != nil { if !os.IsNotExist(err) { return false, err } } else { r.layerspathsModified[locationIndex] = info.ModTime() } data, err := os.ReadFile(rpath) if err != nil && !os.IsNotExist(err) { return false, err } locationLayers := []*Layer{} if len(data) != 0 { if err := json.Unmarshal(data, &locationLayers); err != nil { return false, fmt.Errorf("loading %q: %w", rpath, err) } } for _, layer := range locationLayers { // There should be no duplicated ids between json files, but lets check to be sure if ids[layer.ID] != nil { continue // skip invalid duplicated layer } // Remember where the layer came from if location == volatileLayerLocation { layer.volatileStore = true } layers = append(layers, layer) ids[layer.ID] = layer } } idlist := make([]string, 0, len(layers)) names := make(map[string]*Layer) compressedsums := make(map[digest.Digest][]string) uncompressedsums := make(map[digest.Digest][]string) tocsums := make(map[digest.Digest][]string) var errorToResolveBySaving error // == nil; if there are multiple errors, this is one of them. if r.lockfile.IsReadWrite() { selinux.ClearLabels() } for n, layer := range layers { idlist = append(idlist, layer.ID) for _, name := range layer.Names { if conflict, ok := names[name]; ok { r.removeName(conflict, name) errorToResolveBySaving = ErrDuplicateLayerNames modifiedLocations |= layerLocation(conflict) } names[name] = layers[n] } if layer.CompressedDigest != "" { compressedsums[layer.CompressedDigest] = append(compressedsums[layer.CompressedDigest], layer.ID) } if layer.UncompressedDigest != "" { uncompressedsums[layer.UncompressedDigest] = append(uncompressedsums[layer.UncompressedDigest], layer.ID) } if layer.TOCDigest != "" { tocsums[layer.TOCDigest] = append(tocsums[layer.TOCDigest], layer.ID) } if layer.MountLabel != "" { selinux.ReserveLabel(layer.MountLabel) } layer.ReadOnly = !r.lockfile.IsReadWrite() // The r.lockfile.IsReadWrite() condition maintains past practice: // Incomplete layers in a read-only store are not treated as a reason to refuse to use other layers from that store // (OTOH creating child layers on top would probably lead to problems?). // We do remove incomplete layers in read-write stores so that we don’t build on top of them. if layerHasIncompleteFlag(layer) && r.lockfile.IsReadWrite() { errorToResolveBySaving = errors.New("an incomplete layer exists and can't be cleaned up") } } if errorToResolveBySaving != nil { if !r.lockfile.IsReadWrite() { return false, errorToResolveBySaving } if !lockedForWriting { return true, errorToResolveBySaving } } r.layers = layers r.idindex = truncindex.NewTruncIndex(idlist) // Invalid values in idlist are ignored: they are not a reason to refuse processing the whole store. r.byid = ids r.byname = names r.bycompressedsum = compressedsums r.byuncompressedsum = uncompressedsums r.bytocsum = tocsums // Load and merge information about which layers are mounted, and where. if r.lockfile.IsReadWrite() { r.mountsLockfile.RLock() defer r.mountsLockfile.Unlock() // We need to reload mounts unconditionally, becuause by creating r.layers from scratch, we have discarded the previous // information, if any. So, obtain a fresh mountsLastWrite value so that we don’t unnecessarily reload the data // afterwards. mountsLastWrite, err := r.mountsLockfile.GetLastWrite() if err != nil { return false, err } if err := r.loadMounts(); err != nil { return false, err } r.mountsLastWrite = mountsLastWrite // NOTE: We will release mountsLockfile when this function returns, so unlike most of the layer data, the // r.layers[].MountPoint, r.layers[].MountCount, and r.bymount values might not reflect // true on-filesystem state already by the time this function returns. // Code that needs the state to be accurate must lock r.mountsLockfile again, // and possibly loadMounts() again. } if errorToResolveBySaving != nil { if !r.lockfile.IsReadWrite() { return false, fmt.Errorf("internal error: layerStore.load has shouldSave but !r.lockfile.IsReadWrite") } // Last step: try to remove anything that a previous // user of this storage area marked for deletion but didn't manage to // actually delete. var incompleteDeletionErrors error // = nil for _, layer := range r.layers { if layer.Flags == nil { layer.Flags = make(map[string]interface{}) } if layerHasIncompleteFlag(layer) { logrus.Warnf("Found incomplete layer %#v, deleting it", layer.ID) err := r.deleteInternal(layer.ID) if err != nil { // Don't return the error immediately, because deleteInternal does not saveLayers(); // Even if deleting one incomplete layer fails, call saveLayers() so that other possible successfully // deleted incomplete layers have their metadata correctly removed. incompleteDeletionErrors = multierror.Append(incompleteDeletionErrors, fmt.Errorf("deleting layer %#v: %w", layer.ID, err)) } modifiedLocations |= layerLocation(layer) } } if err := r.saveLayers(modifiedLocations); err != nil { return false, err } if incompleteDeletionErrors != nil { return false, incompleteDeletionErrors } } return false, nil } // The caller must hold r.mountsLockfile for reading or writing. // The caller must hold r.inProcessLock for WRITING. func (r *layerStore) loadMounts() error { mounts := make(map[string]*Layer) mpath := r.mountspath() data, err := os.ReadFile(mpath) if err != nil && !os.IsNotExist(err) { return err } layerMounts := []layerMountPoint{} if len(data) != 0 { if err := json.Unmarshal(data, &layerMounts); err != nil { return err } } // Clear all of our mount information. If another process // unmounted something, it (along with its zero count) won't // have been encoded into the version of mountpoints.json that // we're loading, so our count could fall out of sync with it // if we don't, and if we subsequently change something else, // we'd pass that error along to other process that reloaded // the data after we saved it. for _, layer := range r.layers { layer.MountPoint = "" layer.MountCount = 0 } // All of the non-zero count values will have been encoded, so // we reset the still-mounted ones based on the contents. for _, mount := range layerMounts { if mount.MountPoint != "" { if layer, ok := r.lookup(mount.ID); ok { mounts[mount.MountPoint] = layer layer.MountPoint = mount.MountPoint layer.MountCount = mount.MountCount } } } r.bymount = mounts return nil } // save saves the contents of the store to disk. // The caller must hold r.lockfile locked for writing. // The caller must hold r.inProcessLock for WRITING. func (r *layerStore) save(saveLocations layerLocations) error { r.mountsLockfile.Lock() defer r.mountsLockfile.Unlock() if err := r.saveLayers(saveLocations); err != nil { return err } return r.saveMounts() } // saveFor saves the contents of the store relevant for modifiedLayer to disk. // The caller must hold r.lockfile locked for writing. // The caller must hold r.inProcessLock for WRITING. func (r *layerStore) saveFor(modifiedLayer *Layer) error { return r.save(layerLocation(modifiedLayer)) } // The caller must hold r.lockfile locked for writing. // The caller must hold r.inProcessLock for WRITING. func (r *layerStore) saveLayers(saveLocations layerLocations) error { if !r.lockfile.IsReadWrite() { return fmt.Errorf("not allowed to modify the layer store at %q: %w", r.layerdir, ErrStoreIsReadOnly) } r.lockfile.AssertLockedForWriting() // This must be done before we write the file, because the process could be terminated // after the file is written but before the lock file is updated. lw, err := r.lockfile.RecordWrite() if err != nil { return err } r.lastWrite = lw for locationIndex := 0; locationIndex < numLayerLocationIndex; locationIndex++ { location := layerLocationFromIndex(locationIndex) if location&saveLocations == 0 { continue } rpath := r.jsonPath[locationIndex] if err := os.MkdirAll(filepath.Dir(rpath), 0o700); err != nil { return err } subsetLayers := make([]*Layer, 0, len(r.layers)) for _, layer := range r.layers { if layerLocation(layer) == location { subsetLayers = append(subsetLayers, layer) } } jldata, err := json.Marshal(&subsetLayers) if err != nil { return err } opts := ioutils.AtomicFileWriterOptions{} if location == volatileLayerLocation { opts.NoSync = true } if err := ioutils.AtomicWriteFileWithOpts(rpath, jldata, 0o600, &opts); err != nil { return err } r.layerspathsModified[locationIndex] = opts.ModTime } return nil } // The caller must hold r.mountsLockfile for writing. // The caller must hold r.inProcessLock for WRITING. func (r *layerStore) saveMounts() error { if !r.lockfile.IsReadWrite() { return fmt.Errorf("not allowed to modify the layer store at %q: %w", r.layerdir, ErrStoreIsReadOnly) } r.mountsLockfile.AssertLockedForWriting() mpath := r.mountspath() if err := os.MkdirAll(filepath.Dir(mpath), 0o700); err != nil { return err } mounts := make([]layerMountPoint, 0, len(r.layers)) for _, layer := range r.layers { if layer.MountPoint != "" && layer.MountCount > 0 { mounts = append(mounts, layerMountPoint{ ID: layer.ID, MountPoint: layer.MountPoint, MountCount: layer.MountCount, }) } } jmdata, err := json.Marshal(&mounts) if err != nil { return err } // This must be done before we write the file, because the process could be terminated // after the file is written but before the lock file is updated. lw, err := r.mountsLockfile.RecordWrite() if err != nil { return err } r.mountsLastWrite = lw if err = ioutils.AtomicWriteFile(mpath, jmdata, 0o600); err != nil { return err } return r.loadMounts() } func (s *store) newLayerStore(rundir, layerdir, imagedir string, driver drivers.Driver, transient bool) (rwLayerStore, error) { if err := os.MkdirAll(rundir, 0o700); err != nil { return nil, err } if err := os.MkdirAll(layerdir, 0o700); err != nil { return nil, err } if imagedir != "" { if err := os.MkdirAll(imagedir, 0o700); err != nil { return nil, err } } // Note: While the containers.lock file is in rundir for transient stores // we don't want to do this here, because the non-transient layers in // layers.json might be used externally as a read-only layer (using e.g. // additionalimagestores), and that would look for the lockfile in the // same directory var lockFiles []*lockfile.LockFile lockFile, err := lockfile.GetLockFile(filepath.Join(layerdir, "layers.lock")) if err != nil { return nil, err } lockFiles = append(lockFiles, lockFile) if imagedir != "" { lockFile, err := lockfile.GetLockFile(filepath.Join(imagedir, "layers.lock")) if err != nil { return nil, err } lockFiles = append(lockFiles, lockFile) } mountsLockfile, err := lockfile.GetLockFile(filepath.Join(rundir, "mountpoints.lock")) if err != nil { return nil, err } volatileDir := layerdir if transient { volatileDir = rundir } rlstore := layerStore{ lockfile: newMultipleLockFile(lockFiles...), mountsLockfile: mountsLockfile, rundir: rundir, jsonPath: [numLayerLocationIndex]string{ filepath.Join(layerdir, "layers.json"), filepath.Join(volatileDir, "volatile-layers.json"), }, layerdir: layerdir, byid: make(map[string]*Layer), byname: make(map[string]*Layer), bymount: make(map[string]*Layer), driver: driver, } if err := rlstore.startWritingWithReload(false); err != nil { return nil, err } defer rlstore.stopWriting() lw, err := rlstore.lockfile.GetLastWrite() if err != nil { return nil, err } rlstore.lastWrite = lw // rlstore.mountsLastWrite is initialized inside rlstore.load(). if _, err := rlstore.load(true); err != nil { return nil, err } return &rlstore, nil } func newROLayerStore(rundir string, layerdir string, driver drivers.Driver) (roLayerStore, error) { lockfile, err := lockfile.GetROLockFile(filepath.Join(layerdir, "layers.lock")) if err != nil { return nil, err } rlstore := layerStore{ lockfile: newMultipleLockFile(lockfile), mountsLockfile: nil, rundir: rundir, jsonPath: [numLayerLocationIndex]string{ filepath.Join(layerdir, "layers.json"), filepath.Join(layerdir, "volatile-layers.json"), }, layerdir: layerdir, byid: make(map[string]*Layer), byname: make(map[string]*Layer), bymount: make(map[string]*Layer), driver: driver, } if err := rlstore.startReadingWithReload(false); err != nil { return nil, err } defer rlstore.stopReading() lw, err := rlstore.lockfile.GetLastWrite() if err != nil { return nil, err } rlstore.lastWrite = lw if _, err := rlstore.load(false); err != nil { return nil, err } return &rlstore, nil } // Requires startReading or startWriting. func (r *layerStore) lookup(id string) (*Layer, bool) { if layer, ok := r.byid[id]; ok { return layer, ok } else if layer, ok := r.byname[id]; ok { return layer, ok } else if longid, err := r.idindex.Get(id); err == nil { layer, ok := r.byid[longid] return layer, ok } return nil, false } // Requires startReading or startWriting. func (r *layerStore) Size(name string) (int64, error) { layer, ok := r.lookup(name) if !ok { return -1, ErrLayerUnknown } // We use the presence of a non-empty digest as an indicator that the size value was intentionally set, and that // a zero value is not just present because it was never set to anything else (which can happen if the layer was // created by a version of this library that didn't keep track of digest and size information). if layer.TOCDigest != "" || layer.UncompressedDigest != "" { return layer.UncompressedSize, nil } return -1, nil } // Requires startWriting. func (r *layerStore) ClearFlag(id string, flag string) error { if !r.lockfile.IsReadWrite() { return fmt.Errorf("not allowed to clear flags on layers at %q: %w", r.layerdir, ErrStoreIsReadOnly) } layer, ok := r.lookup(id) if !ok { return ErrLayerUnknown } delete(layer.Flags, flag) return r.saveFor(layer) } // Requires startWriting. func (r *layerStore) SetFlag(id string, flag string, value interface{}) error { if !r.lockfile.IsReadWrite() { return fmt.Errorf("not allowed to set flags on layers at %q: %w", r.layerdir, ErrStoreIsReadOnly) } layer, ok := r.lookup(id) if !ok { return ErrLayerUnknown } if layer.Flags == nil { layer.Flags = make(map[string]interface{}) } layer.Flags[flag] = value return r.saveFor(layer) } func (r *layerStore) Status() ([][2]string, error) { return r.driver.Status(), nil } // Requires startWriting. func (r *layerStore) PutAdditionalLayer(id string, parentLayer *Layer, names []string, aLayer drivers.AdditionalLayer) (layer *Layer, err error) { if duplicateLayer, idInUse := r.byid[id]; idInUse { return duplicateLayer, ErrDuplicateID } for _, name := range names { if _, nameInUse := r.byname[name]; nameInUse { return nil, ErrDuplicateName } } parent := "" if parentLayer != nil { parent = parentLayer.ID } info, err := aLayer.Info() if err != nil { return nil, err } defer info.Close() layer = &Layer{} if err := json.NewDecoder(info).Decode(layer); err != nil { return nil, err } layer.ID = id layer.Parent = parent layer.Created = time.Now().UTC() if err := aLayer.CreateAs(id, parent); err != nil { return nil, err } // TODO: check if necessary fields are filled r.layers = append(r.layers, layer) // This can only fail on duplicate IDs, which shouldn’t happen — and in // that case the index is already in the desired state anyway. // Implementing recovery from an unlikely and unimportant failure here // would be too risky. _ = r.idindex.Add(id) r.byid[id] = layer for _, name := range names { // names got from the additional layer store won't be used r.byname[name] = layer } if layer.CompressedDigest != "" { r.bycompressedsum[layer.CompressedDigest] = append(r.bycompressedsum[layer.CompressedDigest], layer.ID) } if layer.UncompressedDigest != "" { r.byuncompressedsum[layer.UncompressedDigest] = append(r.byuncompressedsum[layer.UncompressedDigest], layer.ID) } if layer.TOCDigest != "" { r.bytocsum[layer.TOCDigest] = append(r.bytocsum[layer.TOCDigest], layer.ID) } if err := r.saveFor(layer); err != nil { if e := r.Delete(layer.ID); e != nil { logrus.Errorf("While recovering from a failure to save layers, error deleting layer %#v: %v", id, e) } return nil, err } return copyLayer(layer), nil } // Requires startWriting. func (r *layerStore) create(id string, parentLayer *Layer, names []string, mountLabel string, options map[string]string, moreOptions *LayerOptions, writeable bool, diff io.Reader, slo *stagedLayerOptions) (layer *Layer, size int64, err error) { if moreOptions == nil { moreOptions = &LayerOptions{} } if !r.lockfile.IsReadWrite() { return nil, -1, fmt.Errorf("not allowed to create new layers at %q: %w", r.layerdir, ErrStoreIsReadOnly) } if err := os.MkdirAll(r.rundir, 0o700); err != nil { return nil, -1, err } if err := os.MkdirAll(r.layerdir, 0o700); err != nil { return nil, -1, err } if id == "" { id = stringid.GenerateRandomID() _, idInUse := r.byid[id] for idInUse { id = stringid.GenerateRandomID() _, idInUse = r.byid[id] } } if duplicateLayer, idInUse := r.byid[id]; idInUse { return duplicateLayer, -1, ErrDuplicateID } names = dedupeStrings(names) for _, name := range names { if _, nameInUse := r.byname[name]; nameInUse { return nil, -1, ErrDuplicateName } } parent := "" if parentLayer != nil { parent = parentLayer.ID } var ( templateIDMappings *idtools.IDMappings templateMetadata string templateCompressedDigest digest.Digest templateCompressedSize int64 templateUncompressedDigest digest.Digest templateTOCDigest digest.Digest templateUncompressedSize int64 templateCompressionType archive.Compression templateUIDs, templateGIDs []uint32 templateTSdata []byte ) if moreOptions.TemplateLayer != "" { templateLayer, ok := r.lookup(moreOptions.TemplateLayer) if !ok { return nil, -1, ErrLayerUnknown } templateMetadata = templateLayer.Metadata templateIDMappings = idtools.NewIDMappingsFromMaps(templateLayer.UIDMap, templateLayer.GIDMap) templateTOCDigest = templateLayer.TOCDigest templateCompressedDigest, templateCompressedSize = templateLayer.CompressedDigest, templateLayer.CompressedSize templateUncompressedDigest, templateUncompressedSize = templateLayer.UncompressedDigest, templateLayer.UncompressedSize templateCompressionType = templateLayer.CompressionType templateUIDs, templateGIDs = append([]uint32{}, templateLayer.UIDs...), append([]uint32{}, templateLayer.GIDs...) templateTSdata, err = os.ReadFile(r.tspath(templateLayer.ID)) if err != nil && !errors.Is(err, os.ErrNotExist) { return nil, -1, err } } else { templateIDMappings = &idtools.IDMappings{} } if mountLabel != "" { selinux.ReserveLabel(mountLabel) } // Before actually creating the layer, make a persistent record of it // with the incomplete flag set, so that future processes have a chance // to clean up after it. layer = &Layer{ ID: id, Parent: parent, Names: names, MountLabel: mountLabel, Metadata: templateMetadata, Created: time.Now().UTC(), CompressedDigest: templateCompressedDigest, CompressedSize: templateCompressedSize, UncompressedDigest: templateUncompressedDigest, TOCDigest: templateTOCDigest, UncompressedSize: templateUncompressedSize, CompressionType: templateCompressionType, UIDs: templateUIDs, GIDs: templateGIDs, Flags: copyStringInterfaceMap(moreOptions.Flags), UIDMap: copyIDMap(moreOptions.UIDMap), GIDMap: copyIDMap(moreOptions.GIDMap), BigDataNames: []string{}, volatileStore: moreOptions.Volatile, } layer.Flags[incompleteFlag] = true r.layers = append(r.layers, layer) // This can only fail if the ID is already missing, which shouldn’t // happen — and in that case the index is already in the desired state // anyway. This is on various paths to recover from failures, so this // should be robust against partially missing data. _ = r.idindex.Add(id) r.byid[id] = layer for _, name := range names { r.byname[name] = layer } cleanupFailureContext := "" defer func() { if err != nil { // now that the in-memory structures know about the new // record, we can use regular Delete() to clean up if // anything breaks from here on out if cleanupFailureContext == "" { cleanupFailureContext = "unknown: cleanupFailureContext not set at the failure site" } if e := r.Delete(id); e != nil { logrus.Errorf("While recovering from a failure (%s), error deleting layer %#v: %v", cleanupFailureContext, id, e) } } }() if err = r.saveFor(layer); err != nil { cleanupFailureContext = "saving incomplete layer metadata" return nil, -1, err } for _, item := range moreOptions.BigData { if err = r.setBigData(layer, item.Key, item.Data); err != nil { cleanupFailureContext = fmt.Sprintf("saving big data item %q", item.Key) return nil, -1, err } } idMappings := idtools.NewIDMappingsFromMaps(moreOptions.UIDMap, moreOptions.GIDMap) opts := drivers.CreateOpts{ MountLabel: mountLabel, StorageOpt: options, IDMappings: idMappings, } var parentMappings, oldMappings *idtools.IDMappings if parentLayer != nil { parentMappings = idtools.NewIDMappingsFromMaps(parentLayer.UIDMap, parentLayer.GIDMap) } else { parentMappings = &idtools.IDMappings{} } if moreOptions.TemplateLayer != "" { if err = r.driver.CreateFromTemplate(id, moreOptions.TemplateLayer, templateIDMappings, parent, parentMappings, &opts, writeable); err != nil { cleanupFailureContext = fmt.Sprintf("creating a layer from template layer %q", moreOptions.TemplateLayer) return nil, -1, fmt.Errorf("creating copy of template layer %q with ID %q: %w", moreOptions.TemplateLayer, id, err) } oldMappings = templateIDMappings } else { if writeable { if err = r.driver.CreateReadWrite(id, parent, &opts); err != nil { cleanupFailureContext = "creating a read-write layer" return nil, -1, fmt.Errorf("creating read-write layer with ID %q: %w", id, err) } } else { if err = r.driver.Create(id, parent, &opts); err != nil { cleanupFailureContext = "creating a read-only layer" return nil, -1, fmt.Errorf("creating read-only layer with ID %q: %w", id, err) } } if parentLayer != nil { oldMappings = parentMappings } } if oldMappings != nil && (!reflect.DeepEqual(oldMappings.UIDs(), idMappings.UIDs()) || !reflect.DeepEqual(oldMappings.GIDs(), idMappings.GIDs())) { if err = r.driver.UpdateLayerIDMap(id, oldMappings, idMappings, mountLabel); err != nil { cleanupFailureContext = "in UpdateLayerIDMap" return nil, -1, err } } if len(templateTSdata) > 0 { if err = os.MkdirAll(filepath.Dir(r.tspath(id)), 0o700); err != nil { cleanupFailureContext = "creating tar-split parent directory for a copy from template" return nil, -1, err } if err = ioutils.AtomicWriteFile(r.tspath(id), templateTSdata, 0o600); err != nil { cleanupFailureContext = "creating a tar-split copy from template" return nil, -1, err } } size = -1 if diff != nil { if size, err = r.applyDiffWithOptions(layer.ID, moreOptions, diff); err != nil { cleanupFailureContext = "applying layer diff" return nil, -1, err } } else if slo != nil { if err := r.applyDiffFromStagingDirectory(layer.ID, slo.DiffOutput, slo.DiffOptions); err != nil { cleanupFailureContext = "applying staged directory diff" return nil, -1, err } } else { // applyDiffWithOptions() would have updated r.bycompressedsum // and r.byuncompressedsum for us, but if we used a template // layer, we didn't call it, so add the new layer as candidates // for searches for layers by checksum if layer.CompressedDigest != "" { r.bycompressedsum[layer.CompressedDigest] = append(r.bycompressedsum[layer.CompressedDigest], layer.ID) } if layer.UncompressedDigest != "" { r.byuncompressedsum[layer.UncompressedDigest] = append(r.byuncompressedsum[layer.UncompressedDigest], layer.ID) } if layer.TOCDigest != "" { r.bytocsum[layer.TOCDigest] = append(r.bytocsum[layer.TOCDigest], layer.ID) } } delete(layer.Flags, incompleteFlag) if err = r.saveFor(layer); err != nil { cleanupFailureContext = "saving finished layer metadata" return nil, -1, err } layer = copyLayer(layer) return layer, size, err } // Requires startReading or startWriting. func (r *layerStore) Mounted(id string) (int, error) { if !r.lockfile.IsReadWrite() { return 0, fmt.Errorf("no mount information for layers at %q: %w", r.mountspath(), ErrStoreIsReadOnly) } layer, ok := r.lookup(id) if !ok { return 0, ErrLayerUnknown } // NOTE: The caller of this function is not holding (currently cannot hold) r.mountsLockfile, // so the data is necessarily obsolete by the time this function returns. So, we don’t even // try to reload it in this function, we just rely on r.load() that happened during // r.startReading() or r.startWriting(). return layer.MountCount, nil } // Requires startWriting. func (r *layerStore) Mount(id string, options drivers.MountOpts) (string, error) { // LOCKING BUG: This is reachable via store.Diff → layerStore.Diff → layerStore.newFileGetter // (with btrfs and zfs graph drivers) holding layerStore only locked for reading, while it modifies // - r.layers[].MountCount (directly and via loadMounts / saveMounts) // - r.layers[].MountPoint (directly and via loadMounts / saveMounts) // - r.bymount (via loadMounts / saveMounts) // check whether options include ro option hasReadOnlyOpt := func(opts []string) bool { for _, item := range opts { if item == "ro" { return true } } return false } // You are not allowed to mount layers from readonly stores if they // are not mounted read/only. if !r.lockfile.IsReadWrite() && !hasReadOnlyOpt(options.Options) { return "", fmt.Errorf("not allowed to update mount locations for layers at %q: %w", r.mountspath(), ErrStoreIsReadOnly) } r.mountsLockfile.Lock() defer r.mountsLockfile.Unlock() if err := r.reloadMountsIfChanged(); err != nil { return "", err } layer, ok := r.lookup(id) if !ok { return "", ErrLayerUnknown } if layer.MountCount > 0 { mounted, err := mount.Mounted(layer.MountPoint) if err != nil { return "", err } // If the container is not mounted then we have a condition // where the kernel umounted the mount point. This means // that the mount count never got decremented. if mounted { layer.MountCount++ return layer.MountPoint, r.saveMounts() } } if options.MountLabel == "" { options.MountLabel = layer.MountLabel } if (options.UidMaps != nil || options.GidMaps != nil) && !r.driver.SupportsShifting() { if !reflect.DeepEqual(options.UidMaps, layer.UIDMap) || !reflect.DeepEqual(options.GidMaps, layer.GIDMap) { return "", fmt.Errorf("cannot mount layer %v: shifting not enabled", layer.ID) } } mountpoint, err := r.driver.Get(id, options) if mountpoint != "" && err == nil { if layer.MountPoint != "" { delete(r.bymount, layer.MountPoint) } layer.MountPoint = filepath.Clean(mountpoint) layer.MountCount++ r.bymount[layer.MountPoint] = layer err = r.saveMounts() } return mountpoint, err } // Requires startWriting. func (r *layerStore) unmount(id string, force bool, conditional bool) (bool, error) { // LOCKING BUG: This is reachable via store.Diff → layerStore.Diff → layerStore.newFileGetter → simpleGetCloser.Close() // (with btrfs and zfs graph drivers) holding layerStore only locked for reading, while it modifies // - r.layers[].MountCount (directly and via loadMounts / saveMounts) // - r.layers[].MountPoint (directly and via loadMounts / saveMounts) // - r.bymount (via loadMounts / saveMounts) if !r.lockfile.IsReadWrite() { return false, fmt.Errorf("not allowed to update mount locations for layers at %q: %w", r.mountspath(), ErrStoreIsReadOnly) } r.mountsLockfile.Lock() defer r.mountsLockfile.Unlock() if err := r.reloadMountsIfChanged(); err != nil { return false, err } layer, ok := r.lookup(id) if !ok { layerByMount, ok := r.bymount[filepath.Clean(id)] if !ok { return false, ErrLayerUnknown } layer = layerByMount } if conditional && layer.MountCount == 0 { return false, ErrLayerNotMounted } if force { layer.MountCount = 1 } if layer.MountCount > 1 { layer.MountCount-- return true, r.saveMounts() } err := r.driver.Put(id) if err == nil || os.IsNotExist(err) { if layer.MountPoint != "" { delete(r.bymount, layer.MountPoint) } layer.MountCount-- layer.MountPoint = "" return false, r.saveMounts() } return true, err } // Requires startReading or startWriting. func (r *layerStore) ParentOwners(id string) (uids, gids []int, err error) { if !r.lockfile.IsReadWrite() { return nil, nil, fmt.Errorf("no mount information for layers at %q: %w", r.mountspath(), ErrStoreIsReadOnly) } r.mountsLockfile.RLock() defer r.mountsLockfile.Unlock() // We are not checking r.mountsLockfile.Modified() and calling r.loadMounts here because the store // is only locked for reading = we are not allowed to modify layer data. // Holding r.mountsLockfile protects us against concurrent mount/unmount operations. layer, ok := r.lookup(id) if !ok { return nil, nil, ErrLayerUnknown } if len(layer.UIDMap) == 0 && len(layer.GIDMap) == 0 { // We're not using any mappings, so there aren't any unmapped IDs on parent directories. return nil, nil, nil } if layer.MountPoint == "" { // We don't know which directories to examine. return nil, nil, ErrLayerNotMounted } // Holding r.mountsLockfile protects us against concurrent mount/unmount operations, but we didn’t // hold it continuously since the time we loaded the mount data; so it’s possible the layer // was unmounted in the meantime, or mounted elsewhere. Treat that as if we were run after the unmount, // = a missing mount, not a filesystem error. if _, err := system.Lstat(layer.MountPoint); errors.Is(err, os.ErrNotExist) { return nil, nil, ErrLayerNotMounted } rootuid, rootgid, err := idtools.GetRootUIDGID(layer.UIDMap, layer.GIDMap) if err != nil { return nil, nil, fmt.Errorf("reading root ID values for layer %q: %w", layer.ID, err) } m := idtools.NewIDMappingsFromMaps(layer.UIDMap, layer.GIDMap) fsuids := make(map[int]struct{}) fsgids := make(map[int]struct{}) for dir := filepath.Dir(layer.MountPoint); dir != "" && dir != string(os.PathSeparator); dir = filepath.Dir(dir) { st, err := system.Stat(dir) if err != nil { return nil, nil, fmt.Errorf("read directory ownership: %w", err) } lst, err := system.Lstat(dir) if err != nil { return nil, nil, err } fsuid := int(st.UID()) fsgid := int(st.GID()) if _, _, err := m.ToContainer(idtools.IDPair{UID: fsuid, GID: rootgid}); err != nil { fsuids[fsuid] = struct{}{} } if _, _, err := m.ToContainer(idtools.IDPair{UID: rootuid, GID: fsgid}); err != nil { fsgids[fsgid] = struct{}{} } fsuid = int(lst.UID()) fsgid = int(lst.GID()) if _, _, err := m.ToContainer(idtools.IDPair{UID: fsuid, GID: rootgid}); err != nil { fsuids[fsuid] = struct{}{} } if _, _, err := m.ToContainer(idtools.IDPair{UID: rootuid, GID: fsgid}); err != nil { fsgids[fsgid] = struct{}{} } } for uid := range fsuids { uids = append(uids, uid) } for gid := range fsgids { gids = append(gids, gid) } if len(uids) > 1 { sort.Ints(uids) } if len(gids) > 1 { sort.Ints(gids) } return uids, gids, nil } // Requires startWriting. func (r *layerStore) removeName(layer *Layer, name string) { layer.Names = stringSliceWithoutValue(layer.Names, name) } // Requires startWriting. func (r *layerStore) updateNames(id string, names []string, op updateNameOperation) error { if !r.lockfile.IsReadWrite() { return fmt.Errorf("not allowed to change layer name assignments at %q: %w", r.layerdir, ErrStoreIsReadOnly) } layer, ok := r.lookup(id) if !ok { return ErrLayerUnknown } oldNames := layer.Names names, err := applyNameOperation(oldNames, names, op) if err != nil { return err } for _, name := range oldNames { delete(r.byname, name) } for _, name := range names { if otherLayer, ok := r.byname[name]; ok { r.removeName(otherLayer, name) } r.byname[name] = layer } layer.Names = names return r.saveFor(layer) } func (r *layerStore) datadir(id string) string { return filepath.Join(r.layerdir, id) } func (r *layerStore) datapath(id, key string) string { return filepath.Join(r.datadir(id), makeBigDataBaseName(key)) } // Requires startReading or startWriting. func (r *layerStore) BigData(id, key string) (io.ReadCloser, error) { if key == "" { return nil, fmt.Errorf("can't retrieve layer big data value for empty name: %w", ErrInvalidBigDataName) } layer, ok := r.lookup(id) if !ok { return nil, fmt.Errorf("locating layer with ID %q: %w", id, ErrLayerUnknown) } return os.Open(r.datapath(layer.ID, key)) } // Requires startWriting. func (r *layerStore) SetBigData(id, key string, data io.Reader) error { if !r.lockfile.IsReadWrite() { return fmt.Errorf("not allowed to save data items associated with layers at %q: %w", r.layerdir, ErrStoreIsReadOnly) } layer, ok := r.lookup(id) if !ok { return fmt.Errorf("locating layer with ID %q to write bigdata: %w", id, ErrLayerUnknown) } return r.setBigData(layer, key, data) } func (r *layerStore) setBigData(layer *Layer, key string, data io.Reader) error { if key == "" { return fmt.Errorf("can't set empty name for layer big data item: %w", ErrInvalidBigDataName) } err := os.MkdirAll(r.datadir(layer.ID), 0o700) if err != nil { return err } // NewAtomicFileWriter doesn't overwrite/truncate the existing inode. // BigData() relies on this behaviour when opening the file for read // so that it is either accessing the old data or the new one. writer, err := ioutils.NewAtomicFileWriter(r.datapath(layer.ID, key), 0o600) if err != nil { return fmt.Errorf("opening bigdata file: %w", err) } if _, err := io.Copy(writer, data); err != nil { writer.Close() return fmt.Errorf("copying bigdata for the layer: %w", err) } if err := writer.Close(); err != nil { return fmt.Errorf("closing bigdata file for the layer: %w", err) } addName := true for _, name := range layer.BigDataNames { if name == key { addName = false break } } if addName { layer.BigDataNames = append(layer.BigDataNames, key) return r.saveFor(layer) } return nil } // Requires startReading or startWriting. func (r *layerStore) BigDataNames(id string) ([]string, error) { layer, ok := r.lookup(id) if !ok { return nil, fmt.Errorf("locating layer with ID %q to retrieve bigdata names: %w", id, ErrImageUnknown) } return copyStringSlice(layer.BigDataNames), nil } // Requires startReading or startWriting. func (r *layerStore) Metadata(id string) (string, error) { if layer, ok := r.lookup(id); ok { return layer.Metadata, nil } return "", ErrLayerUnknown } // Requires startWriting. func (r *layerStore) SetMetadata(id, metadata string) error { if !r.lockfile.IsReadWrite() { return fmt.Errorf("not allowed to modify layer metadata at %q: %w", r.layerdir, ErrStoreIsReadOnly) } if layer, ok := r.lookup(id); ok { layer.Metadata = metadata return r.saveFor(layer) } return ErrLayerUnknown } func (r *layerStore) tspath(id string) string { return filepath.Join(r.layerdir, id+tarSplitSuffix) } // layerHasIncompleteFlag returns true if layer.Flags contains an incompleteFlag set to true // The caller must hold r.inProcessLock for reading. func layerHasIncompleteFlag(layer *Layer) bool { if layer.Flags == nil { return false } if flagValue, ok := layer.Flags[incompleteFlag]; ok { if b, ok := flagValue.(bool); ok && b { return true } } return false } // Requires startWriting. func (r *layerStore) deleteInternal(id string) error { if !r.lockfile.IsReadWrite() { return fmt.Errorf("not allowed to delete layers at %q: %w", r.layerdir, ErrStoreIsReadOnly) } layer, ok := r.lookup(id) if !ok { return ErrLayerUnknown } // Ensure that if we are interrupted, the layer will be cleaned up. if !layerHasIncompleteFlag(layer) { if layer.Flags == nil { layer.Flags = make(map[string]interface{}) } layer.Flags[incompleteFlag] = true if err := r.saveFor(layer); err != nil { return err } } // We never unset incompleteFlag; below, we remove the entire object from r.layers. id = layer.ID if err := r.driver.Remove(id); err != nil && !errors.Is(err, os.ErrNotExist) { return err } os.Remove(r.tspath(id)) os.RemoveAll(r.datadir(id)) delete(r.byid, id) for _, name := range layer.Names { delete(r.byname, name) } // This can only fail if the ID is already missing, which shouldn’t // happen — and in that case the index is already in the desired state // anyway. The store’s Delete method is used on various paths to // recover from failures, so this should be robust against partially // missing data. _ = r.idindex.Delete(id) mountLabel := layer.MountLabel if layer.MountPoint != "" { delete(r.bymount, layer.MountPoint) } r.deleteInDigestMap(id) toDeleteIndex := -1 for i, candidate := range r.layers { if candidate.ID == id { toDeleteIndex = i break } } if toDeleteIndex != -1 { // delete the layer at toDeleteIndex if toDeleteIndex == len(r.layers)-1 { r.layers = r.layers[:len(r.layers)-1] } else { r.layers = append(r.layers[:toDeleteIndex], r.layers[toDeleteIndex+1:]...) } } if mountLabel != "" { var found bool for _, candidate := range r.layers { if candidate.MountLabel == mountLabel { found = true break } } if !found { selinux.ReleaseLabel(mountLabel) } } return nil } // Requires startWriting. func (r *layerStore) deleteInDigestMap(id string) { for digest, layers := range r.bycompressedsum { for i, layerID := range layers { if layerID == id { layers = append(layers[:i], layers[i+1:]...) r.bycompressedsum[digest] = layers break } } } for digest, layers := range r.byuncompressedsum { for i, layerID := range layers { if layerID == id { layers = append(layers[:i], layers[i+1:]...) r.byuncompressedsum[digest] = layers break } } } } // Requires startWriting. func (r *layerStore) Delete(id string) error { layer, ok := r.lookup(id) if !ok { return ErrLayerUnknown } id = layer.ID // The layer may already have been explicitly unmounted, but if not, we // should try to clean that up before we start deleting anything at the // driver level. for { _, err := r.unmount(id, false, true) if err == ErrLayerNotMounted { break } if err != nil { return err } } if err := r.deleteInternal(id); err != nil { return err } return r.saveFor(layer) } // Requires startReading or startWriting. func (r *layerStore) Exists(id string) bool { _, ok := r.lookup(id) return ok } // Requires startReading or startWriting. func (r *layerStore) Get(id string) (*Layer, error) { if layer, ok := r.lookup(id); ok { return copyLayer(layer), nil } return nil, ErrLayerUnknown } // Requires startWriting. func (r *layerStore) Wipe() error { if !r.lockfile.IsReadWrite() { return fmt.Errorf("not allowed to delete layers at %q: %w", r.layerdir, ErrStoreIsReadOnly) } ids := make([]string, 0, len(r.byid)) for id := range r.byid { ids = append(ids, id) } sort.Slice(ids, func(i, j int) bool { return r.byid[ids[i]].Created.After(r.byid[ids[j]].Created) }) for _, id := range ids { if err := r.Delete(id); err != nil { return err } } ids, err := r.driver.ListLayers() if err != nil { if !errors.Is(err, drivers.ErrNotSupported) { return err } ids = nil } for _, id := range ids { if err := r.driver.Remove(id); err != nil { return err } } return nil } // Requires startReading or startWriting. func (r *layerStore) findParentAndLayer(from, to string) (fromID string, toID string, fromLayer, toLayer *Layer, err error) { var ok bool toLayer, ok = r.lookup(to) if !ok { return "", "", nil, nil, ErrLayerUnknown } to = toLayer.ID if from == "" { from = toLayer.Parent } if from != "" { fromLayer, ok = r.lookup(from) if ok { from = fromLayer.ID } else { fromLayer, ok = r.lookup(toLayer.Parent) if ok { from = fromLayer.ID } } } return from, to, fromLayer, toLayer, nil } // The caller must hold r.inProcessLock for reading. func (r *layerStore) layerMappings(layer *Layer) *idtools.IDMappings { if layer == nil { return &idtools.IDMappings{} } return idtools.NewIDMappingsFromMaps(layer.UIDMap, layer.GIDMap) } // Requires startReading or startWriting. func (r *layerStore) Changes(from, to string) ([]archive.Change, error) { from, to, fromLayer, toLayer, err := r.findParentAndLayer(from, to) if err != nil { return nil, ErrLayerUnknown } return r.driver.Changes(to, r.layerMappings(toLayer), from, r.layerMappings(fromLayer), toLayer.MountLabel) } type simpleGetCloser struct { r *layerStore path string id string } func (s *simpleGetCloser) Get(path string) (io.ReadCloser, error) { return os.Open(filepath.Join(s.path, path)) } // LOCKING BUG: See the comments in layerStore.Diff func (s *simpleGetCloser) Close() error { _, err := s.r.unmount(s.id, false, false) return err } // LOCKING BUG: See the comments in layerStore.Diff func (r *layerStore) newFileGetter(id string) (drivers.FileGetCloser, error) { if getter, ok := r.driver.(drivers.DiffGetterDriver); ok { fgc, err := getter.DiffGetter(id) if err != nil { return nil, err } if fgc != nil { return fgc, nil } } path, err := r.Mount(id, drivers.MountOpts{Options: []string{"ro"}}) if err != nil { return nil, err } return &simpleGetCloser{ r: r, path: path, id: id, }, nil } // writeCompressedData copies data from source to compressor, which is on top of pwriter. func writeCompressedData(compressor io.WriteCloser, source io.ReadCloser) error { defer compressor.Close() defer source.Close() _, err := io.Copy(compressor, source) return err } // writeCompressedDataGoroutine copies data from source to compressor, which is on top of pwriter. // All error must be reported by updating pwriter. func writeCompressedDataGoroutine(pwriter *io.PipeWriter, compressor io.WriteCloser, source io.ReadCloser) { err := errors.New("internal error: unexpected panic in writeCompressedDataGoroutine") defer func() { // Note that this is not the same as {defer dest.CloseWithError(err)}; we need err to be evaluated lazily. _ = pwriter.CloseWithError(err) // CloseWithError(nil) is equivalent to Close(), always returns nil }() err = writeCompressedData(compressor, source) } // Requires startReading or startWriting. func (r *layerStore) Diff(from, to string, options *DiffOptions) (io.ReadCloser, error) { var metadata storage.Unpacker from, to, fromLayer, toLayer, err := r.findParentAndLayer(from, to) if err != nil { return nil, ErrLayerUnknown } // Default to applying the type of compression that we noted was used // for the layerdiff when it was applied. compression := toLayer.CompressionType // If a particular compression type (or no compression) was selected, // use that instead. if options != nil && options.Compression != nil { compression = *options.Compression } maybeCompressReadCloser := func(rc io.ReadCloser) (io.ReadCloser, error) { // Depending on whether or not compression is desired, return either the // passed-in ReadCloser, or a new one that provides its readers with a // compressed version of the data that the original would have provided // to its readers. if compression == archive.Uncompressed { return rc, nil } preader, pwriter := io.Pipe() compressor, err := archive.CompressStream(pwriter, compression) if err != nil { rc.Close() pwriter.Close() preader.Close() return nil, err } go writeCompressedDataGoroutine(pwriter, compressor, rc) return preader, nil } if from != toLayer.Parent { diff, err := r.driver.Diff(to, r.layerMappings(toLayer), from, r.layerMappings(fromLayer), toLayer.MountLabel) if err != nil { return nil, err } return maybeCompressReadCloser(diff) } if ad, ok := r.driver.(drivers.AdditionalLayerStoreDriver); ok { if aLayer, err := ad.LookupAdditionalLayerByID(to); err == nil { // This is an additional layer. We leverage blob API for acquiring the reproduced raw blob. info, err := aLayer.Info() if err != nil { aLayer.Release() return nil, err } defer info.Close() layer := &Layer{} if err := json.NewDecoder(info).Decode(layer); err != nil { aLayer.Release() return nil, err } blob, err := aLayer.Blob() if err != nil { aLayer.Release() return nil, err } // If layer compression type is different from the expected one, decompress and convert it. if compression != layer.CompressionType { diff, err := archive.DecompressStream(blob) if err != nil { if err2 := blob.Close(); err2 != nil { err = fmt.Errorf("failed to close blob file: %v: %w", err2, err) } aLayer.Release() return nil, err } rc, err := maybeCompressReadCloser(diff) if err != nil { if err2 := closeAll(blob.Close, diff.Close); err2 != nil { err = fmt.Errorf("failed to cleanup: %v: %w", err2, err) } aLayer.Release() return nil, err } return ioutils.NewReadCloserWrapper(rc, func() error { defer aLayer.Release() return closeAll(blob.Close, rc.Close) }), nil } return ioutils.NewReadCloserWrapper(blob, func() error { defer aLayer.Release(); return blob.Close() }), nil } } tsfile, err := os.Open(r.tspath(to)) if err != nil { if !os.IsNotExist(err) { return nil, err } diff, err := r.driver.Diff(to, r.layerMappings(toLayer), from, r.layerMappings(fromLayer), toLayer.MountLabel) if err != nil { return nil, err } return maybeCompressReadCloser(diff) } decompressor, err := pgzip.NewReader(tsfile) if err != nil { if e := tsfile.Close(); e != nil { logrus.Debug(e) } return nil, err } metadata = storage.NewJSONUnpacker(decompressor) // LOCKING BUG: With btrfs and zfs graph drivers), this uses r.Mount() and r.unmount() holding layerStore only locked for reading // but they modify in-memory state. fgetter, err := r.newFileGetter(to) if err != nil { errs := multierror.Append(nil, fmt.Errorf("creating file-getter: %w", err)) if err := decompressor.Close(); err != nil { errs = multierror.Append(errs, fmt.Errorf("closing decompressor: %w", err)) } if err := tsfile.Close(); err != nil { errs = multierror.Append(errs, fmt.Errorf("closing tarstream headers: %w", err)) } return nil, errs.ErrorOrNil() } tarstream := asm.NewOutputTarStream(fgetter, metadata) rc := ioutils.NewReadCloserWrapper(tarstream, func() error { var errs *multierror.Error if err := decompressor.Close(); err != nil { errs = multierror.Append(errs, fmt.Errorf("closing decompressor: %w", err)) } if err := tsfile.Close(); err != nil { errs = multierror.Append(errs, fmt.Errorf("closing tarstream headers: %w", err)) } if err := tarstream.Close(); err != nil { errs = multierror.Append(errs, fmt.Errorf("closing reconstructed tarstream: %w", err)) } if err := fgetter.Close(); err != nil { errs = multierror.Append(errs, fmt.Errorf("closing file-getter: %w", err)) } if errs != nil { return errs.ErrorOrNil() } return nil }) return maybeCompressReadCloser(rc) } // Requires startReading or startWriting. func (r *layerStore) DiffSize(from, to string) (size int64, err error) { var fromLayer, toLayer *Layer from, to, fromLayer, toLayer, err = r.findParentAndLayer(from, to) if err != nil { return -1, ErrLayerUnknown } return r.driver.DiffSize(to, r.layerMappings(toLayer), from, r.layerMappings(fromLayer), toLayer.MountLabel) } func updateDigestMap(m *map[digest.Digest][]string, oldvalue, newvalue digest.Digest, id string) { var newList []string if oldvalue != "" { for _, value := range (*m)[oldvalue] { if value != id { newList = append(newList, value) } } if len(newList) > 0 { (*m)[oldvalue] = newList } else { delete(*m, oldvalue) } } if newvalue != "" { (*m)[newvalue] = append((*m)[newvalue], id) } } // Requires startWriting. func (r *layerStore) ApplyDiff(to string, diff io.Reader) (size int64, err error) { return r.applyDiffWithOptions(to, nil, diff) } // Requires startWriting. func (r *layerStore) applyDiffWithOptions(to string, layerOptions *LayerOptions, diff io.Reader) (size int64, err error) { if !r.lockfile.IsReadWrite() { return -1, fmt.Errorf("not allowed to modify layer contents at %q: %w", r.layerdir, ErrStoreIsReadOnly) } layer, ok := r.lookup(to) if !ok { return -1, ErrLayerUnknown } header := make([]byte, 10240) n, err := diff.Read(header) if err != nil && err != io.EOF { return -1, err } compression := archive.DetectCompression(header[:n]) defragmented := io.MultiReader(bytes.NewReader(header[:n]), diff) // Decide if we need to compute digests var compressedDigest, uncompressedDigest digest.Digest // = "" var compressedDigester, uncompressedDigester digest.Digester // = nil if layerOptions != nil && layerOptions.OriginalDigest != "" && layerOptions.OriginalDigest.Algorithm() == digest.Canonical { compressedDigest = layerOptions.OriginalDigest } else { compressedDigester = digest.Canonical.Digester() } if layerOptions != nil && layerOptions.UncompressedDigest != "" && layerOptions.UncompressedDigest.Algorithm() == digest.Canonical { uncompressedDigest = layerOptions.UncompressedDigest } else if compression != archive.Uncompressed { uncompressedDigester = digest.Canonical.Digester() } var compressedWriter io.Writer if compressedDigester != nil { compressedWriter = compressedDigester.Hash() } else { compressedWriter = io.Discard } compressedCounter := ioutils.NewWriteCounter(compressedWriter) defragmented = io.TeeReader(defragmented, compressedCounter) tsdata := bytes.Buffer{} uidLog := make(map[uint32]struct{}) gidLog := make(map[uint32]struct{}) var uncompressedCounter *ioutils.WriteCounter size, err = func() (int64, error) { // A scope for defer compressor, err := pgzip.NewWriterLevel(&tsdata, pgzip.BestSpeed) if err != nil { return -1, err } defer compressor.Close() // This must happen before tsdata is consumed. if err := compressor.SetConcurrency(1024*1024, 1); err != nil { // 1024*1024 is the hard-coded default; we're not changing that logrus.Infof("setting compression concurrency threads to 1: %v; ignoring", err) } metadata := storage.NewJSONPacker(compressor) uncompressed, err := archive.DecompressStream(defragmented) if err != nil { return -1, err } defer uncompressed.Close() idLogger, err := tarlog.NewLogger(func(h *tar.Header) { if !strings.HasPrefix(path.Base(h.Name), archive.WhiteoutPrefix) { uidLog[uint32(h.Uid)] = struct{}{} gidLog[uint32(h.Gid)] = struct{}{} } }) if err != nil { return -1, err } defer idLogger.Close() // This must happen before uidLog and gidLog is consumed. uncompressedCounter = ioutils.NewWriteCounter(idLogger) uncompressedWriter := (io.Writer)(uncompressedCounter) if uncompressedDigester != nil { uncompressedWriter = io.MultiWriter(uncompressedWriter, uncompressedDigester.Hash()) } payload, err := asm.NewInputTarStream(io.TeeReader(uncompressed, uncompressedWriter), metadata, storage.NewDiscardFilePutter()) if err != nil { return -1, err } options := drivers.ApplyDiffOpts{ Diff: payload, Mappings: r.layerMappings(layer), MountLabel: layer.MountLabel, } size, err := r.driver.ApplyDiff(layer.ID, layer.Parent, options) if err != nil { return -1, err } return size, err }() if err != nil { return -1, err } if err := os.MkdirAll(filepath.Dir(r.tspath(layer.ID)), 0o700); err != nil { return -1, err } if err := ioutils.AtomicWriteFile(r.tspath(layer.ID), tsdata.Bytes(), 0o600); err != nil { return -1, err } if compressedDigester != nil { compressedDigest = compressedDigester.Digest() } if uncompressedDigester != nil { uncompressedDigest = uncompressedDigester.Digest() } if uncompressedDigest == "" && compression == archive.Uncompressed { uncompressedDigest = compressedDigest } updateDigestMap(&r.bycompressedsum, layer.CompressedDigest, compressedDigest, layer.ID) layer.CompressedDigest = compressedDigest if layerOptions != nil && layerOptions.OriginalDigest != "" && layerOptions.OriginalSize != nil { layer.CompressedSize = *layerOptions.OriginalSize } else { layer.CompressedSize = compressedCounter.Count } updateDigestMap(&r.byuncompressedsum, layer.UncompressedDigest, uncompressedDigest, layer.ID) layer.UncompressedDigest = uncompressedDigest layer.UncompressedSize = uncompressedCounter.Count layer.CompressionType = compression layer.UIDs = make([]uint32, 0, len(uidLog)) for uid := range uidLog { layer.UIDs = append(layer.UIDs, uid) } sort.Slice(layer.UIDs, func(i, j int) bool { return layer.UIDs[i] < layer.UIDs[j] }) layer.GIDs = make([]uint32, 0, len(gidLog)) for gid := range gidLog { layer.GIDs = append(layer.GIDs, gid) } sort.Slice(layer.GIDs, func(i, j int) bool { return layer.GIDs[i] < layer.GIDs[j] }) err = r.saveFor(layer) return size, err } // Requires (startReading or?) startWriting. func (r *layerStore) DifferTarget(id string) (string, error) { ddriver, ok := r.driver.(drivers.DriverWithDiffer) if !ok { return "", ErrNotSupported } layer, ok := r.lookup(id) if !ok { return "", ErrLayerUnknown } return ddriver.DifferTarget(layer.ID) } // Requires startWriting. func (r *layerStore) applyDiffFromStagingDirectory(id string, diffOutput *drivers.DriverWithDifferOutput, options *drivers.ApplyDiffWithDifferOpts) error { ddriver, ok := r.driver.(drivers.DriverWithDiffer) if !ok { return ErrNotSupported } layer, ok := r.lookup(id) if !ok { return ErrLayerUnknown } if options == nil { options = &drivers.ApplyDiffWithDifferOpts{ ApplyDiffOpts: drivers.ApplyDiffOpts{ Mappings: r.layerMappings(layer), MountLabel: layer.MountLabel, }, Flags: nil, } } err := ddriver.ApplyDiffFromStagingDirectory(layer.ID, layer.Parent, diffOutput, options) if err != nil { return err } layer.UIDs = diffOutput.UIDs layer.GIDs = diffOutput.GIDs updateDigestMap(&r.byuncompressedsum, layer.UncompressedDigest, diffOutput.UncompressedDigest, layer.ID) layer.UncompressedDigest = diffOutput.UncompressedDigest updateDigestMap(&r.bytocsum, diffOutput.TOCDigest, diffOutput.TOCDigest, layer.ID) layer.TOCDigest = diffOutput.TOCDigest layer.UncompressedSize = diffOutput.Size layer.Metadata = diffOutput.Metadata if options != nil && options.Flags != nil { if layer.Flags == nil { layer.Flags = make(map[string]interface{}) } for k, v := range options.Flags { layer.Flags[k] = v } } if err = r.saveFor(layer); err != nil { return err } if len(diffOutput.TarSplit) != 0 { tsdata := bytes.Buffer{} compressor, err := pgzip.NewWriterLevel(&tsdata, pgzip.BestSpeed) if err != nil { compressor = pgzip.NewWriter(&tsdata) } if err := compressor.SetConcurrency(1024*1024, 1); err != nil { // 1024*1024 is the hard-coded default; we're not changing that logrus.Infof("setting compression concurrency threads to 1: %v; ignoring", err) } if _, err := compressor.Write(diffOutput.TarSplit); err != nil { compressor.Close() return err } compressor.Close() if err := os.MkdirAll(filepath.Dir(r.tspath(layer.ID)), 0o700); err != nil { return err } if err := ioutils.AtomicWriteFile(r.tspath(layer.ID), tsdata.Bytes(), 0o600); err != nil { return err } } for k, v := range diffOutput.BigData { if err := r.SetBigData(id, k, bytes.NewReader(v)); err != nil { if err2 := r.Delete(id); err2 != nil { logrus.Errorf("While recovering from a failure to set big data, error deleting layer %#v: %v", id, err2) } return err } } return err } // Requires startWriting. func (r *layerStore) ApplyDiffWithDiffer(to string, options *drivers.ApplyDiffWithDifferOpts, differ drivers.Differ) (*drivers.DriverWithDifferOutput, error) { ddriver, ok := r.driver.(drivers.DriverWithDiffer) if !ok { return nil, ErrNotSupported } if to == "" { output, err := ddriver.ApplyDiffWithDiffer("", "", options, differ) return &output, err } layer, ok := r.lookup(to) if !ok { return nil, ErrLayerUnknown } if options == nil { options = &drivers.ApplyDiffWithDifferOpts{ ApplyDiffOpts: drivers.ApplyDiffOpts{ Mappings: r.layerMappings(layer), MountLabel: layer.MountLabel, }, } } output, err := ddriver.ApplyDiffWithDiffer(layer.ID, layer.Parent, options, differ) if err != nil { return nil, err } layer.UIDs = output.UIDs layer.GIDs = output.GIDs err = r.saveFor(layer) return &output, err } func (r *layerStore) CleanupStagingDirectory(stagingDirectory string) error { ddriver, ok := r.driver.(drivers.DriverWithDiffer) if !ok { return ErrNotSupported } return ddriver.CleanupStagingDirectory(stagingDirectory) } // Requires startReading or startWriting. func (r *layerStore) layersByDigestMap(m map[digest.Digest][]string, d digest.Digest) ([]Layer, error) { var layers []Layer for _, layerID := range m[d] { layer, ok := r.lookup(layerID) if !ok { return nil, ErrLayerUnknown } layers = append(layers, *copyLayer(layer)) } return layers, nil } // Requires startReading or startWriting. func (r *layerStore) LayersByCompressedDigest(d digest.Digest) ([]Layer, error) { return r.layersByDigestMap(r.bycompressedsum, d) } // Requires startReading or startWriting. func (r *layerStore) LayersByUncompressedDigest(d digest.Digest) ([]Layer, error) { return r.layersByDigestMap(r.byuncompressedsum, d) } // Requires startReading or startWriting. func (r *layerStore) LayersByTOCDigest(d digest.Digest) ([]Layer, error) { return r.layersByDigestMap(r.bytocsum, d) } func closeAll(closes ...func() error) (rErr error) { for _, f := range closes { if err := f(); err != nil { if rErr == nil { rErr = fmt.Errorf("close error: %w", err) continue } rErr = fmt.Errorf("%v: %w", err, rErr) } } return }