diff options
Diffstat (limited to 'pkg/v1/mutate/mutate.go')
-rw-r--r-- | pkg/v1/mutate/mutate.go | 553 |
1 files changed, 553 insertions, 0 deletions
diff --git a/pkg/v1/mutate/mutate.go b/pkg/v1/mutate/mutate.go new file mode 100644 index 0000000..e4a0e52 --- /dev/null +++ b/pkg/v1/mutate/mutate.go @@ -0,0 +1,553 @@ +// Copyright 2018 Google LLC All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mutate + +import ( + "archive/tar" + "bytes" + "encoding/json" + "errors" + "fmt" + "io" + "path/filepath" + "strings" + "time" + + "github.com/google/go-containerregistry/internal/gzip" + v1 "github.com/google/go-containerregistry/pkg/v1" + "github.com/google/go-containerregistry/pkg/v1/empty" + "github.com/google/go-containerregistry/pkg/v1/match" + "github.com/google/go-containerregistry/pkg/v1/partial" + "github.com/google/go-containerregistry/pkg/v1/tarball" + "github.com/google/go-containerregistry/pkg/v1/types" +) + +const whiteoutPrefix = ".wh." + +// Addendum contains layers and history to be appended +// to a base image +type Addendum struct { + Layer v1.Layer + History v1.History + URLs []string + Annotations map[string]string + MediaType types.MediaType +} + +// AppendLayers applies layers to a base image. +func AppendLayers(base v1.Image, layers ...v1.Layer) (v1.Image, error) { + additions := make([]Addendum, 0, len(layers)) + for _, layer := range layers { + additions = append(additions, Addendum{Layer: layer}) + } + + return Append(base, additions...) +} + +// Append will apply the list of addendums to the base image +func Append(base v1.Image, adds ...Addendum) (v1.Image, error) { + if len(adds) == 0 { + return base, nil + } + if err := validate(adds); err != nil { + return nil, err + } + + return &image{ + base: base, + adds: adds, + }, nil +} + +// Appendable is an interface that represents something that can be appended +// to an ImageIndex. We need to be able to construct a v1.Descriptor in order +// to append something, and this is the minimum required information for that. +type Appendable interface { + MediaType() (types.MediaType, error) + Digest() (v1.Hash, error) + Size() (int64, error) +} + +// IndexAddendum represents an appendable thing and all the properties that +// we may want to override in the resulting v1.Descriptor. +type IndexAddendum struct { + Add Appendable + v1.Descriptor +} + +// AppendManifests appends a manifest to the ImageIndex. +func AppendManifests(base v1.ImageIndex, adds ...IndexAddendum) v1.ImageIndex { + return &index{ + base: base, + adds: adds, + } +} + +// RemoveManifests removes any descriptors that match the match.Matcher. +func RemoveManifests(base v1.ImageIndex, matcher match.Matcher) v1.ImageIndex { + return &index{ + base: base, + remove: matcher, + } +} + +// Config mutates the provided v1.Image to have the provided v1.Config +func Config(base v1.Image, cfg v1.Config) (v1.Image, error) { + cf, err := base.ConfigFile() + if err != nil { + return nil, err + } + + cf.Config = cfg + + return ConfigFile(base, cf) +} + +// Subject mutates the subject on an image or index manifest. +// +// The input is expected to be a v1.Image or v1.ImageIndex, and +// returns the same type. You can type-assert the result like so: +// +// img := Subject(empty.Image, subj).(v1.Image) +// +// Or for an index: +// +// idx := Subject(empty.Index, subj).(v1.ImageIndex) +// +// If the input is not an Image or ImageIndex, the result will +// attempt to lazily annotate the raw manifest. +func Subject(f partial.WithRawManifest, subject v1.Descriptor) partial.WithRawManifest { + if img, ok := f.(v1.Image); ok { + return &image{ + base: img, + subject: &subject, + } + } + if idx, ok := f.(v1.ImageIndex); ok { + return &index{ + base: idx, + subject: &subject, + } + } + return arbitraryRawManifest{a: f, subject: &subject} +} + +// Annotations mutates the annotations on an annotatable image or index manifest. +// +// The annotatable input is expected to be a v1.Image or v1.ImageIndex, and +// returns the same type. You can type-assert the result like so: +// +// img := Annotations(empty.Image, map[string]string{ +// "foo": "bar", +// }).(v1.Image) +// +// Or for an index: +// +// idx := Annotations(empty.Index, map[string]string{ +// "foo": "bar", +// }).(v1.ImageIndex) +// +// If the input Annotatable is not an Image or ImageIndex, the result will +// attempt to lazily annotate the raw manifest. +func Annotations(f partial.WithRawManifest, anns map[string]string) partial.WithRawManifest { + if img, ok := f.(v1.Image); ok { + return &image{ + base: img, + annotations: anns, + } + } + if idx, ok := f.(v1.ImageIndex); ok { + return &index{ + base: idx, + annotations: anns, + } + } + return arbitraryRawManifest{a: f, anns: anns} +} + +type arbitraryRawManifest struct { + a partial.WithRawManifest + anns map[string]string + subject *v1.Descriptor +} + +func (a arbitraryRawManifest) RawManifest() ([]byte, error) { + b, err := a.a.RawManifest() + if err != nil { + return nil, err + } + var m map[string]any + if err := json.Unmarshal(b, &m); err != nil { + return nil, err + } + if ann, ok := m["annotations"]; ok { + if annm, ok := ann.(map[string]string); ok { + for k, v := range a.anns { + annm[k] = v + } + } else { + return nil, fmt.Errorf(".annotations is not a map: %T", ann) + } + } else { + m["annotations"] = a.anns + } + if a.subject != nil { + m["subject"] = a.subject + } + return json.Marshal(m) +} + +// ConfigFile mutates the provided v1.Image to have the provided v1.ConfigFile +func ConfigFile(base v1.Image, cfg *v1.ConfigFile) (v1.Image, error) { + m, err := base.Manifest() + if err != nil { + return nil, err + } + + image := &image{ + base: base, + manifest: m.DeepCopy(), + configFile: cfg, + } + + return image, nil +} + +// CreatedAt mutates the provided v1.Image to have the provided v1.Time +func CreatedAt(base v1.Image, created v1.Time) (v1.Image, error) { + cf, err := base.ConfigFile() + if err != nil { + return nil, err + } + + cfg := cf.DeepCopy() + cfg.Created = created + + return ConfigFile(base, cfg) +} + +// Extract takes an image and returns an io.ReadCloser containing the image's +// flattened filesystem. +// +// Callers can read the filesystem contents by passing the reader to +// tar.NewReader, or io.Copy it directly to some output. +// +// If a caller doesn't read the full contents, they should Close it to free up +// resources used during extraction. +func Extract(img v1.Image) io.ReadCloser { + pr, pw := io.Pipe() + + go func() { + // Close the writer with any errors encountered during + // extraction. These errors will be returned by the reader end + // on subsequent reads. If err == nil, the reader will return + // EOF. + pw.CloseWithError(extract(img, pw)) + }() + + return pr +} + +// Adapted from https://github.com/google/containerregistry/blob/da03b395ccdc4e149e34fbb540483efce962dc64/client/v2_2/docker_image_.py#L816 +func extract(img v1.Image, w io.Writer) error { + tarWriter := tar.NewWriter(w) + defer tarWriter.Close() + + fileMap := map[string]bool{} + + layers, err := img.Layers() + if err != nil { + return fmt.Errorf("retrieving image layers: %w", err) + } + + // we iterate through the layers in reverse order because it makes handling + // whiteout layers more efficient, since we can just keep track of the removed + // files as we see .wh. layers and ignore those in previous layers. + for i := len(layers) - 1; i >= 0; i-- { + layer := layers[i] + layerReader, err := layer.Uncompressed() + if err != nil { + return fmt.Errorf("reading layer contents: %w", err) + } + defer layerReader.Close() + tarReader := tar.NewReader(layerReader) + for { + header, err := tarReader.Next() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + return fmt.Errorf("reading tar: %w", err) + } + + // Some tools prepend everything with "./", so if we don't Clean the + // name, we may have duplicate entries, which angers tar-split. + header.Name = filepath.Clean(header.Name) + // force PAX format to remove Name/Linkname length limit of 100 characters + // required by USTAR and to not depend on internal tar package guess which + // prefers USTAR over PAX + header.Format = tar.FormatPAX + + basename := filepath.Base(header.Name) + dirname := filepath.Dir(header.Name) + tombstone := strings.HasPrefix(basename, whiteoutPrefix) + if tombstone { + basename = basename[len(whiteoutPrefix):] + } + + // check if we have seen value before + // if we're checking a directory, don't filepath.Join names + var name string + if header.Typeflag == tar.TypeDir { + name = header.Name + } else { + name = filepath.Join(dirname, basename) + } + + if _, ok := fileMap[name]; ok { + continue + } + + // check for a whited out parent directory + if inWhiteoutDir(fileMap, name) { + continue + } + + // mark file as handled. non-directory implicitly tombstones + // any entries with a matching (or child) name + fileMap[name] = tombstone || !(header.Typeflag == tar.TypeDir) + if !tombstone { + if err := tarWriter.WriteHeader(header); err != nil { + return err + } + if header.Size > 0 { + if _, err := io.CopyN(tarWriter, tarReader, header.Size); err != nil { + return err + } + } + } + } + } + return nil +} + +func inWhiteoutDir(fileMap map[string]bool, file string) bool { + for { + if file == "" { + break + } + dirname := filepath.Dir(file) + if file == dirname { + break + } + if val, ok := fileMap[dirname]; ok && val { + return true + } + file = dirname + } + return false +} + +func max(a, b int) int { + if a > b { + return a + } + return b +} + +// Time sets all timestamps in an image to the given timestamp. +func Time(img v1.Image, t time.Time) (v1.Image, error) { + newImage := empty.Image + + layers, err := img.Layers() + if err != nil { + return nil, fmt.Errorf("getting image layers: %w", err) + } + + ocf, err := img.ConfigFile() + if err != nil { + return nil, fmt.Errorf("getting original config file: %w", err) + } + + addendums := make([]Addendum, max(len(ocf.History), len(layers))) + var historyIdx, addendumIdx int + for layerIdx := 0; layerIdx < len(layers); addendumIdx, layerIdx = addendumIdx+1, layerIdx+1 { + newLayer, err := layerTime(layers[layerIdx], t) + if err != nil { + return nil, fmt.Errorf("setting layer times: %w", err) + } + + // try to search for the history entry that corresponds to this layer + for ; historyIdx < len(ocf.History); historyIdx++ { + addendums[addendumIdx].History = ocf.History[historyIdx] + // if it's an EmptyLayer, do not set the Layer and have the Addendum with just the History + // and move on to the next History entry + if ocf.History[historyIdx].EmptyLayer { + addendumIdx++ + continue + } + // otherwise, we can exit from the cycle + historyIdx++ + break + } + addendums[addendumIdx].Layer = newLayer + } + + // add all leftover History entries + for ; historyIdx < len(ocf.History); historyIdx, addendumIdx = historyIdx+1, addendumIdx+1 { + addendums[addendumIdx].History = ocf.History[historyIdx] + } + + newImage, err = Append(newImage, addendums...) + if err != nil { + return nil, fmt.Errorf("appending layers: %w", err) + } + + cf, err := newImage.ConfigFile() + if err != nil { + return nil, fmt.Errorf("setting config file: %w", err) + } + + cfg := cf.DeepCopy() + + // Copy basic config over + cfg.Architecture = ocf.Architecture + cfg.OS = ocf.OS + cfg.OSVersion = ocf.OSVersion + cfg.Config = ocf.Config + + // Strip away timestamps from the config file + cfg.Created = v1.Time{Time: t} + + for i, h := range cfg.History { + h.Created = v1.Time{Time: t} + h.CreatedBy = ocf.History[i].CreatedBy + h.Comment = ocf.History[i].Comment + h.EmptyLayer = ocf.History[i].EmptyLayer + // Explicitly ignore Author field; which hinders reproducibility + h.Author = "" + cfg.History[i] = h + } + + return ConfigFile(newImage, cfg) +} + +func layerTime(layer v1.Layer, t time.Time) (v1.Layer, error) { + layerReader, err := layer.Uncompressed() + if err != nil { + return nil, fmt.Errorf("getting layer: %w", err) + } + defer layerReader.Close() + w := new(bytes.Buffer) + tarWriter := tar.NewWriter(w) + defer tarWriter.Close() + + tarReader := tar.NewReader(layerReader) + for { + header, err := tarReader.Next() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + return nil, fmt.Errorf("reading layer: %w", err) + } + + header.ModTime = t + + //PAX and GNU Format support additional timestamps in the header + if header.Format == tar.FormatPAX || header.Format == tar.FormatGNU { + header.AccessTime = t + header.ChangeTime = t + } + + if err := tarWriter.WriteHeader(header); err != nil { + return nil, fmt.Errorf("writing tar header: %w", err) + } + + if header.Typeflag == tar.TypeReg { + // TODO(#1168): This should be lazy, and not buffer the entire layer contents. + if _, err = io.CopyN(tarWriter, tarReader, header.Size); err != nil { + return nil, fmt.Errorf("writing layer file: %w", err) + } + } + } + + if err := tarWriter.Close(); err != nil { + return nil, err + } + + b := w.Bytes() + // gzip the contents, then create the layer + opener := func() (io.ReadCloser, error) { + return gzip.ReadCloser(io.NopCloser(bytes.NewReader(b))), nil + } + layer, err = tarball.LayerFromOpener(opener) + if err != nil { + return nil, fmt.Errorf("creating layer: %w", err) + } + + return layer, nil +} + +// Canonical is a helper function to combine Time and configFile +// to remove any randomness during a docker build. +func Canonical(img v1.Image) (v1.Image, error) { + // Set all timestamps to 0 + created := time.Time{} + img, err := Time(img, created) + if err != nil { + return nil, err + } + + cf, err := img.ConfigFile() + if err != nil { + return nil, err + } + + // Get rid of host-dependent random config + cfg := cf.DeepCopy() + + cfg.Container = "" + cfg.Config.Hostname = "" + cfg.DockerVersion = "" + + return ConfigFile(img, cfg) +} + +// MediaType modifies the MediaType() of the given image. +func MediaType(img v1.Image, mt types.MediaType) v1.Image { + return &image{ + base: img, + mediaType: &mt, + } +} + +// ConfigMediaType modifies the MediaType() of the given image's Config. +// +// If !mt.IsConfig(), this will be the image's artifactType in any indexes it's a part of. +func ConfigMediaType(img v1.Image, mt types.MediaType) v1.Image { + return &image{ + base: img, + configMediaType: &mt, + } +} + +// IndexMediaType modifies the MediaType() of the given index. +func IndexMediaType(idx v1.ImageIndex, mt types.MediaType) v1.ImageIndex { + return &index{ + base: idx, + mediaType: &mt, + } +} |