diff options
Diffstat (limited to 'pkg/v1/tarball/write.go')
-rw-r--r-- | pkg/v1/tarball/write.go | 457 |
1 files changed, 457 insertions, 0 deletions
diff --git a/pkg/v1/tarball/write.go b/pkg/v1/tarball/write.go new file mode 100644 index 0000000..e607df1 --- /dev/null +++ b/pkg/v1/tarball/write.go @@ -0,0 +1,457 @@ +// Copyright 2018 Google LLC All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tarball + +import ( + "archive/tar" + "bytes" + "encoding/json" + "errors" + "fmt" + "io" + "os" + "sort" + "strings" + + "github.com/google/go-containerregistry/pkg/name" + v1 "github.com/google/go-containerregistry/pkg/v1" + "github.com/google/go-containerregistry/pkg/v1/partial" +) + +// WriteToFile writes in the compressed format to a tarball, on disk. +// This is just syntactic sugar wrapping tarball.Write with a new file. +func WriteToFile(p string, ref name.Reference, img v1.Image, opts ...WriteOption) error { + w, err := os.Create(p) + if err != nil { + return err + } + defer w.Close() + + return Write(ref, img, w, opts...) +} + +// MultiWriteToFile writes in the compressed format to a tarball, on disk. +// This is just syntactic sugar wrapping tarball.MultiWrite with a new file. +func MultiWriteToFile(p string, tagToImage map[name.Tag]v1.Image, opts ...WriteOption) error { + refToImage := make(map[name.Reference]v1.Image, len(tagToImage)) + for i, d := range tagToImage { + refToImage[i] = d + } + return MultiRefWriteToFile(p, refToImage, opts...) +} + +// MultiRefWriteToFile writes in the compressed format to a tarball, on disk. +// This is just syntactic sugar wrapping tarball.MultiRefWrite with a new file. +func MultiRefWriteToFile(p string, refToImage map[name.Reference]v1.Image, opts ...WriteOption) error { + w, err := os.Create(p) + if err != nil { + return err + } + defer w.Close() + + return MultiRefWrite(refToImage, w, opts...) +} + +// Write is a wrapper to write a single image and tag to a tarball. +func Write(ref name.Reference, img v1.Image, w io.Writer, opts ...WriteOption) error { + return MultiRefWrite(map[name.Reference]v1.Image{ref: img}, w, opts...) +} + +// MultiWrite writes the contents of each image to the provided writer, in the compressed format. +// The contents are written in the following format: +// One manifest.json file at the top level containing information about several images. +// One file for each layer, named after the layer's SHA. +// One file for the config blob, named after its SHA. +func MultiWrite(tagToImage map[name.Tag]v1.Image, w io.Writer, opts ...WriteOption) error { + refToImage := make(map[name.Reference]v1.Image, len(tagToImage)) + for i, d := range tagToImage { + refToImage[i] = d + } + return MultiRefWrite(refToImage, w, opts...) +} + +// MultiRefWrite writes the contents of each image to the provided writer, in the compressed format. +// The contents are written in the following format: +// One manifest.json file at the top level containing information about several images. +// One file for each layer, named after the layer's SHA. +// One file for the config blob, named after its SHA. +func MultiRefWrite(refToImage map[name.Reference]v1.Image, w io.Writer, opts ...WriteOption) error { + // process options + o := &writeOptions{ + updates: nil, + } + for _, option := range opts { + if err := option(o); err != nil { + return err + } + } + + imageToTags := dedupRefToImage(refToImage) + size, mBytes, err := getSizeAndManifest(imageToTags) + if err != nil { + return sendUpdateReturn(o, err) + } + + return writeImagesToTar(imageToTags, mBytes, size, w, o) +} + +// sendUpdateReturn return the passed in error message, also sending on update channel, if it exists +func sendUpdateReturn(o *writeOptions, err error) error { + if o != nil && o.updates != nil { + o.updates <- v1.Update{ + Error: err, + } + } + return err +} + +// sendProgressWriterReturn return the passed in error message, also sending on update channel, if it exists, along with downloaded information +func sendProgressWriterReturn(pw *progressWriter, err error) error { + if pw != nil { + return pw.Error(err) + } + return err +} + +// writeImagesToTar writes the images to the tarball +func writeImagesToTar(imageToTags map[v1.Image][]string, m []byte, size int64, w io.Writer, o *writeOptions) (err error) { + if w == nil { + return sendUpdateReturn(o, errors.New("must pass valid writer")) + } + + tw := w + var pw *progressWriter + + // we only calculate the sizes and use a progressWriter if we were provided + // an option with a progress channel + if o != nil && o.updates != nil { + pw = &progressWriter{ + w: w, + updates: o.updates, + size: size, + } + tw = pw + } + + tf := tar.NewWriter(tw) + defer tf.Close() + + seenLayerDigests := make(map[string]struct{}) + + for img := range imageToTags { + // Write the config. + cfgName, err := img.ConfigName() + if err != nil { + return sendProgressWriterReturn(pw, err) + } + cfgBlob, err := img.RawConfigFile() + if err != nil { + return sendProgressWriterReturn(pw, err) + } + if err := writeTarEntry(tf, cfgName.String(), bytes.NewReader(cfgBlob), int64(len(cfgBlob))); err != nil { + return sendProgressWriterReturn(pw, err) + } + + // Write the layers. + layers, err := img.Layers() + if err != nil { + return sendProgressWriterReturn(pw, err) + } + layerFiles := make([]string, len(layers)) + for i, l := range layers { + d, err := l.Digest() + if err != nil { + return sendProgressWriterReturn(pw, err) + } + // Munge the file name to appease ancient technology. + // + // tar assumes anything with a colon is a remote tape drive: + // https://www.gnu.org/software/tar/manual/html_section/tar_45.html + // Drop the algorithm prefix, e.g. "sha256:" + hex := d.Hex + + // gunzip expects certain file extensions: + // https://www.gnu.org/software/gzip/manual/html_node/Overview.html + layerFiles[i] = fmt.Sprintf("%s.tar.gz", hex) + + if _, ok := seenLayerDigests[hex]; ok { + continue + } + seenLayerDigests[hex] = struct{}{} + + r, err := l.Compressed() + if err != nil { + return sendProgressWriterReturn(pw, err) + } + blobSize, err := l.Size() + if err != nil { + return sendProgressWriterReturn(pw, err) + } + + if err := writeTarEntry(tf, layerFiles[i], r, blobSize); err != nil { + return sendProgressWriterReturn(pw, err) + } + } + } + if err := writeTarEntry(tf, "manifest.json", bytes.NewReader(m), int64(len(m))); err != nil { + return sendProgressWriterReturn(pw, err) + } + + // be sure to close the tar writer so everything is flushed out before we send our EOF + if err := tf.Close(); err != nil { + return sendProgressWriterReturn(pw, err) + } + // send an EOF to indicate finished on the channel, but nil as our return error + _ = sendProgressWriterReturn(pw, io.EOF) + return nil +} + +// calculateManifest calculates the manifest and optionally the size of the tar file +func calculateManifest(imageToTags map[v1.Image][]string) (m Manifest, err error) { + if len(imageToTags) == 0 { + return nil, errors.New("set of images is empty") + } + + for img, tags := range imageToTags { + cfgName, err := img.ConfigName() + if err != nil { + return nil, err + } + + // Store foreign layer info. + layerSources := make(map[v1.Hash]v1.Descriptor) + + // Write the layers. + layers, err := img.Layers() + if err != nil { + return nil, err + } + layerFiles := make([]string, len(layers)) + for i, l := range layers { + d, err := l.Digest() + if err != nil { + return nil, err + } + // Munge the file name to appease ancient technology. + // + // tar assumes anything with a colon is a remote tape drive: + // https://www.gnu.org/software/tar/manual/html_section/tar_45.html + // Drop the algorithm prefix, e.g. "sha256:" + hex := d.Hex + + // gunzip expects certain file extensions: + // https://www.gnu.org/software/gzip/manual/html_node/Overview.html + layerFiles[i] = fmt.Sprintf("%s.tar.gz", hex) + + // Add to LayerSources if it's a foreign layer. + desc, err := partial.BlobDescriptor(img, d) + if err != nil { + return nil, err + } + if !desc.MediaType.IsDistributable() { + diffid, err := partial.BlobToDiffID(img, d) + if err != nil { + return nil, err + } + layerSources[diffid] = *desc + } + } + + // Generate the tar descriptor and write it. + m = append(m, Descriptor{ + Config: cfgName.String(), + RepoTags: tags, + Layers: layerFiles, + LayerSources: layerSources, + }) + } + // sort by name of the repotags so it is consistent. Alternatively, we could sort by hash of the + // descriptor, but that would make it hard for humans to process + sort.Slice(m, func(i, j int) bool { + return strings.Join(m[i].RepoTags, ",") < strings.Join(m[j].RepoTags, ",") + }) + + return m, nil +} + +// CalculateSize calculates the expected complete size of the output tar file +func CalculateSize(refToImage map[name.Reference]v1.Image) (size int64, err error) { + imageToTags := dedupRefToImage(refToImage) + size, _, err = getSizeAndManifest(imageToTags) + return size, err +} + +func getSizeAndManifest(imageToTags map[v1.Image][]string) (int64, []byte, error) { + m, err := calculateManifest(imageToTags) + if err != nil { + return 0, nil, fmt.Errorf("unable to calculate manifest: %w", err) + } + mBytes, err := json.Marshal(m) + if err != nil { + return 0, nil, fmt.Errorf("could not marshall manifest to bytes: %w", err) + } + + size, err := calculateTarballSize(imageToTags, mBytes) + if err != nil { + return 0, nil, fmt.Errorf("error calculating tarball size: %w", err) + } + return size, mBytes, nil +} + +// calculateTarballSize calculates the size of the tar file +func calculateTarballSize(imageToTags map[v1.Image][]string, mBytes []byte) (size int64, err error) { + seenLayerDigests := make(map[string]struct{}) + for img, name := range imageToTags { + manifest, err := img.Manifest() + if err != nil { + return size, fmt.Errorf("unable to get manifest for img %s: %w", name, err) + } + size += calculateSingleFileInTarSize(manifest.Config.Size) + for _, l := range manifest.Layers { + hex := l.Digest.Hex + if _, ok := seenLayerDigests[hex]; ok { + continue + } + seenLayerDigests[hex] = struct{}{} + size += calculateSingleFileInTarSize(l.Size) + } + } + // add the manifest + size += calculateSingleFileInTarSize(int64(len(mBytes))) + + // add the two padding blocks that indicate end of a tar file + size += 1024 + return size, nil +} + +func dedupRefToImage(refToImage map[name.Reference]v1.Image) map[v1.Image][]string { + imageToTags := make(map[v1.Image][]string) + + for ref, img := range refToImage { + if tag, ok := ref.(name.Tag); ok { + if tags, ok := imageToTags[img]; !ok || tags == nil { + imageToTags[img] = []string{} + } + // Docker cannot load tarballs without an explicit tag: + // https://github.com/google/go-containerregistry/issues/890 + // + // We can't use the fully qualified tag.Name() because of rules_docker: + // https://github.com/google/go-containerregistry/issues/527 + // + // If the tag is "latest", but tag.String() doesn't end in ":latest", + // just append it. Kind of gross, but should work for now. + ts := tag.String() + if tag.Identifier() == name.DefaultTag && !strings.HasSuffix(ts, ":"+name.DefaultTag) { + ts = fmt.Sprintf("%s:%s", ts, name.DefaultTag) + } + imageToTags[img] = append(imageToTags[img], ts) + } else if _, ok := imageToTags[img]; !ok { + imageToTags[img] = nil + } + } + + return imageToTags +} + +// writeTarEntry writes a file to the provided writer with a corresponding tar header +func writeTarEntry(tf *tar.Writer, path string, r io.Reader, size int64) error { + hdr := &tar.Header{ + Mode: 0644, + Typeflag: tar.TypeReg, + Size: size, + Name: path, + } + if err := tf.WriteHeader(hdr); err != nil { + return err + } + _, err := io.Copy(tf, r) + return err +} + +// ComputeManifest get the manifest.json that will be written to the tarball +// for multiple references +func ComputeManifest(refToImage map[name.Reference]v1.Image) (Manifest, error) { + imageToTags := dedupRefToImage(refToImage) + return calculateManifest(imageToTags) +} + +// WriteOption a function option to pass to Write() +type WriteOption func(*writeOptions) error +type writeOptions struct { + updates chan<- v1.Update +} + +// WithProgress create a WriteOption for passing to Write() that enables +// a channel to receive updates as they are downloaded and written to disk. +func WithProgress(updates chan<- v1.Update) WriteOption { + return func(o *writeOptions) error { + o.updates = updates + return nil + } +} + +// progressWriter is a writer which will send the download progress +type progressWriter struct { + w io.Writer + updates chan<- v1.Update + size, complete int64 +} + +func (pw *progressWriter) Write(p []byte) (int, error) { + n, err := pw.w.Write(p) + if err != nil { + return n, err + } + + pw.complete += int64(n) + + pw.updates <- v1.Update{ + Total: pw.size, + Complete: pw.complete, + } + + return n, err +} + +func (pw *progressWriter) Error(err error) error { + pw.updates <- v1.Update{ + Total: pw.size, + Complete: pw.complete, + Error: err, + } + return err +} + +func (pw *progressWriter) Close() error { + pw.updates <- v1.Update{ + Total: pw.size, + Complete: pw.complete, + Error: io.EOF, + } + return io.EOF +} + +// calculateSingleFileInTarSize calculate the size a file will take up in a tar archive, +// given the input data. Provided by rounding up to nearest whole block (512) +// and adding header 512 +func calculateSingleFileInTarSize(in int64) (out int64) { + // doing this manually, because math.Round() works with float64 + out += in + if remainder := out % 512; remainder != 0 { + out += (512 - remainder) + } + out += 512 + return out +} |