Skip to content

Commit

Permalink
When applying a chunked layer with a tar-split, compute its uncompres…
Browse files Browse the repository at this point in the history
…sed digest

This will allow c/image to validate the uncompressed digest against the config's
RootFS.DiffID value (ensuring that the layer's contents are the same when pulled
via TOC and traditionally); and the uncompressed digest will be used as a layer ID,
ensuring users see the traditional layer and image IDs they are used to.

This doesn't work for chunked layers without a tar-split.

Signed-off-by: Miloslav Trmač <[email protected]>
  • Loading branch information
mtrmac committed Nov 15, 2024
1 parent 787ec97 commit 043e2cd
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 4 deletions.
2 changes: 1 addition & 1 deletion pkg/chunked/cache_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,7 @@ func prepareCacheFile(manifest []byte, format graphdriver.DifferOutputFormat) ([
switch format {
case graphdriver.DifferOutputFormatDir:
case graphdriver.DifferOutputFormatFlat:
entries, err = makeEntriesFlat(entries)
entries, err = makeEntriesFlat(entries, nil)
if err != nil {
return nil, err
}
Expand Down
67 changes: 64 additions & 3 deletions pkg/chunked/storage_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package chunked

import (
archivetar "archive/tar"
"bytes"
"context"
"encoding/base64"
"errors"
Expand All @@ -28,12 +29,15 @@ import (
"github.com/containers/storage/pkg/fsverity"
"github.com/containers/storage/pkg/idtools"
"github.com/containers/storage/pkg/system"
securejoin "github.com/cyphar/filepath-securejoin"
jsoniter "github.com/json-iterator/go"
"github.com/klauspost/compress/zstd"
"github.com/klauspost/pgzip"
digest "github.com/opencontainers/go-digest"
"github.com/sirupsen/logrus"
"github.com/vbatts/tar-split/archive/tar"
"github.com/vbatts/tar-split/tar/asm"
tsStorage "github.com/vbatts/tar-split/tar/storage"
"golang.org/x/sys/unix"
)

Expand Down Expand Up @@ -1112,7 +1116,10 @@ func (c *chunkedDiffer) findAndCopyFile(dirfd int, r *fileMetadata, copyOptions
return false, nil
}

func makeEntriesFlat(mergedEntries []fileMetadata) ([]fileMetadata, error) {
// makeEntriesFlat collects regular-file entries from mergedEntries, and produces a new list
// where each file content is only represented once, and uses composefs.RegularFilePathForValidatedDigest for its name.
// if flatPathNameMap is not nil, this function writes to it a mapping from filepath.Clean(originalName) to the composefs name.
func makeEntriesFlat(mergedEntries []fileMetadata, flatPathNameMap map[string]string) ([]fileMetadata, error) {
var new []fileMetadata

knownFlatPaths := make(map[string]struct{})
Expand All @@ -1131,6 +1138,9 @@ func makeEntriesFlat(mergedEntries []fileMetadata) ([]fileMetadata, error) {
if err != nil {
return nil, fmt.Errorf("determining physical file path for %q: %w", mergedEntries[i].Name, err)
}
if flatPathNameMap != nil {
flatPathNameMap[filepath.Clean(mergedEntries[i].Name)] = path
}

if _, known := knownFlatPaths[path]; known {
continue
Expand Down Expand Up @@ -1419,10 +1429,13 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
if err != nil {
return output, &fs.PathError{Op: "open", Path: dest, Err: err}
}
defer unix.Close(dirfd)
dirFile := os.NewFile(uintptr(dirfd), dest)
defer dirFile.Close()

var flatPathNameMap map[string]string // = nil
if differOpts != nil && differOpts.Format == graphdriver.DifferOutputFormatFlat {
mergedEntries, err = makeEntriesFlat(mergedEntries)
flatPathNameMap = map[string]string{}
mergedEntries, err = makeEntriesFlat(mergedEntries, flatPathNameMap)
if err != nil {
return output, err
}
Expand Down Expand Up @@ -1692,6 +1705,24 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
}
}

// To ensure that consumers of the layer who decompress and read the full tar stream,
// and consumers who consume the data via the TOC, both see exactly the same data and metadata,
// compute the UncompressedDigest.
// The caller will then ensure that this value matches the value in the image config’s RootFs.DiffID, i.e. the image must commit
// to one UncompressedDigest value for each layer, and that will avoid the ambiguity (in consumers who validate layers against DiffID).
//
// This is, sadly, quite costly: Up to now we might have only have had to write, and digest, only the new/modified files.
// Here we need to read, and digest, the whole layer, even if almost all of it was already present locally previously.
if output.UncompressedDigest == "" && output.TarSplit != nil {
metadata := tsStorage.NewJSONUnpacker(bytes.NewReader(output.TarSplit))
fg := newStagedFileGetter(dirFile, flatPathNameMap)
digester := digest.Canonical.Digester()
if err := asm.WriteOutputTarStream(fg, metadata, digester.Hash()); err != nil {
return output, fmt.Errorf("digesting staged uncompressed stream: %w", err)
}
output.UncompressedDigest = digester.Digest()
}

if totalChunksSize > 0 {
logrus.Debugf("Missing %d bytes out of %d (%.2f %%)", missingPartsSize, totalChunksSize, float32(missingPartsSize*100.0)/float32(totalChunksSize))
}
Expand Down Expand Up @@ -1817,3 +1848,33 @@ func validateChunkChecksum(chunk *internal.FileMetadata, root, path string, offs

return digester.Digest() == digest
}

// newStagedFileGetter returns an object usable as storage.FileGetter for rootDir.
// if flatPathNameMap is not nil, it must be used to map logical file names into the backing file paths.
func newStagedFileGetter(rootDir *os.File, flatPathNameMap map[string]string) *stagedFileGetter {
return &stagedFileGetter{
rootDir: rootDir,
flatPathNameMap: flatPathNameMap,
}
}

type stagedFileGetter struct {
rootDir *os.File
flatPathNameMap map[string]string // nil, or a map from filepath.Clean()ed tar file names to expected on-filesystem names
}

func (fg *stagedFileGetter) Get(filename string) (io.ReadCloser, error) {
if fg.flatPathNameMap != nil {
path, ok := fg.flatPathNameMap[filepath.Clean(filename)]
if !ok {
return nil, fmt.Errorf("no path mapping exists for tar entry %q", filename)
}
filename = path
}
pathFD, err := securejoin.OpenatInRoot(fg.rootDir, filename)
if err != nil {
return nil, err
}
defer pathFD.Close()
return securejoin.Reopen(pathFD, unix.O_RDONLY)
}

0 comments on commit 043e2cd

Please sign in to comment.