diff --git a/docs/containers-storage.conf.5.md b/docs/containers-storage.conf.5.md index 2f8e29b01f..e19c39ad38 100644 --- a/docs/containers-storage.conf.5.md +++ b/docs/containers-storage.conf.5.md @@ -124,6 +124,27 @@ The `storage.options.pull_options` table supports the following keys: It is an expensive operation so it is not enabled by default. This is a "string bool": "false"|"true" (cannot be native TOML boolean) +**insecure_allow_unpredictable_image_contents="false"|"true"** + This should _almost never_ be set. + It allows partial pulls ofof images without guaranteeing that "partial + pulls" and non-partial pulls both result in consistent image contents. + This allows pulling estargz images and early versions of zstd:chunked images; + otherwise, these layers always use the traditional non-partial pull path. + + This option should be enabled _extremely_ rarely, only if _all_ images that could + EVER be concievably pulled on this system are _guaranteed_ (e.g. using a signature policy) + to come from a build system trusted to never attack image integrity. + + If this consistency enforcement were disabled, malicious images could be built + in a way designed to evade other audit mechanisms, so presence of most other audit + mechanisms is not a replacement for the above-mentioned need for all images to come + from a trusted build system. + + As a side effect, enabling this option will also make image IDs unpredictable + (usually not equal to the traditional value matching the config digest). + + This is a "string bool": "false"|"true" (cannot be native TOML boolean) + ### STORAGE OPTIONS FOR AUFS TABLE The `storage.options.aufs` table supports the following options: diff --git a/drivers/driver.go b/drivers/driver.go index ed0e628883..1f7ac5ff02 100644 --- a/drivers/driver.go +++ b/drivers/driver.go @@ -231,8 +231,8 @@ const ( // DifferOutputFormatDir means the output is a directory and it will // keep the original layout. DifferOutputFormatDir = iota - // DifferOutputFormatFlat will store the files by their checksum, in the form - // checksum[0:2]/checksum[2:] + // DifferOutputFormatFlat will store the files by their checksum, per + // pkg/chunked/internal/composefs.RegularFilePathForValidatedDigest. DifferOutputFormatFlat ) diff --git a/pkg/chunked/cache_linux.go b/pkg/chunked/cache_linux.go index c9cde23b7f..0e49ddd888 100644 --- a/pkg/chunked/cache_linux.go +++ b/pkg/chunked/cache_linux.go @@ -710,7 +710,7 @@ func prepareCacheFile(manifest []byte, format graphdriver.DifferOutputFormat) ([ switch format { case graphdriver.DifferOutputFormatDir: case graphdriver.DifferOutputFormatFlat: - entries, err = makeEntriesFlat(entries) + entries, err = makeEntriesFlat(entries, nil) if err != nil { return nil, err } diff --git a/pkg/chunked/dump/dump.go b/pkg/chunked/dump/dump.go index fde42d75fc..0e673f3f2d 100644 --- a/pkg/chunked/dump/dump.go +++ b/pkg/chunked/dump/dump.go @@ -9,11 +9,11 @@ import ( "io" "path/filepath" "reflect" - "strings" "time" "github.com/containers/storage/pkg/chunked/internal/minimal" storagePath "github.com/containers/storage/pkg/chunked/internal/path" + "github.com/opencontainers/go-digest" "golang.org/x/sys/unix" ) @@ -165,11 +165,16 @@ func dumpNode(out io.Writer, added map[string]*minimal.FileMetadata, links map[s } else { payload = storagePath.CleanAbsPath(entry.Linkname) } - } else { - if len(entry.Digest) > 10 { - d := strings.Replace(entry.Digest, "sha256:", "", 1) - payload = d[:2] + "/" + d[2:] + } else if entry.Digest != "" { + d, err := digest.Parse(entry.Digest) + if err != nil { + return fmt.Errorf("invalid digest %q for %q: %w", entry.Digest, entry.Name, err) + } + path, err := storagePath.RegularFilePathForValidatedDigest(d) + if err != nil { + return fmt.Errorf("determining physical file path for %q: %w", entry.Name, err) } + payload = path } if _, err := fmt.Fprint(out, escapedOptional([]byte(payload), ESCAPE_LONE_DASH)); err != nil { diff --git a/pkg/chunked/dump/dump_test.go b/pkg/chunked/dump/dump_test.go index 88763a4fcc..aa022f2250 100644 --- a/pkg/chunked/dump/dump_test.go +++ b/pkg/chunked/dump/dump_test.go @@ -59,7 +59,7 @@ func TestDumpNode(t *testing.T) { Devminor: 0, ModTime: &modTime, Linkname: "", - Digest: "sha256:abcdef1234567890", + Digest: "sha256:0123456789abcdef1123456789abcdef2123456789abcdef3123456789abcdef", Xattrs: map[string]string{ "user.key1": base64.StdEncoding.EncodeToString([]byte("value1")), }, @@ -150,7 +150,7 @@ func TestDumpNode(t *testing.T) { entries: []*minimal.FileMetadata{ regularFileEntry, }, - expected: "/example.txt 100 100000 1 1000 1000 0 1672531200.0 ab/cdef1234567890 - - user.key1=value1\n", + expected: "/example.txt 100 100000 1 1000 1000 0 1672531200.0 01/23456789abcdef1123456789abcdef2123456789abcdef3123456789abcdef - - user.key1=value1\n", }, { name: "root entry with file", @@ -158,7 +158,7 @@ func TestDumpNode(t *testing.T) { rootEntry, regularFileEntry, }, - expected: "/ 0 40000 1 0 0 0 1672531200.0 - - -\n/example.txt 100 100000 1 1000 1000 0 1672531200.0 ab/cdef1234567890 - - user.key1=value1\n", + expected: "/ 0 40000 1 0 0 0 1672531200.0 - - -\n/example.txt 100 100000 1 1000 1000 0 1672531200.0 01/23456789abcdef1123456789abcdef2123456789abcdef3123456789abcdef - - user.key1=value1\n", skipAddingRootEntry: true, }, { @@ -196,7 +196,7 @@ func TestDumpNode(t *testing.T) { regularFileEntry, directoryEntry, }, - expected: "/ 0 40000 1 0 0 0 1672531200.0 - - -\n/example.txt 100 100000 1 1000 1000 0 1672531200.0 ab/cdef1234567890 - - user.key1=value1\n/mydir 0 40000 1 1000 1000 0 1672531200.0 - - - user.key2=value2\n", + expected: "/ 0 40000 1 0 0 0 1672531200.0 - - -\n/example.txt 100 100000 1 1000 1000 0 1672531200.0 01/23456789abcdef1123456789abcdef2123456789abcdef3123456789abcdef - - user.key1=value1\n/mydir 0 40000 1 1000 1000 0 1672531200.0 - - - user.key2=value2\n", skipAddingRootEntry: true, }, } diff --git a/pkg/chunked/internal/path/path.go b/pkg/chunked/internal/path/path.go index 50ca1def87..55ba74550e 100644 --- a/pkg/chunked/internal/path/path.go +++ b/pkg/chunked/internal/path/path.go @@ -1,7 +1,10 @@ package path import ( + "fmt" "path/filepath" + + "github.com/opencontainers/go-digest" ) // CleanAbsPath removes any ".." and "." from the path @@ -10,3 +13,15 @@ import ( func CleanAbsPath(path string) string { return filepath.Clean("/" + path) } + +// RegularFilePath returns the path used in the composefs backing store for a +// regular file with the provided content digest. +// +// The caller MUST ensure d is a valid digest (in particular, that it contains no path separators or .. entries) +func RegularFilePathForValidatedDigest(d digest.Digest) (string, error) { + if algo := d.Algorithm(); algo != digest.SHA256 { + return "", fmt.Errorf("unexpected digest algorithm %q", algo) + } + e := d.Encoded() + return e[0:2] + "/" + e[2:], nil +} diff --git a/pkg/chunked/internal/path/path_test.go b/pkg/chunked/internal/path/path_test.go index a410483e2b..cec939897d 100644 --- a/pkg/chunked/internal/path/path_test.go +++ b/pkg/chunked/internal/path/path_test.go @@ -4,7 +4,9 @@ import ( "fmt" "testing" + "github.com/opencontainers/go-digest" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestCleanAbsPath(t *testing.T) { @@ -46,3 +48,16 @@ func TestCleanAbsPath(t *testing.T) { assert.Equal(t, test.expected, CleanAbsPath(test.path), fmt.Sprintf("path %q failed", test.path)) } } + +func TestRegularFilePathForValidatedDigest(t *testing.T) { + d, err := digest.Parse("sha256:0123456789abcdef1123456789abcdef2123456789abcdef3123456789abcdef") + require.NoError(t, err) + res, err := RegularFilePathForValidatedDigest(d) + require.NoError(t, err) + assert.Equal(t, "01/23456789abcdef1123456789abcdef2123456789abcdef3123456789abcdef", res) + + d, err = digest.Parse("sha512:0123456789abcdef1123456789abcdef2123456789abcdef3123456789abcdef0123456789abcdef1123456789abcdef2123456789abcdef3123456789abcdef") + require.NoError(t, err) + _, err = RegularFilePathForValidatedDigest(d) + assert.Error(t, err) +} diff --git a/pkg/chunked/storage_linux.go b/pkg/chunked/storage_linux.go index cbf8229ac2..828a42919d 100644 --- a/pkg/chunked/storage_linux.go +++ b/pkg/chunked/storage_linux.go @@ -2,6 +2,7 @@ package chunked import ( archivetar "archive/tar" + "bytes" "context" "encoding/base64" "errors" @@ -28,12 +29,15 @@ import ( "github.com/containers/storage/pkg/fsverity" "github.com/containers/storage/pkg/idtools" "github.com/containers/storage/pkg/system" + securejoin "github.com/cyphar/filepath-securejoin" jsoniter "github.com/json-iterator/go" "github.com/klauspost/compress/zstd" "github.com/klauspost/pgzip" digest "github.com/opencontainers/go-digest" "github.com/sirupsen/logrus" "github.com/vbatts/tar-split/archive/tar" + "github.com/vbatts/tar-split/tar/asm" + tsStorage "github.com/vbatts/tar-split/tar/storage" "golang.org/x/sys/unix" ) @@ -93,7 +97,7 @@ type chunkedDiffer struct { blobSize int64 uncompressedTarSize int64 // -1 if unknown - pullOptions map[string]string + pullOptions pullOptions useFsVerity graphdriver.DifferFsVerity fsVerityDigests map[string]string @@ -109,6 +113,42 @@ type chunkedLayerData struct { Format graphdriver.DifferOutputFormat `json:"format"` } +// pullOptions contains parsed data from storage.Store.PullOptions. +// TO DO: ideally this should be parsed along with the rest of the config file into StoreOptions directly +// (and then storage.Store.PullOptions would need to be somehow simulated). +type pullOptions struct { + enablePartialImages bool // enable_partial_images + convertImages bool // convert_images + useHardLinks bool // use_hard_links + insecureAllowUnpredictableImageContents bool // insecure_allow_unpredictable_image_contents + ostreeRepos []string // ostree_repos +} + +func parsePullOptions(store storage.Store) pullOptions { + options := store.PullOptions() + + res := pullOptions{} + for _, e := range []struct { + dest *bool + name string + defaultValue bool + }{ + {&res.enablePartialImages, "enable_partial_images", false}, + {&res.convertImages, "convert_images", false}, + {&res.useHardLinks, "use_hard_links", false}, + {&res.insecureAllowUnpredictableImageContents, "insecure_allow_unpredictable_image_contents", false}, + } { + if value, ok := options[e.name]; ok { + *e.dest = strings.ToLower(value) == "true" + } else { + *e.dest = e.defaultValue + } + } + res.ostreeRepos = strings.Split(options["ostree_repos"], ":") + + return res +} + func (c *chunkedDiffer) convertTarToZstdChunked(destDirectory string, payload *os.File) (int64, *seekableFile, digest.Digest, map[string]string, error) { diff, err := archive.DecompressStream(payload) if err != nil { @@ -148,22 +188,21 @@ func (c *chunkedDiffer) convertTarToZstdChunked(destDirectory string, payload *o // If it returns an error that implements IsErrFallbackToOrdinaryLayerDownload, the caller can // retry the operation with a different method. func GetDiffer(ctx context.Context, store storage.Store, blobDigest digest.Digest, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (graphdriver.Differ, error) { - pullOptions := store.PullOptions() + pullOptions := parsePullOptions(store) - if !parseBooleanPullOption(pullOptions, "enable_partial_images", false) { - // If convertImages is set, the two options disagree whether fallback is permissible. + if !pullOptions.enablePartialImages { + // If pullOptions.convertImages is set, the two options disagree whether fallback is permissible. // Right now, we enable it, but that’s not a promise; rather, such a configuration should ideally be rejected. return nil, newErrFallbackToOrdinaryLayerDownload(errors.New("partial images are disabled")) } - // convertImages also serves as a “must not fallback to non-partial pull” option (?!) - convertImages := parseBooleanPullOption(pullOptions, "convert_images", false) + // pullOptions.convertImages also serves as a “must not fallback to non-partial pull” option (?!) graphDriver, err := store.GraphDriver() if err != nil { return nil, err } if _, partialSupported := graphDriver.(graphdriver.DriverWithDiffer); !partialSupported { - if convertImages { + if pullOptions.convertImages { return nil, fmt.Errorf("graph driver %s does not support partial pull but convert_images requires that", graphDriver.String()) } return nil, newErrFallbackToOrdinaryLayerDownload(fmt.Errorf("graph driver %s does not support partial pull", graphDriver.String())) @@ -175,7 +214,7 @@ func GetDiffer(ctx context.Context, store storage.Store, blobDigest digest.Diges return nil, err } // If convert_images is enabled, always attempt to convert it instead of returning an error or falling back to a different method. - if convertImages { + if pullOptions.convertImages { logrus.Debugf("Created differ to convert blob %q", blobDigest) return makeConvertFromRawDiffer(store, blobDigest, blobSize, iss, pullOptions) } @@ -187,9 +226,9 @@ func GetDiffer(ctx context.Context, store storage.Store, blobDigest digest.Diges // getProperDiffer is an implementation detail of GetDiffer. // It returns a “proper” differ (not a convert_images one) if possible. -// On error, the second parameter is true if a fallback to an alternative (either the makeConverToRaw differ, or a non-partial pull) +// On error, the second return value is true if a fallback to an alternative (either the makeConverToRaw differ, or a non-partial pull) // is permissible. -func getProperDiffer(store storage.Store, blobDigest digest.Digest, blobSize int64, annotations map[string]string, iss ImageSourceSeekable, pullOptions map[string]string) (graphdriver.Differ, bool, error) { +func getProperDiffer(store storage.Store, blobDigest digest.Digest, blobSize int64, annotations map[string]string, iss ImageSourceSeekable, pullOptions pullOptions) (graphdriver.Differ, bool, error) { zstdChunkedTOCDigestString, hasZstdChunkedTOC := annotations[minimal.ManifestChecksumKey] estargzTOCDigestString, hasEstargzTOC := annotations[estargz.TOCJSONDigestAnnotation] @@ -202,12 +241,10 @@ func getProperDiffer(store storage.Store, blobDigest digest.Digest, blobSize int if err != nil { return nil, false, err } - differ, err := makeZstdChunkedDiffer(store, blobSize, zstdChunkedTOCDigest, annotations, iss, pullOptions) + differ, canFallback, err := makeZstdChunkedDiffer(store, blobSize, zstdChunkedTOCDigest, annotations, iss, pullOptions) if err != nil { logrus.Debugf("Could not create zstd:chunked differ for blob %q: %v", blobDigest, err) - // If the error is a bad request to the server, then signal to the caller that it can try a different method. - var badRequestErr ErrBadRequest - return nil, errors.As(err, &badRequestErr), err + return nil, canFallback, err } logrus.Debugf("Created zstd:chunked differ for blob %q", blobDigest) return differ, false, nil @@ -217,26 +254,23 @@ func getProperDiffer(store storage.Store, blobDigest digest.Digest, blobSize int if err != nil { return nil, false, err } - differ, err := makeEstargzChunkedDiffer(store, blobSize, estargzTOCDigest, iss, pullOptions) + differ, canFallback, err := makeEstargzChunkedDiffer(store, blobSize, estargzTOCDigest, iss, pullOptions) if err != nil { logrus.Debugf("Could not create estargz differ for blob %q: %v", blobDigest, err) - // If the error is a bad request to the server, then signal to the caller that it can try a different method. - var badRequestErr ErrBadRequest - return nil, errors.As(err, &badRequestErr), err + return nil, canFallback, err } logrus.Debugf("Created eStargz differ for blob %q", blobDigest) return differ, false, nil default: // no TOC - convertImages := parseBooleanPullOption(pullOptions, "convert_images", false) - if !convertImages { + if !pullOptions.convertImages { return nil, true, errors.New("no TOC found and convert_images is not configured") } return nil, true, errors.New("no TOC found") } } -func makeConvertFromRawDiffer(store storage.Store, blobDigest digest.Digest, blobSize int64, iss ImageSourceSeekable, pullOptions map[string]string) (*chunkedDiffer, error) { +func makeConvertFromRawDiffer(store storage.Store, blobDigest digest.Digest, blobSize int64, iss ImageSourceSeekable, pullOptions pullOptions) (*chunkedDiffer, error) { layersCache, err := getLayersCache(store) if err != nil { return nil, err @@ -255,22 +289,31 @@ func makeConvertFromRawDiffer(store storage.Store, blobDigest digest.Digest, blo }, nil } -func makeZstdChunkedDiffer(store storage.Store, blobSize int64, tocDigest digest.Digest, annotations map[string]string, iss ImageSourceSeekable, pullOptions map[string]string) (*chunkedDiffer, error) { +// makeZstdChunkedDiffer sets up a chunkedDiffer for a zstd:chunked layer. +// +// On error, the second return value is true if a fallback to an alternative (either the makeConverToRaw differ, or a non-partial pull) +// is permissible. +func makeZstdChunkedDiffer(store storage.Store, blobSize int64, tocDigest digest.Digest, annotations map[string]string, iss ImageSourceSeekable, pullOptions pullOptions) (*chunkedDiffer, bool, error) { manifest, toc, tarSplit, tocOffset, err := readZstdChunkedManifest(iss, tocDigest, annotations) if err != nil { - return nil, fmt.Errorf("read zstd:chunked manifest: %w", err) + // If the error is a bad request to the server, then signal to the caller that it can try a different method. + var badRequestErr ErrBadRequest + return nil, errors.As(err, &badRequestErr), fmt.Errorf("read zstd:chunked manifest: %w", err) } + var uncompressedTarSize int64 = -1 if tarSplit != nil { uncompressedTarSize, err = tarSizeFromTarSplit(tarSplit) if err != nil { - return nil, fmt.Errorf("computing size from tar-split: %w", err) + return nil, false, fmt.Errorf("computing size from tar-split: %w", err) } + } else if !pullOptions.insecureAllowUnpredictableImageContents { // With no tar-split, we can't compute the traditional UncompressedDigest. + return nil, true, fmt.Errorf("zstd:chunked layers without tar-split data don't support partial pulls with guaranteed consistency with non-partial pulls") } layersCache, err := getLayersCache(store) if err != nil { - return nil, err + return nil, false, err } return &chunkedDiffer{ @@ -287,17 +330,27 @@ func makeZstdChunkedDiffer(store storage.Store, blobSize int64, tocDigest digest stream: iss, tarSplit: tarSplit, tocOffset: tocOffset, - }, nil + }, false, nil } -func makeEstargzChunkedDiffer(store storage.Store, blobSize int64, tocDigest digest.Digest, iss ImageSourceSeekable, pullOptions map[string]string) (*chunkedDiffer, error) { +// makeZstdChunkedDiffer sets up a chunkedDiffer for an estargz layer. +// +// On error, the second return value is true if a fallback to an alternative (either the makeConverToRaw differ, or a non-partial pull) +// is permissible. +func makeEstargzChunkedDiffer(store storage.Store, blobSize int64, tocDigest digest.Digest, iss ImageSourceSeekable, pullOptions pullOptions) (*chunkedDiffer, bool, error) { + if !pullOptions.insecureAllowUnpredictableImageContents { // With no tar-split, we can't compute the traditional UncompressedDigest. + return nil, true, fmt.Errorf("estargz layers don't support partial pulls with guaranteed consistency with non-partial pulls") + } + manifest, tocOffset, err := readEstargzChunkedManifest(iss, blobSize, tocDigest) if err != nil { - return nil, fmt.Errorf("read zstd:chunked manifest: %w", err) + // If the error is a bad request to the server, then signal to the caller that it can try a different method. + var badRequestErr ErrBadRequest + return nil, errors.As(err, &badRequestErr), fmt.Errorf("read zstd:chunked manifest: %w", err) } layersCache, err := getLayersCache(store) if err != nil { - return nil, err + return nil, false, err } return &chunkedDiffer{ @@ -312,7 +365,7 @@ func makeEstargzChunkedDiffer(store storage.Store, blobSize int64, tocDigest dig pullOptions: pullOptions, stream: iss, tocOffset: tocOffset, - }, nil + }, false, nil } func makeCopyBuffer() []byte { @@ -1044,13 +1097,6 @@ type hardLinkToCreate struct { metadata *fileMetadata } -func parseBooleanPullOption(pullOptions map[string]string, name string, def bool) bool { - if value, ok := pullOptions[name]; ok { - return strings.ToLower(value) == "true" - } - return def -} - type findAndCopyFileOptions struct { useHardLinks bool ostreeRepos []string @@ -1117,10 +1163,13 @@ func (c *chunkedDiffer) findAndCopyFile(dirfd int, r *fileMetadata, copyOptions return false, nil } -func makeEntriesFlat(mergedEntries []fileMetadata) ([]fileMetadata, error) { +// makeEntriesFlat collects regular-file entries from mergedEntries, and produces a new list +// where each file content is only represented once, and uses composefs.RegularFilePathForValidatedDigest for its name. +// if flatPathNameMap is not nil, this function writes to it a mapping from filepath.Clean(originalName) to the composefs name. +func makeEntriesFlat(mergedEntries []fileMetadata, flatPathNameMap map[string]string) ([]fileMetadata, error) { var new []fileMetadata - hashes := make(map[string]string) + knownFlatPaths := make(map[string]struct{}) for i := range mergedEntries { if mergedEntries[i].Type != TypeReg { continue @@ -1130,16 +1179,22 @@ func makeEntriesFlat(mergedEntries []fileMetadata) ([]fileMetadata, error) { } digest, err := digest.Parse(mergedEntries[i].Digest) if err != nil { - return nil, err + return nil, fmt.Errorf("invalid digest %q for %q: %w", mergedEntries[i].Digest, mergedEntries[i].Name, err) + } + path, err := path.RegularFilePathForValidatedDigest(digest) + if err != nil { + return nil, fmt.Errorf("determining physical file path for %q: %w", mergedEntries[i].Name, err) + } + if flatPathNameMap != nil { + flatPathNameMap[filepath.Clean(mergedEntries[i].Name)] = path } - d := digest.Encoded() - if hashes[d] != "" { + if _, known := knownFlatPaths[path]; known { continue } - hashes[d] = d + knownFlatPaths[path] = struct{}{} - mergedEntries[i].Name = fmt.Sprintf("%s/%s", d[0:2], d[2:]) + mergedEntries[i].Name = path mergedEntries[i].skipSetAttrs = true new = append(new, mergedEntries[i]) @@ -1400,13 +1455,6 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff Size: c.uncompressedTarSize, } - // When the hard links deduplication is used, file attributes are ignored because setting them - // modifies the source file as well. - useHardLinks := parseBooleanPullOption(c.pullOptions, "use_hard_links", false) - - // List of OSTree repositories to use for deduplication - ostreeRepos := strings.Split(c.pullOptions["ostree_repos"], ":") - whiteoutConverter := archive.GetWhiteoutConverter(options.WhiteoutFormat, options.WhiteoutData) var missingParts []missingPart @@ -1439,16 +1487,20 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff if err != nil { return output, &fs.PathError{Op: "open", Path: dest, Err: err} } - defer unix.Close(dirfd) + dirFile := os.NewFile(uintptr(dirfd), dest) + defer dirFile.Close() + var flatPathNameMap map[string]string // = nil if differOpts != nil && differOpts.Format == graphdriver.DifferOutputFormatFlat { - mergedEntries, err = makeEntriesFlat(mergedEntries) + flatPathNameMap = map[string]string{} + mergedEntries, err = makeEntriesFlat(mergedEntries, flatPathNameMap) if err != nil { return output, err } createdDirs := make(map[string]struct{}) for _, e := range mergedEntries { - d := e.Name[0:2] + // This hard-codes an assumption that RegularFilePathForValidatedDigest creates paths with exactly one directory component. + d := filepath.Dir(e.Name) if _, found := createdDirs[d]; !found { if err := unix.Mkdirat(dirfd, d, 0o755); err != nil { return output, &fs.PathError{Op: "mkdirat", Path: d, Err: err} @@ -1465,8 +1517,10 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff missingPartsSize, totalChunksSize := int64(0), int64(0) copyOptions := findAndCopyFileOptions{ - useHardLinks: useHardLinks, - ostreeRepos: ostreeRepos, + // When the hard links deduplication is used, file attributes are ignored because setting them + // modifies the source file as well. + useHardLinks: c.pullOptions.useHardLinks, + ostreeRepos: c.pullOptions.ostreeRepos, // List of OSTree repositories to use for deduplication options: options, } @@ -1731,6 +1785,39 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff } } + // To ensure that consumers of the layer who decompress and read the full tar stream, + // and consumers who consume the data via the TOC, both see exactly the same data and metadata, + // compute the UncompressedDigest. + // c/image will then ensure that this value matches the value in the image config’s RootFS.DiffID, i.e. the image must commit + // to one UncompressedDigest value for each layer, and that will avoid the ambiguity (in consumers who validate layers against DiffID). + // + // c/image also uses the UncompressedDigest as a layer ID, allowing it to use the traditional layer and image IDs. + // + // This is, sadly, quite costly: Up to now we might have only have had to write, and digest, only the new/modified files. + // Here we need to read, and digest, the whole layer, even if almost all of it was already present locally previously. + // So, really specialized (EXTREMELY RARE) users can opt out of this check using insecureAllowUnpredictableImageContents . + // + // Layers without a tar-split (estargz layers and old zstd:chunked layers) can't produce an UncompressedDigest that + // matches the expected RootFS.DiffID; we always fall back to full pulls, again unless the user opts out + // via insecureAllowUnpredictableImageContents . + if output.UncompressedDigest == "" { + switch { + case c.pullOptions.insecureAllowUnpredictableImageContents: + // Oh well. Skip the costly digest computation. + case output.TarSplit != nil: + metadata := tsStorage.NewJSONUnpacker(bytes.NewReader(output.TarSplit)) + fg := newStagedFileGetter(dirFile, flatPathNameMap) + digester := digest.Canonical.Digester() + if err := asm.WriteOutputTarStream(fg, metadata, digester.Hash()); err != nil { + return output, fmt.Errorf("digesting staged uncompressed stream: %w", err) + } + output.UncompressedDigest = digester.Digest() + default: + // We are checking for this earlier in GetDiffer, so this should not be reachable. + return output, fmt.Errorf(`internal error: layer's UncompressedDigest is unknown and "insecure_allow_unpredictable_image_contents" is not set`) + } + } + if totalChunksSize > 0 { logrus.Debugf("Missing %d bytes out of %d (%.2f %%)", missingPartsSize, totalChunksSize, float32(missingPartsSize*100.0)/float32(totalChunksSize)) } @@ -1856,3 +1943,33 @@ func validateChunkChecksum(chunk *minimal.FileMetadata, root, path string, offse return digester.Digest() == digest } + +// newStagedFileGetter returns an object usable as storage.FileGetter for rootDir. +// if flatPathNameMap is not nil, it must be used to map logical file names into the backing file paths. +func newStagedFileGetter(rootDir *os.File, flatPathNameMap map[string]string) *stagedFileGetter { + return &stagedFileGetter{ + rootDir: rootDir, + flatPathNameMap: flatPathNameMap, + } +} + +type stagedFileGetter struct { + rootDir *os.File + flatPathNameMap map[string]string // nil, or a map from filepath.Clean()ed tar file names to expected on-filesystem names +} + +func (fg *stagedFileGetter) Get(filename string) (io.ReadCloser, error) { + if fg.flatPathNameMap != nil { + path, ok := fg.flatPathNameMap[filepath.Clean(filename)] + if !ok { + return nil, fmt.Errorf("no path mapping exists for tar entry %q", filename) + } + filename = path + } + pathFD, err := securejoin.OpenatInRoot(fg.rootDir, filename) + if err != nil { + return nil, err + } + defer pathFD.Close() + return securejoin.Reopen(pathFD, unix.O_RDONLY) +} diff --git a/storage.conf b/storage.conf index 962325233e..909e47b39b 100644 --- a/storage.conf +++ b/storage.conf @@ -80,6 +80,25 @@ additionalimagestores = [ # This is a "string bool": "false" | "true" (cannot be native TOML boolean) # convert_images = "false" +# This should ALMOST NEVER be set. +# It allows partial pulls ofof images without guaranteeing that "partial +# pulls" and non-partial pulls both result in consistent image contents. +# This allows pulling estargz images and early versions of zstd:chunked images; +# otherwise, these layers always use the traditional non-partial pull path. +# +# This option should be enabled EXTREMELY rarely, only if ALL images that could +# EVER be concievably pulled on this system are GUARANTEED (e.g. using a signature policy) +# to come from a build system trusted to never attack image integrity. +# +# If this consistency enforcement were disabled, malicious images could be built +# in a way designed to evade other audit mechanisms, so presence of most other audit +# mechanisms is not a replacement for the above-mentioned need for all images to come +# from a trusted build system. +# +# As a side effect, enabling this option will also make image IDs unpredictable +# (usually not equal to the traditional value matching the config digest). +# insecure_allow_unpredictable_image_contents = "false" + # Root-auto-userns-user is a user name which can be used to look up one or more UID/GID # ranges in the /etc/subuid and /etc/subgid file. These ranges will be partitioned # to containers configured to create automatically a user namespace. Containers