Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release-5.32 Zstd backports #2507

Closed
wants to merge 27 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
5af61e0
Bump to 5.32.1-dev
mtrmac Aug 7, 2024
dca0610
Compute the schema1 DiffID list based on m.LayerInfos()
mtrmac Jul 19, 2024
437b7fb
Improve the documentation of layer identification
mtrmac Apr 13, 2024
59f1890
Allow returning (and reporting) unexpected errors from computeID
mtrmac Jul 19, 2024
e405b01
Centralize collecting _consistent_ layer data into trustedLayerIdenti…
mtrmac Jul 19, 2024
3788220
Add TOC digest <-> uncompressed digest mapping to BIC
mtrmac Feb 29, 2024
ad6d40d
Set up storage destination to support TOC-based layers identified in …
mtrmac Feb 29, 2024
2c22da0
Split reusedBlobFromLayerLookup from tryReusingBlobAsPending
mtrmac Feb 29, 2024
cc25f17
Use BlobInfoCache to share more layers if (TOC->Uncompressed) mapping…
mtrmac Feb 29, 2024
0e79045
Record the (TOC digest, uncompressed digest) data when we compress la…
mtrmac Apr 9, 2024
874ad0e
Use the uncompressed digest we got from a BlobInfoCache for chunked l…
mtrmac Apr 30, 2024
072a576
HACK: Don't compress with zstd:chunked when encrypting
mtrmac Jul 23, 2024
7292f9a
Fix data returned when returning uncompressed data on a c/storage lay…
mtrmac Mar 15, 2024
6e33bdb
Turn CandidateCompression into CandidateTemplateWithCompression
mtrmac Mar 25, 2024
d816552
Make the fields of CandidateWithTime private
mtrmac Jul 30, 2024
b36f716
Reformat CandidateTemplateWithCompression a bit
mtrmac Mar 25, 2024
1ff3519
Introduce blobinfocache.DigestCompressorData
mtrmac Mar 13, 2024
577b535
Always record only the base variant information about consumed source…
mtrmac Mar 13, 2024
4df6647
Unrelated: Fix a bug in SQLite BlobInfoCache
mtrmac Aug 6, 2024
f4b5e90
Fix a comment
mtrmac Aug 6, 2024
08100b7
Add digest -> specific variant, annotation data to BIC
mtrmac Mar 13, 2024
c2d35c0
Record the specific variant, and TOC annotations, for blobs we compress
mtrmac Mar 13, 2024
cced793
Extend private.ReusedBlob to allow zstd:chunked reuses
mtrmac Mar 15, 2024
0800e84
Allow dockerImageDestination to reuse zstd:chunked blobs
mtrmac Mar 15, 2024
2fff234
Detect zstd:chunked format in source blobs
mtrmac Mar 15, 2024
a8aa8c4
Release 5.32.1
mtrmac Aug 7, 2024
dda9562
Bump to 5.32.2-dev
mtrmac Aug 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 98 additions & 63 deletions copy/compression.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/containers/image/v5/pkg/compression"
compressiontypes "github.com/containers/image/v5/pkg/compression/types"
"github.com/containers/image/v5/types"
chunkedToc "github.com/containers/storage/pkg/chunked/toc"
imgspecv1 "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/sirupsen/logrus"
)
Expand All @@ -34,10 +35,10 @@ var (

// bpDetectCompressionStepData contains data that the copy pipeline needs about the “detect compression” step.
type bpDetectCompressionStepData struct {
isCompressed bool
format compressiontypes.Algorithm // Valid if isCompressed
decompressor compressiontypes.DecompressorFunc // Valid if isCompressed
srcCompressorName string // Compressor name to possibly record in the blob info cache for the source blob.
isCompressed bool
format compressiontypes.Algorithm // Valid if isCompressed
decompressor compressiontypes.DecompressorFunc // Valid if isCompressed
srcCompressorBaseVariantName string // Compressor name to possibly record in the blob info cache for the source blob.
}

// blobPipelineDetectCompressionStep updates *stream to detect its current compression format.
Expand All @@ -51,15 +52,25 @@ func blobPipelineDetectCompressionStep(stream *sourceStream, srcInfo types.BlobI
}
stream.reader = reader

if decompressor != nil && format.Name() == compressiontypes.ZstdAlgorithmName {
tocDigest, err := chunkedToc.GetTOCDigest(srcInfo.Annotations)
if err != nil {
return bpDetectCompressionStepData{}, err
}
if tocDigest != nil {
format = compression.ZstdChunked
}

}
res := bpDetectCompressionStepData{
isCompressed: decompressor != nil,
format: format,
decompressor: decompressor,
}
if res.isCompressed {
res.srcCompressorName = format.Name()
res.srcCompressorBaseVariantName = format.BaseVariantName()
} else {
res.srcCompressorName = internalblobinfocache.Uncompressed
res.srcCompressorBaseVariantName = internalblobinfocache.Uncompressed
}

if expectedBaseFormat, known := expectedBaseCompressionFormats[stream.info.MediaType]; known && res.isCompressed && format.BaseVariantName() != expectedBaseFormat.Name() {
Expand All @@ -70,13 +81,14 @@ func blobPipelineDetectCompressionStep(stream *sourceStream, srcInfo types.BlobI

// bpCompressionStepData contains data that the copy pipeline needs about the compression step.
type bpCompressionStepData struct {
operation bpcOperation // What we are actually doing
uploadedOperation types.LayerCompression // Operation to use for updating the blob metadata (matching the end state, not necessarily what we do)
uploadedAlgorithm *compressiontypes.Algorithm // An algorithm parameter for the compressionOperation edits.
uploadedAnnotations map[string]string // Compression-related annotations that should be set on the uploaded blob. WARNING: This is only set after the srcStream.reader is fully consumed.
srcCompressorName string // Compressor name to record in the blob info cache for the source blob.
uploadedCompressorName string // Compressor name to record in the blob info cache for the uploaded blob.
closers []io.Closer // Objects to close after the upload is done, if any.
operation bpcOperation // What we are actually doing
uploadedOperation types.LayerCompression // Operation to use for updating the blob metadata (matching the end state, not necessarily what we do)
uploadedAlgorithm *compressiontypes.Algorithm // An algorithm parameter for the compressionOperation edits.
uploadedAnnotations map[string]string // Compression-related annotations that should be set on the uploaded blob. WARNING: This is only set after the srcStream.reader is fully consumed.
srcCompressorBaseVariantName string // Compressor base variant name to record in the blob info cache for the source blob.
uploadedCompressorBaseVariantName string // Compressor base variant name to record in the blob info cache for the uploaded blob.
uploadedCompressorSpecificVariantName string // Compressor specific variant name to record in the blob info cache for the uploaded blob.
closers []io.Closer // Objects to close after the upload is done, if any.
}

type bpcOperation int
Expand Down Expand Up @@ -128,11 +140,12 @@ func (ic *imageCopier) bpcPreserveEncrypted(stream *sourceStream, _ bpDetectComp
// We can’t do anything with an encrypted blob unless decrypted.
logrus.Debugf("Using original blob without modification for encrypted blob")
return &bpCompressionStepData{
operation: bpcOpPreserveOpaque,
uploadedOperation: types.PreserveOriginal,
uploadedAlgorithm: nil,
srcCompressorName: internalblobinfocache.UnknownCompression,
uploadedCompressorName: internalblobinfocache.UnknownCompression,
operation: bpcOpPreserveOpaque,
uploadedOperation: types.PreserveOriginal,
uploadedAlgorithm: nil,
srcCompressorBaseVariantName: internalblobinfocache.UnknownCompression,
uploadedCompressorBaseVariantName: internalblobinfocache.UnknownCompression,
uploadedCompressorSpecificVariantName: internalblobinfocache.UnknownCompression,
}, nil
}
return nil, nil
Expand All @@ -156,14 +169,19 @@ func (ic *imageCopier) bpcCompressUncompressed(stream *sourceStream, detected bp
Digest: "",
Size: -1,
}
specificVariantName := uploadedAlgorithm.Name()
if specificVariantName == uploadedAlgorithm.BaseVariantName() {
specificVariantName = internalblobinfocache.UnknownCompression
}
return &bpCompressionStepData{
operation: bpcOpCompressUncompressed,
uploadedOperation: types.Compress,
uploadedAlgorithm: uploadedAlgorithm,
uploadedAnnotations: annotations,
srcCompressorName: detected.srcCompressorName,
uploadedCompressorName: uploadedAlgorithm.Name(),
closers: []io.Closer{reader},
operation: bpcOpCompressUncompressed,
uploadedOperation: types.Compress,
uploadedAlgorithm: uploadedAlgorithm,
uploadedAnnotations: annotations,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorBaseVariantName: uploadedAlgorithm.BaseVariantName(),
uploadedCompressorSpecificVariantName: specificVariantName,
closers: []io.Closer{reader},
}, nil
}
return nil, nil
Expand Down Expand Up @@ -196,15 +214,20 @@ func (ic *imageCopier) bpcRecompressCompressed(stream *sourceStream, detected bp
Digest: "",
Size: -1,
}
specificVariantName := ic.compressionFormat.Name()
if specificVariantName == ic.compressionFormat.BaseVariantName() {
specificVariantName = internalblobinfocache.UnknownCompression
}
succeeded = true
return &bpCompressionStepData{
operation: bpcOpRecompressCompressed,
uploadedOperation: types.PreserveOriginal,
uploadedAlgorithm: ic.compressionFormat,
uploadedAnnotations: annotations,
srcCompressorName: detected.srcCompressorName,
uploadedCompressorName: ic.compressionFormat.Name(),
closers: []io.Closer{decompressed, recompressed},
operation: bpcOpRecompressCompressed,
uploadedOperation: types.PreserveOriginal,
uploadedAlgorithm: ic.compressionFormat,
uploadedAnnotations: annotations,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorBaseVariantName: ic.compressionFormat.BaseVariantName(),
uploadedCompressorSpecificVariantName: specificVariantName,
closers: []io.Closer{decompressed, recompressed},
}, nil
}
return nil, nil
Expand All @@ -225,12 +248,13 @@ func (ic *imageCopier) bpcDecompressCompressed(stream *sourceStream, detected bp
Size: -1,
}
return &bpCompressionStepData{
operation: bpcOpDecompressCompressed,
uploadedOperation: types.Decompress,
uploadedAlgorithm: nil,
srcCompressorName: detected.srcCompressorName,
uploadedCompressorName: internalblobinfocache.Uncompressed,
closers: []io.Closer{s},
operation: bpcOpDecompressCompressed,
uploadedOperation: types.Decompress,
uploadedAlgorithm: nil,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorBaseVariantName: internalblobinfocache.Uncompressed,
uploadedCompressorSpecificVariantName: internalblobinfocache.UnknownCompression,
closers: []io.Closer{s},
}, nil
}
return nil, nil
Expand Down Expand Up @@ -268,11 +292,15 @@ func (ic *imageCopier) bpcPreserveOriginal(_ *sourceStream, detected bpDetectCom
algorithm = nil
}
return &bpCompressionStepData{
operation: bpcOp,
uploadedOperation: uploadedOp,
uploadedAlgorithm: algorithm,
srcCompressorName: detected.srcCompressorName,
uploadedCompressorName: detected.srcCompressorName,
operation: bpcOp,
uploadedOperation: uploadedOp,
uploadedAlgorithm: algorithm,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
// We only record the base variant of the format on upload; we didn’t do anything with
// the TOC, we don’t know whether it matches the blob digest, so we don’t want to trigger
// reuse of any kind between the blob digest and the TOC digest.
uploadedCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorSpecificVariantName: internalblobinfocache.UnknownCompression,
}
}

Expand Down Expand Up @@ -308,6 +336,15 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf
// No useful information
case bpcOpCompressUncompressed:
c.blobInfoCache.RecordDigestUncompressedPair(uploadedInfo.Digest, srcInfo.Digest)
if d.uploadedAnnotations != nil {
tocDigest, err := chunkedToc.GetTOCDigest(d.uploadedAnnotations)
if err != nil {
return fmt.Errorf("parsing just-created compression annotations: %w", err)
}
if tocDigest != nil {
c.blobInfoCache.RecordTOCUncompressedPair(*tocDigest, srcInfo.Digest)
}
}
case bpcOpDecompressCompressed:
c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, uploadedInfo.Digest)
case bpcOpRecompressCompressed, bpcOpPreserveCompressed:
Expand All @@ -323,29 +360,27 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf
return fmt.Errorf("Internal error: Unexpected d.operation value %#v", d.operation)
}
}
if d.srcCompressorName == "" || d.uploadedCompressorName == "" {
return fmt.Errorf("internal error: missing compressor names (src: %q, uploaded: %q)",
d.srcCompressorName, d.uploadedCompressorName)
if d.srcCompressorBaseVariantName == "" || d.uploadedCompressorBaseVariantName == "" || d.uploadedCompressorSpecificVariantName == "" {
return fmt.Errorf("internal error: missing compressor names (src base: %q, uploaded base: %q, uploaded specific: %q)",
d.srcCompressorBaseVariantName, d.uploadedCompressorBaseVariantName, d.uploadedCompressorSpecificVariantName)
}
if d.uploadedCompressorName != internalblobinfocache.UnknownCompression {
if d.uploadedCompressorName != compressiontypes.ZstdChunkedAlgorithmName {
// HACK: Don’t record zstd:chunked algorithms.
// There is already a similar hack in internal/imagedestination/impl/helpers.CandidateMatchesTryReusingBlobOptions,
// and that one prevents reusing zstd:chunked blobs, so recording the algorithm here would be mostly harmless.
//
// We skip that here anyway to work around the inability of blobPipelineDetectCompressionStep to differentiate
// between zstd and zstd:chunked; so we could, in varying situations over time, call RecordDigestCompressorName
// with the same digest and both ZstdAlgorithmName and ZstdChunkedAlgorithmName , which causes warnings about
// inconsistent data to be logged.
c.blobInfoCache.RecordDigestCompressorName(uploadedInfo.Digest, d.uploadedCompressorName)
}
if d.uploadedCompressorBaseVariantName != internalblobinfocache.UnknownCompression {
c.blobInfoCache.RecordDigestCompressorData(uploadedInfo.Digest, internalblobinfocache.DigestCompressorData{
BaseVariantCompressor: d.uploadedCompressorBaseVariantName,
SpecificVariantCompressor: d.uploadedCompressorSpecificVariantName,
SpecificVariantAnnotations: d.uploadedAnnotations,
})
}
if srcInfo.Digest != "" && srcInfo.Digest != uploadedInfo.Digest &&
d.srcCompressorName != internalblobinfocache.UnknownCompression {
if d.srcCompressorName != compressiontypes.ZstdChunkedAlgorithmName {
// HACK: Don’t record zstd:chunked algorithms, see above.
c.blobInfoCache.RecordDigestCompressorName(srcInfo.Digest, d.srcCompressorName)
}
d.srcCompressorBaseVariantName != internalblobinfocache.UnknownCompression {
// If the source is already using some TOC-dependent variant, we either copied the
// blob as is, or perhaps decompressed it; either way we don’t trust the TOC digest,
// so record neither the variant name, nor the TOC digest.
c.blobInfoCache.RecordDigestCompressorData(srcInfo.Digest, internalblobinfocache.DigestCompressorData{
BaseVariantCompressor: d.srcCompressorBaseVariantName,
SpecificVariantCompressor: internalblobinfocache.UnknownCompression,
SpecificVariantAnnotations: nil,
})
}
return nil
}
Expand Down
51 changes: 43 additions & 8 deletions copy/single.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"errors"
"fmt"
"io"
"maps"
"reflect"
"slices"
"strings"
Expand Down Expand Up @@ -149,6 +150,28 @@ func (c *copier) copySingleImage(ctx context.Context, unparsedImage *image.Unpar
ic.compressionFormat = c.options.DestinationCtx.CompressionFormat
ic.compressionLevel = c.options.DestinationCtx.CompressionLevel
}
// HACK: Don’t combine zstd:chunked and encryption.
// zstd:chunked can only usefully be consumed using range requests of parts of the layer, which would require the encryption
// to support decrypting arbitrary subsets of the stream. That’s plausible but not supported using the encryption API we have.
// Also, the chunked metadata is exposed in annotations unencrypted, which reveals the TOC digest = layer identity without
// encryption. (That can be determined from the unencrypted config anyway, but, still...)
//
// Ideally this should query a well-defined property of the compression algorithm (and $somehow determine the right fallback) instead of
// hard-coding zstd:chunked / zstd.
if ic.c.options.OciEncryptLayers != nil {
format := ic.compressionFormat
if format == nil {
format = defaultCompressionFormat
}
if format.Name() == compressiontypes.ZstdChunkedAlgorithmName {
if ic.requireCompressionFormatMatch {
return copySingleImageResult{}, errors.New("explicitly requested to combine zstd:chunked with encryption, which is not beneficial; use plain zstd instead")
}
logrus.Warnf("Compression using zstd:chunked is not beneficial for encrypted layers, using plain zstd instead")
ic.compressionFormat = &compression.Zstd
}
}

// Decide whether we can substitute blobs with semantic equivalents:
// - Don’t do that if we can’t modify the manifest at all
// - Ensure _this_ copy sees exactly the intended data when either processing a signed image or signing it.
Expand Down Expand Up @@ -866,21 +889,33 @@ func updatedBlobInfoFromReuse(inputInfo types.BlobInfo, reusedBlob private.Reuse
// Handling of compression, encryption, and the related MIME types and the like are all the responsibility
// of the generic code in this package.
res := types.BlobInfo{
Digest: reusedBlob.Digest,
Size: reusedBlob.Size,
URLs: nil, // This _must_ be cleared if Digest changes; clear it in other cases as well, to preserve previous behavior.
Annotations: inputInfo.Annotations, // FIXME: This should remove zstd:chunked annotations (but those annotations being left with incorrect values should not break pulls)
MediaType: inputInfo.MediaType, // Mostly irrelevant, MediaType is updated based on Compression*/CryptoOperation.
Digest: reusedBlob.Digest,
Size: reusedBlob.Size,
URLs: nil, // This _must_ be cleared if Digest changes; clear it in other cases as well, to preserve previous behavior.
// FIXME: This should remove zstd:chunked annotations IF the original was chunked and the new one isn’t
// (but those annotations being left with incorrect values should not break pulls).
Annotations: maps.Clone(inputInfo.Annotations),
MediaType: inputInfo.MediaType, // Mostly irrelevant, MediaType is updated based on Compression*/CryptoOperation.
CompressionOperation: reusedBlob.CompressionOperation,
CompressionAlgorithm: reusedBlob.CompressionAlgorithm,
CryptoOperation: inputInfo.CryptoOperation, // Expected to be unset anyway.
}
// The transport is only expected to fill CompressionOperation and CompressionAlgorithm
// if the blob was substituted; otherwise, fill it in based
// if the blob was substituted; otherwise, it is optional, and if not set, fill it in based
// on what we know from the srcInfos we were given.
if reusedBlob.Digest == inputInfo.Digest {
res.CompressionOperation = inputInfo.CompressionOperation
res.CompressionAlgorithm = inputInfo.CompressionAlgorithm
if res.CompressionOperation == types.PreserveOriginal {
res.CompressionOperation = inputInfo.CompressionOperation
}
if res.CompressionAlgorithm == nil {
res.CompressionAlgorithm = inputInfo.CompressionAlgorithm
}
}
if len(reusedBlob.CompressionAnnotations) != 0 {
if res.Annotations == nil {
res.Annotations = map[string]string{}
}
maps.Copy(res.Annotations, reusedBlob.CompressionAnnotations)
}
return res
}
Expand Down
Loading