Skip to content

Commit

Permalink
docker, BlobInfoCache: try to reuse compressed blobs when pushing acr…
Browse files Browse the repository at this point in the history
…oss registries

It seems we try to reuse blobs only for the specified registry, however
we can have valid known compressed digests across registry as well
following pr attempts to use that by doing following steps.

* `CandidateLocations2` now processes all known blobs and appends them
  to returned candidates at the lowest priority. As a result when
`TryReusingBlob` tries to process these candidates and if the blobs
filtered by the `Opaque` set by the `transport` fail to match then
attempt is made against all known blobs (ones which do not belong to the
current registry).

* Increase the sample set of potential blob reuse to all known
  compressed digests , also involving the one which do not belong to
current registry.

* If a blob is found match it against the registry where we are
  attempting to push. If blob is already there consider it a `CACHE
HIT!` and reply skipping blob, since its already there.

How to verify this ?

* Remove all images `buildah rmi --all` // needed so all new blobs can
  be tagged again in common bucket
* Remove any previous `blob-info-cache` by

```console
rm /home/<user>/.local/share/containers/cache/blob-info-cache-v1.boltdb
```

```console
$ skopeo copy docker://registry.fedoraproject.org/fedora-minimal docker://quay.io/fl/test:some-tag
$ buildah pull registry.fedoraproject.org/fedora-minimal
$ buildah tag registry.fedoraproject.org/fedora-minimal quay.io/fl/test
$ buildah push quay.io/fl/test
```

```console
Getting image source signatures
Copying blob a3497ca15bbf skipped: already exists
Copying config f7e02de757 done
Writing manifest to image destination
Storing signatures
```

Signed-off-by: Aditya R <[email protected]>
  • Loading branch information
flouthoc committed Sep 12, 2022
1 parent 2673cd8 commit 6c2de11
Show file tree
Hide file tree
Showing 5 changed files with 154 additions and 82 deletions.
40 changes: 26 additions & 14 deletions docker/docker_image_dest.go
Original file line number Diff line number Diff line change
Expand Up @@ -332,21 +332,33 @@ func (d *dockerImageDestination) TryReusingBlobWithOptions(ctx context.Context,
// Then try reusing blobs from other locations.
candidates := options.Cache.CandidateLocations2(d.ref.Transport(), bicTransportScope(d.ref), info.Digest, options.CanSubstitute)
for _, candidate := range candidates {
candidateRepo, err := parseBICLocationReference(candidate.Location)
if err != nil {
logrus.Debugf("Error parsing BlobInfoCache location reference: %s", err)
continue
}
if candidate.CompressorName != blobinfocache.Uncompressed {
logrus.Debugf("Trying to reuse cached location %s compressed with %s in %s", candidate.Digest.String(), candidate.CompressorName, candidateRepo.Name())
var candidateRepo reference.Named
if !candidate.UnknownLocation {
candidateRepo, err = parseBICLocationReference(candidate.Location)
if err != nil {
logrus.Debugf("Error parsing BlobInfoCache location reference: %s", err)
continue
}
if candidate.CompressorName != blobinfocache.Uncompressed {
logrus.Debugf("Trying to reuse cached location %s compressed with %s in %s", candidate.Digest.String(), candidate.CompressorName, candidateRepo.Name())
} else {
logrus.Debugf("Trying to reuse cached location %s with no compression in %s", candidate.Digest.String(), candidateRepo.Name())
}
// Sanity checks:
if reference.Domain(candidateRepo) != reference.Domain(d.ref.ref) {
logrus.Debugf("... Internal error: domain %s does not match destination %s", reference.Domain(candidateRepo), reference.Domain(d.ref.ref))
continue
}
} else {
logrus.Debugf("Trying to reuse cached location %s with no compression in %s", candidate.Digest.String(), candidateRepo.Name())
}

// Sanity checks:
if reference.Domain(candidateRepo) != reference.Domain(d.ref.ref) {
logrus.Debugf("... Internal error: domain %s does not match destination %s", reference.Domain(candidateRepo), reference.Domain(d.ref.ref))
continue
if candidate.CompressorName != blobinfocache.Uncompressed {
logrus.Debugf("Trying to reuse cached location %s compressed with %s", candidate.Digest.String(), candidate.CompressorName)
} else {
logrus.Debugf("Trying to reuse cached location %s with no compression", candidate.Digest.String())
}
// This digest is a known variant of this blob but we don’t
// have a recorded location in this registry, let’s try looking
// for it in the current repo.
candidateRepo = reference.TrimNamed(d.ref.ref)
}
if candidateRepo.Name() == d.ref.ref.Name() && candidate.Digest == info.Digest {
logrus.Debug("... Already tried the primary destination")
Expand Down
7 changes: 4 additions & 3 deletions internal/blobinfocache/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ type BlobInfoCache2 interface {

// BICReplacementCandidate2 is an item returned by BlobInfoCache2.CandidateLocations2.
type BICReplacementCandidate2 struct {
Digest digest.Digest
CompressorName string // either the Name() of a known pkg/compression.Algorithm, or Uncompressed or UnknownCompression
Location types.BICLocationReference
Digest digest.Digest
CompressorName string // either the Name() of a known pkg/compression.Algorithm, or Uncompressed or UnknownCompression
UnknownLocation bool // is true when `Location` for this blob is not set
Location types.BICLocationReference // not set if UnknownLocation is set to `true`
}
80 changes: 46 additions & 34 deletions pkg/blobinfocache/boltdb/boltdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -282,13 +282,12 @@ func (bdc *cache) RecordKnownLocation(transport types.ImageTransport, scope type
}) // FIXME? Log error (but throttle the log volume on repeated accesses)?
}

// appendReplacementCandidates creates prioritize.CandidateWithTime values for digest in scopeBucket with corresponding compression info from compressionBucket (if compressionBucket is not nil), and returns the result of appending them to candidates.
func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateWithTime, scopeBucket, compressionBucket *bolt.Bucket, digest digest.Digest, requireCompressionInfo bool) []prioritize.CandidateWithTime {
// appendReplacementCandidates creates prioritize.CandidateWithTime values for digest in scopeBucket
// (or with an unknown location if scope is nil and v2Output is set) with corresponding compression
// info from compressionBucket (if compressionBucket is not nil), and returns the result of appending them
// to candidates.
func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateWithTime, scopeBucket, compressionBucket *bolt.Bucket, digest digest.Digest, v2Output bool) []prioritize.CandidateWithTime {
digestKey := []byte(digest.String())
b := scopeBucket.Bucket(digestKey)
if b == nil {
return candidates
}
compressorName := blobinfocache.UnknownCompression
if compressionBucket != nil {
// the bucket won't exist if the cache was created by a v1 implementation and
Expand All @@ -297,24 +296,42 @@ func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW
compressorName = string(compressorNameValue)
}
}
if compressorName == blobinfocache.UnknownCompression && requireCompressionInfo {
if compressorName == blobinfocache.UnknownCompression && v2Output {
return candidates
}
_ = b.ForEach(func(k, v []byte) error {
t := time.Time{}
if err := t.UnmarshalBinary(v); err != nil {
return err
var b *bolt.Bucket
if scopeBucket != nil {
b = scopeBucket.Bucket(digestKey)
}
if b != nil {
_ = b.ForEach(func(k, v []byte) error {
t := time.Time{}
if err := t.UnmarshalBinary(v); err != nil {
return err
}
candidates = append(candidates, prioritize.CandidateWithTime{
Candidate: blobinfocache.BICReplacementCandidate2{
Digest: digest,
CompressorName: compressorName,
Location: types.BICLocationReference{Opaque: string(k)},
},
LastSeen: t,
})
return nil
}) // FIXME? Log error (but throttle the log volume on repeated accesses)?
} else {
if v2Output {
candidates = append(candidates, prioritize.CandidateWithTime{
Candidate: blobinfocache.BICReplacementCandidate2{
Digest: digest,
CompressorName: compressorName,
UnknownLocation: true,
Location: types.BICLocationReference{Opaque: ""},
},
LastSeen: time.Time{},
})
}
candidates = append(candidates, prioritize.CandidateWithTime{
Candidate: blobinfocache.BICReplacementCandidate2{
Digest: digest,
CompressorName: compressorName,
Location: types.BICLocationReference{Opaque: string(k)},
},
LastSeen: t,
})
return nil
}) // FIXME? Log error (but throttle the log volume on repeated accesses)?
}
return candidates
}

Expand All @@ -328,27 +345,22 @@ func (bdc *cache) CandidateLocations2(transport types.ImageTransport, scope type
return bdc.candidateLocations(transport, scope, primaryDigest, canSubstitute, true)
}

func (bdc *cache) candidateLocations(transport types.ImageTransport, scope types.BICTransportScope, primaryDigest digest.Digest, canSubstitute, requireCompressionInfo bool) []blobinfocache.BICReplacementCandidate2 {
func (bdc *cache) candidateLocations(transport types.ImageTransport, scope types.BICTransportScope, primaryDigest digest.Digest, canSubstitute, v2Output bool) []blobinfocache.BICReplacementCandidate2 {
res := []prioritize.CandidateWithTime{}
var uncompressedDigestValue digest.Digest // = ""
if err := bdc.view(func(tx *bolt.Tx) error {
scopeBucket := tx.Bucket(knownLocationsBucket)
if scopeBucket == nil {
return nil
if scopeBucket != nil {
scopeBucket = scopeBucket.Bucket([]byte(transport.Name()))
}
scopeBucket = scopeBucket.Bucket([]byte(transport.Name()))
if scopeBucket == nil {
return nil
}
scopeBucket = scopeBucket.Bucket([]byte(scope.Opaque))
if scopeBucket == nil {
return nil
if scopeBucket != nil {
scopeBucket = scopeBucket.Bucket([]byte(scope.Opaque))
}
// compressionBucket won't have been created if previous writers never recorded info about compression,
// and we don't want to fail just because of that
compressionBucket := tx.Bucket(digestCompressorBucket)

res = bdc.appendReplacementCandidates(res, scopeBucket, compressionBucket, primaryDigest, requireCompressionInfo)
res = bdc.appendReplacementCandidates(res, scopeBucket, compressionBucket, primaryDigest, v2Output)
if canSubstitute {
if uncompressedDigestValue = bdc.uncompressedDigest(tx, primaryDigest); uncompressedDigestValue != "" {
b := tx.Bucket(digestByUncompressedBucket)
Expand All @@ -361,7 +373,7 @@ func (bdc *cache) candidateLocations(transport types.ImageTransport, scope types
return err
}
if d != primaryDigest && d != uncompressedDigestValue {
res = bdc.appendReplacementCandidates(res, scopeBucket, compressionBucket, d, requireCompressionInfo)
res = bdc.appendReplacementCandidates(res, scopeBucket, compressionBucket, d, v2Output)
}
return nil
}); err != nil {
Expand All @@ -370,7 +382,7 @@ func (bdc *cache) candidateLocations(transport types.ImageTransport, scope types
}
}
if uncompressedDigestValue != primaryDigest {
res = bdc.appendReplacementCandidates(res, scopeBucket, compressionBucket, uncompressedDigestValue, requireCompressionInfo)
res = bdc.appendReplacementCandidates(res, scopeBucket, compressionBucket, uncompressedDigestValue, v2Output)
}
}
}
Expand Down
49 changes: 39 additions & 10 deletions pkg/blobinfocache/internal/prioritize/prioritize.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,16 @@ import (
"github.com/opencontainers/go-digest"
)

// replacementAttempts is the number of blob replacement candidates returned by destructivelyPrioritizeReplacementCandidates,
// replacementAttempts is the number of blob replacement candidates with known location returned by destructivelyPrioritizeReplacementCandidates,
// and therefore ultimately by types.BlobInfoCache.CandidateLocations.
// This is a heuristic/guess, and could well use a different value.
const replacementAttempts = 5

// replacementUnknownLocationAttempts is the number of blob replacement candidates with unknown Location returned by destructivelyPrioritizeReplacementCandidates,
// and therefore ultimately by types.BlobInfoCache.CandidateLocations.
// This is a heuristic/guess, and could well use a different value.
const replacementUnknownLocationAttempts = 3

// CandidateWithTime is the input to types.BICReplacementCandidate prioritization.
type CandidateWithTime struct {
Candidate blobinfocache.BICReplacementCandidate2 // The replacement candidate
Expand Down Expand Up @@ -77,34 +82,58 @@ func (css *candidateSortState) Swap(i, j int) {
css.cs[i], css.cs[j] = css.cs[j], css.cs[i]
}

// destructivelyPrioritizeReplacementCandidatesWithMax is destructivelyPrioritizeReplacementCandidates with a parameter for the
// number of entries to limit, only to make testing simpler.
func destructivelyPrioritizeReplacementCandidatesWithMax(cs []CandidateWithTime, primaryDigest, uncompressedDigest digest.Digest, maxCandidates int) []blobinfocache.BICReplacementCandidate2 {
// destructivelyPrioritizeReplacementCandidatesWithMax is destructivelyPrioritizeReplacementCandidates with parameters for the
// number of entries to limit for known and unknown location separately, only to make testing simpler.
func destructivelyPrioritizeReplacementCandidatesWithMax(cs []CandidateWithTime, primaryDigest, uncompressedDigest digest.Digest, maxCandidates int, unknownLocationLimit int) []blobinfocache.BICReplacementCandidate2 {
// split unknown candidates and known candidates
// and limit them seperately.
var knownLocationCandidates []CandidateWithTime
var unknownLocationCandidates []CandidateWithTime

for _, candidate := range cs {
if candidate.Candidate.UnknownLocation {
unknownLocationCandidates = append(unknownLocationCandidates, candidate)
} else {
knownLocationCandidates = append(knownLocationCandidates, candidate)
}
}

// We don't need to use sort.Stable() because nanosecond timestamps are (presumably?) unique, so no two elements should
// compare equal.
sort.Sort(&candidateSortState{
cs: cs,
cs: knownLocationCandidates,
primaryDigest: primaryDigest,
uncompressedDigest: uncompressedDigest,
})

resLength := len(cs)
resLength := len(knownLocationCandidates)
if resLength > maxCandidates {
resLength = maxCandidates
}
res := make([]blobinfocache.BICReplacementCandidate2, resLength)
for i := range res {
res[i] = cs[i].Candidate
res[i] = knownLocationCandidates[i].Candidate
}
// If candidates with unknown location are found, lets add them to final list
if len(unknownLocationCandidates) > 0 {
resUnknownLength := len(unknownLocationCandidates)
if resUnknownLength > unknownLocationLimit {
resUnknownLength = unknownLocationLimit
}
for i := 0; i < resUnknownLength; i++ {
res = append(res, unknownLocationCandidates[i].Candidate)
}
}
return res
}

// DestructivelyPrioritizeReplacementCandidates consumes AND DESTROYS an array of possible replacement candidates with their last known existence times,
// the primary digest the user actually asked for, and the corresponding uncompressed digest (if known, possibly equal to the primary digest),
// and returns an appropriately prioritized and/or trimmed result suitable for a return value from types.BlobInfoCache.CandidateLocations.
// the primary digest the user actually asked for, the corresponding uncompressed digest (if known, possibly equal to the primary digest) and Location
// (where unknown location is always given lower priority as compared to candidates with known locations) and returns an appropriately prioritized and/or
// trimmed result suitable for a return value from types.BlobInfoCache.CandidateLocations.
//
// WARNING: The array of candidates is destructively modified. (The implementation of this function could of course
// make a copy, but all CandidateLocations implementations build the slice of candidates only for the single purpose of calling this function anyway.)
func DestructivelyPrioritizeReplacementCandidates(cs []CandidateWithTime, primaryDigest, uncompressedDigest digest.Digest) []blobinfocache.BICReplacementCandidate2 {
return destructivelyPrioritizeReplacementCandidatesWithMax(cs, primaryDigest, uncompressedDigest, replacementAttempts)
return destructivelyPrioritizeReplacementCandidatesWithMax(cs, primaryDigest, uncompressedDigest, replacementAttempts, replacementUnknownLocationAttempts)
}
60 changes: 39 additions & 21 deletions pkg/blobinfocache/memory/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,25 +120,43 @@ func (mem *cache) RecordDigestCompressorName(blobDigest digest.Digest, compresso
mem.compressors[blobDigest] = compressorName
}

// appendReplacementCandidates creates prioritize.CandidateWithTime values for (transport, scope, digest), and returns the result of appending them to candidates.
func (mem *cache) appendReplacementCandidates(candidates []prioritize.CandidateWithTime, transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, requireCompressionInfo bool) []prioritize.CandidateWithTime {
locations := mem.knownLocations[locationKey{transport: transport.Name(), scope: scope, blobDigest: digest}] // nil if not present
for l, t := range locations {
compressorName, compressorKnown := mem.compressors[digest]
if !compressorKnown {
if requireCompressionInfo {
continue
}
compressorName = blobinfocache.UnknownCompression
// appendReplacementCandidates creates prioritize.CandidateWithTime values for digest in scopeBucket
// (or with an unknown location if scope is nil and v2Output is set) with corresponding compression
// info from compressionBucket (if compressionBucket is not nil), and returns the result of appending them
// to candidates.
func (mem *cache) appendReplacementCandidates(candidates []prioritize.CandidateWithTime, transport types.ImageTransport, scope *types.BICTransportScope, digest digest.Digest, v2Output bool) []prioritize.CandidateWithTime {
compressorName := blobinfocache.UnknownCompression
compressorNameValue, compressorKnown := mem.compressors[digest]
if compressorKnown {
compressorName = compressorNameValue
}
if compressorName == blobinfocache.UnknownCompression && v2Output {
return candidates
}
if scope != nil {
locations := mem.knownLocations[locationKey{transport: transport.Name(), scope: *scope, blobDigest: digest}] // nil if not present
for l, t := range locations {
candidates = append(candidates, prioritize.CandidateWithTime{
Candidate: blobinfocache.BICReplacementCandidate2{
Digest: digest,
CompressorName: compressorName,
Location: l,
},
LastSeen: t,
})
}
} else {
if v2Output {
candidates = append(candidates, prioritize.CandidateWithTime{
Candidate: blobinfocache.BICReplacementCandidate2{
Digest: digest,
CompressorName: compressorName,
UnknownLocation: true,
Location: types.BICLocationReference{Opaque: ""},
},
LastSeen: time.Time{},
})
}
candidates = append(candidates, prioritize.CandidateWithTime{
Candidate: blobinfocache.BICReplacementCandidate2{
Digest: digest,
CompressorName: compressorName,
Location: l,
},
LastSeen: t,
})
}
return candidates
}
Expand Down Expand Up @@ -167,18 +185,18 @@ func (mem *cache) candidateLocations(transport types.ImageTransport, scope types
mem.mutex.Lock()
defer mem.mutex.Unlock()
res := []prioritize.CandidateWithTime{}
res = mem.appendReplacementCandidates(res, transport, scope, primaryDigest, requireCompressionInfo)
res = mem.appendReplacementCandidates(res, transport, &scope, primaryDigest, requireCompressionInfo)
var uncompressedDigest digest.Digest // = ""
if canSubstitute {
if uncompressedDigest = mem.uncompressedDigestLocked(primaryDigest); uncompressedDigest != "" {
otherDigests := mem.digestsByUncompressed[uncompressedDigest] // nil if not present in the map
for d := range otherDigests {
if d != primaryDigest && d != uncompressedDigest {
res = mem.appendReplacementCandidates(res, transport, scope, d, requireCompressionInfo)
res = mem.appendReplacementCandidates(res, transport, &scope, d, requireCompressionInfo)
}
}
if uncompressedDigest != primaryDigest {
res = mem.appendReplacementCandidates(res, transport, scope, uncompressedDigest, requireCompressionInfo)
res = mem.appendReplacementCandidates(res, transport, &scope, uncompressedDigest, requireCompressionInfo)
}
}
}
Expand Down

0 comments on commit 6c2de11

Please sign in to comment.