Skip to content

Commit

Permalink
Recover corrupted cache (#1381)
Browse files Browse the repository at this point in the history
* add failing test to restorer

Signed-off-by: Joey Brown <[email protected]>

* restorer and exporter working as expected

Signed-off-by: Joey Brown <[email protected]>

* lint

Signed-off-by: Joey Brown <[email protected]>

* Update phase/restorer.go

Co-authored-by: Natalie Arellano <[email protected]>
Signed-off-by: Joey Brown <[email protected]>

* Update phase/cache.go

Co-authored-by: Natalie Arellano <[email protected]>
Signed-off-by: Joey Brown <[email protected]>

* Update cache/image_cache.go

Co-authored-by: Natalie Arellano <[email protected]>
Signed-off-by: Joey Brown <[email protected]>

* Update cache/volume_cache.go

Co-authored-by: Natalie Arellano <[email protected]>
Signed-off-by: Joey Brown <[email protected]>

* Update cache/volume_cache.go

Co-authored-by: Natalie Arellano <[email protected]>
Signed-off-by: Joey Brown <[email protected]>

* Update cache/volume_cache.go

Co-authored-by: Natalie Arellano <[email protected]>
Signed-off-by: Joey Brown <[email protected]>

* Update cache/volume_cache.go

Co-authored-by: Natalie Arellano <[email protected]>
Signed-off-by: Joey Brown <[email protected]>

* Update cache/volume_cache.go

Co-authored-by: Natalie Arellano <[email protected]>
Signed-off-by: Joey Brown <[email protected]>

* update based on feedback

Signed-off-by: Joey Brown <[email protected]>

* fix log

* temp fix

* this does not work as is. I think we need to modify img utils.

Image utils should fail with a Layer Not found in both ReuseLayer & GetLayer.

For GetLayer, when there is a missing blob, it's return an unexpected EOF error.

For ReuseLayer, when there is a missing blob, it's not returning an error but it should.

* add eof check

* add not exist check

* reuse layer test

* fix test regression

---------

Signed-off-by: Joey Brown <[email protected]>
Co-authored-by: Joey Brown <[email protected]>
  • Loading branch information
natalieparellano and joeybrown-sf authored Jul 17, 2024
1 parent a87e12e commit df6be88
Show file tree
Hide file tree
Showing 18 changed files with 269 additions and 97 deletions.
4 changes: 3 additions & 1 deletion cache/caching_image_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import (
"github.com/sclevine/spec"
"github.com/sclevine/spec/report"

"github.com/buildpacks/lifecycle/cmd"

"github.com/buildpacks/lifecycle/cache"
h "github.com/buildpacks/lifecycle/testhelpers"
)
Expand All @@ -37,7 +39,7 @@ func testCachingImage(t *testing.T, when spec.G, it spec.S) {
fakeImage = fakes.NewImage("some-image", "", nil)
tmpDir, err = os.MkdirTemp("", "")
h.AssertNil(t, err)
volumeCache, err = cache.NewVolumeCache(tmpDir)
volumeCache, err = cache.NewVolumeCache(tmpDir, cmd.DefaultLogger)
h.AssertNil(t, err)
subject = cache.NewCachingImage(fakeImage, volumeCache)
layerPath, layerSHA, layerData = h.RandomLayer(t, tmpDir)
Expand Down
22 changes: 22 additions & 0 deletions cache/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,25 @@ import (
)

var errCacheCommitted = errors.New("cache cannot be modified after commit")

// ReadErr is an error type for filesystem read errors.
type ReadErr struct {
msg string
}

// NewReadErr creates a new ReadErr.
func NewReadErr(msg string) ReadErr {
return ReadErr{msg: msg}
}

// Error returns the error message.
func (e ReadErr) Error() string {
return e.msg
}

// IsReadErr checks if an error is a ReadErr.
func IsReadErr(err error) (bool, *ReadErr) {
var e ReadErr
isReadErr := errors.As(err, &e)
return isReadErr, &e
}
33 changes: 31 additions & 2 deletions cache/image_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,44 @@ func (c *ImageCache) AddLayerFile(tarPath string, diffID string) error {
return c.newImage.AddLayerWithDiffID(tarPath, diffID)
}

// isLayerNotFound checks if the error is a layer not found error
//
// FIXME: we should not have to rely on trapping ErrUnexpectedEOF.
// If a blob is not present in the registry, we should get imgutil.ErrLayerNotFound,
// but we do not and instead get io.ErrUnexpectedEOF
func isLayerNotFound(err error) bool {
var e imgutil.ErrLayerNotFound
return errors.As(err, &e) || errors.Is(err, io.ErrUnexpectedEOF)
}

func (c *ImageCache) ReuseLayer(diffID string) error {
if c.committed {
return errCacheCommitted
}
return c.newImage.ReuseLayer(diffID)
err := c.newImage.ReuseLayer(diffID)
if err != nil {
// FIXME: this path is not currently executed.
// If a blob is not present in the registry, we should get imgutil.ErrLayerNotFound.
// We should then skip attempting to reuse the layer.
// However, we do not get imgutil.ErrLayerNotFound when the blob is not present.
if isLayerNotFound(err) {
return NewReadErr(fmt.Sprintf("failed to find cache layer with SHA '%s'", diffID))
}
return fmt.Errorf("failed to reuse cache layer with SHA '%s'", diffID)
}
return nil
}

// RetrieveLayer retrieves a layer from the cache
func (c *ImageCache) RetrieveLayer(diffID string) (io.ReadCloser, error) {
return c.origImage.GetLayer(diffID)
closer, err := c.origImage.GetLayer(diffID)
if err != nil {
if isLayerNotFound(err) {
return nil, NewReadErr(fmt.Sprintf("failed to find cache layer with SHA '%s'", diffID))
}
return nil, fmt.Errorf("failed to get cache layer with SHA '%s'", diffID)
}
return closer, nil
}

func (c *ImageCache) Commit() error {
Expand Down
4 changes: 2 additions & 2 deletions cache/image_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ func testImageCache(t *testing.T, when spec.G, it spec.S) {
when("layer does not exist", func() {
it("returns an error", func() {
_, err := subject.RetrieveLayer("some_nonexistent_sha")
h.AssertError(t, err, "failed to get layer with sha 'some_nonexistent_sha'")
h.AssertError(t, err, "failed to get cache layer with SHA 'some_nonexistent_sha'")
})
})
})
Expand Down Expand Up @@ -236,7 +236,7 @@ func testImageCache(t *testing.T, when spec.G, it spec.S) {
h.AssertNil(t, subject.AddLayerFile(testLayerTarPath, testLayerSHA))

_, err := subject.RetrieveLayer(testLayerSHA)
h.AssertError(t, err, fmt.Sprintf("failed to get layer with sha '%s'", testLayerSHA))
h.AssertError(t, err, fmt.Sprintf("failed to get cache layer with SHA '%s'", testLayerSHA))
})
})
})
Expand Down
7 changes: 4 additions & 3 deletions cache/image_deleter.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
// ImageDeleter defines the methods available to delete and compare cached images
type ImageDeleter interface {
DeleteOrigImageIfDifferentFromNewImage(origImage, newImage imgutil.Image)
DeleteImage(image imgutil.Image)
}

// ImageDeleterImpl is a component to manage cache image deletion
Expand All @@ -35,13 +36,13 @@ func (c *ImageDeleterImpl) DeleteOrigImageIfDifferentFromNewImage(origImage, new
}

if !same {
c.deleteImage(origImage)
c.DeleteImage(origImage)
}
}
}

// deleteImage deletes an image
func (c *ImageDeleterImpl) deleteImage(image imgutil.Image) {
// DeleteImage deletes an image
func (c *ImageDeleterImpl) DeleteImage(image imgutil.Image) {
if c.deletionEnabled {
if err := image.Delete(); err != nil {
c.logger.Warnf("Unable to delete cache image: %v", err.Error())
Expand Down
33 changes: 29 additions & 4 deletions cache/volume_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package cache

import (
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
Expand All @@ -10,6 +11,8 @@ import (

"github.com/pkg/errors"

"github.com/buildpacks/lifecycle/log"

"github.com/buildpacks/lifecycle/internal/fsutil"
"github.com/buildpacks/lifecycle/platform"
)
Expand All @@ -20,9 +23,11 @@ type VolumeCache struct {
backupDir string
stagingDir string
committedDir string
logger log.Logger
}

func NewVolumeCache(dir string) (*VolumeCache, error) {
// NewVolumeCache creates a new VolumeCache
func NewVolumeCache(dir string, logger log.Logger) (*VolumeCache, error) {
if _, err := os.Stat(dir); err != nil {
return nil, err
}
Expand All @@ -32,6 +37,7 @@ func NewVolumeCache(dir string) (*VolumeCache, error) {
backupDir: filepath.Join(dir, "committed-backup"),
stagingDir: filepath.Join(dir, "staging"),
committedDir: filepath.Join(dir, "committed"),
logger: logger,
}

if err := c.setupStagingDir(); err != nil {
Expand Down Expand Up @@ -133,7 +139,20 @@ func (c *VolumeCache) ReuseLayer(diffID string) error {
if c.committed {
return errCacheCommitted
}
if err := os.Link(diffIDPath(c.committedDir, diffID), diffIDPath(c.stagingDir, diffID)); err != nil && !os.IsExist(err) {
committedPath := diffIDPath(c.committedDir, diffID)
stagingPath := diffIDPath(c.stagingDir, diffID)

if _, err := os.Stat(committedPath); err != nil {
if os.IsNotExist(err) {
return NewReadErr(fmt.Sprintf("failed to find cache layer with SHA '%s'", diffID))
}
if os.IsPermission(err) {
return NewReadErr(fmt.Sprintf("failed to read cache layer with SHA '%s' due to insufficient permissions", diffID))
}
return fmt.Errorf("failed to re-use cache layer with SHA '%s': %w", diffID, err)
}

if err := os.Link(committedPath, stagingPath); err != nil && !os.IsExist(err) {
return errors.Wrapf(err, "reusing layer (%s)", diffID)
}
return nil
Expand All @@ -146,7 +165,13 @@ func (c *VolumeCache) RetrieveLayer(diffID string) (io.ReadCloser, error) {
}
file, err := os.Open(path)
if err != nil {
return nil, errors.Wrapf(err, "opening layer with SHA '%s'", diffID)
if os.IsPermission(err) {
return nil, NewReadErr(fmt.Sprintf("failed to read cache layer with SHA '%s' due to insufficient permissions", diffID))
}
if os.IsNotExist(err) {
return nil, NewReadErr(fmt.Sprintf("failed to find cache layer with SHA '%s'", diffID))
}
return nil, fmt.Errorf("failed to get cache layer with SHA '%s'", diffID)
}
return file, nil
}
Expand All @@ -165,7 +190,7 @@ func (c *VolumeCache) RetrieveLayerFile(diffID string) (string, error) {
path := diffIDPath(c.committedDir, diffID)
if _, err := os.Stat(path); err != nil {
if os.IsNotExist(err) {
return "", errors.Wrapf(err, "layer with SHA '%s' not found", diffID)
return "", NewReadErr(fmt.Sprintf("failed to find cache layer with SHA '%s'", diffID))
}
return "", errors.Wrapf(err, "retrieving layer with SHA '%s'", diffID)
}
Expand Down
40 changes: 30 additions & 10 deletions cache/volume_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ import (
"github.com/sclevine/spec"
"github.com/sclevine/spec/report"

"github.com/buildpacks/lifecycle/cmd"
"github.com/buildpacks/lifecycle/log"

"github.com/buildpacks/lifecycle/buildpack"
"github.com/buildpacks/lifecycle/cache"
"github.com/buildpacks/lifecycle/platform"
Expand All @@ -28,6 +31,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
backupDir string
stagingDir string
committedDir string
testLogger log.Logger
)

it.Before(func() {
Expand All @@ -42,6 +46,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
backupDir = filepath.Join(volumeDir, "committed-backup")
stagingDir = filepath.Join(volumeDir, "staging")
committedDir = filepath.Join(volumeDir, "committed")
testLogger = cmd.DefaultLogger
})

it.After(func() {
Expand All @@ -50,7 +55,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {

when("#NewVolumeCache", func() {
it("returns an error when the volume path does not exist", func() {
_, err := cache.NewVolumeCache(filepath.Join(tmpDir, "does_not_exist"))
_, err := cache.NewVolumeCache(filepath.Join(tmpDir, "does_not_exist"), testLogger)
if err == nil {
t.Fatal("expected NewVolumeCache to fail because volume path does not exist")
}
Expand All @@ -66,7 +71,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
it("clears staging", func() {
var err error

subject, err = cache.NewVolumeCache(volumeDir)
subject, err = cache.NewVolumeCache(volumeDir, testLogger)
h.AssertNil(t, err)

_, err = os.Stat(filepath.Join(stagingDir, "some-layer.tar"))
Expand All @@ -80,7 +85,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
it("creates staging dir", func() {
var err error

subject, err = cache.NewVolumeCache(volumeDir)
subject, err = cache.NewVolumeCache(volumeDir, testLogger)
h.AssertNil(t, err)

_, err = os.Stat(stagingDir)
Expand All @@ -92,7 +97,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
it("creates committed dir", func() {
var err error

subject, err = cache.NewVolumeCache(volumeDir)
subject, err = cache.NewVolumeCache(volumeDir, testLogger)
h.AssertNil(t, err)

_, err = os.Stat(committedDir)
Expand All @@ -109,7 +114,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
it("clears the backup dir", func() {
var err error

subject, err = cache.NewVolumeCache(volumeDir)
subject, err = cache.NewVolumeCache(volumeDir, testLogger)
h.AssertNil(t, err)

_, err = os.Stat(filepath.Join(backupDir, "some-layer.tar"))
Expand All @@ -124,7 +129,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
it.Before(func() {
var err error

subject, err = cache.NewVolumeCache(volumeDir)
subject, err = cache.NewVolumeCache(volumeDir, testLogger)
h.AssertNil(t, err)
})

Expand Down Expand Up @@ -206,7 +211,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
when("layer does not exist", func() {
it("returns an error", func() {
_, err := subject.RetrieveLayer("some_nonexistent_sha")
h.AssertError(t, err, "layer with SHA 'some_nonexistent_sha' not found")
h.AssertError(t, err, "failed to find cache layer with SHA 'some_nonexistent_sha'")
})
})
})
Expand All @@ -230,7 +235,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
when("layer does not exist", func() {
it("returns an error", func() {
_, err := subject.RetrieveLayerFile("some_nonexistent_sha")
h.AssertError(t, err, "layer with SHA 'some_nonexistent_sha' not found")
h.AssertError(t, err, "failed to find cache layer with SHA 'some_nonexistent_sha'")
})
})
})
Expand Down Expand Up @@ -340,7 +345,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
h.AssertNil(t, subject.AddLayerFile(tarPath, "some_sha"))

_, err := subject.RetrieveLayer("some_sha")
h.AssertError(t, err, "layer with SHA 'some_sha' not found")
h.AssertError(t, err, "failed to find cache layer with SHA 'some_sha'")
})
})

Expand Down Expand Up @@ -415,7 +420,7 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
h.AssertNil(t, subject.AddLayer(layerReader, layerSha))

_, err := subject.RetrieveLayer(layerSha)
h.AssertError(t, err, fmt.Sprintf("layer with SHA '%s' not found", layerSha))
h.AssertError(t, err, fmt.Sprintf("failed to find cache layer with SHA '%s'", layerSha))
})
})

Expand Down Expand Up @@ -507,6 +512,21 @@ func testVolumeCache(t *testing.T, when spec.G, it spec.S) {
h.AssertEq(t, string(bytes), "existing data")
})
})

when("the layer does not exist", func() {
it("fails with a read error", func() {
err := subject.ReuseLayer("some_nonexistent_sha")
isReadErr, _ := cache.IsReadErr(err)
h.AssertEq(t, isReadErr, true)

err = subject.Commit()
h.AssertNil(t, err)

_, err = subject.RetrieveLayer("some_sha")
isReadErr, _ = cache.IsReadErr(err)
h.AssertEq(t, isReadErr, true)
})
})
})

when("attempting to commit more than once", func() {
Expand Down
8 changes: 5 additions & 3 deletions cmd/lifecycle/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import (
"github.com/pkg/errors"
"golang.org/x/sync/errgroup"

"github.com/buildpacks/lifecycle/log"

"github.com/buildpacks/lifecycle/auth"
"github.com/buildpacks/lifecycle/buildpack"
"github.com/buildpacks/lifecycle/cache"
Expand Down Expand Up @@ -200,7 +202,7 @@ func (e *exportCmd) export(group buildpack.Group, cacheStore phase.Cache, analyz
case e.UseLayout:
appImage, runImageID, err = e.initLayoutAppImage(analyzedMD)
case e.UseDaemon:
appImage, runImageID, err = e.initDaemonAppImage(analyzedMD)
appImage, runImageID, err = e.initDaemonAppImage(analyzedMD, cmd.DefaultLogger)
default:
appImage, runImageID, err = e.initRemoteAppImage(analyzedMD)
}
Expand Down Expand Up @@ -258,7 +260,7 @@ func (e *exportCmd) export(group buildpack.Group, cacheStore phase.Cache, analyz
return nil
}

func (e *exportCmd) initDaemonAppImage(analyzedMD files.Analyzed) (imgutil.Image, string, error) {
func (e *exportCmd) initDaemonAppImage(analyzedMD files.Analyzed, logger log.Logger) (imgutil.Image, string, error) {
var opts = []imgutil.ImageOption{
local.FromBaseImage(e.RunImageRef),
}
Expand Down Expand Up @@ -301,7 +303,7 @@ func (e *exportCmd) initDaemonAppImage(analyzedMD files.Analyzed) (imgutil.Image
}

if e.LaunchCacheDir != "" {
volumeCache, err := cache.NewVolumeCache(e.LaunchCacheDir)
volumeCache, err := cache.NewVolumeCache(e.LaunchCacheDir, logger)
if err != nil {
return nil, "", cmd.FailErr(err, "create launch cache")
}
Expand Down
Loading

0 comments on commit df6be88

Please sign in to comment.