From 0fc2bedc7f4c057c3a28b5a27016ea76c4100651 Mon Sep 17 00:00:00 2001 From: Florian Forster Date: Thu, 19 Sep 2024 11:14:33 +0200 Subject: [PATCH] tar: Add the `WithIgnore` option. This option allows to exclude certain files from extraction. This is going to be used by `flux diff artifact` to only extract "interesting" files from an archive for comparison with another source. See also: https://github.com/fluxcd/flux2/pull/4916 Signed-off-by: Florian Forster --- tar/go.mod | 13 +++++++++++- tar/go.sum | 13 ++++++++++++ tar/tar.go | 8 ++++++++ tar/tar_opts.go | 25 +++++++++++++++++++++++ tar/tar_test.go | 54 ++++++++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 111 insertions(+), 2 deletions(-) diff --git a/tar/go.mod b/tar/go.mod index 3565ea00..decf60bb 100644 --- a/tar/go.mod +++ b/tar/go.mod @@ -2,4 +2,15 @@ module github.com/fluxcd/pkg/tar go 1.22.0 -require github.com/cyphar/filepath-securejoin v0.2.4 +require ( + github.com/cyphar/filepath-securejoin v0.2.4 + github.com/go-git/go-git/v5 v5.12.0 +) + +require ( + github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect + github.com/go-git/go-billy/v5 v5.5.0 // indirect + github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect + golang.org/x/net v0.22.0 // indirect + gopkg.in/warnings.v0 v0.1.2 // indirect +) diff --git a/tar/go.sum b/tar/go.sum index de447c23..d922f0c9 100644 --- a/tar/go.sum +++ b/tar/go.sum @@ -1,2 +1,15 @@ github.com/cyphar/filepath-securejoin v0.2.4 h1:Ugdm7cg7i6ZK6x3xDF1oEu1nfkyfH53EtKeQYTC3kyg= github.com/cyphar/filepath-securejoin v0.2.4/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= +github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI= +github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376/go.mod h1:an3vInlBmSxCcxctByoQdvwPiA7DTK7jaaFDBTtu0ic= +github.com/go-git/go-billy/v5 v5.5.0 h1:yEY4yhzCDuMGSv83oGxiBotRzhwhNr8VZyphhiu+mTU= +github.com/go-git/go-billy/v5 v5.5.0/go.mod h1:hmexnoNsr2SJU1Ju67OaNz5ASJY3+sHgFRpCtpDCKow= +github.com/go-git/go-git/v5 v5.12.0 h1:7Md+ndsjrzZxbddRDZjF14qK+NN56sy6wkqaVrjZtys= +github.com/go-git/go-git/v5 v5.12.0/go.mod h1:FTM9VKtnI2m65hNI/TenDDDnUf2Q9FHnXYjuz9i5OEY= +github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A= +github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc= +golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +gopkg.in/warnings.v0 v0.1.2 h1:wFXVbFY8DY5/xOe1ECiWdKCzZlxgshcYVNkBHstARME= +gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI= diff --git a/tar/tar.go b/tar/tar.go index 3adc3860..81070c8f 100644 --- a/tar/tar.go +++ b/tar/tar.go @@ -22,6 +22,7 @@ import ( "time" securejoin "github.com/cyphar/filepath-securejoin" + "github.com/go-git/go-git/v5/plumbing/format/gitignore" ) const ( @@ -45,6 +46,9 @@ type tarOpts struct { // skipGzip skip gzip reader an un-tar a plain tar file. skipGzip bool + + // ignoreMatcher allows to exclude specific files from extraction. + ignoreMatcher gitignore.Matcher } // Untar reads the gzip-compressed tar file from r and writes it into dir. @@ -121,6 +125,10 @@ func Untar(r io.Reader, dir string, inOpts ...TarOption) (err error) { fi := f.FileInfo() mode := fi.Mode() + if opts.ignore(f.Name, mode.IsDir()) { + continue + } + switch { case mode.IsRegular(): // Make the directory. This is redundant because it should diff --git a/tar/tar_opts.go b/tar/tar_opts.go index 1d433c28..9161e2d9 100644 --- a/tar/tar_opts.go +++ b/tar/tar_opts.go @@ -16,6 +16,12 @@ limitations under the License. package tar +import ( + "strings" + + "github.com/go-git/go-git/v5/plumbing/format/gitignore" +) + // TarOption represents options to be applied to Tar. type TarOption func(*tarOpts) @@ -41,8 +47,27 @@ func WithSkipGzip() TarOption { } } +// WithIgnore allows to exclude certain files from being extracted. +func WithIgnore(m gitignore.Matcher) TarOption { + return func(t *tarOpts) { + t.ignoreMatcher = m + } +} + func (t *tarOpts) applyOpts(tarOpts ...TarOption) { for _, clientOpt := range tarOpts { clientOpt(t) } } + +// ignore is a convenience function around t.ignoreMatcher.Match(). It handles +// the absense of a matcher gracefully and takes care of splitting the path into +// its components. The `path` argument must be a slash-delimited path, i.e. the +// file name from the tar archive *before* it gets converted to a filepath. +func (t *tarOpts) ignore(path string, isDir bool) bool { + if t.ignoreMatcher == nil { + return false + } + + return t.ignoreMatcher.Match(strings.Split(path, "/"), isDir) +} diff --git a/tar/tar_test.go b/tar/tar_test.go index bad41563..745d3fe6 100644 --- a/tar/tar_test.go +++ b/tar/tar_test.go @@ -21,10 +21,14 @@ import ( "bytes" "compress/gzip" "crypto/rand" + "errors" "fmt" + "io/fs" "os" "path/filepath" "testing" + + "github.com/go-git/go-git/v5/plumbing/format/gitignore" ) type untarTestCase struct { @@ -35,6 +39,8 @@ type untarTestCase struct { content []byte wantErr string maxUntarSize int + ignore gitignore.Matcher + wantNotExist bool } func TestUntar(t *testing.T) { @@ -128,6 +134,39 @@ func TestUntar(t *testing.T) { targetDir: symlink, wantErr: fmt.Sprintf(`dir '%s' must be a directory`, symlink), }, + { + name: "ignore", + fileName: "file1", + content: geRandomContent(256), + targetDir: targetDirOutput, + secureTargetDir: targetDirOutput, + ignore: gitignore.NewMatcher([]gitignore.Pattern{ + gitignore.ParsePattern("file1", nil), + }), + wantNotExist: true, + }, + { + name: "ignore does not match", + fileName: "file1", + content: geRandomContent(256), + targetDir: targetDirOutput, + secureTargetDir: targetDirOutput, + ignore: gitignore.NewMatcher([]gitignore.Pattern{ + gitignore.ParsePattern("no_match", nil), + }), + wantNotExist: false, + }, + { + name: "ignore with glob", + fileName: "path/to/file.ignored", + content: geRandomContent(256), + targetDir: targetDirOutput, + secureTargetDir: targetDirOutput, + ignore: gitignore.NewMatcher([]gitignore.Pattern{ + gitignore.ParsePattern("*.ignored", nil), + }), + wantNotExist: true, + }, } for _, tt := range cases { @@ -143,6 +182,9 @@ func TestUntar(t *testing.T) { if tt.maxUntarSize != 0 { opts = append(opts, WithMaxUntarSize(tt.maxUntarSize)) } + if tt.ignore != nil { + opts = append(opts, WithIgnore(tt.ignore)) + } err = Untar(f, tt.targetDir, opts...) var got string @@ -161,11 +203,21 @@ func TestUntar(t *testing.T) { if tt.wantErr == "" { abs := filepath.Join(tt.secureTargetDir, tt.fileName) fi, err := os.Stat(abs) - if err != nil { + + gotNotExist := errors.Is(err, fs.ErrNotExist) + if err != nil && gotNotExist != tt.wantNotExist { t.Errorf("stat %q: %v", abs, err) return } + if !gotNotExist && tt.wantNotExist { + t.Errorf("os.Stat(%q) = (%v, nil), want %v", abs, fi, fs.ErrNotExist) + } + + if tt.wantNotExist { + return + } + if fi.Size() != int64(len(tt.content)) { t.Errorf("file size wanted: %d got: %d", len(tt.content), fi.Size()) }