From 0d266eec08bc271f2e9be4f15f4abd3101f660a6 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Thu, 21 Nov 2024 10:45:30 -0500 Subject: [PATCH] enable packaging v6 DBs Signed-off-by: Alex Goodman --- cmd/grype-db/application/application.go | 2 + go.mod | 2 + go.sum | 4 + internal/tarutil/writer.go | 82 ++++++++++-- pkg/process/package.go | 165 +++++++++++++++--------- pkg/process/package_legacy.go | 116 +++++++++++++++++ 6 files changed, 304 insertions(+), 67 deletions(-) create mode 100644 pkg/process/package_legacy.go diff --git a/cmd/grype-db/application/application.go b/cmd/grype-db/application/application.go index 7d17a78c..61de0cfa 100644 --- a/cmd/grype-db/application/application.go +++ b/cmd/grype-db/application/application.go @@ -22,6 +22,7 @@ import ( "github.com/anchore/grype-db/internal/log" "github.com/anchore/grype-db/internal/ui" "github.com/anchore/grype-db/internal/utils" + "github.com/anchore/grype/grype" ) const Name = internal.ApplicationName @@ -169,6 +170,7 @@ func setupLogger(app *Config) error { } log.Set(l) + grype.SetLogger(l) return nil } diff --git a/go.mod b/go.mod index e0570058..51b013f0 100644 --- a/go.mod +++ b/go.mod @@ -177,6 +177,8 @@ require ( github.com/opencontainers/image-spec v1.1.0 // indirect github.com/opencontainers/runtime-spec v1.1.0 // indirect github.com/opencontainers/selinux v1.11.0 // indirect + github.com/openvex/go-vex v0.2.5 // indirect + github.com/package-url/packageurl-go v0.1.1 // indirect github.com/pborman/indent v1.2.1 // indirect github.com/pelletier/go-toml v1.9.5 // indirect github.com/pelletier/go-toml/v2 v2.2.2 // indirect diff --git a/go.sum b/go.sum index ee740202..993eb27d 100644 --- a/go.sum +++ b/go.sum @@ -810,6 +810,10 @@ github.com/opencontainers/runtime-spec v1.1.0 h1:HHUyrt9mwHUjtasSbXSMvs4cyFxh+Bl github.com/opencontainers/runtime-spec v1.1.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU= github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec= +github.com/openvex/go-vex v0.2.5 h1:41utdp2rHgAGCsG+UbjmfMG5CWQxs15nGqir1eRgSrQ= +github.com/openvex/go-vex v0.2.5/go.mod h1:j+oadBxSUELkrKh4NfNb+BPo77U3q7gdKME88IO/0Wo= +github.com/package-url/packageurl-go v0.1.1 h1:KTRE0bK3sKbFKAk3yy63DpeskU7Cvs/x/Da5l+RtzyU= +github.com/package-url/packageurl-go v0.1.1/go.mod h1:uQd4a7Rh3ZsVg5j0lNyAfyxIeGde9yrlhjF78GzeW0c= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pborman/indent v1.2.1 h1:lFiviAbISHv3Rf0jcuh489bi06hj98JsVMtIDZQb9yM= diff --git a/internal/tarutil/writer.go b/internal/tarutil/writer.go index a22984fe..3fd6fbf7 100644 --- a/internal/tarutil/writer.go +++ b/internal/tarutil/writer.go @@ -2,13 +2,17 @@ package tarutil import ( "archive/tar" + "bufio" "compress/gzip" "fmt" "io" "os" + "os/exec" "strings" - "github.com/klauspost/compress/zstd" + "github.com/google/shlex" + + "github.com/anchore/grype-db/internal/log" ) var ErrUnsupportedArchiveSuffix = fmt.Errorf("archive name has an unsupported suffix") @@ -20,7 +24,7 @@ type writer struct { writer *tar.Writer } -// NewWriter creates a new tar writer that writes to the specified archive path. Supports .tar.gz and .tar.zst file extensions. +// NewWriter creates a new tar writer that writes to the specified archive path. Supports .tar.gz, .tar.zst, .tar.xz, and .tar file extensions. func NewWriter(archivePath string) (Writer, error) { w, err := newCompressor(archivePath) if err != nil { @@ -45,19 +49,79 @@ func newCompressor(archivePath string) (io.WriteCloser, error) { case strings.HasSuffix(archivePath, ".tar.gz"): return gzip.NewWriter(archive), nil case strings.HasSuffix(archivePath, ".tar.zst"): - // adding zstd.WithWindowSize(zstd.MaxWindowSize), zstd.WithAllLitEntropyCompression(true) - // will have slightly better results, but use a lot more memory - w, err := zstd.NewWriter(archive, zstd.WithEncoderLevel(zstd.SpeedBestCompression)) - if err != nil { - return nil, fmt.Errorf("unable to get zst compression stream: %w", err) - } - return w, nil + // note: since we're using --ultra this tends to have a high memory usage at decompression time + // For ~700 MB payload that is compressing down to ~60 MB, that would need ~130 MB of memory (--ultra -22) + // for the same payload compressing down to ~65MB, that would need ~70MB of memory (--ultra -21) + return newShellCompressor("zstd -T0 -22 --ultra -c -vv", archive) + case strings.HasSuffix(archivePath, ".tar.xz"): + return newShellCompressor("xz -9 --threads=0 -c -vv", archive) case strings.HasSuffix(archivePath, ".tar"): return archive, nil } return nil, ErrUnsupportedArchiveSuffix } +// shellCompressor wraps the stdin pipe of an external compression process and ensures proper cleanup. +type shellCompressor struct { + cmd *exec.Cmd + pipe io.WriteCloser +} + +func newShellCompressor(c string, archive io.Writer) (*shellCompressor, error) { + args, err := shlex.Split(c) + if err != nil { + return nil, fmt.Errorf("unable to parse command: %w", err) + } + binary := args[0] + args = args[1:] + cmd := exec.Command(binary, args...) + log.Debug(strings.Join(cmd.Args, " ")) + cmd.Stdout = archive + + stderrPipe, err := cmd.StderrPipe() + if err != nil { + return nil, fmt.Errorf("unable to create stderr pipe: %w", err) + } + + pipe, err := cmd.StdinPipe() + if err != nil { + return nil, fmt.Errorf("unable to create stdin pipe: %w", err) + } + + if err := cmd.Start(); err != nil { + return nil, fmt.Errorf("unable to start process: %w", err) + } + + go func() { + scanner := bufio.NewScanner(stderrPipe) + for scanner.Scan() { + log.Debugf("[%s] %s", binary, scanner.Text()) + } + if err := scanner.Err(); err != nil { + log.Errorf("[%s] error reading stderr: %v", binary, err) + } + }() + + return &shellCompressor{ + cmd: cmd, + pipe: pipe, + }, nil +} + +func (sc *shellCompressor) Write(p []byte) (int, error) { + return sc.pipe.Write(p) +} + +func (sc *shellCompressor) Close() error { + if err := sc.pipe.Close(); err != nil { + return fmt.Errorf("unable to close compression stdin pipe: %w", err) + } + if err := sc.cmd.Wait(); err != nil { + return fmt.Errorf("compression process error: %w", err) + } + return nil +} + func (w *writer) WriteEntry(entry Entry) error { return entry.writeEntry(w.writer) } diff --git a/pkg/process/package.go b/pkg/process/package.go index 2d010082..89eff025 100644 --- a/pkg/process/package.go +++ b/pkg/process/package.go @@ -1,62 +1,59 @@ package process import ( + "errors" "fmt" - "net/url" "os" "path" + "path/filepath" "strings" "time" - "github.com/spf13/afero" - "github.com/anchore/grype-db/internal/log" "github.com/anchore/grype-db/internal/tarutil" - "github.com/anchore/grype/grype/db/legacy/distribution" + grypeDBLegacyDistribution "github.com/anchore/grype/grype/db/legacy/distribution" + v6 "github.com/anchore/grype/grype/db/v6" + v6Distribution "github.com/anchore/grype/grype/db/v6/distribution" ) -func secondsSinceEpoch() int64 { - return time.Now().UTC().Unix() -} - func Package(dbDir, publishBaseURL, overrideArchiveExtension string) error { - log.WithFields("from", dbDir, "url", publishBaseURL, "extension-override", overrideArchiveExtension).Info("packaging database") + // check if metadata file exists, if so, then this + if _, err := os.Stat(filepath.Join(dbDir, grypeDBLegacyDistribution.MetadataFileName)); os.IsNotExist(err) { + return packageDB(dbDir, overrideArchiveExtension) + } + return packageLegacyDB(dbDir, publishBaseURL, overrideArchiveExtension) +} - fs := afero.NewOsFs() - metadata, err := distribution.NewMetadataFromDir(fs, dbDir) +func packageDB(dbDir, overrideArchiveExtension string) error { + extension, err := resolveExtension(overrideArchiveExtension) if err != nil { return err } + log.WithFields("from", dbDir, "extension", extension).Info("packaging database") - if metadata == nil { - return fmt.Errorf("no metadata found in %q", dbDir) + tarPath, err := calculateTarPath(dbDir, extension) + if err != nil { + return err } - u, err := url.Parse(publishBaseURL) - if err != nil { + if err := populateTar(tarPath); err != nil { return err } - // we need a well-ordered string to append to the archive name to ensure uniqueness (to avoid overwriting - // existing archives in the CDN) as well as to ensure that multiple archives created in the same day are - // put in the correct order in the listing file. The DB timestamp represents the age of the data in the DB - // not when the DB was created. The trailer represents the time the DB was packaged. - trailer := fmt.Sprintf("%d", secondsSinceEpoch()) + log.WithFields("path", tarPath).Info("created database archive") + + return writeLatestDocument(tarPath) +} - // TODO (alex): supporting tar.zst - // var extension = "tar.zst" - var extension = "tar.gz" +func resolveExtension(overrideArchiveExtension string) (string, error) { + var extension = "tar.zst" if overrideArchiveExtension != "" { extension = strings.TrimLeft(overrideArchiveExtension, ".") } - // TODO (alex): supporting tar.zst - // else if metadata.Version < 5 { - // extension = "tar.gz" - // } var found bool - for _, valid := range []string{"tar.zst", "tar.gz"} { + for _, valid := range []string{"tar.zst", "tar.xz", "tar.gz"} { if valid == extension { found = true break @@ -64,57 +61,83 @@ func Package(dbDir, publishBaseURL, overrideArchiveExtension string) error { } if !found { - return fmt.Errorf("invalid archive extension %q", extension) + return "", fmt.Errorf("unsupported archive extension %q", extension) } + return extension, nil +} - // we attach a random value at the end of the file name to prevent from overwriting DBs in S3 that are already - // cached in the CDN. Ideally this would be based off of the archive checksum but a random string is simpler. - tarName := fmt.Sprintf( - "vulnerability-db_v%d_%s_%s.%s", - metadata.Version, - metadata.Built.Format(time.RFC3339), - trailer, - extension, - ) - tarPath := path.Join(dbDir, tarName) +func calculateTarPath(dbDir string, extension string) (string, error) { + s, err := v6.NewReader(v6.Config{DBDirPath: dbDir}) + if err != nil { + return "", fmt.Errorf("unable to open vulnerability store: %w", err) + } - if err := populate(tarName, dbDir); err != nil { - return err + metadata, err := s.GetDBMetadata() + if err != nil { + return "", fmt.Errorf("unable to get vulnerability store metadata: %w", err) } - log.WithFields("path", tarPath).Info("created database archive") + if metadata.Model != v6.ModelVersion { + return "", fmt.Errorf("metadata model %d does not match vulnerability store model %d", v6.ModelVersion, metadata.Model) + } - entry, err := distribution.NewListingEntryFromArchive(fs, *metadata, tarPath, u) + providers, err := s.AllProviders() if err != nil { - return fmt.Errorf("unable to create listing entry from archive: %w", err) + return "", fmt.Errorf("unable to get all providers: %w", err) } - listing := distribution.NewListing(entry) - listingPath := path.Join(dbDir, distribution.ListingFileName) - if err = listing.Write(listingPath); err != nil { - return err + if len(providers) == 0 { + return "", fmt.Errorf("no providers found in the vulnerability store") } - log.WithFields("path", listingPath).Debug("created initial listing file") + eldest := eldestProviderTimestamp(providers) + if eldest == nil { + return "", errors.New("could not resolve eldest provider timestamp") + } + // output archive vulnerability-db_VERSION_OLDESTDATADATE_BUILTEPOCH.tar.gz, where: + // - VERSION: schema version in the form of v#.#.# + // - OLDESTDATADATE: RFC3338 formatted value of the oldest date capture date found for all contained providers + // - BUILTEPOCH: linux epoch formatted value of the database metadata built field + tarName := fmt.Sprintf( + "vulnerability-db_v%s_%s_%d.%s", + fmt.Sprintf("%d.%d.%d", metadata.Model, metadata.Revision, metadata.Addition), + eldest.UTC().Format(time.RFC3339), + metadata.BuildTimestamp.Unix(), + extension, + ) - return nil + return filepath.Join(dbDir, tarName), err +} + +func eldestProviderTimestamp(providers []v6.Provider) *time.Time { + var eldest *time.Time + for _, p := range providers { + if eldest == nil || p.DateCaptured.Before(*eldest) { + eldest = p.DateCaptured + } + } + return eldest } -func populate(tarName, dbDir string) error { +func populateTar(tarPath string) error { originalDir, err := os.Getwd() if err != nil { return fmt.Errorf("unable to get CWD: %w", err) } - if err = os.Chdir(dbDir); err != nil { - return fmt.Errorf("unable to cd to build dir: %w", err) - } + dbDir, tarName := filepath.Split(tarPath) - defer func() { - if err = os.Chdir(originalDir); err != nil { - log.Errorf("unable to cd to original dir: %v", err) + if dbDir != "" { + if err = os.Chdir(dbDir); err != nil { + return fmt.Errorf("unable to cd to build dir: %w", err) } - }() + + defer func() { + if err = os.Chdir(originalDir); err != nil { + log.Errorf("unable to cd to original dir: %v", err) + } + }() + } fileInfos, err := os.ReadDir("./") if err != nil { @@ -134,3 +157,29 @@ func populate(tarName, dbDir string) error { return nil } + +func writeLatestDocument(tarPath string) error { + archive, err := v6Distribution.NewArchive(tarPath) + if err != nil || archive == nil { + return fmt.Errorf("unable to create archive: %w", err) + } + + doc := v6Distribution.NewLatestDocument(*archive) + if doc == nil { + return errors.New("unable to create latest document") + } + + dbDir := filepath.Dir(tarPath) + + latestPath := path.Join(dbDir, v6Distribution.LatestFileName) + + fh, err := os.OpenFile(latestPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) + if err != nil { + return fmt.Errorf("unable to create latest file: %w", err) + } + + if err = doc.Write(fh); err != nil { + return fmt.Errorf("unable to write latest document: %w", err) + } + return nil +} diff --git a/pkg/process/package_legacy.go b/pkg/process/package_legacy.go new file mode 100644 index 00000000..6672fe19 --- /dev/null +++ b/pkg/process/package_legacy.go @@ -0,0 +1,116 @@ +package process + +import ( + "fmt" + "net/url" + "path" + "path/filepath" + "strings" + "time" + + "github.com/spf13/afero" + + "github.com/anchore/grype-db/internal/log" + "github.com/anchore/grype/grype/db/legacy/distribution" + grypeDBLegacy "github.com/anchore/grype/grype/db/v5" + grypeDBLegacyStore "github.com/anchore/grype/grype/db/v5/store" +) + +func packageLegacyDB(dbDir, publishBaseURL, overrideArchiveExtension string) error { //nolint:funlen + log.WithFields("from", dbDir, "url", publishBaseURL, "extension-override", overrideArchiveExtension).Info("packaging database") + + fs := afero.NewOsFs() + metadata, err := distribution.NewMetadataFromDir(fs, dbDir) + if err != nil { + return err + } + + if metadata == nil { + return fmt.Errorf("no metadata found in %q", dbDir) + } + + s, err := grypeDBLegacyStore.New(filepath.Join(dbDir, grypeDBLegacy.VulnerabilityStoreFileName), false) + if err != nil { + return fmt.Errorf("unable to open vulnerability store: %w", err) + } + + id, err := s.GetID() + if err != nil { + return fmt.Errorf("unable to get vulnerability store ID: %w", err) + } + + if id.SchemaVersion != metadata.Version { + return fmt.Errorf("metadata version %d does not match vulnerability store version %d", metadata.Version, id.SchemaVersion) + } + + u, err := url.Parse(publishBaseURL) + if err != nil { + return err + } + + // we need a well-ordered string to append to the archive name to ensure uniqueness (to avoid overwriting + // existing archives in the CDN) as well as to ensure that multiple archives created in the same day are + // put in the correct order in the listing file. The DB timestamp represents the age of the data in the DB + // not when the DB was created. The trailer represents the time the DB was packaged. + trailer := fmt.Sprintf("%d", secondsSinceEpoch()) + + // TODO (alex): supporting tar.zst + // var extension = "tar.zst" + var extension = "tar.gz" + + if overrideArchiveExtension != "" { + extension = strings.TrimLeft(overrideArchiveExtension, ".") + } + // TODO (alex): supporting tar.zst + // else if metadata.Version < 5 { + // extension = "tar.gz" + // } + + var found bool + for _, valid := range []string{"tar.zst", "tar.gz"} { + if valid == extension { + found = true + break + } + } + + if !found { + return fmt.Errorf("invalid archive extension %q", extension) + } + + // we attach a random value at the end of the file name to prevent from overwriting DBs in S3 that are already + // cached in the CDN. Ideally this would be based off of the archive checksum but a random string is simpler. + tarName := fmt.Sprintf( + "vulnerability-db_v%d_%s_%s.%s", + metadata.Version, + metadata.Built.Format(time.RFC3339), + trailer, + extension, + ) + tarPath := path.Join(dbDir, tarName) + + if err := populateTar(tarPath); err != nil { + return err + } + + log.WithFields("path", tarPath).Info("created database archive") + + entry, err := distribution.NewListingEntryFromArchive(fs, *metadata, tarPath, u) + if err != nil { + return fmt.Errorf("unable to create listing entry from archive: %w", err) + } + + listing := distribution.NewListing(entry) + listingPath := path.Join(dbDir, distribution.ListingFileName) + if err = listing.Write(listingPath); err != nil { + return err + } + + log.WithFields("path", listingPath).Debug("created initial listing file") + + return nil +} + +func secondsSinceEpoch() int64 { + return time.Now().UTC().Unix() +}