Skip to content

Commit

Permalink
enable packaging v6 DBs
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Goodman <[email protected]>
  • Loading branch information
wagoodman committed Nov 22, 2024
1 parent 429c31c commit 5443220
Show file tree
Hide file tree
Showing 6 changed files with 304 additions and 67 deletions.
2 changes: 2 additions & 0 deletions cmd/grype-db/application/application.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/anchore/grype-db/internal/log"
"github.com/anchore/grype-db/internal/ui"
"github.com/anchore/grype-db/internal/utils"
"github.com/anchore/grype/grype"
)

const Name = internal.ApplicationName
Expand Down Expand Up @@ -169,6 +170,7 @@ func setupLogger(app *Config) error {
}

log.Set(l)
grype.SetLogger(l)

return nil
}
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,8 @@ require (
github.com/opencontainers/image-spec v1.1.0 // indirect
github.com/opencontainers/runtime-spec v1.1.0 // indirect
github.com/opencontainers/selinux v1.11.0 // indirect
github.com/openvex/go-vex v0.2.5 // indirect
github.com/package-url/packageurl-go v0.1.1 // indirect
github.com/pborman/indent v1.2.1 // indirect
github.com/pelletier/go-toml v1.9.5 // indirect
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -810,6 +810,10 @@ github.com/opencontainers/runtime-spec v1.1.0 h1:HHUyrt9mwHUjtasSbXSMvs4cyFxh+Bl
github.com/opencontainers/runtime-spec v1.1.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU=
github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec=
github.com/openvex/go-vex v0.2.5 h1:41utdp2rHgAGCsG+UbjmfMG5CWQxs15nGqir1eRgSrQ=
github.com/openvex/go-vex v0.2.5/go.mod h1:j+oadBxSUELkrKh4NfNb+BPo77U3q7gdKME88IO/0Wo=
github.com/package-url/packageurl-go v0.1.1 h1:KTRE0bK3sKbFKAk3yy63DpeskU7Cvs/x/Da5l+RtzyU=
github.com/package-url/packageurl-go v0.1.1/go.mod h1:uQd4a7Rh3ZsVg5j0lNyAfyxIeGde9yrlhjF78GzeW0c=
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pborman/indent v1.2.1 h1:lFiviAbISHv3Rf0jcuh489bi06hj98JsVMtIDZQb9yM=
Expand Down
82 changes: 73 additions & 9 deletions internal/tarutil/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@ package tarutil

import (
"archive/tar"
"bufio"
"compress/gzip"
"fmt"
"io"
"os"
"os/exec"
"strings"

"github.com/klauspost/compress/zstd"
"github.com/google/shlex"

"github.com/anchore/grype-db/internal/log"
)

var ErrUnsupportedArchiveSuffix = fmt.Errorf("archive name has an unsupported suffix")
Expand All @@ -20,7 +24,7 @@ type writer struct {
writer *tar.Writer
}

// NewWriter creates a new tar writer that writes to the specified archive path. Supports .tar.gz and .tar.zst file extensions.
// NewWriter creates a new tar writer that writes to the specified archive path. Supports .tar.gz, .tar.zst, .tar.xz, and .tar file extensions.
func NewWriter(archivePath string) (Writer, error) {
w, err := newCompressor(archivePath)
if err != nil {
Expand All @@ -45,19 +49,79 @@ func newCompressor(archivePath string) (io.WriteCloser, error) {
case strings.HasSuffix(archivePath, ".tar.gz"):
return gzip.NewWriter(archive), nil
case strings.HasSuffix(archivePath, ".tar.zst"):
// adding zstd.WithWindowSize(zstd.MaxWindowSize), zstd.WithAllLitEntropyCompression(true)
// will have slightly better results, but use a lot more memory
w, err := zstd.NewWriter(archive, zstd.WithEncoderLevel(zstd.SpeedBestCompression))
if err != nil {
return nil, fmt.Errorf("unable to get zst compression stream: %w", err)
}
return w, nil
// note: since we're using --ultra this tends to have a high memory usage at decompression time
// For ~700 MB payload that is compressing down to ~60 MB, that would need ~130 MB of memory (--ultra -22)
// for the same payload compressing down to ~65MB, that would need ~70MB of memory (--ultra -21)
return newShellCompressor("zstd -T0 -22 --ultra -c -vv", archive)
case strings.HasSuffix(archivePath, ".tar.xz"):
return newShellCompressor("xz -9 --threads=0 -c -vv", archive)
case strings.HasSuffix(archivePath, ".tar"):
return archive, nil
}
return nil, ErrUnsupportedArchiveSuffix
}

// shellCompressor wraps the stdin pipe of an external compression process and ensures proper cleanup.
type shellCompressor struct {
cmd *exec.Cmd
pipe io.WriteCloser
}

func newShellCompressor(c string, archive io.Writer) (*shellCompressor, error) {
args, err := shlex.Split(c)
if err != nil {
return nil, fmt.Errorf("unable to parse command: %w", err)
}
binary := args[0]
args = args[1:]
cmd := exec.Command(binary, args...)
log.Debug(strings.Join(cmd.Args, " "))
cmd.Stdout = archive

stderrPipe, err := cmd.StderrPipe()
if err != nil {
return nil, fmt.Errorf("unable to create stderr pipe: %w", err)
}

pipe, err := cmd.StdinPipe()
if err != nil {
return nil, fmt.Errorf("unable to create stdin pipe: %w", err)
}

if err := cmd.Start(); err != nil {
return nil, fmt.Errorf("unable to start process: %w", err)
}

go func() {
scanner := bufio.NewScanner(stderrPipe)
for scanner.Scan() {
log.Debugf("[%s] %s", binary, scanner.Text())
}
if err := scanner.Err(); err != nil {
log.Errorf("[%s] error reading stderr: %v", binary, err)
}
}()

return &shellCompressor{
cmd: cmd,
pipe: pipe,
}, nil
}

func (sc *shellCompressor) Write(p []byte) (int, error) {
return sc.pipe.Write(p)
}

func (sc *shellCompressor) Close() error {
if err := sc.pipe.Close(); err != nil {
return fmt.Errorf("unable to close compression stdin pipe: %w", err)
}
if err := sc.cmd.Wait(); err != nil {
return fmt.Errorf("compression process error: %w", err)
}
return nil
}

func (w *writer) WriteEntry(entry Entry) error {
return entry.writeEntry(w.writer)
}
Expand Down
165 changes: 107 additions & 58 deletions pkg/process/package.go
Original file line number Diff line number Diff line change
@@ -1,120 +1,143 @@
package process

import (
"errors"
"fmt"
"net/url"
"os"
"path"
"path/filepath"
"strings"
"time"

"github.com/spf13/afero"

"github.com/anchore/grype-db/internal/log"
"github.com/anchore/grype-db/internal/tarutil"
"github.com/anchore/grype/grype/db/legacy/distribution"
grypeDBLegacyDistribution "github.com/anchore/grype/grype/db/legacy/distribution"
v6 "github.com/anchore/grype/grype/db/v6"
v6Distribution "github.com/anchore/grype/grype/db/v6/distribution"
)

func secondsSinceEpoch() int64 {
return time.Now().UTC().Unix()
}

func Package(dbDir, publishBaseURL, overrideArchiveExtension string) error {
log.WithFields("from", dbDir, "url", publishBaseURL, "extension-override", overrideArchiveExtension).Info("packaging database")
// check if metadata file exists, if so, then this
if _, err := os.Stat(filepath.Join(dbDir, grypeDBLegacyDistribution.MetadataFileName)); os.IsNotExist(err) {
return packageDB(dbDir, overrideArchiveExtension)
}
return packageLegacyDB(dbDir, publishBaseURL, overrideArchiveExtension)
}

fs := afero.NewOsFs()
metadata, err := distribution.NewMetadataFromDir(fs, dbDir)
func packageDB(dbDir, overrideArchiveExtension string) error {
extension, err := resolveExtension(overrideArchiveExtension)
if err != nil {
return err
}
log.WithFields("from", dbDir, "extension", extension).Info("packaging database")

if metadata == nil {
return fmt.Errorf("no metadata found in %q", dbDir)
tarPath, err := calculateTarPath(dbDir, extension)
if err != nil {
return err
}

u, err := url.Parse(publishBaseURL)
if err != nil {
if err := populateTar(tarPath); err != nil {
return err
}

// we need a well-ordered string to append to the archive name to ensure uniqueness (to avoid overwriting
// existing archives in the CDN) as well as to ensure that multiple archives created in the same day are
// put in the correct order in the listing file. The DB timestamp represents the age of the data in the DB
// not when the DB was created. The trailer represents the time the DB was packaged.
trailer := fmt.Sprintf("%d", secondsSinceEpoch())
log.WithFields("path", tarPath).Info("created database archive")

return writeLatestDocument(tarPath)
}

// TODO (alex): supporting tar.zst
// var extension = "tar.zst"
var extension = "tar.gz"
func resolveExtension(overrideArchiveExtension string) (string, error) {
var extension = "tar.zst"

if overrideArchiveExtension != "" {
extension = strings.TrimLeft(overrideArchiveExtension, ".")
}
// TODO (alex): supporting tar.zst
// else if metadata.Version < 5 {
// extension = "tar.gz"
// }

var found bool
for _, valid := range []string{"tar.zst", "tar.gz"} {
for _, valid := range []string{"tar.zst", "tar.xz", "tar.gz"} {
if valid == extension {
found = true
break
}
}

if !found {
return fmt.Errorf("invalid archive extension %q", extension)
return "", fmt.Errorf("unsupported archive extension %q", extension)
}
return extension, nil
}

// we attach a random value at the end of the file name to prevent from overwriting DBs in S3 that are already
// cached in the CDN. Ideally this would be based off of the archive checksum but a random string is simpler.
tarName := fmt.Sprintf(
"vulnerability-db_v%d_%s_%s.%s",
metadata.Version,
metadata.Built.Format(time.RFC3339),
trailer,
extension,
)
tarPath := path.Join(dbDir, tarName)
func calculateTarPath(dbDir string, extension string) (string, error) {
s, err := v6.NewReader(v6.Config{DBDirPath: dbDir})
if err != nil {
return "", fmt.Errorf("unable to open vulnerability store: %w", err)
}

if err := populate(tarName, dbDir); err != nil {
return err
metadata, err := s.GetDBMetadata()
if err != nil {
return "", fmt.Errorf("unable to get vulnerability store metadata: %w", err)
}

log.WithFields("path", tarPath).Info("created database archive")
if metadata.Model != v6.ModelVersion {
return "", fmt.Errorf("metadata model %d does not match vulnerability store model %d", v6.ModelVersion, metadata.Model)
}

entry, err := distribution.NewListingEntryFromArchive(fs, *metadata, tarPath, u)
providers, err := s.AllProviders()
if err != nil {
return fmt.Errorf("unable to create listing entry from archive: %w", err)
return "", fmt.Errorf("unable to get all providers: %w", err)
}

listing := distribution.NewListing(entry)
listingPath := path.Join(dbDir, distribution.ListingFileName)
if err = listing.Write(listingPath); err != nil {
return err
if len(providers) == 0 {
return "", fmt.Errorf("no providers found in the vulnerability store")
}

log.WithFields("path", listingPath).Debug("created initial listing file")
eldest := eldestProviderTimestamp(providers)
if eldest == nil {
return "", errors.New("could not resolve eldest provider timestamp")
}
// output archive vulnerability-db_VERSION_OLDESTDATADATE_BUILTEPOCH.tar.gz, where:
// - VERSION: schema version in the form of v#.#.#
// - OLDESTDATADATE: RFC3338 formatted value of the oldest date capture date found for all contained providers
// - BUILTEPOCH: linux epoch formatted value of the database metadata built field
tarName := fmt.Sprintf(
"vulnerability-db_v%s_%s_%d.%s",
fmt.Sprintf("%d.%d.%d", metadata.Model, metadata.Revision, metadata.Addition),
eldest.UTC().Format(time.RFC3339),
metadata.BuildTimestamp.Unix(),
extension,
)

return nil
return filepath.Join(dbDir, tarName), err
}

func eldestProviderTimestamp(providers []v6.Provider) *time.Time {
var eldest *time.Time
for _, p := range providers {
if eldest == nil || p.DateCaptured.Before(*eldest) {
eldest = p.DateCaptured
}
}
return eldest
}

func populate(tarName, dbDir string) error {
func populateTar(tarPath string) error {
originalDir, err := os.Getwd()
if err != nil {
return fmt.Errorf("unable to get CWD: %w", err)
}

if err = os.Chdir(dbDir); err != nil {
return fmt.Errorf("unable to cd to build dir: %w", err)
}
dbDir, tarName := filepath.Split(tarPath)

defer func() {
if err = os.Chdir(originalDir); err != nil {
log.Errorf("unable to cd to original dir: %v", err)
if dbDir != "" {
if err = os.Chdir(dbDir); err != nil {
return fmt.Errorf("unable to cd to build dir: %w", err)
}
}()

defer func() {
if err = os.Chdir(originalDir); err != nil {
log.Errorf("unable to cd to original dir: %v", err)
}
}()
}

fileInfos, err := os.ReadDir("./")
if err != nil {
Expand All @@ -134,3 +157,29 @@ func populate(tarName, dbDir string) error {

return nil
}

func writeLatestDocument(tarPath string) error {
archive, err := v6Distribution.NewArchive(tarPath)
if err != nil || archive == nil {
return fmt.Errorf("unable to create archive: %w", err)
}

doc := v6Distribution.NewLatestDocument(*archive)
if doc == nil {
return errors.New("unable to create latest document")
}

dbDir := filepath.Dir(tarPath)

latestPath := path.Join(dbDir, v6Distribution.LatestFileName)

fh, err := os.OpenFile(latestPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
if err != nil {
return fmt.Errorf("unable to create latest file: %w", err)
}

if err = doc.Write(fh); err != nil {
return fmt.Errorf("unable to write latest document: %w", err)
}
return nil
}
Loading

0 comments on commit 5443220

Please sign in to comment.