diff --git a/internal/staticanalysis/analyze.go b/internal/staticanalysis/analyze.go index a39aacf1..965f5f62 100644 --- a/internal/staticanalysis/analyze.go +++ b/internal/staticanalysis/analyze.go @@ -47,7 +47,7 @@ If staticanalysis.Parsing is not in the list of analysisTasks, jsParserConfig ma If an error occurs while traversing the extracted package directory tree, or an invalid task is requested, a nil result is returned along with the corresponding error object. */ -func AnalyzePackageFiles(ctx context.Context, extractDir string, jsParserConfig parsing.ParserConfig, analysisTasks []Task) (*Result, error) { +func AnalyzePackageFiles(ctx context.Context, extractDir string, jsParserConfig parsing.ParserConfig, analysisTasks []Task) ([]SingleResult, error) { runTask := map[Task]bool{} for _, task := range analysisTasks { @@ -89,7 +89,7 @@ func AnalyzePackageFiles(ctx context.Context, extractDir string, jsParserConfig if runTask[Basic] { slog.InfoContext(ctx, "run basic analysis") - basicData, err := basicdata.Analyze(ctx, paths, getPathInArchive) + basicData, err := basicdata.Analyze(ctx, paths, basicdata.FormatPaths(getPathInArchive)) if err != nil { slog.ErrorContext(ctx, "static analysis basic data error", "error", err) } else if len(basicData) != len(fileResults) { @@ -133,5 +133,5 @@ func AnalyzePackageFiles(ctx context.Context, extractDir string, jsParserConfig } } - return &Result{Files: fileResults}, nil + return fileResults, nil } diff --git a/internal/staticanalysis/analyze_test.go b/internal/staticanalysis/analyze_test.go index 8585c87a..d26f7151 100644 --- a/internal/staticanalysis/analyze_test.go +++ b/internal/staticanalysis/analyze_test.go @@ -34,12 +34,10 @@ var helloWorldJs = testFile{ lineLengths: valuecounts.Count([]int{18}), } -func makeDesiredResult(files ...testFile) *Result { - result := Result{ - Files: []SingleResult{}, - } - for _, file := range files { - result.Files = append(result.Files, SingleResult{ +func makeDesiredResult(files ...testFile) []SingleResult { + result := make([]SingleResult, len(files)) + for index, file := range files { + result[index] = SingleResult{ Filename: file.filename, Basic: &basicdata.FileData{ DetectedType: file.fileType, @@ -67,17 +65,17 @@ func makeDesiredResult(files ...testFile) *Result { IPAddresses: []string{}, URLs: []string{}, }, - }) + } } - return &result + return result } func TestAnalyzePackageFiles(t *testing.T) { tests := []struct { name string files []testFile - want *Result + want []SingleResult wantErr bool }{ { diff --git a/internal/staticanalysis/basicdata/basic_data.go b/internal/staticanalysis/basicdata/basic_data.go index 3e8b8af5..26375492 100644 --- a/internal/staticanalysis/basicdata/basic_data.go +++ b/internal/staticanalysis/basicdata/basic_data.go @@ -39,32 +39,79 @@ func (bd FileData) String() string { return strings.Join(parts, "\n") } +// Option allows controlling the behaviour of Analyze with non-required arguments. +type Option interface{ set(*analyzeConfig) } + +// option implements Option. +type option func(*analyzeConfig) + +func (o option) set(config *analyzeConfig) { o(config) } + +// analyzeConfig stores all behaviour configuration for Analyze which is adjustable by Option. +type analyzeConfig struct { + // withLineLengths enables line length analysis + withLineLengths bool + // formatPathFunc allows providing a custom transformation for file paths + // when logging errors. For example, removing a common path prefix. + formatPathFunc func(absPath string) string +} + +func getDefaultAnalyzeConfig() analyzeConfig { + return analyzeConfig{ + withLineLengths: true, + formatPathFunc: func(absPath string) string { return absPath }, + } +} + +// SkipLineLengths disables collecting line length information during analysis, which is +// useful when the input files are known not to be text files (e.g. a package tarball). +func SkipLineLengths() Option { + return option(func(config *analyzeConfig) { + config.withLineLengths = false + }) +} + +// FormatPaths uses the given function to transform absolute file paths +// before they are passed to logging. +func FormatPaths(formatPathFunc func(absPath string) string) Option { + return option(func(config *analyzeConfig) { + config.formatPathFunc = formatPathFunc + }) +} + /* Analyze collects basic file information for the specified files. Errors are logged rather than returned where possible, to maximise the amount of data collected. -pathInArchive should return the relative path in the package archive, given an absolute -path to a file in the package. The relative path is used for the result data. +Pass instances of Option to control which information is collected. */ -func Analyze(ctx context.Context, paths []string, pathInArchive func(absolutePath string) string) ([]FileData, error) { +func Analyze(ctx context.Context, paths []string, options ...Option) ([]FileData, error) { if len(paths) == 0 { return []FileData{}, nil } - detectedTypes, err := detectFileTypes(ctx, paths) - haveDetectedTypes := true + config := getDefaultAnalyzeConfig() + for _, o := range options { + o.set(&config) + } + + var detectedTypes []string + var haveDetectedTypes bool + types, err := detectFileTypes(ctx, paths) + haveDetectedTypes = true if err != nil { slog.ErrorContext(ctx, "failed to run file type detection", "error", err) haveDetectedTypes = false } - if len(detectedTypes) != len(paths) { + if len(types) != len(paths) { slog.ErrorContext(ctx, fmt.Sprintf("detectFileTypes() returned %d results, expecting %d", len(detectedTypes), len(paths))) haveDetectedTypes = false } + detectedTypes = types - var result []FileData + result := make([]FileData, len(paths)) for index, filePath := range paths { - archivePath := pathInArchive(filePath) + formattedPath := config.formatPathFunc(filePath) detectedType := "" if haveDetectedTypes { detectedType = detectedTypes[index] @@ -73,31 +120,33 @@ func Analyze(ctx context.Context, paths []string, pathInArchive func(absolutePat var fileSize int64 if fileInfo, err := os.Stat(filePath); err != nil { fileSize = -1 // error value - slog.ErrorContext(ctx, "Error during stat file", "path", archivePath, "error", err) + slog.ErrorContext(ctx, "Error during stat file", "file", formattedPath, "error", err) } else { fileSize = fileInfo.Size() } var sha265Sum string if hash, err := utils.SHA256Hash(filePath); err != nil { - slog.ErrorContext(ctx, "Error hashing file", "path", archivePath, "error", err) + slog.ErrorContext(ctx, "Error hashing file", "file", formattedPath, "error", err) } else { sha265Sum = hash } var lineLengths valuecounts.ValueCounts - if ll, err := linelengths.GetLineLengths(filePath, ""); err != nil { - slog.ErrorContext(ctx, "Error counting line lengths", "path", archivePath, "error", err) - } else { - lineLengths = valuecounts.Count(ll) + if config.withLineLengths { + if ll, err := linelengths.GetLineLengths(filePath, ""); err != nil { + slog.ErrorContext(ctx, "Error counting line lengths", "file", formattedPath, "error", err) + } else { + lineLengths = valuecounts.Count(ll) + } } - result = append(result, FileData{ + result[index] = FileData{ DetectedType: detectedType, Size: fileSize, SHA256: sha265Sum, LineLengths: lineLengths, - }) + } } return result, nil diff --git a/internal/staticanalysis/basicdata/basic_data_test.go b/internal/staticanalysis/basicdata/basic_data_test.go index be86e368..38873963 100644 --- a/internal/staticanalysis/basicdata/basic_data_test.go +++ b/internal/staticanalysis/basicdata/basic_data_test.go @@ -5,7 +5,6 @@ import ( "os" "path/filepath" "reflect" - "strings" "testing" "github.com/ossf/package-analysis/internal/utils" @@ -72,11 +71,7 @@ func TestGetBasicData(t *testing.T) { } } - getArchivePath := func(absolutePath string) string { - return strings.TrimPrefix(absolutePath, testDir+string(os.PathSeparator)) - } - - got, err := Analyze(context.Background(), paths, getArchivePath) + got, err := Analyze(context.Background(), paths) if (err != nil) != tt.wantErr { t.Errorf("detectFileTypes() error = %v, wantErr %v", err, tt.wantErr) return diff --git a/internal/staticanalysis/result.go b/internal/staticanalysis/result.go index 35f283f5..8bcac4c1 100644 --- a/internal/staticanalysis/result.go +++ b/internal/staticanalysis/result.go @@ -13,8 +13,19 @@ import ( // Result (staticanalysis.Result) is the top-level internal data structure // that stores all data produced by static analysis performed on a package artifact. type Result struct { - ArchiveSHA256 string - Files []SingleResult + Archive ArchiveResult + Files []SingleResult +} + +type ArchiveResult struct { + // DetectedType records the output of the `file` command run on the archive. + DetectedType string + + // Size records the (compressed) size of the archive (as reported by the filesystem). + Size int64 + + // SHA256 records the SHA256 hashsum of the archive. + SHA256 string } /* diff --git a/sandboxes/staticanalysis/staticanalyze.go b/sandboxes/staticanalysis/staticanalyze.go index 7759c47a..354c8334 100644 --- a/sandboxes/staticanalysis/staticanalyze.go +++ b/sandboxes/staticanalysis/staticanalyze.go @@ -13,6 +13,7 @@ import ( "github.com/ossf/package-analysis/internal/log" "github.com/ossf/package-analysis/internal/pkgmanager" "github.com/ossf/package-analysis/internal/staticanalysis" + "github.com/ossf/package-analysis/internal/staticanalysis/basicdata" "github.com/ossf/package-analysis/internal/staticanalysis/parsing" "github.com/ossf/package-analysis/internal/utils" "github.com/ossf/package-analysis/internal/worker" @@ -151,7 +152,8 @@ func run() (err error) { } defer workDirs.cleanup(ctx) - startExtractionTime := time.Now() + startDownloadTime := time.Now() + var archivePath string if *localFile != "" { archivePath = *localFile @@ -162,6 +164,32 @@ func run() (err error) { } } + downloadTime := time.Since(startDownloadTime) + + results := staticanalysis.Result{} + + startArchiveAnalysisTime := time.Now() + archiveResult, err := basicdata.Analyze(ctx, []string{archivePath}, + basicdata.SkipLineLengths(), + basicdata.FormatPaths(func(absPath string) string { return "/" }), + ) + if err != nil { + slog.WarnContext(ctx, "failed to analyze archive file", "error", err) + } else if len(archiveResult) != 1 { + slog.WarnContext(ctx, "archive file analysis: unexpected number of results", "len", len(archiveResult)) + } else { + archiveInfo := archiveResult[0] + results.Archive = staticanalysis.ArchiveResult{ + DetectedType: archiveInfo.DetectedType, + Size: archiveInfo.Size, + SHA256: archiveInfo.SHA256, + } + } + + archiveAnalysisTime := time.Since(startArchiveAnalysisTime) + + startExtractionTime := time.Now() + if err := manager.ExtractArchive(archivePath, workDirs.extractDir); err != nil { return fmt.Errorf("archive extraction failed: %w", err) } @@ -174,20 +202,13 @@ func run() (err error) { } startAnalysisTime := time.Now() - results, err := staticanalysis.AnalyzePackageFiles(ctx, workDirs.extractDir, jsParserConfig, analysisTasks) - analysisTime := time.Since(startAnalysisTime) + fileResults, err := staticanalysis.AnalyzePackageFiles(ctx, workDirs.extractDir, jsParserConfig, analysisTasks) if err != nil { return fmt.Errorf("static analysis error: %w", err) } + results.Files = fileResults - startHashTime := time.Now() - archiveHash, err := utils.SHA256Hash(archivePath) - if err != nil { - slog.WarnContext(ctx, "failed to calculate archive checksum", "error", err) - } - results.ArchiveSHA256 = archiveHash - hashTime := time.Since(startHashTime) - + analysisTime := time.Since(startAnalysisTime) startWritingResultsTime := time.Now() jsonResult, err := json.Marshal(results) @@ -217,12 +238,13 @@ func run() (err error) { writingResultsTime := time.Since(startWritingResultsTime) totalTime := time.Since(startTime) - otherTime := totalTime - writingResultsTime - analysisTime - extractionTime - hashTime + otherTime := totalTime - writingResultsTime - analysisTime - extractionTime - archiveAnalysisTime - downloadTime slog.InfoContext(ctx, "Execution times", - "download and extraction", extractionTime, - "analysis", analysisTime, - "sha256Hash calculation", hashTime, + "download", downloadTime, + "archive analysis", archiveAnalysisTime, + "archive extraction", extractionTime, + "file analysis", analysisTime, "writing results", writingResultsTime, "other", otherTime, "total", totalTime)