Skip to content

Commit

Permalink
Merge branch 'main' into add_archive_hash
Browse files Browse the repository at this point in the history
  • Loading branch information
maxfisher-g authored Jan 16, 2024
2 parents 7c32985 + 5c77643 commit baee708
Show file tree
Hide file tree
Showing 8 changed files with 56 additions and 36 deletions.
2 changes: 1 addition & 1 deletion cmd/analyze/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ func dynamicAnalysis(ctx context.Context, pkg *pkgmanager.Pkg, resultStores *wor
"status", string(result.LastStatus))
}

if err := worker.SaveDynamicAnalysisData(ctx, pkg, resultStores, result.AnalysisData); err != nil {
if err := worker.SaveDynamicAnalysisData(ctx, pkg, resultStores, result.Data); err != nil {
slog.ErrorContext(ctx, "Upload error", "error", err)
}
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/worker/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ func handleMessage(ctx context.Context, msg *pubsub.Message, packagesBucket *blo

result, dynamicAnalysisErr := worker.RunDynamicAnalysis(ctx, pkg, dynamicSandboxOpts, "")
if dynamicAnalysisErr == nil {
dynamicAnalysisErr = worker.SaveDynamicAnalysisData(ctx, pkg, resultStores, result.AnalysisData)
dynamicAnalysisErr = worker.SaveDynamicAnalysisData(ctx, pkg, resultStores, result.Data)
}

resultStores.AnalyzedPackageSaved = false
Expand Down
3 changes: 2 additions & 1 deletion internal/dynamicanalysis/analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ const (
type Result struct {
StraceSummary analysisrun.StraceSummary
FileWritesSummary analysisrun.FileWritesSummary
// Ids that correlate to the name of the file that saves the actual write buffer contents. We save this separately so we don't need to dig through the FileWritesSummary later on.
// IDs that correlate to the name of the file that saves the actual write buffer contents.
// We save this separately so that we don't need to dig through the FileWritesSummary later on.
FileWriteBufferIds []string
}

Expand Down
25 changes: 11 additions & 14 deletions internal/worker/rundynamic.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import (
"strings"
"time"

"golang.org/x/crypto/ssh"

"github.com/ossf/package-analysis/internal/analysis"
"github.com/ossf/package-analysis/internal/dynamicanalysis"
"github.com/ossf/package-analysis/internal/featureflags"
Expand All @@ -25,32 +27,27 @@ import (
"github.com/ossf/package-analysis/internal/sandbox"
"github.com/ossf/package-analysis/pkg/api/analysisrun"
"github.com/ossf/package-analysis/pkg/api/pkgecosystem"
"golang.org/x/crypto/ssh"
)

// defaultDynamicAnalysisImage is container image name of the default dynamic analysis sandbox
const defaultDynamicAnalysisImage = "gcr.io/ossf-malware-analysis/dynamic-analysis"

/*
DynamicAnalysisResult holds all the results from RunDynamicAnalysis
AnalysisData: Map of each successfully run phase to a summary of
the corresponding dynamic analysis result. This summary has two parts:
1. StraceSummary: information about system calls performed by the process
2. FileWrites: list of files which were written to and counts of bytes written
DynamicAnalysisResult holds all data and status from RunDynamicAnalysis.
Data: analysisrun.DynamicAnalysisData for the package under analysis.
Note, if error is not nil, then results[lastRunPhase] is nil.
LastRunPhase: the last phase that was run. If error is non-nil, this phase did not
successfully complete, and the results for this phase are not recorded.
Otherwise, the results contain data for this phase, even in cases where the
sandboxed process terminated abnormally.
Status: the status of the last run phase if it completed without error, else empty
LastStatus: the status of the last run phase if it completed without error, else empty
*/

type DynamicAnalysisResult struct {
AnalysisData analysisrun.DynamicAnalysisResults
Data analysisrun.DynamicAnalysisData
LastRunPhase analysisrun.DynamicPhase
LastStatus analysis.Status
}
Expand Down Expand Up @@ -185,7 +182,7 @@ func RunDynamicAnalysis(ctx context.Context, pkg *pkgmanager.Pkg, sbOpts []sandb
}

result := DynamicAnalysisResult{
AnalysisData: analysisrun.DynamicAnalysisResults{
Data: analysisrun.DynamicAnalysisData{
StraceSummary: make(analysisrun.DynamicAnalysisStraceSummary),
FileWritesSummary: make(analysisrun.DynamicAnalysisFileWritesSummary),
FileWriteBufferIds: make(analysisrun.DynamicAnalysisFileWriteBufferIds),
Expand Down Expand Up @@ -291,9 +288,9 @@ func runDynamicAnalysisPhase(ctx context.Context, pkg *pkgmanager.Pkg, sb sandbo
return err
}

result.AnalysisData.StraceSummary[phase] = &phaseResult.StraceSummary
result.AnalysisData.FileWritesSummary[phase] = &phaseResult.FileWritesSummary
result.AnalysisData.FileWriteBufferIds[phase] = phaseResult.FileWriteBufferIds
result.Data.StraceSummary[phase] = &phaseResult.StraceSummary
result.Data.FileWritesSummary[phase] = &phaseResult.FileWritesSummary
result.Data.FileWriteBufferIds[phase] = phaseResult.FileWriteBufferIds
result.LastStatus = phaseResult.StraceSummary.Status

if phase == analysisrun.DynamicPhaseExecute {
Expand All @@ -302,7 +299,7 @@ func runDynamicAnalysisPhase(ctx context.Context, pkg *pkgmanager.Pkg, sb sandbo
// don't return this error, just log it
slog.ErrorContext(ctx, "Error retrieving execution log", "error", err)
} else {
result.AnalysisData.ExecutionLog = analysisrun.DynamicAnalysisExecutionLog(executionLog)
result.Data.ExecutionLog = analysisrun.DynamicAnalysisExecutionLog(executionLog)
}
}

Expand Down
6 changes: 3 additions & 3 deletions internal/worker/save_data.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ type ResultStores struct {
// SaveDynamicAnalysisData saves the data from dynamic analysis to the corresponding bucket in the ResultStores.
// This includes strace data, execution log, and file writes (in that order).
// If any operation fails, the rest are aborted
func SaveDynamicAnalysisData(ctx context.Context, pkg *pkgmanager.Pkg, dest *ResultStores, data analysisrun.DynamicAnalysisResults) error {
func SaveDynamicAnalysisData(ctx context.Context, pkg *pkgmanager.Pkg, dest *ResultStores, data analysisrun.DynamicAnalysisData) error {
if dest.DynamicAnalysis == nil {
// nothing to do
return nil
Expand Down Expand Up @@ -64,7 +64,7 @@ func SaveDynamicAnalysisData(ctx context.Context, pkg *pkgmanager.Pkg, dest *Res
}

// saveExecutionLog saves the execution log to the dynamic analysis resultstore, only if it is nonempty
func saveExecutionLog(ctx context.Context, pkg *pkgmanager.Pkg, dest *ResultStores, data analysisrun.DynamicAnalysisResults) error {
func saveExecutionLog(ctx context.Context, pkg *pkgmanager.Pkg, dest *ResultStores, data analysisrun.DynamicAnalysisData) error {
if dest.ExecutionLog == nil || len(data.ExecutionLog) == 0 {
// nothing to do
return nil
Expand Down Expand Up @@ -125,7 +125,7 @@ func SaveStaticAnalysisData(ctx context.Context, pkg *pkgmanager.Pkg, dest *Resu
}

// SaveFileWritesData saves file writes data from dynamic analysis to the file writes bucket in the ResultStores
func SaveFileWritesData(ctx context.Context, pkg *pkgmanager.Pkg, dest *ResultStores, data analysisrun.DynamicAnalysisResults) error {
func SaveFileWritesData(ctx context.Context, pkg *pkgmanager.Pkg, dest *ResultStores, data analysisrun.DynamicAnalysisData) error {
if dest.FileWrites == nil {
return nil
}
Expand Down
6 changes: 3 additions & 3 deletions internal/worker/savefilewriteresults.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@ import (
"github.com/ossf/package-analysis/pkg/api/analysisrun"
)

func saveFileWriteResults(rs *resultstore.ResultStore, ctx context.Context, pkg *pkgmanager.Pkg, dynamicResults analysisrun.DynamicAnalysisResults) error {
func saveFileWriteResults(rs *resultstore.ResultStore, ctx context.Context, pkg *pkgmanager.Pkg, data analysisrun.DynamicAnalysisData) error {
if rs == nil {
// TODO this should become a method on resultstore.ResultStore?
return errors.New("resultstore is nil")
}

if err := rs.SaveDynamicAnalysis(ctx, pkg, dynamicResults.FileWritesSummary, ""); err != nil {
if err := rs.SaveDynamicAnalysis(ctx, pkg, data.FileWritesSummary, ""); err != nil {
return fmt.Errorf("failed to upload file write analysis to blobstore = %w", err)
}
var allPhasesWriteBufferIdsArray []string
for _, writeBufferIds := range dynamicResults.FileWriteBufferIds {
for _, writeBufferIds := range data.FileWriteBufferIds {
allPhasesWriteBufferIdsArray = append(allPhasesWriteBufferIdsArray, writeBufferIds...)
}

Expand Down
41 changes: 32 additions & 9 deletions pkg/api/analysisrun/result.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,47 @@ import (
)

type (
DynamicAnalysisStraceSummary map[DynamicPhase]*StraceSummary
DynamicAnalysisFileWritesSummary map[DynamicPhase]*FileWritesSummary
// DynamicAnalysisStraceSummary holds system calls made during each analysis phase,
// obtained by strace monitoring.
DynamicAnalysisStraceSummary map[DynamicPhase]*StraceSummary

// DynamicAnalysisFileWritesSummary holds a summary of files written by all processes
// under analysis, during each analysis phase. This includes a list of paths written to,
// and counts of bytes written each time. Write data is obtained via strace monitoring.
DynamicAnalysisFileWritesSummary map[DynamicPhase]*FileWritesSummary

// DynamicAnalysisFileWriteBufferIds holds IDs (names) for each recorded write operation
// during each analysis phase. These names correspond to files in a zip archive that contain
// the actual write buffer contents.
DynamicAnalysisFileWriteBufferIds map[DynamicPhase][]string
DynamicAnalysisExecutionLog string

// DynamicAnalysisExecutionLog contains a record of which package symbols (e.g. modules,
// functions, classes) were discovered during the 'execute' analysis phase, and the results
// of attempts to call or instantiate them.
DynamicAnalysisExecutionLog string
)

// DynamicAnalysisRecord is the top-level struct which is serialised to produce JSON results files
// for dynamic analysis.
// DynamicAnalysisRecord is a generic top-level struct which is used to produce JSON results
// files for dynamic analysis in the current schema format. This format is used for
// strace data, file write summary data and execution log data.
type DynamicAnalysisRecord struct {
Package Key `json:"Package"`
CreatedTimestamp int64 `json:"CreatedTimestamp"`
Analysis any `json:"Analysis"`
}

type DynamicAnalysisResults struct {
StraceSummary DynamicAnalysisStraceSummary
FileWritesSummary DynamicAnalysisFileWritesSummary
// Ids that correlate to the name of the file that saves the actual write buffer contents.
// DynamicAnalysisStraceRecord is a specialisation of DynamicAnalysisRecord that can be used for
// deserializing JSON files from the original strace-only dynamic analysis results.
type DynamicAnalysisStraceRecord struct {
Package Key `json:"Package"`
CreatedTimestamp int64 `json:"CreatedTimestamp"`
Analysis DynamicAnalysisStraceSummary `json:"Analysis"`
}

// DynamicAnalysisData holds all data obtained from running dynamic analysis.
type DynamicAnalysisData struct {
StraceSummary DynamicAnalysisStraceSummary
FileWritesSummary DynamicAnalysisFileWritesSummary
FileWriteBufferIds DynamicAnalysisFileWriteBufferIds
ExecutionLog DynamicAnalysisExecutionLog
}
Expand Down
7 changes: 3 additions & 4 deletions sandboxes/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## Sandbox Image Testing

By default the analysis command will update the sandbox images from the grc.io
By default, the analysis command will update the sandbox images from the grc.io
repository. To test local changes to sandboxes, they need to be built locally,
and the analysis needs to redirect its container source to the local filesystem.

Expand Down Expand Up @@ -34,7 +34,7 @@ These commands will (re-)build both sandboxes and copy them to the correct locat
### Running the analysis

The `scripts/run_analysis.sh` script automates much of the setup for running
local analysis, but it the default setting will pull the sandbox images from
local analysis, but the default setting will pull the sandbox images from
the remote container registry rather than using locally built ones. To change
this, add the `-nopull` option to the script.

Expand Down Expand Up @@ -108,8 +108,7 @@ $ pip install django==9.3.4

##### import

Iterates through the installed package's modules, etc and attempts to import
them.
Iterates through the installed package's modules and attempts to import them.

This is relevant to languages that execute code at import time.

Expand Down

0 comments on commit baee708

Please sign in to comment.