Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve error handling when cannot connect to target(s) and when target doesn't meet requirements for metrics command #95

Merged
merged 5 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion cmd/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,13 +207,22 @@ func runCmd(cmd *cobra.Command, args []string) error {
appContext := cmd.Context().Value(common.AppContext{}).(common.AppContext)
localTempDir := appContext.TempDir
// get the targets
myTargets, err := common.GetTargets(cmd, true, true, localTempDir)
myTargets, targetErrs, err := common.GetTargets(cmd, true, true, localTempDir)
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
slog.Error(err.Error())
cmd.SilenceUsage = true
return err
}
// check for errors in target connections
for _, err := range targetErrs {
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
slog.Error(err.Error())
cmd.SilenceUsage = true
return err
}
}
if len(myTargets) == 0 {
err := fmt.Errorf("no targets specified")
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
Expand Down
15 changes: 6 additions & 9 deletions cmd/metrics/metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,10 @@ func LoadMetadata(myTarget target.Target, noRoot bool, perfPath string, localTem
var err error
var output string
if metadata.SupportsFixedCycles, output, err = getSupportsFixedEvent(myTarget, "cpu-cycles", cpu.MicroArchitecture, noRoot, perfPath, localTempDir); err != nil {
err = fmt.Errorf("failed to determine if fixed-counter cpu-cycles is supported: %v", err)
err = fmt.Errorf("failed to determine if fixed-counter 'cpu-cycles' is supported: %v", err)
} else {
if !metadata.SupportsFixedCycles {
slog.Warn("Fixed-counter cpu-cycles events not supported", slog.String("output", output))
slog.Warn("Fixed-counter 'cpu-cycles' events not supported", slog.String("output", output))
}
}
slowFuncChannel <- err
Expand All @@ -153,10 +153,10 @@ func LoadMetadata(myTarget target.Target, noRoot bool, perfPath string, localTem
var err error
var output string
if metadata.SupportsFixedInstructions, output, err = getSupportsFixedEvent(myTarget, "instructions", cpu.MicroArchitecture, noRoot, perfPath, localTempDir); err != nil {
err = fmt.Errorf("failed to determine if fixed-counter instructions is supported: %v", err)
err = fmt.Errorf("failed to determine if fixed-counter 'instructions' is supported: %v", err)
} else {
if !metadata.SupportsFixedInstructions {
slog.Warn("Fixed-counter instructions events not supported", slog.String("output", output))
slog.Warn("Fixed-counter 'instructions' events not supported", slog.String("output", output))
}
}
slowFuncChannel <- err
Expand All @@ -170,11 +170,8 @@ func LoadMetadata(myTarget target.Target, noRoot bool, perfPath string, localTem
errs = append(errs, <-slowFuncChannel)
for _, errInside := range errs {
if errInside != nil {
if err == nil {
err = errInside
} else {
err = fmt.Errorf("%v : %v", err, errInside)
}
slog.Error("error loading metadata", slog.String("error", errInside.Error()), slog.String("target", myTarget.GetName()))
err = fmt.Errorf("target not supported, see log for details")
}
}
}()
Expand Down
121 changes: 83 additions & 38 deletions cmd/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -472,16 +472,17 @@ func validateFlags(cmd *cobra.Command, args []string) error {
}

type targetContext struct {
target target.Target
err error
perfPath string
tempDir string
metadata Metadata
nmiDisabled bool
perfMuxIntervals map[string]int
groupDefinitions []GroupDefinition
metricDefinitions []MetricDefinition
printedFiles []string
target target.Target
err error
perfPath string
tempDir string
metadata Metadata
nmiDisabled bool
perfMuxIntervalsSet bool
perfMuxIntervals map[string]int
groupDefinitions []GroupDefinition
metricDefinitions []MetricDefinition
printedFiles []string
}

type targetError struct {
Expand All @@ -494,7 +495,6 @@ func runCmd(cmd *cobra.Command, args []string) error {
appContext := cmd.Context().Value(common.AppContext{}).(common.AppContext)
localTempDir := appContext.TempDir
localOutputDir := appContext.OutputDir

// handle signals
// child processes will exit when the signals are received which will
// allow this app to exit normally
Expand All @@ -505,7 +505,6 @@ func runCmd(cmd *cobra.Command, args []string) error {
setSignalReceived()
slog.Info("received signal", slog.String("signal", sig.String()))
}()

// round up to next perfPrintInterval second (the collection interval used by perf stat)
if flagDuration != 0 {
qf := float64(flagDuration) / float64(flagPerfPrintInterval)
Expand All @@ -514,32 +513,22 @@ func runCmd(cmd *cobra.Command, args []string) error {
flagDuration = (qi + 1) * flagPerfPrintInterval
}
}

// get the targets
myTargets, err := common.GetTargets(cmd, !flagNoRoot, !flagNoRoot, localTempDir)
myTargets, targetErrs, err := common.GetTargets(cmd, !flagNoRoot, !flagNoRoot, localTempDir)
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
slog.Error(err.Error())
cmd.SilenceUsage = true
return err
}
if len(myTargets) == 0 {
err := fmt.Errorf("no targets specified")
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
slog.Error(err.Error())
cmd.SilenceUsage = true
return err
}
targetTempRoot, _ := cmd.Flags().GetString(common.FlagTargetTempDirName)

// check for live mode with multiple targets
if flagLive && len(myTargets) > 1 {
err := fmt.Errorf("live mode is only supported for a single target")
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
slog.Error(err.Error())
cmd.SilenceUsage = true
return err
}

// create progress spinner
multiSpinner := progress.NewMultiSpinner()
for _, myTarget := range myTargets {
Expand All @@ -553,7 +542,48 @@ func runCmd(cmd *cobra.Command, args []string) error {
}
multiSpinner.Start()
defer multiSpinner.Finish()

// check for errors in target creation
for i := range targetErrs {
if targetErrs[i] != nil {
_ = multiSpinner.Status(myTargets[i].GetName(), fmt.Sprintf("Error: %v", targetErrs[i]))
// remove target from targets list
myTargets = append(myTargets[:i], myTargets[i+1:]...)
}
}
// check if any targets remain
if len(myTargets) == 0 {
err := fmt.Errorf("no targets specified")
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
slog.Error(err.Error())
cmd.SilenceUsage = true
return err
}
// check if all targets have the same architecture
for _, target := range myTargets {
tArch, err := target.GetArchitecture()
if err != nil {
err = fmt.Errorf("failed to get architecture: %w", err)
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
slog.Error(err.Error())
cmd.SilenceUsage = true
return err
}
tArch0, err := myTargets[0].GetArchitecture()
if err != nil {
err = fmt.Errorf("failed to get architecture: %w", err)
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
slog.Error(err.Error())
cmd.SilenceUsage = true
return err
}
if tArch != tArch0 {
err := fmt.Errorf("all targets must have the same architecture")
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
slog.Error(err.Error())
cmd.SilenceUsage = true
return err
}
}
// extract perf into local temp directory (assumes all targets have the same architecture)
localPerfPath, err := extractPerf(myTargets[0], localTempDir)
if err != nil {
Expand All @@ -562,23 +592,26 @@ func runCmd(cmd *cobra.Command, args []string) error {
cmd.SilenceUsage = true
return err
}

// prepare the targets
channelTargetError := make(chan targetError)
var targetContexts []targetContext
for _, myTarget := range myTargets {
targetContexts = append(targetContexts, targetContext{target: myTarget})
}
targetTempRoot, _ := cmd.Flags().GetString(common.FlagTargetTempDirName)
for i := range targetContexts {
go prepareTarget(&targetContexts[i], targetTempRoot, localTempDir, localPerfPath, channelTargetError, multiSpinner.Status)
}
// wait for all targets to be prepared
numPreparedTargets := 0
for range targetContexts {
targetError := <-channelTargetError
if targetError.err != nil {
slog.Error("failed to prepare target", slog.String("target", targetError.target.GetName()), slog.String("error", targetError.err.Error()))
} else {
numPreparedTargets++
}
}

// schedule temporary directory cleanup
if cmd.Parent().PersistentFlags().Lookup("debug").Value.String() != "true" { // don't remove the directory if we're debugging
defer func() {
Expand All @@ -592,7 +625,6 @@ func runCmd(cmd *cobra.Command, args []string) error {
}
}()
}

// schedule NMI watchdog reset
defer func() {
for _, targetContext := range targetContexts {
Expand All @@ -604,28 +636,44 @@ func runCmd(cmd *cobra.Command, args []string) error {
}
}
}()

// schedule mux interval reset
defer func() {
for _, targetContext := range targetContexts {
err := SetMuxIntervals(targetContext.target, targetContext.perfMuxIntervals, localTempDir)
if err != nil {
slog.Error("failed to reset perf mux intervals", slog.String("target", targetContext.target.GetName()), slog.String("error", err.Error()))
if targetContext.perfMuxIntervalsSet {
err := SetMuxIntervals(targetContext.target, targetContext.perfMuxIntervals, localTempDir)
if err != nil {
slog.Error("failed to reset perf mux intervals", slog.String("target", targetContext.target.GetName()), slog.String("error", err.Error()))
}
}
}
}()

// check if any targets were successfully prepared
if numPreparedTargets == 0 {
err := fmt.Errorf("no targets were successfully prepared")
slog.Error(err.Error())
cmd.SilenceUsage = true
return err
}
// prepare the metrics for each target
for i := range targetContexts {
go prepareMetrics(&targetContexts[i], localTempDir, channelTargetError, multiSpinner.Status)
}
// wait for all metrics to be prepared
numTargetsWithPreparedMetrics := 0
for range targetContexts {
targetError := <-channelTargetError
if targetError.err != nil {
slog.Error("failed to prepare metrics", slog.String("target", targetError.target.GetName()), slog.String("error", targetError.err.Error()))
} else {
numTargetsWithPreparedMetrics++
}
}

if numTargetsWithPreparedMetrics == 0 {
err := fmt.Errorf("no targets had metrics prepared")
slog.Error(err.Error())
cmd.SilenceUsage = true
return err
}
// show metric names and exit, if requested
if flagShowMetricNames {
// stop the multiSpinner
Expand All @@ -638,7 +686,6 @@ func runCmd(cmd *cobra.Command, args []string) error {
}
return nil
}

// create the local output directory
if !flagLive {
err = common.CreateOutputDir(localOutputDir)
Expand All @@ -649,7 +696,6 @@ func runCmd(cmd *cobra.Command, args []string) error {
return err
}
}

// write metadata to file
if flagWriteEventsToFile {
for _, targetContext := range targetContexts {
Expand All @@ -661,7 +707,6 @@ func runCmd(cmd *cobra.Command, args []string) error {
}
}
}

// start the metric collection
for i := range targetContexts {
if targetContexts[i].err == nil {
Expand Down Expand Up @@ -796,6 +841,7 @@ func prepareTarget(targetContext *targetContext, targetTempRoot string, localTem
channelError <- targetError{target: myTarget, err: err}
return
}
targetContext.perfMuxIntervalsSet = true
}
// get the full path to the perf binary
if targetContext.perfPath, err = getPerfPath(myTarget, localPerfPath); err != nil {
Expand All @@ -819,7 +865,6 @@ func prepareMetrics(targetContext *targetContext, localTempDir string, channelEr
_ = statusUpdate(myTarget.GetName(), "collecting metadata")
var err error
if targetContext.metadata, err = LoadMetadata(myTarget, flagNoRoot, targetContext.perfPath, localTempDir); err != nil {
err = fmt.Errorf("failed to load metadata: %w", err)
_ = statusUpdate(myTarget.GetName(), fmt.Sprintf("Error: %s", err.Error()))
targetContext.err = err
channelError <- targetError{target: myTarget, err: err}
Expand Down
25 changes: 17 additions & 8 deletions internal/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,20 +148,13 @@ func (rc *ReportingCommand) Run() error {
}
}
// get the targets
myTargets, err := GetTargets(rc.Cmd, elevated, false, localTempDir)
myTargets, targetErrs, err := GetTargets(rc.Cmd, elevated, false, localTempDir)
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
slog.Error(err.Error())
rc.Cmd.SilenceUsage = true
return err
}
if len(myTargets) == 0 {
err := fmt.Errorf("no targets specified")
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
slog.Error(err.Error())
rc.Cmd.SilenceUsage = true
return err
}
// setup and start the progress indicator
multiSpinner := progress.NewMultiSpinner()
for _, target := range myTargets {
Expand All @@ -174,6 +167,22 @@ func (rc *ReportingCommand) Run() error {
}
}
multiSpinner.Start()
// check for errors in target creation
for i := range targetErrs {
if targetErrs[i] != nil {
_ = multiSpinner.Status(myTargets[i].GetName(), fmt.Sprintf("Error: %v", targetErrs[i]))
// remove target from targets list
myTargets = append(myTargets[:i], myTargets[i+1:]...)
}
}
// check if we have any targets to run the scripts on
if len(myTargets) == 0 {
err := fmt.Errorf("no targets specified")
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
slog.Error(err.Error())
rc.Cmd.SilenceUsage = true
return err
}
// run the scripts on the targets
channelTargetScriptOutputs := make(chan TargetScriptOutputs)
channelError := make(chan error)
Expand Down
Loading