Skip to content

Commit

Permalink
Eval (#87)
Browse files Browse the repository at this point in the history
See tn003_learning_eval.md for more description of how we are doing
eval.

This is a first pass at evaluation.
* Implement a distance metric based on edit distance
* Implement the infrastructure to compute it.

Add an apply command and use it to run different experiments

* Requires starting to move some of the Agent config into the API
package because we want to reuse it in the experiment type.

Start an initial evaluation dataset.

### API Updates
- Added `EvalResult` structure to represent the evaluation outcome,
including the actual commands generated, the expected commands, and the
evaluation distance.
- Introduced `EvalResultStatus` to indicate the status of an evaluation,
such as `DONE` or `ERROR`.

### Agent Updates
- Updated the `Agent` service to support evaluation mode, allowing it to
operate without impacting the learning process.

### Executor Updates
- Enhanced the `Executor` service to handle execution in evaluation
mode, ensuring that execution traces are marked accordingly.

### Evaluator Implementation
- Implemented the `Evaluator` component responsible for orchestrating
the evaluation process, loading evaluation examples, generating
predictions with the Agent, calculating distances, and updating results.

### Google Sheets Integration
- Added functionality to export evaluation results to Google Sheets,
enabling easy review and analysis of Foyle's performance.

### CLI Tool Enhancements
- Extended the CLI tool with commands for running evaluations

### Miscellaneous
- Added necessary protobuf definitions for new data structures related
to evaluations.
- Updated server setup to handle evaluation logic and integrate with the
learning mechanism.
- Provided sample evaluation datasets for initial testing and validation
of the evaluation process.
  • Loading branch information
jlewi authored May 3, 2024
1 parent 4a5e3b5 commit 1be7110
Show file tree
Hide file tree
Showing 32 changed files with 2,555 additions and 32 deletions.
21 changes: 21 additions & 0 deletions app/api/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package api

type AgentConfig struct {
// Model is the name of the model to use to generate completions
Model string `json:"model" yaml:"model"`

// RAG is the configuration for the RAG model
RAG *RAGConfig `json:"rag,omitempty" yaml:"rag,omitempty"`

// EvalMode is whether to run in evaluation mode or not.
// In EvalMode logs are specially marked so requests won't be used for training.
EvalMode bool `json:"evalMode" yaml:"evalMode"`
}

// RAGConfig configures the RAG model
type RAGConfig struct {
// Enabled is whether to enable the RAG model or not
Enabled bool `json:"enabled" yaml:"enabled"`
// MaxResults is the maximum number of results to return
MaxResults int `json:"maxResults" yaml:"maxResults"`
}
30 changes: 30 additions & 0 deletions app/api/experiment.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package api

import "k8s.io/apimachinery/pkg/runtime/schema"

var (
ExperimentGVK = schema.FromAPIVersionAndKind(Group+"/"+Version, "Experiment")
)

// Experiment is a struct that represents an experiment
type Experiment struct {
Metadata Metadata `json:"metadata" yaml:"metadata"`
Spec ExperimentSpec `json:"spec" yaml:"spec"`
}

type ExperimentSpec struct {
// EvalDir is the directory containing the evaluation the evaluation input
EvalDir string `json:"evalDir" yaml:"evalDir"`

// DBDir is the directory for the pebble database that will store the results
DBDir string `json:"dbDir" yaml:"dbDir"`

// SheetID is the ID of the Google Sheet to update with the results.
SheetID string `json:"sheetID" yaml:"sheetID"`

// SheetName is the name of the sheet to update.
SheetName string `json:"sheetName" yaml:"sheetName"`

// Agent is the configuration for the agent
Agent *AgentConfig `json:"agent,omitempty" yaml:"agent,omitempty"`
}
21 changes: 21 additions & 0 deletions app/api/meta.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package api

const (
Group = "foyle.io"
Version = "v1alpha1"
)

// N.B. We need to redefine Metadata and not reuse the version in the K8s libraries
// because we want it to have yaml tags so we can serialize with the YAML library.

// Metadata holds an optional name of the project.
type Metadata struct {
Name string `yaml:"name,omitempty"`
Namespace string `yaml:"namespace,omitempty"`
Labels map[string]string `yaml:"labels"`
Annotations map[string]string `yaml:"annotations,omitempty"`
// ResourceVersion is used for optimistic concurrency.
// Ref: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#metadata
// This should be treated as an opaque value by clients.
ResourceVersion string `yaml:"resourceVersion,omitempty"`
}
52 changes: 52 additions & 0 deletions app/cmd/apply.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package cmd

import (
"context"
"fmt"
"os"

"github.com/go-logr/zapr"
"github.com/jlewi/foyle/app/pkg/application"
"github.com/pkg/errors"
"github.com/spf13/cobra"
"go.uber.org/zap"
)

// NewApplyCmd create an apply command
func NewApplyCmd() *cobra.Command {
// TODO(jeremy): We should update apply to support the image resource.
applyCmd := &cobra.Command{
Use: "apply <resource.yaml> <resourceDir> <resource.yaml> ...",
Short: "Apply the specified resource.",
Run: func(cmd *cobra.Command, args []string) {
err := func() error {
log := zapr.NewLogger(zap.L())
if len(args) == 0 {
log.Info("apply takes at least one argument which should be the file or directory YAML to apply.")
return errors.New("apply takes at least one argument which should be the file or directory YAML to apply.")
}
logVersion()

app := application.NewApp()
if err := app.LoadConfig(cmd); err != nil {
return err
}
if err := app.SetupLogging(false); err != nil {
return err
}

if err := app.SetupRegistry(); err != nil {
return err
}

return app.ApplyPaths(context.Background(), args)
}()
if err != nil {
fmt.Printf("Error running apply;\n %+v\n", err)
os.Exit(1)
}
},
}

return applyCmd
}
1 change: 1 addition & 0 deletions app/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,6 @@ func NewRootCmd() *cobra.Command {
rootCmd.AddCommand(NewLearnCmd())
rootCmd.AddCommand(NewConfigCmd())
rootCmd.AddCommand(NewLogsCmd())
rootCmd.AddCommand(NewApplyCmd())
return rootCmd
}
23 changes: 21 additions & 2 deletions app/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ replace github.com/jlewi/foyle/protos/go => ../protos/go

require (
github.com/Kunde21/markdownfmt/v3 v3.1.0
github.com/agnivade/levenshtein v1.1.1
github.com/cockroachdb/pebble v1.1.0
github.com/gin-contrib/cors v1.7.1
github.com/gin-gonic/gin v1.9.1
github.com/go-cmd/cmd v1.4.1
Expand All @@ -19,7 +21,7 @@ require (
github.com/honeycombio/honeycomb-opentelemetry-go v0.10.0
github.com/honeycombio/otel-config-go v1.15.0
github.com/jlewi/foyle/protos/go v0.0.0-00010101000000-000000000000
github.com/jlewi/hydros v0.0.6
github.com/jlewi/hydros v0.0.7-0.20240503183011-8f99ead373fb
github.com/jlewi/monogo v0.0.0-20240123191147-401afe194d74
github.com/maxence-charriere/go-app/v9 v9.8.0
github.com/pkg/errors v0.9.1
Expand Down Expand Up @@ -50,17 +52,24 @@ require (
cloud.google.com/go/longrunning v0.5.5 // indirect
cloud.google.com/go/secretmanager v1.11.5 // indirect
cloud.google.com/go/storage v1.36.0 // indirect
github.com/DataDog/zstd v1.4.5 // indirect
github.com/Microsoft/go-winio v0.6.1 // indirect
github.com/ProtonMail/go-crypto v0.0.0-20230217124315-7d5c6f04bbb8 // indirect
github.com/acomagu/bufpipe v1.0.4 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/bmatcuk/doublestar/v4 v4.6.1 // indirect
github.com/bytedance/sonic v1.11.3 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/chenzhuoyu/base64x v0.0.0-20230717121745-296ad89f973d // indirect
github.com/chenzhuoyu/iasm v0.9.1 // indirect
github.com/cli/go-gh v0.1.3-0.20221102170023-e3ec45fb1d1b // indirect
github.com/cli/safeexec v1.0.0 // indirect
github.com/cloudflare/circl v1.1.0 // indirect
github.com/cockroachdb/errors v1.11.1 // indirect
github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b // indirect
github.com/cockroachdb/redact v1.1.5 // indirect
github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 // indirect
github.com/containerd/stargz-snapshotter/estargz v0.14.3 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/docker/cli v24.0.0+incompatible // indirect
Expand All @@ -71,9 +80,10 @@ require (
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/gabriel-vasile/mimetype v1.4.3 // indirect
github.com/getsentry/sentry-go v0.18.0 // indirect
github.com/ghodss/yaml v1.0.0 // indirect
github.com/gin-contrib/sse v0.1.0 // indirect
github.com/go-errors/errors v1.0.1 // indirect
github.com/go-errors/errors v1.4.2 // indirect
github.com/go-git/gcfg v1.5.0 // indirect
github.com/go-git/go-billy/v5 v5.4.1 // indirect
github.com/go-git/go-git/v5 v5.6.1 // indirect
Expand All @@ -89,6 +99,7 @@ require (
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/google/gnostic v0.6.9 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/s2a-go v0.1.7 // indirect
Expand All @@ -104,13 +115,16 @@ require (
github.com/kevinburke/ssh_config v1.2.0 // indirect
github.com/klauspost/compress v1.17.0 // indirect
github.com/klauspost/cpuid/v2 v2.2.7 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/leodido/go-urn v1.4.0 // indirect
github.com/lufia/plan9stats v0.0.0-20240408141607-282e7b5d6b74 // indirect
github.com/magiconair/properties v1.8.7 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/matryer/try v0.0.0-20161228173917-9ac251b645a2 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-runewidth v0.0.13 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
Expand All @@ -121,7 +135,12 @@ require (
github.com/pelletier/go-toml/v2 v2.2.0 // indirect
github.com/pjbgf/sha1cd v0.3.0 // indirect
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
github.com/prometheus/client_golang v1.12.0 // indirect
github.com/prometheus/client_model v0.2.1-0.20210607210712-147c58e9608a // indirect
github.com/prometheus/common v0.32.1 // indirect
github.com/prometheus/procfs v0.7.3 // indirect
github.com/rivo/uniseg v0.4.2 // indirect
github.com/rogpeppe/go-internal v1.11.0 // indirect
github.com/sagikazarmark/locafero v0.4.0 // indirect
github.com/sagikazarmark/slog-shim v0.1.0 // indirect
github.com/sergi/go-diff v1.2.0 // indirect
Expand Down
Loading

0 comments on commit 1be7110

Please sign in to comment.