From 8ce6e494f657c0bed37083f91eecb6b0b5560f9f Mon Sep 17 00:00:00 2001 From: Jeremy Lewi Date: Sat, 14 Sep 2024 10:31:13 -0700 Subject: [PATCH] Improve GetLLMLogs (#237) * GetLLMLogs is critical for being able to view the actual inputs/outputs of the LLM * Don't require the user to specify the LogFile * Add a playbook to describe how to use it to view the actual LLM request and response * Support OpenAI and Anthropic --- app/api/config.go | 1 + app/pkg/analyze/crud.go | 32 ++++-- app/pkg/analyze/logs.go | 90 ++++++++++------ app/pkg/analyze/logs_test.go | 27 +++-- app/pkg/analyze/render.go | 19 ++++ app/pkg/analyze/render_test.go | 45 ++++---- .../analyze/test_data/anthropic_request.json | 17 +++ app/pkg/logs/matchers/names.go | 7 ++ app/pkg/logs/matchers/names_test.go | 29 +++++ docs/content/en/docs/observability/ai.md | 72 +++++++++++++ protos/foyle/logs/traces.proto | 7 +- protos/go/foyle/logs/traces.pb.go | 101 +++++++++++------- protos/go/foyle/logs/traces.zap.go | 10 +- protos/go/foyle/v1alpha1/agent.zap.go | 2 +- tools/stripmetadata/main.go | 9 +- 15 files changed, 352 insertions(+), 116 deletions(-) create mode 100644 app/pkg/analyze/test_data/anthropic_request.json create mode 100644 docs/content/en/docs/observability/ai.md diff --git a/app/api/config.go b/app/api/config.go index 22b76458..6fe8da01 100644 --- a/app/api/config.go +++ b/app/api/config.go @@ -7,6 +7,7 @@ const ( ModelProviderAnthropic ModelProvider = "anthropic" ModelProviderOpenAI ModelProvider = "openai" ModelProviderDefault ModelProvider = "openai" + ModelProviderUnknown ModelProvider = "unknown" ) type AgentConfig struct { diff --git a/app/pkg/analyze/crud.go b/app/pkg/analyze/crud.go index 1feeaf35..dc85cc9e 100644 --- a/app/pkg/analyze/crud.go +++ b/app/pkg/analyze/crud.go @@ -2,6 +2,7 @@ package analyze import ( "context" + "sort" "connectrpc.com/connect" "github.com/jlewi/foyle/app/pkg/logs" @@ -54,26 +55,35 @@ func (h *CrudHandler) GetTrace(ctx context.Context, request *connect.Request[log } func (h *CrudHandler) GetLLMLogs(ctx context.Context, request *connect.Request[logspb.GetLLMLogsRequest]) (*connect.Response[logspb.GetLLMLogsResponse], error) { + log := logs.FromContext(ctx) getReq := request.Msg if getReq.GetTraceId() == "" { return nil, connect.NewError(connect.CodeInvalidArgument, errors.New("No traceID provided")) } - if getReq.GetLogFile() == "" { - return nil, connect.NewError(connect.CodeInvalidArgument, errors.New("No LogFile provided")) - } - - log, err := readAnthropicLog(ctx, getReq.GetTraceId(), getReq.GetLogFile()) + logFiles, err := findLogFiles(ctx, h.cfg.GetLogDir()) if err != nil { - // Assume its a not found error. - return nil, connect.NewError(connect.CodeInternal, errors.Wrapf(err, "Failed to get prompt for trace id %s; logFile: %s", getReq.GetTraceId(), getReq.GetLogFile())) + log.Error(err, "Failed to find log files") + return nil, connect.NewError(connect.CodeInternal, errors.Wrap(err, "Failed to find log files")) } - resp := &logspb.GetLLMLogsResponse{} - resp.RequestHtml = renderAnthropicRequest(log.Request) - resp.ResponseHtml = renderAnthropicResponse(log.Response) + // Sort the slice in descending order + sort.Slice(logFiles, func(i, j int) bool { + return logFiles[i] > logFiles[j] + }) + + // We loop over all the logFiles until we find it which is not efficient. + for _, logFile := range logFiles { + resp, err := readLLMLog(ctx, getReq.GetTraceId(), logFile) + if err != nil { + return nil, connect.NewError(connect.CodeInternal, errors.Wrapf(err, "Failed to get LLM call log for trace id %s; logFile: %s", getReq.GetTraceId(), getReq.GetLogFile())) + } + if resp != nil { + return connect.NewResponse(resp), nil + } + } - return connect.NewResponse(resp), nil + return nil, connect.NewError(connect.CodeNotFound, errors.Errorf("No log file found for traceID %v", getReq.GetTraceId())) } func (h *CrudHandler) GetBlockLog(ctx context.Context, request *connect.Request[logspb.GetBlockLogRequest]) (*connect.Response[logspb.GetBlockLogResponse], error) { diff --git a/app/pkg/analyze/logs.go b/app/pkg/analyze/logs.go index fc1ab2f6..c16133e5 100644 --- a/app/pkg/analyze/logs.go +++ b/app/pkg/analyze/logs.go @@ -7,6 +7,9 @@ import ( "os" "strings" + "github.com/jlewi/foyle/app/pkg/logs/matchers" + logspb "github.com/jlewi/foyle/protos/go/foyle/logs" + "connectrpc.com/connect" "github.com/jlewi/foyle/app/api" "github.com/jlewi/foyle/app/pkg/logs" @@ -24,61 +27,82 @@ type AnthropicLog struct { Response *anthropic.MessagesResponse } -// readAnthropicRequest reads an Anthropic request from a log file -// -// N.B. If there are multiple requests as part of the same trace then only the last request will be returned. -// TODO(jeremy): Ideally we'd join the request with its response and return the one that succeeded. The reason -// There might be multiple is because context exceeded length; in which case only one request which has been -// sufficiently shortened will have an actual response. -func readAnthropicLog(ctx context.Context, traceId string, logFile string) (*AnthropicLog, error) { +// readLLMLog tries to fetch the raw LLM request/response from the log +func readLLMLog(ctx context.Context, traceId string, logFile string) (*logspb.GetLLMLogsResponse, error) { log := logs.FromContext(ctx) file, err := os.Open(logFile) + if err != nil { return nil, connect.NewError(connect.CodeNotFound, errors.Wrapf(err, "Failed to open file %s", logFile)) } d := json.NewDecoder(file) - aLog := &AnthropicLog{ - TraceID: traceId, - LogFile: logFile, - } - req := &anthropic.MessagesRequest{} - resp := &anthropic.MessagesResponse{} + resp := &logspb.GetLLMLogsResponse{} + + provider := api.ModelProviderUnknown for { entry := &api.LogEntry{} if err := d.Decode(entry); err != nil { if err == io.EOF { - return aLog, nil + return nil, nil } log.Error(err, "Failed to decode log entry") } if entry.TraceID() != traceId { continue } - if !strings.HasSuffix(entry.Function(), "anthropic.(*Completer).Complete") { + isMatch := false + if strings.HasSuffix(entry.Function(), "anthropic.(*Completer).Complete") { + provider = api.ModelProviderAnthropic + isMatch = true + } + + if matchers.IsOAIComplete(entry.Function()) { + provider = api.ModelProviderOpenAI + isMatch = true + } + + if strings.HasSuffix(entry.Function(), "anthropic.(*Completer).Complete") { + provider = api.ModelProviderAnthropic + isMatch = true + } + + // If tis not a matching request ignore it. + if !isMatch { continue } + if reqBytes := entry.Request(); reqBytes != nil { + resp.RequestJson = string(reqBytes) + } - reqBytes := entry.Request() - if reqBytes != nil { - if err := json.Unmarshal(reqBytes, req); err != nil { - // TODO(jeremy): Should we include the error in the response? - log.Error(err, "Failed to unmarshal request") - } else { - aLog.Request = req - req = &anthropic.MessagesRequest{} - } + if resBytes := entry.Response(); resBytes != nil { + resp.ResponseJson = string(resBytes) } - respBytes := entry.Response() - if respBytes != nil { - if err := json.Unmarshal(respBytes, resp); err != nil { - // TODO(jeremy): Should we include the error in the response? - log.Error(err, "Failed to unmarshal response") - } else { - aLog.Response = resp - resp = &anthropic.MessagesResponse{} - } + // Since we have read the request and response less + // This isn't a great implementation because we will end up reading all the logs if for some reason + // The logs don't have the entries. + if resp.RequestJson != "" && resp.ResponseJson != "" { + break + } + } + + if provider == api.ModelProviderAnthropic && resp.ResponseJson != "" { + html, err := renderAnthropicRequestJson(resp.RequestJson) + if err != nil { + log.Error(err, "Failed to render request") + + } else { + resp.RequestHtml = html + } + + htmlResp, err := renderAnthropicResponseJson(resp.ResponseJson) + if err != nil { + log.Error(err, "Failed to render response") + + } else { + resp.ResponseHtml = htmlResp } } + return resp, nil } diff --git a/app/pkg/analyze/logs_test.go b/app/pkg/analyze/logs_test.go index 169d09fa..26d3a53d 100644 --- a/app/pkg/analyze/logs_test.go +++ b/app/pkg/analyze/logs_test.go @@ -2,9 +2,12 @@ package analyze import ( "context" + "encoding/json" "os" "path/filepath" "testing" + + "github.com/liushuangls/go-anthropic/v2" ) func TestReadAnthropicLog(t *testing.T) { @@ -28,22 +31,32 @@ func TestReadAnthropicLog(t *testing.T) { for _, c := range cases { t.Run(c.name, func(t *testing.T) { fullPath := filepath.Join(cwd, "test_data", c.logFile) - result, err := readAnthropicLog(context.Background(), c.traceId, fullPath) + result, err := readLLMLog(context.Background(), c.traceId, fullPath) if err != nil { t.Errorf("Failed to read Anthropic request: %v", err) } if result == nil { t.Fatalf("Request should not be nil") } - if result.Request == nil { + if result.RequestHtml == "" { + t.Errorf("Request should not be nil") + } + if result.ResponseHtml == "" { + t.Errorf("Response should not be nil") + } + if result.RequestJson == "" { t.Errorf("Request should not be nil") } - if result.Response == nil { + if result.ResponseJson == "" { t.Errorf("Response should not be nil") - } else { - if result.Response.Model == "" { - t.Errorf("Model should not be empty") - } + } + resp := &anthropic.MessagesResponse{} + if err := json.Unmarshal([]byte(result.ResponseJson), resp); err != nil { + t.Fatalf("Failed to unmarshal response: %v", err) + } + + if resp.Model == "" { + t.Errorf("Model should be set") } }) } diff --git a/app/pkg/analyze/render.go b/app/pkg/analyze/render.go index 7ae729a9..1585c2df 100644 --- a/app/pkg/analyze/render.go +++ b/app/pkg/analyze/render.go @@ -3,6 +3,7 @@ package analyze import ( "bytes" _ "embed" + "encoding/json" "fmt" "html/template" @@ -34,6 +35,24 @@ type Message struct { Content template.HTML } +func renderAnthropicRequestJson(jsonValue string) (string, error) { + req := &anthropic.MessagesRequest{} + if err := json.Unmarshal([]byte(jsonValue), req); err != nil { + return "", nil + } + + return renderAnthropicRequest(req), nil +} + +func renderAnthropicResponseJson(jsonValue string) (string, error) { + res := &anthropic.MessagesResponse{} + if err := json.Unmarshal([]byte(jsonValue), res); err != nil { + return "", nil + } + + return renderAnthropicResponse(res), nil +} + // renderAnthropicRequest returns a string containing the HTML representation of the request func renderAnthropicRequest(request *anthropic.MessagesRequest) string { log := zapr.NewLogger(zap.L()) diff --git a/app/pkg/analyze/render_test.go b/app/pkg/analyze/render_test.go index cd32e7bc..35fbb1bd 100644 --- a/app/pkg/analyze/render_test.go +++ b/app/pkg/analyze/render_test.go @@ -1,8 +1,10 @@ package analyze import ( + "encoding/json" "fmt" "os" + "path/filepath" "testing" "github.com/liushuangls/go-anthropic/v2" @@ -12,35 +14,38 @@ import ( func TestRenderAnthropicRequest(t *testing.T) { type testCase struct { - name string - request *anthropic.MessagesRequest + name string + fname string } tests := []testCase{ { - name: "basic", - request: &anthropic.MessagesRequest{ - Model: "test", - MaxTokens: 10, - Temperature: proto.Float32(0.5), - System: "This is the system message", - Messages: []anthropic.Message{ - { - Role: "User", - Content: []anthropic.MessageContent{ - { - Text: proto.String("# md heading\n * item 1 \n * item 2"), - }, - }, - }, - }, - }, + name: "basic", + fname: "anthropic_request.json", }, } + cwd, err := os.Getwd() + if err != nil { + t.Fatalf("Failed to get current working directory: %v", err) + } + + testDataDir := filepath.Join(cwd, "test_data") for _, test := range tests { t.Run(test.name, func(t *testing.T) { - result := renderAnthropicRequest(test.request) + + fname := filepath.Join(testDataDir, test.fname) + data, err := os.ReadFile(fname) + if err != nil { + t.Fatalf("Failed to read file %s: %v", fname, err) + } + + req := &anthropic.MessagesRequest{} + if err := json.Unmarshal(data, req); err != nil { + t.Fatalf("Failed to unmarshal request: %v", err) + } + + result := renderAnthropicRequest(req) if result == "" { t.Errorf("Request should not be empty") } diff --git a/app/pkg/analyze/test_data/anthropic_request.json b/app/pkg/analyze/test_data/anthropic_request.json new file mode 100644 index 00000000..d7aef510 --- /dev/null +++ b/app/pkg/analyze/test_data/anthropic_request.json @@ -0,0 +1,17 @@ +{ + "max_tokens": 2000, + "messages": [ + { + "content": [ + { + "text": "Continue writing the markdown document by adding a code block with the commands a user should execute.\nFollow these rules\n\n* Set the language inside the code block to bash\n* Use the text at the end of the document to determine what commands to execute next\n* Use the existing text and code blocks in the document to learn phrases that are predictive of specific commands\n* Only respond with a single code block\n* You can put multiple commands into a code block\n* If the text at the end of the document doesn't clearly describe a command to execute simply respond with the tag\n\n\nHere are a bunch of examples of input documents along with the expected output.\n\n\n\n# Troubleshoot Learning\n\nTroubleshoot Learning\n## Check Examples\n\nIf Foyle is learning there should be example files in ${HOME}/.foyle/training\n```bash\nls -la ~/.foyle/training\n```\n```output\n{\n\t\"type\": \"stateful.runme/terminal\",\n\t\"output\": {\n\t\t\"runme.dev/id\": \"01J6DFZ9V3R29TCDEY2AYA8XTV\",\n\t\t\"content\": \"\",\n\t\t\"initialRows\": 10,\n\t\t\"enableShareButton\": true,\n\t\t\"isAutoSaveEnabled\": false,\n\t\t\"isPlatformAuthEnabled\": false,\n\t\t\"isSessionOutputsEnabled\": true,\n\t\t\"backgroundTask\": true,\n\t\t\"nonInteractive\": false,\n\t\t\"interactive\": true,\n\t\t\"fontSize\": 16,\n\t\t\"fontFamily\": \"Menlo, Monaco, 'Courier New', monospace\",\n\t\t\"rows\": 10,\n\t\t\"cursorStyle\": \"bar\",\n\t\t\"cursorBlink\": true,\n\t\t\"cursorWidth\": 1,\n\t\t\"smoothScrollDuration\": 0,\n\t\t\"scrollback\": 1000,\n\t\t\"closeOnSuccess\": true\n\t}\n}\n```\n```output\n\n```\nThe output should include `example.binpb` files as illustrated below. \n\n```\n-rw-r--r-- 1 jlewi staff 9895 Aug 28 07:46 01J6CQ6N02T7J16RFEYCT8KYWP.example.binpb\n```\n\nIf there aren't any then no examples have been learned.\n## Did Block Logs Get Created \n\n* Get the block logs for the cell\n* Change the cell ID to the ULID of the cell (you can view this in the markdown)\n* The cell should be one that was generated by the AI and you think learning should have occurred on\n```bash\nCELLID=01J6DFZ9V3R29TCDEY2AYA8XTV\ncurl http://localhost:8080/api/blocklogs/${CELLID} | jq .\n```\n```output\n{\n\t\"type\": \"stateful.runme/terminal\",\n\t\"output\": {\n\t\t\"runme.dev/id\": \"01J6DG428ER427GJNTKC15G6JM\",\n\t\t\"content\": \"\",\n\t\t\"initialRows\": 10,\n\t\t\"enableShareButton\": true,\n\t\t\"isAutoSaveEnabled\": false,\n\t\t\"isPlatformAuthEnabled\": false,\n\t\t\"isSessionOutputsEnabled\": true,\n\t\t\"backgroundTask\": true,\n\t\t\"nonInteractive\": false,\n\t\t\"interactive\": true,\n\t\t\"fontSize\": 16,\n\t\t\"fontFamily\": \"Menlo, Monaco, 'Courier New', monospace\",\n\t\t\"rows\": 10,\n\t\t\"cursorStyle\": \"bar\",\n\t\t\"cursorBlink\": true,\n\t\t\"cursorWidth\": 1,\n\t\t\"smoothScrollDuration\": 0,\n\t\t\"scrollback\": 1000,\n\t\t\"closeOnSuccess\": true\n\t}\n}\n```\n```output\n\n```\n* If this returns not found then no log was created for this cell\n\n## Check the logs associated with that cell\n* We can search for all logs associated with that cell\n```bash\nexport LASTLOG=~/.foyle/logs/raw/$(ls -t ~/.foyle/logs/raw | head -n 1 )\necho \"Last log file: ${LASTLOG}\"\njq -c \"select(.blockId == \\\"${CELLID}\\\")\" ${LASTLOG}\n```\n```output\n{\n\t\"type\": \"stateful.runme/terminal\",\n\t\"output\": {\n\t\t\"runme.dev/id\": \"01J6DG8266XWR99MWK7WZHQAJM\",\n\t\t\"content\": \"\",\n\t\t\"initialRows\": 10,\n\t\t\"enableShareButton\": true,\n\t\t\"isAutoSaveEnabled\": false,\n\t\t\"isPlatformAuthEnabled\": false,\n\t\t\"isSessionOutputsEnabled\": true,\n\t\t\"backgroundTask\": true,\n\t\t\"nonInteractive\": false,\n\t\t\"interactive\": true,\n\t\t\"fontSize\": 16,\n\t\t\"fontFamily\": \"Menlo, Monaco, 'Courier New', monospace\",\n\t\t\"rows\": 10,\n\t\t\"cursorStyle\": \"bar\",\n\t\t\"cursorBlink\": true,\n\t\t\"cursorWidth\": 1,\n\t\t\"smoothScrollDuration\": 0,\n\t\t\"scrollback\": 1000,\n\t\t\"closeOnSuccess\": true\n\t}\n}\n```\n```output\n\n```\n* Check for any errors processing the block\n* Note that the above command will only process the most recent log file\n* Each time Foyle is restarted it will create a new log file. \n### Did we try to create an example from the cell\n\n* If Foyle tries to learn from a cell it logs a message [here](https://github.com/jlewi/foyle/blob/4288e91ac805b46103d94230b32dd1bc2f957095/app/pkg/learn/learner.go#L155)\n\n* We can query for that log \n## Ensure Block Logs are being created\n* The query below checks that block logs are being created. \n* If no logs are being processed than there is a problem with the block log processing. \n\n\n```bash\njq -c 'select(.message == \"Building block log\")' ${LASTLOG}\n```\n```output\n{\n\t\"type\": \"stateful.runme/terminal\",\n\t\"output\": {\n\t\t\"runme.dev/id\": \"01J6DGNE5HPMA77YBCTXMA3K7Y\",\n\t\t\"content\": \"\",\n\t\t\"initialRows\": 10,\n\t\t\"enableShareButton\": true,\n\t\t\"isAutoSaveEnabled\": false,\n\t\t\"isPlatformAuthEnabled\": false,\n\t\t\"isSessionOutputsEnabled\": true,\n\t\t\"backgroundTask\": true,\n\t\t\"nonInteractive\": false,\n\t\t\"interactive\": true,\n\t\t\"fontSize\": 16,\n\t\t\"fontFamily\": \"Menlo, Monaco, 'Courier New', monospace\",\n\t\t\"rows\": 10,\n\t\t\"cursorStyle\": \"bar\",\n\t\t\"cursorBlink\": true,\n\t\t\"cursorWidth\": 1,\n\t\t\"smoothScrollDuration\": 0,\n\t\t\"scrollback\": 1000,\n\t\t\"closeOnSuccess\": true\n\t}\n}\n```\n```output\n\n```\n```bash\necho $LASTLOG\n```\n```output\n{\n\t\"type\": \"stateful.runme/terminal\",\n\t\"output\": {\n\t\t\"runme.dev/id\": \"01J6DGQX67N5HTB7QJ73YADA0D\",\n\t\t\"content\": \"\",\n\t\t\"initialRows\": 10,\n\t\t\"enableShareButton\": true,\n\t\t\"isAutoSaveEnabled\": false,\n\t\t\"isPlatformAuthEnabled\": false,\n\t\t\"isSessionOutputsEnabled\": true,\n\t\t\"backgroundTask\": true,\n\t\t\"nonInteractive\": false,\n\t\t\"interactive\": true,\n\t\t\"fontSize\": 16,\n\t\t\"fontFamily\": \"Menlo, Monaco, 'Courier New', monospace\",\n\t\t\"rows\": 10,\n\t\t\"cursorStyle\": \"bar\",\n\t\t\"cursorBlink\": true,\n\t\t\"cursorWidth\": 1,\n\t\t\"smoothScrollDuration\": 0,\n\t\t\"scrollback\": 1000,\n\t\t\"closeOnSuccess\": true\n\t}\n}\n```\n```output\n\n```\n```bash\ncat ${LASTLOG} | grep -i \"blockId\" | head -n 5\n```\n\n\n\n```bash\njq -c 'select(.message == \"Found new training example\")' ${LASTLOG} \n```\n\n\n\n\n\n# Troubleshoot Learning\n\nTroubleshoot Learning\n## Check Examples\n\nIf Foyle is learning there should be example files in ${HOME}/.foyle/training\n```bash\nls -la ~/.foyle/training\n```\n```output\n{\n\t\"type\": \"stateful.runme/terminal\",\n\t\"output\": {\n\t\t\"runme.dev/id\": \"01J6DFZ9V3R29TCDEY2AYA8XTV\",\n\t\t\"content\": \"\",\n\t\t\"initialRows\": 10,\n\t\t\"enableShareButton\": true,\n\t\t\"isAutoSaveEnabled\": false,\n\t\t\"isPlatformAuthEnabled\": false,\n\t\t\"isSessionOutputsEnabled\": true,\n\t\t\"backgroundTask\": true,\n\t\t\"nonInteractive\": false,\n\t\t\"interactive\": true,\n\t\t\"fontSize\": 16,\n\t\t\"fontFamily\": \"Menlo, Monaco, 'Courier New', monospace\",\n\t\t\"rows\": 10,\n\t\t\"cursorStyle\": \"bar\",\n\t\t\"cursorBlink\": true,\n\t\t\"cursorWidth\": 1,\n\t\t\"smoothScrollDuration\": 0,\n\t\t\"scrollback\": 1000,\n\t\t\"closeOnSuccess\": true\n\t}\n}\n```\n```output\n\n```\nThe output should include `example.binpb` files as illustrated below. \n\n```\n-rw-r--r-- 1 jlewi staff 9895 Aug 28 07:46 01J6CQ6N02T7J16RFEYCT8KYWP.example.binpb\n```\n\nIf there aren't any then no examples have been learned.\n## Did Block Logs Get Created \n\n* Get the block logs for the cell\n* Change the cell ID to the ULID of the cell (you can view this in the markdown)\n* The cell should be one that was generated by the AI and you think learning should have occurred on\n```bash\nCELLID=01J6DFZ9V3R29TCDEY2AYA8XTV\ncurl http://localhost:8080/api/blocklogs/${CELLID} | jq .\n```\n```output\n{\n\t\"type\": \"stateful.runme/terminal\",\n\t\"output\": {\n\t\t\"runme.dev/id\": \"01J6DG428ER427GJNTKC15G6JM\",\n\t\t\"content\": \"\",\n\t\t\"initialRows\": 10,\n\t\t\"enableShareButton\": true,\n\t\t\"isAutoSaveEnabled\": false,\n\t\t\"isPlatformAuthEnabled\": false,\n\t\t\"isSessionOutputsEnabled\": true,\n\t\t\"backgroundTask\": true,\n\t\t\"nonInteractive\": false,\n\t\t\"interactive\": true,\n\t\t\"fontSize\": 16,\n\t\t\"fontFamily\": \"Menlo, Monaco, 'Courier New', monospace\",\n\t\t\"rows\": 10,\n\t\t\"cursorStyle\": \"bar\",\n\t\t\"cursorBlink\": true,\n\t\t\"cursorWidth\": 1,\n\t\t\"smoothScrollDuration\": 0,\n\t\t\"scrollback\": 1000,\n\t\t\"closeOnSuccess\": true\n\t}\n}\n```\n```output\n\n```\n* If this returns not found then no log was created for this cell\n\n## Check the logs associated with that cell\n* We can search for all logs associated with that cell\n```bash\nexport LASTLOG=~/.foyle/logs/raw/$(ls -t ~/.foyle/logs/raw | head -n 1 )\necho \"Last log file: ${LASTLOG}\"\njq -c \"select(.blockId == \\\"${CELLID}\\\")\" ${LASTLOG}\n```\n```output\n{\n\t\"type\": \"stateful.runme/terminal\",\n\t\"output\": {\n\t\t\"runme.dev/id\": \"01J6DG8266XWR99MWK7WZHQAJM\",\n\t\t\"content\": \"\",\n\t\t\"initialRows\": 10,\n\t\t\"enableShareButton\": true,\n\t\t\"isAutoSaveEnabled\": false,\n\t\t\"isPlatformAuthEnabled\": false,\n\t\t\"isSessionOutputsEnabled\": true,\n\t\t\"backgroundTask\": true,\n\t\t\"nonInteractive\": false,\n\t\t\"interactive\": true,\n\t\t\"fontSize\": 16,\n\t\t\"fontFamily\": \"Menlo, Monaco, 'Courier New', monospace\",\n\t\t\"rows\": 10,\n\t\t\"cursorStyle\": \"bar\",\n\t\t\"cursorBlink\": true,\n\t\t\"cursorWidth\": 1,\n\t\t\"smoothScrollDuration\": 0,\n\t\t\"scrollback\": 1000,\n\t\t\"closeOnSuccess\": true\n\t}\n}\n```\n```output\n\n```\n* Check for any errors processing the block\n* Note that the above command will only process the most recent log file\n* Each time Foyle is restarted it will create a new log file. \n### Did we try to create an example from any cells?\n\n* If Foyle tries to learn from a cell it logs a message [here](https://github.com/jlewi/foyle/blob/4288e91ac805b46103d94230b32dd1bc2f957095/app/pkg/learn/learner.go#L155)\n\n* We can query for that log as follows\n```bash\njq -c 'select(.message == \"Found new training example\")' ${LASTLOG}\n```\n```output\n{\n\t\"type\": \"stateful.runme/terminal\",\n\t\"output\": {\n\t\t\"runme.dev/id\": \"01J6DH6PRXKD55RQ6333AQH3V7\",\n\t\t\"content\": \"\",\n\t\t\"initialRows\": 10,\n\t\t\"enableShareButton\": true,\n\t\t\"isAutoSaveEnabled\": false,\n\t\t\"isPlatformAuthEnabled\": false,\n\t\t\"isSessionOutputsEnabled\": true,\n\t\t\"backgroundTask\": true,\n\t\t\"nonInteractive\": false,\n\t\t\"interactive\": true,\n\t\t\"fontSize\": 16,\n\t\t\"fontFamily\": \"Menlo, Monaco, 'Courier New', monospace\",\n\t\t\"rows\": 10,\n\t\t\"cursorStyle\": \"bar\",\n\t\t\"cursorBlink\": true,\n\t\t\"cursorWidth\": 1,\n\t\t\"smoothScrollDuration\": 0,\n\t\t\"scrollback\": 1000,\n\t\t\"closeOnSuccess\": true\n\t}\n}\n```\n```output\n\n```\n* If that returns nothing then we know Foyle never tried to learn from any cells\n* If it returns something then we know Foyle tried to learn from a cell but it may have failed\n* If there is an error processing an example it gets logged [here](https://github.com/jlewi/foyle/blob/4288e91ac805b46103d94230b32dd1bc2f957095/app/pkg/learn/learner.go#L205)\n* So we can search for \n```bash\njq -c 'select(.level == \"error\")' ${LASTLOG}\n```\n## Ensure Block Logs are being created\n* The query below checks that block logs are being created. \n* If no logs are being processed than there is a problem with the block log processing. \n\n\n```bash\njq -c 'select(.message == \"Building block log\")' ${LASTLOG}\n```\n```output\n{\n\t\"type\": \"stateful.runme/terminal\",\n\t\"output\": {\n\t\t\"runme.dev/id\": \"01J6DGNE5HPMA77YBCTXMA3K7Y\",\n\t\t\"content\": \"\",\n\t\t\"initialRows\": 10,\n\t\t\"enableShareButton\": true,\n\t\t\"isAutoSaveEnabled\": false,\n\t\t\"isPlatformAuthEnabled\": false,\n\t\t\"isSessionOutputsEnabled\": true,\n\t\t\"backgroundTask\": true,\n\t\t\"nonInteractive\": false,\n\t\t\"interactive\": true,\n\t\t\"fontSize\": 16,\n\t\t\"fontFamily\": \"Menlo, Monaco, 'Courier New', monospace\",\n\t\t\"rows\": 10,\n\t\t\"cursorStyle\": \"bar\",\n\t\t\"cursorBlink\": true,\n\t\t\"cursorWidth\": 1,\n\t\t\"smoothScrollDuration\": 0,\n\t\t\"scrollback\": 1000,\n\t\t\"closeOnSuccess\": true\n\t}\n}\n```\n```output\n\n```\n```bash\necho $LASTLOG\n```\n```output\n{\n\t\"type\": \"stateful.runme/terminal\",\n\t\"output\": {\n\t\t\"runme.dev/id\": \"01J6DGQX67N5HTB7QJ73YADA0D\",\n\t\t\"content\": \"\",\n\t\t\"initialRows\": 10,\n\t\t\"enableShareButton\": true,\n\t\t\"isAutoSaveEnabled\": false,\n\t\t\"isPlatformAuthEnabled\": false,\n\t\t\"isSessionOutputsEnabled\": true,\n\t\t\"backgroundTask\": true,\n\t\t\"nonInteractive\": false,\n\t\t\"interactive\": true,\n\t\t\"fontSize\": 16,\n\t\t\"fontFamily\": \"Menlo, Monaco, 'Courier New', monospace\",\n\t\t\"rows\": 10,\n\t\t\"cursorStyle\": \"bar\",\n\t\t\"cursorBlink\": true,\n\t\t\"cursorWidth\": 1,\n\t\t\"smoothScrollDuration\": 0,\n\t\t\"scrollback\": 1000,\n\t\t\"closeOnSuccess\": true\n\t}\n}\n```\n```output\n\n```\n```bash\ncat ${LASTLOG} | grep -i \"blockId\" | head -n 5\n```\n\n\n\n```bash\njq -c 'select(.level == \"Failed to write example\")' ${LASTLOG} \n```\n\n\n\n\n\n# Troubleshoot Learning\n\nTroubleshoot Learning\n\n## Check Examples\n\nIf Foyle is learning there should be example files in ${HOME}/.foyle/training\n```bash\nls -la ~/.foyle/training\n```\n```output\n{\n\t\"type\": \"stateful.runme/terminal\",\n\t\"output\": {\n\t\t\"runme.dev/id\": \"01J6DFZ9V3R29TCDEY2AYA8XTV\",\n\t\t\"content\": \"\",\n\t\t\"initialRows\": 10,\n\t\t\"enableShareButton\": true,\n\t\t\"isAutoSaveEnabled\": false,\n\t\t\"isPlatformAuthEnabled\": false,\n\t\t\"isSessionOutputsEnabled\": true,\n\t\t\"backgroundTask\": true,\n\t\t\"nonInteractive\": false,\n\t\t\"interactive\": true,\n\t\t\"fontSize\": 16,\n\t\t\"fontFamily\": \"Menlo, Monaco, 'Courier New', monospace\",\n\t\t\"rows\": 10,\n\t\t\"cursorStyle\": \"bar\",\n\t\t\"cursorBlink\": true,\n\t\t\"cursorWidth\": 1,\n\t\t\"smoothScrollDuration\": 0,\n\t\t\"scrollback\": 1000,\n\t\t\"closeOnSuccess\": true\n\t}\n}\n```\n```output\n\n```\nThe output should include `example.binpb` files as illustrated below. \n\n```\n-rw-r--r-- 1 jlewi staff 9895 Aug 28 07:46 01J6CQ6N02T7J16RFEYCT8KYWP.example.binpb\n```\n\nIf there aren't any then no examples have been learned.\n## Did Block Logs Get Created \n\n* Get the block logs for the cell\n* Change the cell ID to the ULID of the cell (you can view this in the markdown)\n* The cell should be one that was generated by the AI and you think learning should have occurred on\n```bash\nCELLID=01J6DFZ9V3R29TCDEY2AYA8XTV\ncurl http://localhost:8080/api/blocklogs/${CELLID} | jq .\n```\n```output\n{\n\t\"type\": \"stateful.runme/terminal\",\n\t\"output\": {\n\t\t\"runme.dev/id\": \"01J6DG428ER427GJNTKC15G6JM\",\n\t\t\"content\": \"\",\n\t\t\"initialRows\": 10,\n\t\t\"enableShareButton\": true,\n\t\t\"isAutoSaveEnabled\": false,\n\t\t\"isPlatformAuthEnabled\": false,\n\t\t\"isSessionOutputsEnabled\": true,\n\t\t\"backgroundTask\": true,\n\t\t\"nonInteractive\": false,\n\t\t\"interactive\": true,\n\t\t\"fontSize\": 16,\n\t\t\"fontFamily\": \"Menlo, Monaco, 'Courier New', monospace\",\n\t\t\"rows\": 10,\n\t\t\"cursorStyle\": \"bar\",\n\t\t\"cursorBlink\": true,\n\t\t\"cursorWidth\": 1,\n\t\t\"smoothScrollDuration\": 0,\n\t\t\"scrollback\": 1000,\n\t\t\"closeOnSuccess\": true\n\t}\n}\n```\n```output\n\n```\n* If this returns not found then no log was created for this cell\n\n## Check the logs associated with that cell\n* We can search for all logs associated with that cell\n```bash\nexport LASTLOG=~/.foyle/logs/raw/$(ls -t ~/.foyle/logs/raw | head -n 1 )\necho \"Last log file: ${LASTLOG}\"\njq -c \"select(.blockId == \\\"${CELLID}\\\")\" ${LASTLOG}\n```\n```output\n{\n\t\"type\": \"stateful.runme/terminal\",\n\t\"output\": {\n\t\t\"runme.dev/id\": \"01J6DG8266XWR99MWK7WZHQAJM\",\n\t\t\"content\": \"\",\n\t\t\"initialRows\": 10,\n\t\t\"enableShareButton\": true,\n\t\t\"isAutoSaveEnabled\": false,\n\t\t\"isPlatformAuthEnabled\": false,\n\t\t\"isSessionOutputsEnabled\": true,\n\t\t\"backgroundTask\": true,\n\t\t\"nonInteractive\": false,\n\t\t\"interactive\": true,\n\t\t\"fontSize\": 16,\n\t\t\"fontFamily\": \"Menlo, Monaco, 'Courier New', monospace\",\n\t\t\"rows\": 10,\n\t\t\"cursorStyle\": \"bar\",\n\t\t\"cursorBlink\": true,\n\t\t\"cursorWidth\": 1,\n\t\t\"smoothScrollDuration\": 0,\n\t\t\"scrollback\": 1000,\n\t\t\"closeOnSuccess\": true\n\t}\n}\n```\n```output\n\n```\n* Check for any errors processing the block\n* Note that the above command will only process the most recent log file\n* Each time Foyle is restarted it will create a new log file. \n### Did we try to create an example from any cells?\n\n* If Foyle tries to learn from a cell it logs a message [here](https://github.com/jlewi/foyle/blob/4288e91ac805b46103d94230b32dd1bc2f957095/app/pkg/learn/learner.go#L155)\n\n* We can query for that log as follows\n```bash\njq -c 'select(.message == \"Found new training example\")' ${LASTLOG}\n```\n```output\n{\n\t\"type\": \"stateful.runme/terminal\",\n\t\"output\": {\n\t\t\"runme.dev/id\": \"01J6DH6PRXKD55RQ6333AQH3V7\",\n\t\t\"content\": \"\",\n\t\t\"initialRows\": 10,\n\t\t\"enableShareButton\": true,\n\t\t\"isAutoSaveEnabled\": false,\n\t\t\"isPlatformAuthEnabled\": false,\n\t\t\"isSessionOutputsEnabled\": true,\n\t\t\"backgroundTask\": true,\n\t\t\"nonInteractive\": false,\n\t\t\"interactive\": true,\n\t\t\"fontSize\": 16,\n\t\t\"fontFamily\": \"Menlo, Monaco, 'Courier New', monospace\",\n\t\t\"rows\": 10,\n\t\t\"cursorStyle\": \"bar\",\n\t\t\"cursorBlink\": true,\n\t\t\"cursorWidth\": 1,\n\t\t\"smoothScrollDuration\": 0,\n\t\t\"scrollback\": 1000,\n\t\t\"closeOnSuccess\": true\n\t}\n}\n```\n```output\n\n```\n* If that returns nothing then we know Foyle never tried to learn from any cells\n* If it returns something then we know Foyle tried to learn from a cell but it may have failed\n* If there is an error processing an example it gets logged [here](https://github.com/jlewi/foyle/blob/4288e91ac805b46103d94230b32dd1bc2f957095/app/pkg/learn/learner.go#L205)\n* So we can search for that error message in the logs\n```bash\njq -c 'select(.level == \"Failed to write example\")' ${LASTLOG}\n```\n```output\n{\n\t\"type\": \"stateful.runme/terminal\",\n\t\"output\": {\n\t\t\"runme.dev/id\": \"01J6DHB6FMBF40E5WP0ZACR3B2\",\n\t\t\"content\": \"\",\n\t\t\"initialRows\": 10,\n\t\t\"enableShareButton\": true,\n\t\t\"isAutoSaveEnabled\": false,\n\t\t\"isPlatformAuthEnabled\": false,\n\t\t\"isSessionOutputsEnabled\": true,\n\t\t\"backgroundTask\": true,\n\t\t\"nonInteractive\": false,\n\t\t\"interactive\": true,\n\t\t\"fontSize\": 16,\n\t\t\"fontFamily\": \"Menlo, Monaco, 'Courier New', monospace\",\n\t\t\"rows\": 10,\n\t\t\"cursorStyle\": \"bar\",\n\t\t\"cursorBlink\": true,\n\t\t\"cursorWidth\": 1,\n\t\t\"smoothScrollDuration\": 0,\n\t\t\"scrollback\": 1000,\n\t\t\"closeOnSuccess\": true\n\t}\n}\n```\n```output\n\n```\n```bash\njq -c 'select(.level == \"error\" and .message == \"Failed to write example\")' ${LASTLOG}\n```\n## Ensure Block Logs are being created\n* The query below checks that block logs are being created. \n* If no logs are being processed than there is a problem with the block log processing. \n\n\n```bash\njq -c 'select(.message == \"Building block log\")' ${LASTLOG}\n```\n```output\n{\n\t\"type\": \"stateful.runme/terminal\",\n\t\"output\": {\n\t\t\"runme.dev/id\": \"01J6DGNE5HPMA77YBCTXMA3K7Y\",\n\t\t\"content\": \"\",\n\t\t\"initialRows\": 10,\n\t\t\"enableShareButton\": true,\n\t\t\"isAutoSaveEnabled\": false,\n\t\t\"isPlatformAuthEnabled\": false,\n\t\t\"isSessionOutputsEnabled\": true,\n\t\t\"backgroundTask\": true,\n\t\t\"nonInteractive\": false,\n\t\t\"interactive\": true,\n\t\t\"fontSize\": 16,\n\t\t\"fontFamily\": \"Menlo, Monaco, 'Courier New', monospace\",\n\t\t\"rows\": 10,\n\t\t\"cursorStyle\": \"bar\",\n\t\t\"cursorBlink\": true,\n\t\t\"cursorWidth\": 1,\n\t\t\"smoothScrollDuration\": 0,\n\t\t\"scrollback\": 1000,\n\t\t\"closeOnSuccess\": true\n\t}\n}\n```\n```output\n\n```\n## Are there any errors in the logs\n\n* The query below should show you a\n\n\n\n```bash\njq -c 'select(.severity == \"error\")' ${LASTLOG} \n```\n\n\n\nHere's the actual document containing the problem or task to be solved:\n\n\n# Troubleshoot Learning\n## What You'll Learn\n\n* How to ensure learning is working and monitor learning\n## Check Examples\nIf Foyle is learning there should be example files in ${HOME}/.foyle/training\n```bash\nls -la ~/.foyle/training\n```\nThe output should include `example.binpb` files as illustrated below.\n```bash\n-rw-r--r-- 1 jlewi staff 9895 Aug 28 07:46 01J6CQ6N02T7J16RFEYCT8KYWP.example.binpb\n```\nIf there aren't any then no examples have been learned.\n## Trigger Learning\n\nFoyle's learning is triggered by the following sequence of actions:\n\n1. Foyle generates a suggested cell which is added to the notebook as a Ghost Cell\n1. You accept the suggested cell by putting the focus on the cell\n1. You edit the cell \n1. You execute the cell\n\nWhen you execute the cell, the execution is logged to Foyle. For each executed cell Foyle checks\n\n1. Was that cell generated by Foyle\n1. If the cell was generated by Foyle did the actual command executed differ from the suggested command\n1. If the cell was changed by the user than Foyle attempts to learn from that execution\n\nCrucially, every cell created by Foyle is assigned an ID. This ID can be used to track how\nthe cell was generated and if learning occurred.\n\nTo get the cell ID for a given cell\n\n1. Open the raw markdown file by right clicking on it in VSCode and selecting `Open With` -> `Text Editor`\n1. Find code block containing your cell\n1. Your cell will contain metadata which contains the ID e.g. \n\n````\n```bash {\"id\":\"01J6DG428ER427GJNTKC15G6JM\"}\nCELLID=01J6DFZ9V3R29TCDEY2AYA8XTV\ncurl http://localhost:8080/api/blocklogs/${CELLID} | jq .\n```\n````\n\n\n## Did Block Logs Get Created\n* Get the block logs for the cell\n* Change the cell ID to the ULID of the cell (you can view this in the markdown)\n* The cell should be one that was generated by the AI and you think learning should have occurred on\n\n```bash\nCELLID=01J6DFZ9V3R29TCDEY2AYA8XTV\ncurl http://localhost:8080/api/blocklogs/${CELLID} | jq .\n```\n* If this returns not found then no log was created for this cell\n## Check the logs associated with that cell\n* We can search for all logs associated with that cell\n```bash\nexport LASTLOG=~/.foyle/logs/raw/$(ls -t ~/.foyle/logs/raw | head -n 1 )\necho \"Last log file: ${LASTLOG}\"\njq -c \"select(.blockId == \\\"${CELLID}\\\")\" ${LASTLOG}\n```\n* Check for any errors processing the block\n* Note that the above command will only process the most recent log file\n* Each time Foyle is restarted it will create a new log file.\n### Did we try to create an example from any cells?\n* If Foyle tries to learn from a cell it logs a message [here](https://github.com/jlewi/foyle/blob/4288e91ac805b46103d94230b32dd1bc2f957095/app/pkg/learn/learner.go#L155)\n* We can query for that log as follows\n```bash\njq -c 'select(.message == \"Found new training example\")' ${LASTLOG}\n```\n* If that returns nothing then we know Foyle never tried to learn from any cells\n* If it returns something then we know Foyle tried to learn from a cell but it may have failed\n* If there is an error processing an example it gets logged [here](https://github.com/jlewi/foyle/blob/4288e91ac805b46103d94230b32dd1bc2f957095/app/pkg/learn/learner.go#L205)\n* So we can search for that error message in the logs\n```bash\njq -c 'select(.level == \"Failed to write example\")' ${LASTLOG}\n```\n```bash\njq -c 'select(.level == \"error\" and .message == \"Failed to write example\")' ${LASTLOG}\n```\n## Ensure Block Logs are being created\n* The query below checks that block logs are being created.\n* If no logs are being processed than there is a problem with the block log processing.\n```bash\njq -c 'select(.message == \"Building block log\")' ${LASTLOG}\n```\n## Are there any errors in the logs\n* The query below should show you any errors in the logs.\n```bash\njq -c 'select(.severity == \"error\")' ${LASTLOG}\n```\n\n\n\n", + "type": "text" + } + ], + "role": "user" + } + ], + "model": "claude-3-5-sonnet-20240620", + "system": "You are a helpful AI assistant for software developers. You are helping software engineers write \nmarkdown documents to deploy and operate software. Your job is to help users with tasks related to building, deploying,\nand operating software. You should interpret any questions or commands in that context. You job is to suggest\ncommands the user can execute to accomplish their goals.", + "temperature": 0.9 +} diff --git a/app/pkg/logs/matchers/names.go b/app/pkg/logs/matchers/names.go index bdffe751..8eafb01e 100644 --- a/app/pkg/logs/matchers/names.go +++ b/app/pkg/logs/matchers/names.go @@ -6,10 +6,17 @@ package matchers import "strings" const ( + OAIComplete = "github.com/jlewi/foyle/app/pkg/oai.(*Completer).Complete" LogEvents = "github.com/jlewi/foyle/app/pkg/agent.(*Agent).LogEvents" StreamGenerate = "github.com/jlewi/foyle/app/pkg/agent.(*Agent).StreamGenerate" ) +type Matcher func(name string) bool + +func IsOAIComplete(name string) bool { + return strings.HasPrefix(name, OAIComplete) +} + func IsLogEvent(fname string) bool { // We need to use HasPrefix because the logging statement is nested inside an anonymous function so there // will be a suffix like "func1" diff --git a/app/pkg/logs/matchers/names_test.go b/app/pkg/logs/matchers/names_test.go index cfa9ab16..10e4c8f9 100644 --- a/app/pkg/logs/matchers/names_test.go +++ b/app/pkg/logs/matchers/names_test.go @@ -6,6 +6,8 @@ import ( "strings" "testing" + "github.com/jlewi/foyle/app/pkg/oai" + "github.com/jlewi/foyle/app/pkg/agent" ) @@ -18,6 +20,7 @@ func GetFunctionNameFromFunc(f interface{}) string { return name } +// TODO(jeremy): We should probably migrate this to matchers. func Test_Names(t *testing.T) { type testCases struct { expected string @@ -41,3 +44,29 @@ func Test_Names(t *testing.T) { } } } + +func Test_Matchers(t *testing.T) { + type testCases struct { + name string + Matcher Matcher + input interface{} + expected bool + } + + cases := []testCases{ + { + input: (&oai.Completer{}).Complete, + Matcher: IsOAIComplete, + name: "IsOAIComplete", + expected: true, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + if got := c.Matcher(GetFunctionNameFromFunc(c.input)); got != c.expected { + t.Errorf("Expected %v, but got %v", c.expected, got) + } + }) + } +} diff --git a/docs/content/en/docs/observability/ai.md b/docs/content/en/docs/observability/ai.md new file mode 100644 index 00000000..4f5ca3e8 --- /dev/null +++ b/docs/content/en/docs/observability/ai.md @@ -0,0 +1,72 @@ +--- +description: How to monitor the quality of AI outputs +title: Monitoring AI Quality +weight: 1 +--- + +## What You'll Learn + +* How to observe the AI to understand why it generated the answers it did + +## What was the actual prompt and response? + +A good place to start when trying to understand the AI's responses is to look at the actual prompt and response from the LLM that produced the cell. + +You can fetch the request and response as follows + +1. Get the log for a given cell +2. From the cell get the traceId of the AI generation request + +```bash +CELLID=01J7KQPBYCT9VM2KFBY48JC7J0 +export TRACEID=$(curl -s -X POST http://localhost:8877/api/foyle.logs.LogsService/GetBlockLog -H "Content-Type: application/json" -d "{\"id\": \"${CELLID}\"}" | jq -r .blockLog.genTraceId) +echo TRACEID=$TRACEID +``` + +* Given the traceId, you can fetch the request and response from the LOGS + +```bash +curl -s -o /tmp/response.json -X POST http://localhost:8877/api/foyle.logs.LogsService/GetLLMLogs -H "Content-Type: application/json" -d "{\"traceId\": \"${TRACEID}\"}" +CODE="$?" +if [ $CODE -ne 0 ]; then + echo "Error occurred while fetching LLM logs" + exit $CODE +fi + +``` + +* You can view an HTML rendering of the prompt and response +* If you disable interactive mode for the cell then vscode will render the HTML respnse inline +* **Note** There appears to be a bug right now in the HTML rendering causing a bunch of newlines to be introduced relative to what's in the actual markdown in the JSON request + +```bash +jq -r '.responseHtml' /tmp/response.json > /tmp/response.html +cat /tmp/response.html +``` + +* To view the response + +```bash +jq -r '.responseHtml' /tmp/response.json > /tmp/response.html +cat /tmp/response.html +``` + +* To view the JSON versions of the actual requests and response + +```bash +jq -r '.requestJson' /tmp/response.json | jq . +``` + +```bash +jq -r '.responseJson' /tmp/response.json | jq '.messages[0].content[0].text' +``` + +* You can print the raw markdown of the prompt as follows + +```bash +echo $(jq -r '.requestJson' /tmp/response.json | jq '.messages[0].content[0].text') +``` + +```bash +jq -r '.responseJson' /tmp/response.json | jq . +``` diff --git a/protos/foyle/logs/traces.proto b/protos/foyle/logs/traces.proto index 6b3b88fe..558465a2 100644 --- a/protos/foyle/logs/traces.proto +++ b/protos/foyle/logs/traces.proto @@ -90,7 +90,7 @@ message GetLLMLogsRequest { // Trace ID is the id of the trace string trace_id = 1; // log_file is the path to the logs to analyze to get the trace - // TODO(jeremy): Should we make this a glob? + // TODO(jeremy): We should deprecate this. string log_file = 2; } @@ -99,4 +99,9 @@ message GetLLMLogsResponse { string request_html = 1; // response_html is the html representation of the response string response_html = 2; + + // The json representation of the request and response. The value will + // change depending on the LLM provider + string request_json = 3; + string response_json = 4; } diff --git a/protos/go/foyle/logs/traces.pb.go b/protos/go/foyle/logs/traces.pb.go index c7ca76da..6c56de7f 100644 --- a/protos/go/foyle/logs/traces.pb.go +++ b/protos/go/foyle/logs/traces.pb.go @@ -570,7 +570,7 @@ type GetLLMLogsRequest struct { // Trace ID is the id of the trace TraceId string `protobuf:"bytes,1,opt,name=trace_id,json=traceId,proto3" json:"trace_id,omitempty"` // log_file is the path to the logs to analyze to get the trace - // TODO(jeremy): Should we make this a glob? + // TODO(jeremy): We should deprecate this. LogFile string `protobuf:"bytes,2,opt,name=log_file,json=logFile,proto3" json:"log_file,omitempty"` } @@ -629,6 +629,10 @@ type GetLLMLogsResponse struct { RequestHtml string `protobuf:"bytes,1,opt,name=request_html,json=requestHtml,proto3" json:"request_html,omitempty"` // response_html is the html representation of the response ResponseHtml string `protobuf:"bytes,2,opt,name=response_html,json=responseHtml,proto3" json:"response_html,omitempty"` + // The json representation of the request and response. The value will + // change depending on the LLM provider + RequestJson string `protobuf:"bytes,3,opt,name=request_json,json=requestJson,proto3" json:"request_json,omitempty"` + ResponseJson string `protobuf:"bytes,4,opt,name=response_json,json=responseJson,proto3" json:"response_json,omitempty"` } func (x *GetLLMLogsResponse) Reset() { @@ -677,6 +681,20 @@ func (x *GetLLMLogsResponse) GetResponseHtml() string { return "" } +func (x *GetLLMLogsResponse) GetRequestJson() string { + if x != nil { + return x.RequestJson + } + return "" +} + +func (x *GetLLMLogsResponse) GetResponseJson() string { + if x != nil { + return x.ResponseJson + } + return "" +} + var File_foyle_logs_traces_proto protoreflect.FileDescriptor var file_foyle_logs_traces_proto_rawDesc = []byte{ @@ -752,44 +770,49 @@ var file_foyle_logs_traces_proto_rawDesc = []byte{ 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x74, 0x72, 0x61, 0x63, 0x65, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x74, 0x72, 0x61, 0x63, 0x65, 0x49, 0x64, 0x12, 0x19, 0x0a, 0x08, 0x6c, 0x6f, 0x67, 0x5f, 0x66, 0x69, 0x6c, 0x65, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6c, 0x6f, 0x67, 0x46, 0x69, 0x6c, 0x65, 0x22, 0x5c, - 0x0a, 0x12, 0x47, 0x65, 0x74, 0x4c, 0x4c, 0x4d, 0x4c, 0x6f, 0x67, 0x73, 0x52, 0x65, 0x73, 0x70, - 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x5f, - 0x68, 0x74, 0x6d, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x72, 0x65, 0x71, 0x75, - 0x65, 0x73, 0x74, 0x48, 0x74, 0x6d, 0x6c, 0x12, 0x23, 0x0a, 0x0d, 0x72, 0x65, 0x73, 0x70, 0x6f, - 0x6e, 0x73, 0x65, 0x5f, 0x68, 0x74, 0x6d, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, - 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x48, 0x74, 0x6d, 0x6c, 0x32, 0xc8, 0x02, 0x0a, - 0x0b, 0x4c, 0x6f, 0x67, 0x73, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x47, 0x0a, 0x08, - 0x47, 0x65, 0x74, 0x54, 0x72, 0x61, 0x63, 0x65, 0x12, 0x1b, 0x2e, 0x66, 0x6f, 0x79, 0x6c, 0x65, - 0x2e, 0x6c, 0x6f, 0x67, 0x73, 0x2e, 0x47, 0x65, 0x74, 0x54, 0x72, 0x61, 0x63, 0x65, 0x52, 0x65, - 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1c, 0x2e, 0x66, 0x6f, 0x79, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, - 0x67, 0x73, 0x2e, 0x47, 0x65, 0x74, 0x54, 0x72, 0x61, 0x63, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, - 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x50, 0x0a, 0x0b, 0x47, 0x65, 0x74, 0x42, 0x6c, 0x6f, 0x63, - 0x6b, 0x4c, 0x6f, 0x67, 0x12, 0x1e, 0x2e, 0x66, 0x6f, 0x79, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, - 0x73, 0x2e, 0x47, 0x65, 0x74, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x4c, 0x6f, 0x67, 0x52, 0x65, 0x71, - 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1f, 0x2e, 0x66, 0x6f, 0x79, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, - 0x73, 0x2e, 0x47, 0x65, 0x74, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x4c, 0x6f, 0x67, 0x52, 0x65, 0x73, - 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x4d, 0x0a, 0x0a, 0x47, 0x65, 0x74, 0x4c, 0x4c, - 0x4d, 0x4c, 0x6f, 0x67, 0x73, 0x12, 0x1d, 0x2e, 0x66, 0x6f, 0x79, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, - 0x67, 0x73, 0x2e, 0x47, 0x65, 0x74, 0x4c, 0x4c, 0x4d, 0x4c, 0x6f, 0x67, 0x73, 0x52, 0x65, 0x71, - 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1e, 0x2e, 0x66, 0x6f, 0x79, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, - 0x73, 0x2e, 0x47, 0x65, 0x74, 0x4c, 0x4c, 0x4d, 0x4c, 0x6f, 0x67, 0x73, 0x52, 0x65, 0x73, 0x70, - 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x4f, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x12, 0x20, 0x2e, 0x66, 0x6f, 0x79, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, 0x73, 0x2e, 0x47, 0x65, - 0x74, 0x4c, 0x6f, 0x67, 0x73, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, - 0x73, 0x74, 0x1a, 0x21, 0x2e, 0x66, 0x6f, 0x79, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, 0x73, 0x2e, - 0x47, 0x65, 0x74, 0x4c, 0x6f, 0x67, 0x73, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, - 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x42, 0x9a, 0x01, 0x0a, 0x0e, 0x63, 0x6f, 0x6d, 0x2e, - 0x66, 0x6f, 0x79, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, 0x73, 0x42, 0x0b, 0x54, 0x72, 0x61, 0x63, - 0x65, 0x73, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x32, 0x67, 0x69, 0x74, 0x68, 0x75, - 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6a, 0x6c, 0x65, 0x77, 0x69, 0x2f, 0x66, 0x6f, 0x79, 0x6c, - 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x67, 0x6f, 0x2f, 0x66, 0x6f, 0x79, 0x6c, - 0x65, 0x2f, 0x6c, 0x6f, 0x67, 0x73, 0x3b, 0x6c, 0x6f, 0x67, 0x73, 0x70, 0x62, 0xa2, 0x02, 0x03, - 0x46, 0x4c, 0x58, 0xaa, 0x02, 0x0a, 0x46, 0x6f, 0x79, 0x6c, 0x65, 0x2e, 0x4c, 0x6f, 0x67, 0x73, - 0xca, 0x02, 0x0a, 0x46, 0x6f, 0x79, 0x6c, 0x65, 0x5c, 0x4c, 0x6f, 0x67, 0x73, 0xe2, 0x02, 0x16, - 0x46, 0x6f, 0x79, 0x6c, 0x65, 0x5c, 0x4c, 0x6f, 0x67, 0x73, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, - 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, 0x02, 0x0b, 0x46, 0x6f, 0x79, 0x6c, 0x65, 0x3a, 0x3a, - 0x4c, 0x6f, 0x67, 0x73, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6c, 0x6f, 0x67, 0x46, 0x69, 0x6c, 0x65, 0x22, 0xa4, + 0x01, 0x0a, 0x12, 0x47, 0x65, 0x74, 0x4c, 0x4c, 0x4d, 0x4c, 0x6f, 0x67, 0x73, 0x52, 0x65, 0x73, + 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x5f, 0x68, 0x74, 0x6d, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x72, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x48, 0x74, 0x6d, 0x6c, 0x12, 0x23, 0x0a, 0x0d, 0x72, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x5f, 0x68, 0x74, 0x6d, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x0c, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x48, 0x74, 0x6d, 0x6c, 0x12, 0x21, 0x0a, + 0x0c, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x5f, 0x6a, 0x73, 0x6f, 0x6e, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x0b, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x4a, 0x73, 0x6f, 0x6e, + 0x12, 0x23, 0x0a, 0x0d, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x5f, 0x6a, 0x73, 0x6f, + 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x4a, 0x73, 0x6f, 0x6e, 0x32, 0xc8, 0x02, 0x0a, 0x0b, 0x4c, 0x6f, 0x67, 0x73, 0x53, 0x65, + 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x47, 0x0a, 0x08, 0x47, 0x65, 0x74, 0x54, 0x72, 0x61, 0x63, + 0x65, 0x12, 0x1b, 0x2e, 0x66, 0x6f, 0x79, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, 0x73, 0x2e, 0x47, + 0x65, 0x74, 0x54, 0x72, 0x61, 0x63, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1c, + 0x2e, 0x66, 0x6f, 0x79, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, 0x73, 0x2e, 0x47, 0x65, 0x74, 0x54, + 0x72, 0x61, 0x63, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x50, + 0x0a, 0x0b, 0x47, 0x65, 0x74, 0x42, 0x6c, 0x6f, 0x63, 0x6b, 0x4c, 0x6f, 0x67, 0x12, 0x1e, 0x2e, + 0x66, 0x6f, 0x79, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, 0x73, 0x2e, 0x47, 0x65, 0x74, 0x42, 0x6c, + 0x6f, 0x63, 0x6b, 0x4c, 0x6f, 0x67, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1f, 0x2e, + 0x66, 0x6f, 0x79, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, 0x73, 0x2e, 0x47, 0x65, 0x74, 0x42, 0x6c, + 0x6f, 0x63, 0x6b, 0x4c, 0x6f, 0x67, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, + 0x12, 0x4d, 0x0a, 0x0a, 0x47, 0x65, 0x74, 0x4c, 0x4c, 0x4d, 0x4c, 0x6f, 0x67, 0x73, 0x12, 0x1d, + 0x2e, 0x66, 0x6f, 0x79, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, 0x73, 0x2e, 0x47, 0x65, 0x74, 0x4c, + 0x4c, 0x4d, 0x4c, 0x6f, 0x67, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1e, 0x2e, + 0x66, 0x6f, 0x79, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, 0x73, 0x2e, 0x47, 0x65, 0x74, 0x4c, 0x4c, + 0x4d, 0x4c, 0x6f, 0x67, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, + 0x4f, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x20, 0x2e, 0x66, 0x6f, 0x79, 0x6c, + 0x65, 0x2e, 0x6c, 0x6f, 0x67, 0x73, 0x2e, 0x47, 0x65, 0x74, 0x4c, 0x6f, 0x67, 0x73, 0x53, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x21, 0x2e, 0x66, 0x6f, + 0x79, 0x6c, 0x65, 0x2e, 0x6c, 0x6f, 0x67, 0x73, 0x2e, 0x47, 0x65, 0x74, 0x4c, 0x6f, 0x67, 0x73, + 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, + 0x42, 0x9a, 0x01, 0x0a, 0x0e, 0x63, 0x6f, 0x6d, 0x2e, 0x66, 0x6f, 0x79, 0x6c, 0x65, 0x2e, 0x6c, + 0x6f, 0x67, 0x73, 0x42, 0x0b, 0x54, 0x72, 0x61, 0x63, 0x65, 0x73, 0x50, 0x72, 0x6f, 0x74, 0x6f, + 0x50, 0x01, 0x5a, 0x32, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6a, + 0x6c, 0x65, 0x77, 0x69, 0x2f, 0x66, 0x6f, 0x79, 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x73, 0x2f, 0x67, 0x6f, 0x2f, 0x66, 0x6f, 0x79, 0x6c, 0x65, 0x2f, 0x6c, 0x6f, 0x67, 0x73, 0x3b, + 0x6c, 0x6f, 0x67, 0x73, 0x70, 0x62, 0xa2, 0x02, 0x03, 0x46, 0x4c, 0x58, 0xaa, 0x02, 0x0a, 0x46, + 0x6f, 0x79, 0x6c, 0x65, 0x2e, 0x4c, 0x6f, 0x67, 0x73, 0xca, 0x02, 0x0a, 0x46, 0x6f, 0x79, 0x6c, + 0x65, 0x5c, 0x4c, 0x6f, 0x67, 0x73, 0xe2, 0x02, 0x16, 0x46, 0x6f, 0x79, 0x6c, 0x65, 0x5c, 0x4c, + 0x6f, 0x67, 0x73, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, + 0x02, 0x0b, 0x46, 0x6f, 0x79, 0x6c, 0x65, 0x3a, 0x3a, 0x4c, 0x6f, 0x67, 0x73, 0x62, 0x06, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/protos/go/foyle/logs/traces.zap.go b/protos/go/foyle/logs/traces.zap.go index 3931b04d..3085f20b 100644 --- a/protos/go/foyle/logs/traces.zap.go +++ b/protos/go/foyle/logs/traces.zap.go @@ -7,11 +7,11 @@ import ( fmt "fmt" math "math" proto "github.com/golang/protobuf/proto" + _ "github.com/jlewi/foyle/protos/go/foyle/v1alpha1" + _ "github.com/stateful/runme/v3/pkg/api/gen/proto/go/runme/runner/v1" _ "google.golang.org/protobuf/types/known/structpb" _ "google.golang.org/protobuf/types/known/timestamppb" _ "github.com/stateful/runme/v3/pkg/api/gen/proto/go/runme/parser/v1" - _ "github.com/jlewi/foyle/protos/go/foyle/v1alpha1" - _ "github.com/stateful/runme/v3/pkg/api/gen/proto/go/runme/runner/v1" go_uber_org_zap_zapcore "go.uber.org/zap/zapcore" github_com_golang_protobuf_ptypes "github.com/golang/protobuf/ptypes" ) @@ -273,5 +273,11 @@ func (m *GetLLMLogsResponse) MarshalLogObject(enc go_uber_org_zap_zapcore.Object keyName = "response_html" // field response_html = 2 enc.AddString(keyName, m.ResponseHtml) + keyName = "request_json" // field request_json = 3 + enc.AddString(keyName, m.RequestJson) + + keyName = "response_json" // field response_json = 4 + enc.AddString(keyName, m.ResponseJson) + return nil } diff --git a/protos/go/foyle/v1alpha1/agent.zap.go b/protos/go/foyle/v1alpha1/agent.zap.go index d3145807..c4acd663 100644 --- a/protos/go/foyle/v1alpha1/agent.zap.go +++ b/protos/go/foyle/v1alpha1/agent.zap.go @@ -7,8 +7,8 @@ import ( fmt "fmt" math "math" proto "github.com/golang/protobuf/proto" - _ "github.com/stateful/runme/v3/pkg/api/gen/proto/go/runme/parser/v1" _ "google.golang.org/protobuf/types/known/structpb" + _ "github.com/stateful/runme/v3/pkg/api/gen/proto/go/runme/parser/v1" go_uber_org_zap_zapcore "go.uber.org/zap/zapcore" ) diff --git a/tools/stripmetadata/main.go b/tools/stripmetadata/main.go index a4b678f7..7285fab0 100644 --- a/tools/stripmetadata/main.go +++ b/tools/stripmetadata/main.go @@ -81,7 +81,13 @@ func run() error { return errors.Wrapf(err, "Error getting current working directory") } - mdFiles, err := findMDFiles(context.Background(), cwd) + rootDir, err := filepath.Abs(filepath.Join(cwd, "..", "..")) + if err != nil { + return errors.Wrapf(err, "Error getting root directory") + } + docsDir := filepath.Join(rootDir, "docs", "content") + + mdFiles, err := findMDFiles(context.Background(), docsDir) if err != nil { return errors.Wrapf(err, "Error finding markdown files") } @@ -95,7 +101,6 @@ func run() error { } func main() { - processFile(context.Background(), "/Users/jlewi/git_foyle/docs/content/en/docs/learning/troubleshoot_learning.md") if err := run(); err != nil { fmt.Println("Error processing markdown: %+v", err) os.Exit(1)