-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* In 14 of our 424 examples in evaluation the input document sent to the model ends up being the empty string * This is the result of how our doc tailer works. Our doc tailer imposes a length cap on the tail of the document. There was a bug in the tailer where if the last cell in the document exceeded the cap (currently 1110 characters) then an empty string would be returned. * This PR fixes that. If the last cell exceeds the length then we take the tail of that cell. * This PR also checks in completer that the tail of the document is non empty; if its not empty then we fail the completion rather than continuing to generate the completion. * Add a level1 assertion so we can measure how often we include less than 1 full cell in the prompt. * Fix #305
- Loading branch information
Showing
9 changed files
with
306 additions
and
137 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
package docs | ||
|
||
import ( | ||
"context" | ||
"testing" | ||
|
||
"github.com/google/go-cmp/cmp" | ||
"github.com/jlewi/foyle/protos/go/foyle/v1alpha1" | ||
) | ||
|
||
func Test_Tailer(t *testing.T) { | ||
type testCase struct { | ||
name string | ||
Doc *v1alpha1.Doc | ||
MaxChars int | ||
Expected string | ||
} | ||
|
||
cases := []testCase{ | ||
{ | ||
name: "cell-longer-than-max-chars", | ||
Doc: &v1alpha1.Doc{ | ||
Blocks: []*v1alpha1.Block{ | ||
{ | ||
Kind: v1alpha1.BlockKind_MARKUP, | ||
Contents: "Cell1", | ||
}, | ||
{ | ||
Kind: v1alpha1.BlockKind_MARKUP, | ||
Contents: "Cell2\nCell3", | ||
}, | ||
}, | ||
}, | ||
MaxChars: 5, | ||
Expected: "Cell3\n", | ||
}, | ||
{ | ||
name: "multiple-cells", | ||
Doc: &v1alpha1.Doc{ | ||
Blocks: []*v1alpha1.Block{ | ||
{ | ||
Kind: v1alpha1.BlockKind_MARKUP, | ||
Contents: "Cell1", | ||
}, | ||
{ | ||
Kind: v1alpha1.BlockKind_MARKUP, | ||
Contents: "Cell2", | ||
}, | ||
{ | ||
Kind: v1alpha1.BlockKind_MARKUP, | ||
Contents: "Cell3", | ||
}, | ||
}, | ||
}, | ||
MaxChars: 12, | ||
Expected: "Cell2\nCell3\n", | ||
}, | ||
} | ||
|
||
for _, c := range cases { | ||
tailer := NewTailer(context.Background(), c.Doc.Blocks, c.MaxChars) | ||
actual := tailer.Text() | ||
if d := cmp.Diff(c.Expected, actual); d != "" { | ||
t.Fatalf("Expected text to be %s but got %s; diff:\n%v", c.Expected, tailer.Text(), d) | ||
} | ||
} | ||
} | ||
|
||
func Test_tailLines(t *testing.T) { | ||
type testCase struct { | ||
name string | ||
Contents string | ||
MaxChars int | ||
Expected string | ||
} | ||
|
||
cases := []testCase{ | ||
{ | ||
name: "last-line-exceeds-max-chars", | ||
Contents: "line1\nline2", | ||
MaxChars: 2, | ||
Expected: "line2", | ||
}, | ||
{ | ||
name: "all-lines", | ||
Contents: "line1\nline2", | ||
MaxChars: 30, | ||
Expected: "line1\nline2", | ||
}, | ||
{ | ||
name: "some-lines", | ||
Contents: "line1\nline2\nline3", | ||
MaxChars: 10, | ||
Expected: "line2\nline3", | ||
}, | ||
} | ||
|
||
for _, c := range cases { | ||
t.Run(c.name, func(t *testing.T) { | ||
if tailLines(c.Contents, c.MaxChars) != c.Expected { | ||
t.Fatalf("Expected text to be %s but got %s", c.Expected, tailLines(c.Contents, c.MaxChars)) | ||
} | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.