Skip to content

Commit

Permalink
fix eos handling
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Jun 24, 2024
1 parent 8c2c035 commit 6fa14ca
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 5 deletions.
6 changes: 5 additions & 1 deletion controllers/llguidance_ctrl/run_g.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def character_maker2(lm, id, description, valid_weapons):
prompt = ""
grm = guidance.json(schema={"type": "null"})

assert grm.match("null")
# assert grm.match("null")

grm = guidance.json(
"OBJ",
Expand All @@ -280,6 +280,10 @@ def character_maker2(lm, id, description, valid_weapons):
# g = zero_or_more("a") + "b"
# assert not g.match("b")

# lm = guidance.models.Mock(b"<s>1234233234<s>")
# grammar = one_or_more(select(["1", "2"]))
# lm += grammar

max_tokens = 250

serialized = grm.ll_serialize()
Expand Down
11 changes: 9 additions & 2 deletions controllers/llguidance_ctrl/src/earley/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -894,8 +894,15 @@ impl Parser {

debug!(" flush_lexer() OK");

if lexer_eos {
return true;
if mv == ModelVariable::eos_token() {
if lexer_eos {
return true;
}
// This is really for EOS tokens in the middle of the grammar
// that need to be eaten; so don't check for accepting state here
// if self.is_accepting() {
// return true;
// }
}

self.scratch.new_row(self.curr_row().last_item);
Expand Down
8 changes: 6 additions & 2 deletions controllers/llguidance_ctrl/src/tokenparser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,14 +202,18 @@ impl TokenParser {
trie.tokens_dbg(&arg.tokens)
);

let mut has_eos = false;

if arg.tokens.contains(&trie.eos_token()) {
assert!(arg.tokens.len() == 1);
if self.parser.scan_model_variable(ModelVariable::eos_token()) {
// it got scanned correctly, so we remove it
infoln!(self, "scanned eos_token");
arg.tokens.clear();
} else {
infoln!(self, "didn't scan eos_token; saving");
arg.save_tokens(&mut self.llm_tokens);
has_eos = true;
}
} else {
arg.save_tokens(&mut self.llm_tokens);
Expand Down Expand Up @@ -345,10 +349,10 @@ impl TokenParser {
let no_pending_bytes = !self.parser.has_pending_lexeme_bytes();
let is_accepting = no_pending_bytes && row_accepting;
let can_advance = self.parser.can_advance();
let inner_done = empty_token_prefix && is_accepting && !can_advance;
let inner_done = empty_token_prefix && is_accepting && (!can_advance || has_eos);
infoln!(
self,
"inner_done: {inner_done}; can_advance: {can_advance}; \
"inner_done: {inner_done}; can_advance: {can_advance} (eos:{has_eos}); \
accept: {is_accepting} (row:{row_accepting} & lexer:{no_pending_bytes}); \
empty_token_prefix: {empty_token_prefix}"
);
Expand Down

0 comments on commit 6fa14ca

Please sign in to comment.