Skip to content

Commit

Permalink
handle lazy-greedy lexeme after greedy lexeme
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Jun 24, 2024
1 parent a02ecb1 commit ba9a917
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 5 deletions.
2 changes: 1 addition & 1 deletion controllers/llguidance_ctrl/run_g.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def character_maker2(lm, id, description, valid_weapons):
grm = guidance.json(
schema={"type": "object", "properties": {"a": {"type": "integer"}}}
)
assert grm.match('{"a": 1} ')
assert grm.match('{"a": 1}')

max_tokens = 250

Expand Down
18 changes: 17 additions & 1 deletion controllers/llguidance_ctrl/src/earley/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,21 @@ impl Lexer {
})
}

pub fn check_for_single_byte_lexeme(&mut self, state: StateID, b: u8) -> Option<PreLexeme> {
if self.dfa.next_byte(state) == NextByte::ForcedEOI {
let info = self.state_info(state);
let idx = info.possible.first_bit_set().expect("no allowed lexemes");
Some(PreLexeme {
idx: LexemeIdx::new(idx),
byte: Some(b),
byte_next_row: false,
hidden_len: 0,
})
} else {
None
}
}

#[inline(always)]
pub fn advance(&mut self, prev: StateID, byte: u8, enable_logging: bool) -> LexerResult {
let state = self.dfa.transition(prev, byte);
Expand Down Expand Up @@ -126,7 +141,8 @@ impl Lexer {
LexerResult::Error
}
} else {
let can_stop_now = !self.spec.greedy || self.dfa.next_byte(state) == NextByte::ForcedEOI;
let can_stop_now =
!self.spec.greedy || self.dfa.next_byte(state) == NextByte::ForcedEOI;
let info = self.state_info(state);
if can_stop_now && info.is_accepting() {
LexerResult::Lexeme(PreLexeme {
Expand Down
33 changes: 30 additions & 3 deletions controllers/llguidance_ctrl/src/earley/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1192,9 +1192,10 @@ impl Parser {
// save lexeme at the last row, before we mess with the stack
self.row_infos[added_row - 1].lexeme = lexeme;
debug!(
"lex: re-start {:?} (via {:?})",
"lex: re-start {:?} (via {:?}); allowed: {}",
no_hidden.lexer_state,
transition_byte.map(|b| b as char)
transition_byte.map(|b| b as char),
self.lexer_spec().dbg_lexeme_set(added_row_lexemes)
);
}
no_hidden
Expand Down Expand Up @@ -1303,14 +1304,40 @@ impl Parser {
};

if scan_res {
let no_hidden = self.lexer_state_for_added_row(lexeme, transition_byte);
let mut no_hidden = self.lexer_state_for_added_row(lexeme, transition_byte);

if pre_lexeme.hidden_len > 0 {
self.handle_hidden_bytes(no_hidden, lexeme_byte, pre_lexeme);
} else {
if pre_lexeme.byte_next_row && no_hidden.lexer_state.is_dead() {
return false;
}
if let Some(b) = transition_byte {
if let Some(second_lexeme) = self
.lexer
.check_for_single_byte_lexeme(no_hidden.lexer_state, b)
{
if self.scratch.definitive {
debug!("single byte lexeme: {:?}", second_lexeme);
}
no_hidden.byte = None;
self.lexer_stack.push(no_hidden);

// disallow recursion depth > 2
assert!(pre_lexeme.byte_next_row);
assert!(!second_lexeme.byte_next_row);

let r = self.advance_parser(second_lexeme);
if r {
let new_top = self.lexer_stack.pop().unwrap();
*self.lexer_stack.last_mut().unwrap() = new_top;
return true;
} else {
self.lexer_stack.pop();
return false;
}
}
}
self.lexer_stack.push(no_hidden);
}
if self.scratch.definitive {
Expand Down

0 comments on commit ba9a917

Please sign in to comment.