Skip to content

Commit

Permalink
avoid EOS turning into bytes
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Jun 24, 2024
1 parent 7246614 commit 8c2c035
Showing 1 changed file with 6 additions and 0 deletions.
6 changes: 6 additions & 0 deletions py/llguidance/rust/py.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,12 @@ impl LLTokenizer {
.getattr("bos_token_id")?
.extract::<Option<u32>>()?;

// we want decode_bytes([EOS]) etc to be empty
tokens[tok_eos as usize] = vec![];
if let Some(t) = tok_bos {
tokens[t as usize] = vec![];
}

let info = TokRxInfo {
vocab_size: tokens.len() as u32,
tok_eos,
Expand Down

0 comments on commit 8c2c035

Please sign in to comment.