From 752a08d85d325eb997b81e20cde69cf6eb617f3e Mon Sep 17 00:00:00 2001 From: Michal Moskal Date: Fri, 14 Jun 2024 16:50:17 +0000 Subject: [PATCH] allow for ByteTokenizer --- py/guidance | 2 +- py/llguidance/rust/py.rs | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/py/guidance b/py/guidance index 1ca0ff2b..4db20be1 160000 --- a/py/guidance +++ b/py/guidance @@ -1 +1 @@ -Subproject commit 1ca0ff2b876dfc86dadaa8b68ec55823089af0e8 +Subproject commit 4db20be1c2422be8c46cd499aff265907a2b58ca diff --git a/py/llguidance/rust/py.rs b/py/llguidance/rust/py.rs index 66d8742b..ae7af1f6 100644 --- a/py/llguidance/rust/py.rs +++ b/py/llguidance/rust/py.rs @@ -118,12 +118,15 @@ impl LLTokenizer { )); } - let tok_eos = tokenizer.getattr("eos_token_id")?.extract::()?; + let tokens = tokenizer.getattr("tokens")?.extract::>>()?; + let tok_eos = tokenizer + .getattr("eos_token_id")? + .extract::>()? + .unwrap_or(tokens.len() as u32); let tok_bos = tokenizer .getattr("bos_token_id")? .extract::() .map_or(None, |v| Some(v)); - let tokens = tokenizer.getattr("tokens")?.extract::>>()?; let info = TokRxInfo { vocab_size: tokens.len() as u32, tok_eos,