From 86eccdfbcea0de860bd5ceca634bd8ee37e3d1c1 Mon Sep 17 00:00:00 2001 From: Saibo-creator <53392976+Saibo-creator@users.noreply.github.com> Date: Sun, 9 Jun 2024 18:41:38 +0200 Subject: [PATCH] fix: update func api in debugging_custom_grammar.md (#56) --- docs/debugging_custom_grammars.md | 8 ++++---- transformers_cfg/recognizer.py | 23 ++--------------------- 2 files changed, 6 insertions(+), 25 deletions(-) diff --git a/docs/debugging_custom_grammars.md b/docs/debugging_custom_grammars.md index 4542206..496c0bf 100644 --- a/docs/debugging_custom_grammars.md +++ b/docs/debugging_custom_grammars.md @@ -90,18 +90,18 @@ We provide a simple script to do this: ```python from transformers_cfg.parser import parse_ebnf -from transformers_cfg.recognizer import GrammarRecognizer +from transformers_cfg.recognizer import StringRecognizer with open("examples/grammars/json.ebnf", "r") as file: input_text = file.read() parsed_grammar = parse_ebnf(input_text) start_rule_id = parsed_grammar.symbol_table["root"] -recognizer = GrammarRecognizer(parsed_grammar.grammar_encoding, start_rule_id) +recognizer = StringRecognizer(parsed_grammar.grammar_encoding, start_rule_id) # Test the grammar with a simple input json_input = '{"foo": "bar", "baz": "bat"}' -is_accepted = recognizer._accept_prefix(json_input, recognizer.stacks) +is_accepted = recognizer._accept_prefix(json_input) print(is_accepted) ``` @@ -112,7 +112,7 @@ N.B. the recognizer can accept partial input, so you can try the following: ```python json_input = '{"foo": "bar"' -is_accepted = recognizer._accept_prefix(json_input, recognizer.stacks) +is_accepted = recognizer._accept_prefix(json_input) print(is_accepted) ``` diff --git a/transformers_cfg/recognizer.py b/transformers_cfg/recognizer.py index dfbc92e..28db72b 100644 --- a/transformers_cfg/recognizer.py +++ b/transformers_cfg/recognizer.py @@ -444,28 +444,9 @@ def char_acceptance_at_element(self, element_offset): logging.debug(acceptance) return acceptance - # def _consume_code_points_new( - # self, code_points: List[int], stacks: Set[Tuple[int]], verbose=False - # ) -> Set[Tuple[int]]: - # new_stacks: Set[Tuple[int]] = set() - # for stack in stacks: - # new_stacks.update( - # self._consume_code_points_per_stack(tuple(code_points), stack, verbose) - # ) - # return new_stacks - # - # @lru_cache(maxsize=30000) - # def _consume_code_points_per_stack( - # self, code_points: Tuple[int], stack: Tuple[int], verbose=False - # ) -> Set[Tuple[int]]: - # stacks = {stack} - # - # for code_point in code_points: - # # Update the stacks variable by consuming each code point. - # stacks = self._consume_code_point_for_all_stacks(code_point, (stack,)) - # - # return stacks +# backward compatibility, add alias of StringRecognizer to GrammarRecognizer +GrammarRecognizer = StringRecognizer if __name__ == "__main__": # set logging level