diff --git a/llmlingua/prompt_compressor.py b/llmlingua/prompt_compressor.py index 3a6aecd..971c7ca 100644 --- a/llmlingua/prompt_compressor.py +++ b/llmlingua/prompt_compressor.py @@ -26,10 +26,10 @@ TokenClfDataset, get_pure_token, is_begin_of_new_word, - replace_added_token, - seed_everything, process_structured_json_data, remove_consecutive_commas, + replace_added_token, + seed_everything, ) @@ -1279,7 +1279,6 @@ def sync_sentence(sentences, text): sentences = [nltk.sent_tokenize(c) for c in context] sentences = [sync_sentence(s, c) for s, c in zip(sentences, context)] - sentences = [sync_sentence(s, c) for s, c in zip(sentences, context)] dem_g, s2de, idx = defaultdict(set), defaultdict(int), 0 for idx_d, s in enumerate(sentences): for _ in s: diff --git a/llmlingua/utils.py b/llmlingua/utils.py index ddba1cc..7b6681c 100644 --- a/llmlingua/utils.py +++ b/llmlingua/utils.py @@ -1,12 +1,12 @@ +import json import os import random -import string -import json import re -import yaml +import string import numpy as np import torch +import yaml from torch.utils.data import Dataset