-
Notifications
You must be signed in to change notification settings - Fork 0
/
tekst_entropi.py
35 lines (29 loc) · 1.22 KB
/
tekst_entropi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from llama_cpp import Llama
from tqdm import tqdm
llm = Llama.from_pretrained(
repo_id="norallm/normistral-7b-warm", # HuggingFace repository containing the GGUF files.
filename="*Q4_K_M.gguf", # suffix of the filename containing the level of quantization.
n_ctx=768, # The max sequence length to use - note that longer sequence lengths require much more resources
n_threads=4, # The number of CPU threads to use, tailor to your system and the resulting performance
n_gpu_layers=0,# The number of layers to offload to GPU, if you have GPU acceleration available
logits_all=True
)
#llm = Llama(
# model_path="/itf-fi-ml/shared/ml-models/NoraLLm/normistral-7b-warm.Q8_0.gguf",
# n_threads=8,
# n_gpu_layers=-1, # Uncomment to use GPU acceleration
# # seed=1337, # Uncomment to set a specific seed
# n_ctx=2048, # Uncomment to increase the context window
#)
f = open("pg13041.txt")
text = f.read()
f.close()
llm.reset()
text = text.encode()
tokens = llm.tokenize(text)
# les inn første token
llm.eval([tokens[0]])
for tk in tqdm(tokens[1:]):
probs = llm.logits_to_logprobs(llm.eval_logits[-1])
print(f"{llm.detokenize([tk]).decode()}\t{-probs[tk]}")
llm.eval([tk])