forked from turboderp/exllama
-
Notifications
You must be signed in to change notification settings - Fork 0
/
example_alt_generator.py
207 lines (144 loc) · 13 KB
/
example_alt_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
from model import ExLlama, ExLlamaCache, ExLlamaConfig
from lora import ExLlamaLora
from tokenizer import ExLlamaTokenizer
from alt_generator import ExLlamaAltGenerator
import argparse
import torch
import sys
import os
import glob
import model_init
import time
config: ExLlamaConfig # Config for the model, loaded from config.json
model: ExLlama # Model, initialized with ExLlamaConfig
cache: ExLlamaCache # Cache for generation, tied to model
generator: ExLlamaAltGenerator # Generator
tokenizer: ExLlamaTokenizer # Tokenizer
lora: ExLlamaLora = None # (Optional) LoRA, remember to specify in generator settings
# Initialize model
def init_explicit():
global model, cache, config, generator, tokenizer, lora
# Directory containing model, tokenizer
model_directory = "/mnt/str/models/llama-7b-4bit-128g/"
# Locate files we need within that directory
tokenizer_path = os.path.join(model_directory, "tokenizer.model")
model_config_path = os.path.join(model_directory, "config.json")
st_pattern = os.path.join(model_directory, "*.safetensors")
model_path = glob.glob(st_pattern)[0]
# Create config, model, tokenizer and generator
config = ExLlamaConfig(model_config_path) # create config from config.json
config.model_path = model_path # supply path to model weights file
model = ExLlama(config) # create ExLlama instance and load the weights
tokenizer = ExLlamaTokenizer(tokenizer_path) # create tokenizer from tokenizer model file
cache = ExLlamaCache(model) # create cache for inference
generator = ExLlamaAltGenerator(model, tokenizer, cache) # create generator
# Load LoRA
lora_dir = None
if lora_dir is not None:
lora_config = os.path.join(lora_dir, "adapter_config.json")
lora = os.path.join(lora_dir, "adapter_model.bin")
lora = ExLlamaLora(model, lora_config, lora)
# Alternatively, initialize from command line args
def init_args():
global model, cache, config, generator, tokenizer, lora
# Global initialization
torch.set_grad_enabled(False)
torch.cuda._lazy_init()
# Parse arguments
parser = argparse.ArgumentParser(description = "Generator example")
model_init.add_args(parser)
parser.add_argument("-lora", "--lora", type = str, help = "Path to LoRA binary to use during benchmark")
parser.add_argument("-loracfg", "--lora_config", type = str, help = "Path to LoRA config to use during benchmark")
parser.add_argument("-ld", "--lora_dir", type = str, help = "Path to LoRA config and binary. to use during benchmark")
args = parser.parse_args()
model_init.post_parse(args)
model_init.get_model_files(args)
print_opts = []
model_init.print_options(args, print_opts)
# Paths
if args.lora_dir is not None:
args.lora_config = os.path.join(args.lora_dir, "adapter_config.json")
args.lora = os.path.join(args.lora_dir, "adapter_model.bin")
# Model globals
model_init.set_globals(args)
# Instantiate model and generator
config = model_init.make_config(args)
model = ExLlama(config)
cache = ExLlamaCache(model)
tokenizer = ExLlamaTokenizer(args.tokenizer)
model_init.print_stats(model)
# Load LoRA
lora = None
if args.lora:
print(f" -- LoRA config: {args.lora_config}")
print(f" -- Loading LoRA: {args.lora}")
if args.lora_config is None:
print(f" ## Error: please specify lora path to adapter_config.json")
sys.exit()
lora = ExLlamaLora(model, args.lora_config, args.lora)
if lora.bias_ignored:
print(f" !! Warning: LoRA zero bias ignored")
# Generator
generator = ExLlamaAltGenerator(model, tokenizer, cache)
# Intialize
# init_args()
init_explicit()
# Example one-shot generation
settings = ExLlamaAltGenerator.Settings()
settings.temperature = 0.75
settings.top_p = 0.8
prompt = "A bird in the hand is worth"
stop_conditions = [".", "!", "bush", tokenizer.newline_token_id]
output = generator.generate(prompt = prompt,
stop_conditions = stop_conditions,
max_new_tokens = 50,
gen_settings = settings)
print()
print(prompt + output)
print()
# Example of (implicit) cache reuse
context = """Albert Einstein (/ˈaɪnstaɪn/ EYEN-styne;[4] German: [ˈalbɛʁt ˈʔaɪnʃtaɪn] (listen); 14 March 1879 – 18 April 1955) was a German-born theoretical physicist,[5] widely held to be one of the greatest and most influential scientists of all time. Best known for developing the theory of relativity, he also made important contributions to quantum mechanics, and was thus a central figure in the revolutionary reshaping of the scientific understanding of nature that modern physics accomplished in the first decades of the twentieth century.[1][6] His mass–energy equivalence formula E = mc2, which arises from relativity theory, has been called "the world's most famous equation".[7] He received the 1921 Nobel Prize in Physics "for his services to theoretical physics, and especially for his discovery of the law of the photoelectric effect",[8] a pivotal step in the development of quantum theory. His work is also known for its influence on the philosophy of science.[9][10] In a 1999 poll of 130 leading physicists worldwide by the British journal Physics World, Einstein was ranked the greatest physicist of all time.[11] His intellectual achievements and originality have made Einstein synonymous with genius.[12]
In 1905, a year sometimes described as his annus mirabilis (miracle year), Einstein published four groundbreaking papers.[13] These outlined a theory of the photoelectric effect, explained Brownian motion, introduced his special theory of relativity—a theory which addressed the inability of classical mechanics to account satisfactorily for the behavior of the electromagnetic field—and demonstrated that if the special theory is correct, mass and energy are equivalent to each other. In 1915, he proposed a general theory of relativity that extended his system of mechanics to incorporate gravitation. A cosmological paper that he published the following year laid out the implications of general relativity for the modeling of the structure and evolution of the universe as a whole.[14][15] The middle part of his career also saw him making important contributions to statistical mechanics and quantum theory. Especially notable was his work on the quantum physics of radiation, in which light consists of particles, subsequently called photons.
For much of the last phase of his academic life, Einstein worked on two endeavors that proved ultimately unsuccessful. Firstly, he fought a long rearguard action against quantum theory's introduction of fundamental randomness into science's picture of the world, objecting that "God does not play dice".[16] Secondly, he attempted to devise a unified field theory by generalizing his geometric theory of gravitation to include electromagnetism too. As a result, he became increasingly isolated from the mainstream of modern physics.
Born in the German Empire, Einstein moved to Switzerland in 1895, forsaking his German citizenship (as a subject of the Kingdom of Württemberg)[note 1] the following year. In 1897, at the age of seventeen, he enrolled in the mathematics and physics teaching diploma program at the Swiss Federal polytechnic school in Zürich, graduating in 1900. In 1901, he acquired Swiss citizenship, which he kept for the rest of his life. In 1903, he secured a permanent position at the Swiss Patent Office in Bern. In 1905, he submitted a successful PhD dissertation to the University of Zurich. In 1914, he moved to Berlin in order to join the Prussian Academy of Sciences and the Humboldt University of Berlin. In 1917, he became director of the Kaiser Wilhelm Institute for Physics; he also became a German citizen again, this time as a subject of the Kingdom of Prussia.[note 1] In 1933, while he was visiting the United States, Adolf Hitler came to power in Germany. Alienated by the policies of the newly elected Nazi government,[17] Einstein decided to remain in the US, and was granted American citizenship in 1940.[18] On the eve of World War II, he endorsed a letter to President Franklin D. Roosevelt alerting him to the potential German nuclear weapons program and recommending that the US begin similar research. Einstein supported the Allies but generally viewed the idea of nuclear weapons with great dismay.[19]
Albert Einstein was born in Ulm,[5] in the Kingdom of Württemberg in the German Empire, on 14 March 1879.[20][21] His parents, secular Ashkenazi Jews, were Hermann Einstein, a salesman and engineer, and Pauline Koch. In 1880, the family moved to Munich, where Einstein's father and his uncle Jakob founded Elektrotechnische Fabrik J. Einstein & Cie, a company that manufactured electrical equipment based on direct current.[5]
Albert attended a Catholic elementary school in Munich from the age of five. When he was eight, he was transferred to the Luitpold-Gymnasium (now known as the Albert-Einstein-Gymnasium [de]), where he received advanced primary and then secondary school education.[22]
In 1894, Hermann and Jakob's company tendered for a contract to install electric lighting in Munich, but without success—they lacked the capital that would have been required to update their technology from direct current to the more efficient, alternating current alternative.[23] The failure of their bid forced them to sell their Munich factory and search for new opportunities elsewhere. The Einstein family moved to Italy, first to Milan and a few months later to Pavia, where they settled in Palazzo Cornazzani, a medieval building which, at different times, had been the home of Ugo Foscolo, Contardo Ferrini and Ada Negri.[24] Einstein, then fifteen, stayed behind in Munich in order to finish his schooling. His father wanted him to study electrical engineering, but he was a fractious pupil who found the Gymnasium's regimen and teaching methods far from congenial. He later wrote that the school's policy of strict rote learning was harmful to creativity. At the end of December 1894, a letter from a doctor persuaded the Luitpold's authorities to release him from its care, and he joined his family in Pavia.[25] While in Italy as a teenager, he wrote an essay entitled "On the Investigation of the State of the Ether in a Magnetic Field".[26][27]
Einstein excelled at physics and mathematics from an early age, and soon acquired the mathematical expertise normally only found in a child several years his senior. He began teaching himself algebra, calculus and Euclidean geometry when he was twelve; he made such rapid progress that he discovered an original proof of the Pythagorean theorem before his thirteenth birthday.[28][29][30] A family tutor, Max Talmud, said that only a short time after he had given the twelve year old Einstein a geometry textbook, the boy "had worked through the whole book. He thereupon devoted himself to higher mathematics ... Soon the flight of his mathematical genius was so high I could not follow."[31] Einstein recorded that he had "mastered integral and differential calculus" while still just fourteen.[29] His love of algebra and geometry was so great that at twelve, he was already confident that nature could be understood as a "mathematical structure".[31]
At thirteen, when his range of enthusiasms had broadened to include music and philosophy,[32] Einstein was introduced to Kant's Critique of Pure Reason. Kant became his favorite philosopher; according to his tutor, "At the time he was still a child, only thirteen years old, yet Kant's works, incomprehensible to ordinary mortals, seemed to be clear to him."[31]"""
def timer(func):
t = time.time()
ret = func()
t = time.time() - t
return ret, t
settings = ExLlamaAltGenerator.Settings()
settings.temperature = 0.95
settings.top_k = 80
settings.typical = 0.8
questions = ["When was Albert Einstein born?",
"How many groundbreaking papers did Einstein publish in 1905?",
"Where did Einstein move in 1895?",
"When did Einstein graduate?"]
stop_conditions = [tokenizer.newline_token_id, tokenizer.eos_token_id]
for question in questions:
output, t = timer(lambda: generator.generate(context + "\nQ: " + question + "\nA:", stop_conditions, 100, settings))
print(f"Generated in {t:.3f} seconds: {question} -> {output}")
# Streaming example
settings = ExLlamaAltGenerator.Settings()
settings.temperature = 1.00
settings.top_k = 80
settings.top_p = 0.9
settings.lora = lora
prompt = "Our story begins in the town of Auchtermuchty, where once"
print()
print(prompt, end = "")
sys.stdout.flush()
output = generator.begin_stream(prompt = prompt,
stop_conditions = [],
max_new_tokens = 1000,
gen_settings = settings)
while True:
chunk, eos = generator.stream()
print(chunk, end = "")
sys.stdout.flush()
if eos: break