-
Notifications
You must be signed in to change notification settings - Fork 2
/
LLMChatWebLLM.ts
65 lines (58 loc) · 1.98 KB
/
LLMChatWebLLM.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import { InitProgressReport, MLCEngine } from "@mlc-ai/web-llm";
import { contextType } from "../../App";
export default class LLMChatWebLLM {
private modelName: string;
private client: MLCEngine;
private initStatus: "not start" | "working" | "done" | "error";
public initProgress: null | string;
constructor(modelName: string) {
// Callback function to update model loading progress
const initProgressCallback = (initProgress: InitProgressReport) => {
console.log(initProgress);
this.initProgress = initProgress.text;
};
this.initStatus = "not start";
this.initProgress = null;
this.client = new MLCEngine({
initProgressCallback: initProgressCallback,
logLevel: "DEBUG",
});
this.modelName = modelName; // not using
}
async init() {
// This is an asynchronous call and can take a long time to finish
// models are https://github.com/mlc-ai/web-llm/blob/main/src/config.ts
this.initStatus = "working";
try {
const maxStorageBufferBindingSize =
await this.client.getMaxStorageBufferBindingSize();
// alert(maxStorageBufferBindingSize);
let selectedModel = "SmolLM2-135M-Instruct-q0f32-MLC";
if (maxStorageBufferBindingSize >= 1073741800) {
// ~1G
selectedModel = "Llama-3.2-3B-Instruct-q4f16_1-MLC";
}
console.log(`webLLM: select ${selectedModel}`);
await this.client.reload(selectedModel);
this.initStatus = "done";
} catch (e) {
this.initStatus = "error";
this.initProgress = (e as Error).message;
alert("Error: " + (e as Error).message);
}
}
public getInitStatus() {
return this.initStatus;
}
async ask(context: contextType[]) {
const data = {
// model: this.modelName,
messages: context,
stream: true,
max_tokens: 200,
temperature: 0.75,
};
const stream = await this.client.chat.completions.create(data);
return { stream, interruptGenerate: this.client.interruptGenerate };
}
}