-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #269 from fixstars/feature/support-llm
Support LLM
- Loading branch information
Showing
32 changed files
with
42,299 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
1. Install dependencies | ||
|
||
``` | ||
python -m pip install -r requirements.txt | ||
2. Deploy latest ion-kit binaries with LLM support | ||
Build llama.cpp following instruction in `ion-kit/src/bb/llm/config.cmake` | ||
Build ion-kit: | ||
``` | ||
cmake -D CMAKE_BUILD_TYPE=Release -DLlama_DIR=<path-to-llama.cpp-install>/lib/cmake/Llama .. && cmake --build . | ||
``` | ||
Replace binares: | ||
``` | ||
cp ./install/lib/lib* <path-to-site-packages/ionpy/module/linux/ | ||
``` | ||
3. Download LLaVA models from Hugging Face | ||
- [mmproj-mistral7b-f16-q6_k.gguf](https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-mistral7b-f16-q6_k.gguf?download=true) | ||
- [ggml-mistral-q_4_k.gguf](https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/ggml-mistral-q_4_k.gguf?download=true) | ||
4. Connect UVC camera and run | ||
``` | ||
python3 main.py | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
import sys | ||
import argparse | ||
|
||
from ionpy import Node, Builder, Buffer, Port, Param, Type, TypeCode | ||
import numpy as np | ||
|
||
from tkinter import * | ||
from tkinter.ttk import * | ||
import sv_ttk | ||
from PIL import ImageTk, Image | ||
|
||
class App(Frame): | ||
def __init__(self, window, args): | ||
super().__init__(window, padding=15) | ||
|
||
self.window = window | ||
|
||
# Model | ||
self.camera_width = int(args.resolution.split('x')[0]) | ||
self.camera_height = int(args.resolution.split('x')[1]) | ||
self.screen_width = self.winfo_screenwidth() | ||
self.screen_height = self.winfo_screenheight() | ||
self.img = np.zeros((self.camera_height, self.camera_width, 3), dtype=np.uint8) | ||
self.prompt = np.zeros(1024, dtype=np.int8) | ||
self.response = np.zeros(1024, dtype=np.int8) | ||
|
||
# View model | ||
self.prompt_string = StringVar() | ||
self.prompt_string.set('Explain the image in a single sentence.') | ||
|
||
# Support variables | ||
self.live_mode = True | ||
self.advanced_mode = False | ||
self.analyze_in_progress = False | ||
self.last_response = np.copy(self.response) | ||
|
||
self.init_pipeline() | ||
self.init_layout() | ||
|
||
self.window.protocol("WM_DELETE_WINDOW", self.on_closing) | ||
|
||
def init_pipeline(self): | ||
|
||
self.b = Builder() | ||
self.b.set_target("host-cuda") | ||
self.b.with_bb_module("ion-bb") | ||
|
||
|
||
# U3V camera | ||
# params = [Param("num_devices", 1), Param("realtime_diaplay_mode", True)] | ||
# n_img_cwh = self.b.add("image_io_u3v_cameraN_u8x3").set_param(params) | ||
|
||
# UVC camera | ||
params = [Param("width", self.camera_width), Param("height", self.camera_height)] | ||
n_img_whc = self.b.add("image_io_camera").set_param(params) | ||
params = [Param("dim0", 2), Param("dim1", 0), Param("dim2", 1)] | ||
n_img_cwh = self.b.add("base_reorder_buffer_3d_uint8").set_iport([n_img_whc.get_port("output")]).set_param(params); | ||
|
||
self.prompt_buf = Buffer(array=self.prompt) | ||
prompt_port = Port(name="prompt", type=Type(TypeCode.Int, 8, 1), dim=1) | ||
prompt_port.bind(self.prompt_buf) | ||
|
||
params = [Param("width", self.camera_width), Param("height", self.camera_height)] | ||
n_txt = self.b.add("llm_llava").set_iport([n_img_cwh.get_port("output")[0], prompt_port]).set_param(params) | ||
|
||
for i in range(self.camera_height): | ||
for j in range(self.camera_width): | ||
self.img[i][j] = [i%256, i%256, i%256] | ||
|
||
self.img_buf = Buffer(array=self.img) | ||
n_img_cwh.get_port("output").bind(self.img_buf) | ||
|
||
self.response_buf = Buffer(array=self.response) | ||
n_txt.get_port("output").bind(self.response_buf) | ||
|
||
def init_layout(self): | ||
self.img_canvas = Canvas(self, width = self.screen_width, height = self.screen_height) | ||
self.img_canvas.pack() | ||
|
||
response_frame = Frame(self, padding=15, height=50) | ||
self.response_label = Label(response_frame, font=('Helvetica', 48), wraplength=self.screen_width-30, padding=15, anchor = 'nw', justify='left') | ||
self.response_label.pack() | ||
self.img_canvas.create_window(40, 40, window = response_frame, anchor = 'nw') | ||
|
||
self.update_prompt() | ||
self.update_response() | ||
self.update_periodic() | ||
|
||
def update_periodic(self): | ||
# Running pipeline | ||
self.b.run() | ||
|
||
img = Image.fromarray(self.img) | ||
cutoff = self.camera_height - (self.screen_height / self.screen_width) * self.camera_width | ||
img = img.crop((0, cutoff/2, self.camera_width, self.camera_height-cutoff/2)) | ||
img = img.resize((self.screen_width, self.screen_height)) | ||
self.photo = ImageTk.PhotoImage(image = img) | ||
self.img_canvas.create_image(0, 0, image = self.photo, anchor = 'nw') | ||
|
||
if (self.live_mode): | ||
self.update_response() | ||
|
||
self.window.after(30, self.update_periodic) | ||
|
||
def update_prompt(self): | ||
self.prompt.fill(0) | ||
i = 0 | ||
if not self.advanced_mode: | ||
# Append image prompt marker | ||
for c in '<image>': | ||
self.prompt[i] = ord(c) | ||
i += 1 | ||
offset = i | ||
for i, c in enumerate(self.prompt_string.get()): | ||
self.prompt[offset+i] = ord(c) | ||
|
||
# Clearing response make look & feel better | ||
self.response.fill(0) | ||
self.response_label.configure(text='') | ||
|
||
def update_response(self): | ||
# question = "Hey buddy, what's on your eyes?\n" | ||
response = ''.join([chr(v) for v in self.response]) | ||
response = response.split('.')[0] | ||
self.response_label.configure(text=response) | ||
|
||
def toggle_live(self): | ||
self.live_mode = not self.live_mode | ||
self.analysis_button.configure(state='disabled' if self.live_mode else 'normal') | ||
|
||
def analyze(self): | ||
self.analyze_in_progress = True | ||
self.analysis_button.configure(text='Analyzing...') | ||
self.last_response = np.copy(self.response) | ||
self.window.after(100, self.wait_response) | ||
|
||
def wait_response(self): | ||
if np.array_equal(self.last_response, self.response): | ||
self.window.after(100, self.wait_response) | ||
else: | ||
self.update_response() | ||
self.analysis_button.configure(text='Analyze') | ||
self.analyze_in_progress = False | ||
|
||
def on_closing(self): | ||
del self.b | ||
self.window.destroy() | ||
|
||
if __name__ == '__main__': | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--resolution', default='640x480', help='Camera resolution in "<width>x<height>" format. e.g. 640x480') | ||
|
||
args = parser.parse_args() | ||
|
||
root = Tk() | ||
root.wm_attributes('-type', 'splash') | ||
root.wm_attributes('-fullscreen', True) | ||
sv_ttk.set_theme("dark") | ||
App(root, args).pack(expand=True, fill='both') | ||
root.mainloop() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
ion-python >= 1.8.2 | ||
numpy | ||
sv_ttk | ||
pillow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
#include <ion/ion.h> | ||
#include <iostream> | ||
|
||
#include <opencv2/highgui.hpp> | ||
|
||
using namespace ion; | ||
|
||
int main(int argc, char *argv[]) { | ||
try { | ||
// const int width = 1280; | ||
// const int height = 960; | ||
const int width = 503; | ||
const int height = 337; | ||
|
||
Buffer<int8_t> prompt{1024}; | ||
prompt.fill(0); | ||
std::string prompt_s("<image>Explain the image in one sentence."); | ||
for (auto i = 0; i < prompt_s.size(); ++i) { | ||
prompt(i) = prompt_s[i]; | ||
} | ||
|
||
Builder b; | ||
b.set_target(Halide::get_target_from_environment()); | ||
b.with_bb_module("ion-bb"); | ||
|
||
auto n_img_cwh = b.add("image_io_color_data_loader").set_param(Param{"url", "http://www.onthejob.education/images/4th_level/Road_Worker/Road_Worker_Darwin.jpg"}, Param{"width", width}, Param{"height", height}); | ||
auto n_img_whc = b.add("base_reorder_buffer_3d_uint8")(n_img_cwh["output"]).set_param(Param{"dim0", 2}, Param{"dim1", 0}, Param{"dim2", 1}); | ||
// auto n_img_cwh = b.add("image_io_u3v_cameraN_u8x3").set_param(Param{"num_devices", "1"}, Param{"realtime_diaplay_mode", true}); | ||
// auto n_img_whc = b.add("base_reorder_buffer_3d_uint8")(n_img_cwh["output"]).set_param(Param{"dim0", 1}, Param{"dim1", 2}, Param{"dim2", 0}); | ||
|
||
auto n_disp = b.add("image_io_gui_display")(n_img_whc["output"][0]).set_param(Param{"width", width}, Param{"height", height}); | ||
auto n_txt = b.add("llm_llava")(n_img_cwh["output"][0], prompt).set_param(Param{"width", width}, Param{"height", height}); | ||
|
||
Buffer<int8_t> txt_output{1024}; | ||
n_txt["output"].bind(txt_output); | ||
|
||
Buffer<int32_t> result = Buffer<int32_t>::make_scalar(); | ||
n_disp["output"].bind(result); | ||
|
||
// for (int i=0; i<1; ++i) { | ||
while (true) { | ||
b.run(); | ||
std::cout << reinterpret_cast<const char *>(txt_output.data()) << std::endl; | ||
} | ||
|
||
} catch (const Halide::Error &e) { | ||
std::cerr << e.what() << std::endl; | ||
return 1; | ||
} catch (const std::exception &e) { | ||
std::cerr << e.what() << std::endl; | ||
return 1; | ||
} catch (...) { | ||
return 1; | ||
} | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
*.so* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.