WIP: Called llama.cpp from extern functions

fixstars · Apr 23, 2024 · c9c0b91 · c9c0b91
1 parent 661f788
commit c9c0b91
Show file tree

Hide file tree

Showing 10 changed files with 252 additions and 83 deletions.
diff --git a/example/llm_llava.cc b/example/llm_llava.cc
@@ -13,11 +13,12 @@ int main(int argc, char *argv[]) {
         Buffer<int8_t> prompt{1024};
 
         Builder b;
-        b.set_target(Halide::get_target_from_environment().with_feature(Halide::Target::TracePipeline));
+        b.set_target(Halide::get_target_from_environment().with_feature(Halide::Target::Debug).with_feature(Halide::Target::TracePipeline));
         b.with_bb_module("ion-bb");
 
-        auto n_img = b.add("image_io_color_data_loader").set_param(Param("url", "http://www.onthejob.education/images/4th_level/Road_Worker/Road_Worker_Darwin.jpg"), Param("width", width), Param("height", height));
-        auto n_txt = b.add("llm_llava")(n_img["output"], prompt);
+        auto n_img = b.add("image_io_color_data_loader").set_param(Param{"url", "http://www.onthejob.education/images/4th_level/Road_Worker/Road_Worker_Darwin.jpg"}, Param{"width", width}, Param{"height", height});
+        n_img = b.add("base_reorder_buffer_3d_uint8")(n_img["output"]).set_param(Param{"dim0", 2}, Param{"dim1", 0}, Param{"dim2", 1});
+        auto n_txt = b.add("llm_llava")(n_img["output"], prompt).set_param(Param{"width", width}, Param{"height", height});
 
         Buffer<int8_t> txt_output{1024};
         n_txt["output"].bind(txt_output);

diff --git a/src/bb/CMakeLists.txt b/src/bb/CMakeLists.txt
@@ -3,6 +3,7 @@ cmake_policy(SET CMP0057 NEW)
 set(ION_BB_INCLUDE_DIRS)
 set(ION_BB_LINK_DIRS)
 set(ION_BB_LIBRARIES)
+set(ION_BB_SRCS)
 file(GLOB childs ${CMAKE_CURRENT_SOURCE_DIR}/*)
 set(BB_NAMES base dnn fpga image-io image-processing opencv sgm llm)
 foreach(BB_NAME IN LISTS BB_NAMES)
@@ -23,12 +24,16 @@ foreach(BB_NAME IN LISTS BB_NAMES)
             set(ION_BB_BUILD_${BB_NAME} FALSE PARENT_SCOPE)
             message("Skip building \"${BB_NAME}\"")
         endif()
+
+        if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${BB_NAME}/bb.cc)
+            list(APPEND ION_BB_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/${BB_NAME}/bb.cc)
+        endif()
     endif()
 endforeach()
 
-add_library(ion-bb SHARED bb.cc)
+add_library(ion-bb SHARED bb.cc ${ION_BB_SRCS})
 target_include_directories(ion-bb PUBLIC ${PROJECT_SOURCE_DIR}/include ${PROJECT_SOURCE_DIR}/src ${CMAKE_CURRENT_SOURCE_DIR} ${ION_BB_INCLUDE_DIRS})
-
+target_link_directories(ion-bb PUBLIC ${ION_BB_LINK_DIRS})
 target_link_libraries(ion-bb PUBLIC ion-core ${ION_BB_LIBRARIES})
 if(UNIX)
     target_compile_options(ion-bb PUBLIC -fno-rtti)  # For Halide::Generator

diff --git a/src/bb/image-io/rt_file.h b/src/bb/image-io/rt_file.h
@@ -25,26 +25,19 @@ extern "C" int ION_EXPORT ion_bb_image_io_color_data_loader(halide_buffer_t *ses
     using namespace ion::bb::image_io;
 
     try {
-
         if (out->is_bounds_query()) {
-            out->dim[0].min = 0;
-            out->dim[0].extent = width,
-            out->dim[1].min = 0;
-            out->dim[1].extent = height;
-            out->dim[2].min = 0;
-            out->dim[2].extent = 3;
-        } else {
-            const std::string session_id(reinterpret_cast<const char *>(session_id_buf->host));
-            const std::string url = reinterpret_cast<const char *>(url_buf->host);
-            static std::unordered_map<std::string, std::unique_ptr<ImageSequence<uint8_t>>> seqs;
-            if (seqs.count(session_id) == 0) {
-                seqs[session_id] = std::unique_ptr<ImageSequence<uint8_t>>(new ImageSequence<uint8_t>(session_id, url));
-            }
-
-            Halide::Runtime::Buffer<uint8_t> obuf(*out);
-            seqs[session_id]->get(width, height, IMREAD_COLOR,  obuf);
+            return 0;
+        }
 
+        const std::string session_id(reinterpret_cast<const char *>(session_id_buf->host));
+        const std::string url = reinterpret_cast<const char *>(url_buf->host);
+        static std::unordered_map<std::string, std::unique_ptr<ImageSequence<uint8_t>>> seqs;
+        if (seqs.count(session_id) == 0) {
+            seqs[session_id] = std::unique_ptr<ImageSequence<uint8_t>>(new ImageSequence<uint8_t>(session_id, url));
         }
+
+        Halide::Runtime::Buffer<uint8_t> obuf(*out);
+        seqs[session_id]->get(width, height, IMREAD_COLOR, obuf);
     } catch (const std::exception &e) {
         std::cerr << e.what() << std::endl;
         return -1;

diff --git a/src/bb/llm/bb.cc b/src/bb/llm/bb.cc
@@ -0,0 +1,81 @@
+#include <fstream>
+
+#include <Halide.h>
+
+#include <llama.h>
+
+#include "ion/export.h"
+
+#include "log.h"
+#include "json/json.hpp"
+
+#include "clip.h"
+#include "llava.h"
+
+namespace ion {
+namespace bb {
+namespace llm {
+
+std::map<std::string, Halide::ExternCFunction> extern_functions;
+
+class RegisterExtern {
+ public:
+     RegisterExtern(std::string key, Halide::ExternCFunction f) {
+         extern_functions[key] = f;
+     }
+};
+
+} // llm
+} // bb
+} // ion
+
+#define ION_REGISTER_EXTERN(NAME) static auto ion_register_extern_##NAME = ion::bb::llm::RegisterExtern(#NAME, NAME);
+
+extern "C"
+ION_EXPORT int ion_bb_llm_llava(halide_buffer_t *in, halide_buffer_t *prompt, int32_t width, int32_t height, halide_buffer_t *out) {
+    try {
+        if (in->is_bounds_query()) {
+            in->dim[0].min = 0;
+            in->dim[0].extent = 3;
+            in->dim[1].min = 0;
+            in->dim[1].extent = width;
+            in->dim[2].min = 0;
+            in->dim[2].extent = height;
+            return 0;
+        }
+
+        Halide::Runtime::Buffer<int8_t> obuf(*out);
+
+        std::ofstream ofs("test.bin");
+        Halide::Runtime::Buffer<uint8_t> ibuf(*in);
+        ofs.write(reinterpret_cast<const char*>(ibuf.data()), in->size_in_bytes());
+
+        auto verbosity = 1;
+        auto ctx_clip = clip_model_load("pasu", verbosity);
+
+        llama_backend_init();
+        // llama_numa_init(params->numa);
+
+        auto n_threads = 1;
+        auto embed = llava_image_embed_make_with_bytes(ctx_clip, n_threads, ibuf.data(), ibuf.size_in_bytes());
+        if (!embed) {
+            ion::log::error("Could not load image");
+            return 1;
+        }
+
+        obuf.fill(0);
+        obuf(0) = 'x';
+
+        return 0;
+
+    } catch (const std::exception &e) {
+        std::cerr << e.what() << std::endl;
+        return -1;
+    } catch (...) {
+        std::cerr << "Unknown error" << std::endl;
+        return -1;
+    }
+}
+ION_REGISTER_EXTERN(ion_bb_llm_llava)
+
+#undef ION_REGISTER_EXTERN
diff --git a/src/bb/llm/bb.h b/src/bb/llm/bb.h
@@ -7,11 +7,15 @@ namespace ion {
 namespace bb {
 namespace llm {
 
+extern std::map<std::string, Halide::ExternCFunction> extern_functions;
+
 class Llava : public BuildingBlock<Llava> {
 public:
     Input<Halide::Func> input{"input", Halide::type_of<uint8_t>(), 3};
     Input<Halide::Func> prompt{"prompt", Halide::type_of<int8_t>(), 1};
     Output<Halide::Func> output{"output", Halide::type_of<int8_t>(), 1};
+    BuildingBlockParam<int32_t> width{"width", 640};
+    BuildingBlockParam<int32_t> height{"height", 480};
 
     void generate() {
         using namespace Halide; 
@@ -24,8 +28,8 @@ class Llava : public BuildingBlock<Llava> {
         Func prompt_;
         prompt_(_) = prompt(_);
         prompt_.compute_root();
-        
-        std::vector<ExternFuncArgument> params = {input_, prompt_};
+
+        std::vector<ExternFuncArgument> params = {input_, prompt_, static_cast<int32_t>(width), static_cast<int32_t>(height)};
         Func llava("llava");
         llava.define_extern("ion_bb_llm_llava", params, type_of<int8_t>(), 1);
         llava.compute_root();

diff --git a/src/bb/llm/clip.h b/src/bb/llm/clip.h
@@ -0,0 +1,85 @@
+#ifndef CLIP_H
+#define CLIP_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef LLAMA_SHARED
+#    if defined(_WIN32) && !defined(__MINGW32__)
+#        ifdef LLAMA_BUILD
+#            define CLIP_API __declspec(dllexport)
+#        else
+#            define CLIP_API __declspec(dllimport)
+#        endif
+#    else
+#        define CLIP_API __attribute__ ((visibility ("default")))
+#    endif
+#else
+#    define CLIP_API
+#endif
+
+struct clip_ctx;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct clip_ctx;
+
+struct clip_image_u8_batch {
+    struct clip_image_u8 * data;
+    size_t size;
+};
+
+struct clip_image_f32_batch {
+    struct clip_image_f32 * data;
+    size_t size;
+};
+
+CLIP_API struct clip_ctx * clip_model_load    (const char * fname, int verbosity);
+CLIP_API struct clip_ctx * clip_model_load_cpu(const char * fname, int verbosity);
+
+CLIP_API void clip_free(struct clip_ctx * ctx);
+
+CLIP_API size_t clip_embd_nbytes(const struct clip_ctx * ctx);
+
+CLIP_API int32_t clip_image_size (const struct clip_ctx * ctx);
+CLIP_API int32_t clip_patch_size (const struct clip_ctx * ctx);
+CLIP_API int32_t clip_hidden_size(const struct clip_ctx * ctx);
+
+// TODO: should be enum, not string
+CLIP_API const char * clip_patch_merge_type(const struct clip_ctx * ctx);
+
+CLIP_API const int32_t * clip_image_grid(const struct clip_ctx * ctx);
+
+CLIP_API int clip_n_patches    (const struct clip_ctx * ctx);
+CLIP_API int clip_n_mmproj_embd(const struct clip_ctx * ctx);
+
+CLIP_API struct clip_image_u8  * clip_image_u8_init ();
+CLIP_API struct clip_image_f32 * clip_image_f32_init();
+
+CLIP_API void clip_image_u8_free (struct clip_image_u8  * img);
+CLIP_API void clip_image_f32_free(struct clip_image_f32 * img);
+CLIP_API void clip_image_u8_batch_free (struct clip_image_u8_batch  * batch);
+CLIP_API void clip_image_f32_batch_free(struct clip_image_f32_batch * batch);
+
+CLIP_API bool clip_image_load_from_file(const char * fname, struct clip_image_u8 * img);
+
+/** interpret bytes as an image file with length bytes_length, and use the result to populate img */
+CLIP_API bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, struct clip_image_u8 * img);
+
+/** preprocess img and store the result in res_imgs, pad_to_square may be overriden to false depending on model configuration */
+CLIP_API bool clip_image_preprocess(struct clip_ctx * ctx, const struct clip_image_u8 * img, struct clip_image_f32_batch * res_imgs );
+
+CLIP_API struct ggml_tensor * clip_get_newline_tensor(const struct clip_ctx * ctx);
+
+CLIP_API bool clip_image_encode      (struct clip_ctx * ctx, int n_threads, struct clip_image_f32 * img, float * vec);
+CLIP_API bool clip_image_batch_encode(struct clip_ctx * ctx, int n_threads, const struct clip_image_f32_batch * imgs, float * vec);
+
+CLIP_API bool clip_model_quantize(const char * fname_inp, const char * fname_out, int itype);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // CLIP_H
diff --git a/src/bb/llm/config.cmake b/src/bb/llm/config.cmake
@@ -1,7 +1,11 @@
+# Build and install llama.cpp with following command
+# cmake -D CMAKE_INSTALL_PREFIX=<prefix> -D BUILD_SHARED_LIBS=on -D LLAMA_STATIC=off ..
 find_package(Llama QUIET)
 if (${Llama_FOUND})
     set(ION_BB_BUILD_llm TRUE)
-    set(LIBRARIES llama)
+    set(INCLUDE_DIRS ${LLAMA_INCLUDE_DIR})
+    set(LINK_DIRS ${LLAMA_LIB_DIR})
+    set(LIBRARIES llama llava_shared)
 else()
     set(ION_BB_BUILD_llm FALSE)
 endif()
diff --git a/src/bb/llm/llava.h b/src/bb/llm/llava.h
@@ -0,0 +1,50 @@
+#ifndef LLAVA_H
+#define LLAVA_H
+
+#include "ggml.h"
+
+#ifdef LLAMA_SHARED
+#    if defined(_WIN32) && !defined(__MINGW32__)
+#        ifdef LLAMA_BUILD
+#            define LLAVA_API __declspec(dllexport)
+#        else
+#            define LLAVA_API __declspec(dllimport)
+#        endif
+#    else
+#        define LLAVA_API __attribute__ ((visibility ("default")))
+#    endif
+#else
+#    define LLAVA_API
+#endif
+
+struct clip_ctx;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct llava_image_embed {
+    float * embed;
+    int n_image_pos;
+};
+
+/** sanity check for clip <-> llava embed size match */
+LLAVA_API bool llava_validate_embed_size(const struct llama_context * ctx_llama, const struct clip_ctx * ctx_clip);
+
+LLAVA_API bool llava_image_embed_make_with_clip_img(struct clip_ctx * ctx_clip, int n_threads, const struct clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out);
+
+/** build an image embed from image file bytes */
+LLAVA_API struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length);
+/** build an image embed from a path to an image filename */
+LLAVA_API struct llava_image_embed * llava_image_embed_make_with_filename(struct clip_ctx * ctx_clip, int n_threads, const char * image_path);
+LLAVA_API void llava_image_embed_free(struct llava_image_embed * embed);
+/** free an embedding made with llava_image_embed_make_* */
+
+/** write the image represented by embed into the llama context with batch size n_batch, starting at context pos n_past. on completion, n_past points to the next position in the context after the image embed. */
+LLAVA_API bool llava_eval_image_embed(struct llama_context * ctx_llama, const struct llava_image_embed * embed, int n_batch, int * n_past);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/bb/llm/rt.h b/src/bb/llm/rt.h
@@ -1,57 +1 @@
-#ifndef ION_BB_LLM_RT_H
-#define ION_BB_LLM_RT_H
-
-#include <Halide.h>
-
-#include <llama.h>
-
-#include "ion/export.h"
-
-#include "log.h"
-#include "json/json.hpp"
-
-namespace ion {
-namespace bb {
-namespace llm {
-
-std::map<std::string, Halide::ExternCFunction> extern_functions;
-
-class RegisterExtern {
- public:
-     RegisterExtern(std::string key, Halide::ExternCFunction f) {
-         extern_functions[key] = f;
-     }
-};
-
-} // llm
-} // bb
-} // ion
-
-#define ION_REGISTER_EXTERN(NAME) static auto ion_register_extern_##NAME = ion::bb::llm::RegisterExtern(#NAME, NAME);
-
-extern "C" 
-ION_EXPORT int ion_bb_llm_llava(halide_buffer_t *in, halide_buffer_t *prompt, halide_buffer_t *out) {
-    try {
-        ion::log::info("ion_bb_llm_llava");
-        // if (in->is_bounds_query()) {
-        //     in->dim[0] = out->dim[0];
-        //     in->dim[1] = out->dim[1];
-        //     in->dim[2] = out->dim[2];
-        //     return 0;
-        // }
-
-        return 0;
-
-    } catch (const std::exception &e) {
-        std::cerr << e.what() << std::endl;
-        return -1;
-    } catch (...) {
-        std::cerr << "Unknown error" << std::endl;
-        return -1;
-    }
-}
-ION_REGISTER_EXTERN(ion_bb_llm_llava)
-
-#undef ION_REGISTER_EXTERN
-
-#endif  // ION_BB_LLM_BB_H
+// NOTE: This file is remaining just for compatibility with bb.cc header including system