From c9c0b91deba2ed222838e5a4e4fa0509375ac629 Mon Sep 17 00:00:00 2001 From: Takuro Iizuka Date: Mon, 22 Apr 2024 17:16:16 -0700 Subject: [PATCH] WIP: Called llama.cpp from extern functions --- example/llm_llava.cc | 7 +- src/bb/CMakeLists.txt | 9 +- src/bb/image-io/rt_file.h | 27 +++--- src/bb/llm/bb.cc | 81 ++++++++++++++++++ src/bb/llm/bb.h | 8 +- src/bb/llm/clip.h | 85 +++++++++++++++++++ src/bb/llm/config.cmake | 6 +- src/bb/llm/llava.h | 50 +++++++++++ src/bb/llm/rt.h | 58 +------------ src/bb/llm/{LICENSE => thirdparty_notice.txt} | 4 +- 10 files changed, 252 insertions(+), 83 deletions(-) create mode 100644 src/bb/llm/bb.cc create mode 100644 src/bb/llm/clip.h create mode 100644 src/bb/llm/llava.h rename src/bb/llm/{LICENSE => thirdparty_notice.txt} (95%) diff --git a/example/llm_llava.cc b/example/llm_llava.cc index 2ffa40cb..f4a45ac1 100644 --- a/example/llm_llava.cc +++ b/example/llm_llava.cc @@ -13,11 +13,12 @@ int main(int argc, char *argv[]) { Buffer prompt{1024}; Builder b; - b.set_target(Halide::get_target_from_environment().with_feature(Halide::Target::TracePipeline)); + b.set_target(Halide::get_target_from_environment().with_feature(Halide::Target::Debug).with_feature(Halide::Target::TracePipeline)); b.with_bb_module("ion-bb"); - auto n_img = b.add("image_io_color_data_loader").set_param(Param("url", "http://www.onthejob.education/images/4th_level/Road_Worker/Road_Worker_Darwin.jpg"), Param("width", width), Param("height", height)); - auto n_txt = b.add("llm_llava")(n_img["output"], prompt); + auto n_img = b.add("image_io_color_data_loader").set_param(Param{"url", "http://www.onthejob.education/images/4th_level/Road_Worker/Road_Worker_Darwin.jpg"}, Param{"width", width}, Param{"height", height}); + n_img = b.add("base_reorder_buffer_3d_uint8")(n_img["output"]).set_param(Param{"dim0", 2}, Param{"dim1", 0}, Param{"dim2", 1}); + auto n_txt = b.add("llm_llava")(n_img["output"], prompt).set_param(Param{"width", width}, Param{"height", height}); Buffer txt_output{1024}; n_txt["output"].bind(txt_output); diff --git a/src/bb/CMakeLists.txt b/src/bb/CMakeLists.txt index c6dd681b..a19253c5 100644 --- a/src/bb/CMakeLists.txt +++ b/src/bb/CMakeLists.txt @@ -3,6 +3,7 @@ cmake_policy(SET CMP0057 NEW) set(ION_BB_INCLUDE_DIRS) set(ION_BB_LINK_DIRS) set(ION_BB_LIBRARIES) +set(ION_BB_SRCS) file(GLOB childs ${CMAKE_CURRENT_SOURCE_DIR}/*) set(BB_NAMES base dnn fpga image-io image-processing opencv sgm llm) foreach(BB_NAME IN LISTS BB_NAMES) @@ -23,12 +24,16 @@ foreach(BB_NAME IN LISTS BB_NAMES) set(ION_BB_BUILD_${BB_NAME} FALSE PARENT_SCOPE) message("Skip building \"${BB_NAME}\"") endif() + + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${BB_NAME}/bb.cc) + list(APPEND ION_BB_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/${BB_NAME}/bb.cc) + endif() endif() endforeach() -add_library(ion-bb SHARED bb.cc) +add_library(ion-bb SHARED bb.cc ${ION_BB_SRCS}) target_include_directories(ion-bb PUBLIC ${PROJECT_SOURCE_DIR}/include ${PROJECT_SOURCE_DIR}/src ${CMAKE_CURRENT_SOURCE_DIR} ${ION_BB_INCLUDE_DIRS}) - +target_link_directories(ion-bb PUBLIC ${ION_BB_LINK_DIRS}) target_link_libraries(ion-bb PUBLIC ion-core ${ION_BB_LIBRARIES}) if(UNIX) target_compile_options(ion-bb PUBLIC -fno-rtti) # For Halide::Generator diff --git a/src/bb/image-io/rt_file.h b/src/bb/image-io/rt_file.h index cc5fb769..f446a66c 100644 --- a/src/bb/image-io/rt_file.h +++ b/src/bb/image-io/rt_file.h @@ -25,26 +25,19 @@ extern "C" int ION_EXPORT ion_bb_image_io_color_data_loader(halide_buffer_t *ses using namespace ion::bb::image_io; try { - if (out->is_bounds_query()) { - out->dim[0].min = 0; - out->dim[0].extent = width, - out->dim[1].min = 0; - out->dim[1].extent = height; - out->dim[2].min = 0; - out->dim[2].extent = 3; - } else { - const std::string session_id(reinterpret_cast(session_id_buf->host)); - const std::string url = reinterpret_cast(url_buf->host); - static std::unordered_map>> seqs; - if (seqs.count(session_id) == 0) { - seqs[session_id] = std::unique_ptr>(new ImageSequence(session_id, url)); - } - - Halide::Runtime::Buffer obuf(*out); - seqs[session_id]->get(width, height, IMREAD_COLOR, obuf); + return 0; + } + const std::string session_id(reinterpret_cast(session_id_buf->host)); + const std::string url = reinterpret_cast(url_buf->host); + static std::unordered_map>> seqs; + if (seqs.count(session_id) == 0) { + seqs[session_id] = std::unique_ptr>(new ImageSequence(session_id, url)); } + + Halide::Runtime::Buffer obuf(*out); + seqs[session_id]->get(width, height, IMREAD_COLOR, obuf); } catch (const std::exception &e) { std::cerr << e.what() << std::endl; return -1; diff --git a/src/bb/llm/bb.cc b/src/bb/llm/bb.cc new file mode 100644 index 00000000..d54df8b6 --- /dev/null +++ b/src/bb/llm/bb.cc @@ -0,0 +1,81 @@ +#include + +#include + +#include + +#include "ion/export.h" + +#include "log.h" +#include "json/json.hpp" + +#include "clip.h" +#include "llava.h" + +namespace ion { +namespace bb { +namespace llm { + +std::map extern_functions; + +class RegisterExtern { + public: + RegisterExtern(std::string key, Halide::ExternCFunction f) { + extern_functions[key] = f; + } +}; + +} // llm +} // bb +} // ion + +#define ION_REGISTER_EXTERN(NAME) static auto ion_register_extern_##NAME = ion::bb::llm::RegisterExtern(#NAME, NAME); + +extern "C" +ION_EXPORT int ion_bb_llm_llava(halide_buffer_t *in, halide_buffer_t *prompt, int32_t width, int32_t height, halide_buffer_t *out) { + try { + if (in->is_bounds_query()) { + in->dim[0].min = 0; + in->dim[0].extent = 3; + in->dim[1].min = 0; + in->dim[1].extent = width; + in->dim[2].min = 0; + in->dim[2].extent = height; + return 0; + } + + Halide::Runtime::Buffer obuf(*out); + + std::ofstream ofs("test.bin"); + Halide::Runtime::Buffer ibuf(*in); + ofs.write(reinterpret_cast(ibuf.data()), in->size_in_bytes()); + + auto verbosity = 1; + auto ctx_clip = clip_model_load("pasu", verbosity); + + llama_backend_init(); + // llama_numa_init(params->numa); + + auto n_threads = 1; + auto embed = llava_image_embed_make_with_bytes(ctx_clip, n_threads, ibuf.data(), ibuf.size_in_bytes()); + if (!embed) { + ion::log::error("Could not load image"); + return 1; + } + + obuf.fill(0); + obuf(0) = 'x'; + + return 0; + + } catch (const std::exception &e) { + std::cerr << e.what() << std::endl; + return -1; + } catch (...) { + std::cerr << "Unknown error" << std::endl; + return -1; + } +} +ION_REGISTER_EXTERN(ion_bb_llm_llava) + +#undef ION_REGISTER_EXTERN diff --git a/src/bb/llm/bb.h b/src/bb/llm/bb.h index b902eeee..2d6d1ecc 100644 --- a/src/bb/llm/bb.h +++ b/src/bb/llm/bb.h @@ -7,11 +7,15 @@ namespace ion { namespace bb { namespace llm { +extern std::map extern_functions; + class Llava : public BuildingBlock { public: Input input{"input", Halide::type_of(), 3}; Input prompt{"prompt", Halide::type_of(), 1}; Output output{"output", Halide::type_of(), 1}; + BuildingBlockParam width{"width", 640}; + BuildingBlockParam height{"height", 480}; void generate() { using namespace Halide; @@ -24,8 +28,8 @@ class Llava : public BuildingBlock { Func prompt_; prompt_(_) = prompt(_); prompt_.compute_root(); - - std::vector params = {input_, prompt_}; + + std::vector params = {input_, prompt_, static_cast(width), static_cast(height)}; Func llava("llava"); llava.define_extern("ion_bb_llm_llava", params, type_of(), 1); llava.compute_root(); diff --git a/src/bb/llm/clip.h b/src/bb/llm/clip.h new file mode 100644 index 00000000..45bdad68 --- /dev/null +++ b/src/bb/llm/clip.h @@ -0,0 +1,85 @@ +#ifndef CLIP_H +#define CLIP_H + +#include +#include + +#ifdef LLAMA_SHARED +# if defined(_WIN32) && !defined(__MINGW32__) +# ifdef LLAMA_BUILD +# define CLIP_API __declspec(dllexport) +# else +# define CLIP_API __declspec(dllimport) +# endif +# else +# define CLIP_API __attribute__ ((visibility ("default"))) +# endif +#else +# define CLIP_API +#endif + +struct clip_ctx; + +#ifdef __cplusplus +extern "C" { +#endif + +struct clip_ctx; + +struct clip_image_u8_batch { + struct clip_image_u8 * data; + size_t size; +}; + +struct clip_image_f32_batch { + struct clip_image_f32 * data; + size_t size; +}; + +CLIP_API struct clip_ctx * clip_model_load (const char * fname, int verbosity); +CLIP_API struct clip_ctx * clip_model_load_cpu(const char * fname, int verbosity); + +CLIP_API void clip_free(struct clip_ctx * ctx); + +CLIP_API size_t clip_embd_nbytes(const struct clip_ctx * ctx); + +CLIP_API int32_t clip_image_size (const struct clip_ctx * ctx); +CLIP_API int32_t clip_patch_size (const struct clip_ctx * ctx); +CLIP_API int32_t clip_hidden_size(const struct clip_ctx * ctx); + +// TODO: should be enum, not string +CLIP_API const char * clip_patch_merge_type(const struct clip_ctx * ctx); + +CLIP_API const int32_t * clip_image_grid(const struct clip_ctx * ctx); + +CLIP_API int clip_n_patches (const struct clip_ctx * ctx); +CLIP_API int clip_n_mmproj_embd(const struct clip_ctx * ctx); + +CLIP_API struct clip_image_u8 * clip_image_u8_init (); +CLIP_API struct clip_image_f32 * clip_image_f32_init(); + +CLIP_API void clip_image_u8_free (struct clip_image_u8 * img); +CLIP_API void clip_image_f32_free(struct clip_image_f32 * img); +CLIP_API void clip_image_u8_batch_free (struct clip_image_u8_batch * batch); +CLIP_API void clip_image_f32_batch_free(struct clip_image_f32_batch * batch); + +CLIP_API bool clip_image_load_from_file(const char * fname, struct clip_image_u8 * img); + +/** interpret bytes as an image file with length bytes_length, and use the result to populate img */ +CLIP_API bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, struct clip_image_u8 * img); + +/** preprocess img and store the result in res_imgs, pad_to_square may be overriden to false depending on model configuration */ +CLIP_API bool clip_image_preprocess(struct clip_ctx * ctx, const struct clip_image_u8 * img, struct clip_image_f32_batch * res_imgs ); + +CLIP_API struct ggml_tensor * clip_get_newline_tensor(const struct clip_ctx * ctx); + +CLIP_API bool clip_image_encode (struct clip_ctx * ctx, int n_threads, struct clip_image_f32 * img, float * vec); +CLIP_API bool clip_image_batch_encode(struct clip_ctx * ctx, int n_threads, const struct clip_image_f32_batch * imgs, float * vec); + +CLIP_API bool clip_model_quantize(const char * fname_inp, const char * fname_out, int itype); + +#ifdef __cplusplus +} +#endif + +#endif // CLIP_H diff --git a/src/bb/llm/config.cmake b/src/bb/llm/config.cmake index 5c0da05f..8e57db04 100644 --- a/src/bb/llm/config.cmake +++ b/src/bb/llm/config.cmake @@ -1,7 +1,11 @@ +# Build and install llama.cpp with following command +# cmake -D CMAKE_INSTALL_PREFIX= -D BUILD_SHARED_LIBS=on -D LLAMA_STATIC=off .. find_package(Llama QUIET) if (${Llama_FOUND}) set(ION_BB_BUILD_llm TRUE) - set(LIBRARIES llama) + set(INCLUDE_DIRS ${LLAMA_INCLUDE_DIR}) + set(LINK_DIRS ${LLAMA_LIB_DIR}) + set(LIBRARIES llama llava_shared) else() set(ION_BB_BUILD_llm FALSE) endif() diff --git a/src/bb/llm/llava.h b/src/bb/llm/llava.h new file mode 100644 index 00000000..19212f6e --- /dev/null +++ b/src/bb/llm/llava.h @@ -0,0 +1,50 @@ +#ifndef LLAVA_H +#define LLAVA_H + +#include "ggml.h" + +#ifdef LLAMA_SHARED +# if defined(_WIN32) && !defined(__MINGW32__) +# ifdef LLAMA_BUILD +# define LLAVA_API __declspec(dllexport) +# else +# define LLAVA_API __declspec(dllimport) +# endif +# else +# define LLAVA_API __attribute__ ((visibility ("default"))) +# endif +#else +# define LLAVA_API +#endif + +struct clip_ctx; + +#ifdef __cplusplus +extern "C" { +#endif + +struct llava_image_embed { + float * embed; + int n_image_pos; +}; + +/** sanity check for clip <-> llava embed size match */ +LLAVA_API bool llava_validate_embed_size(const struct llama_context * ctx_llama, const struct clip_ctx * ctx_clip); + +LLAVA_API bool llava_image_embed_make_with_clip_img(struct clip_ctx * ctx_clip, int n_threads, const struct clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out); + +/** build an image embed from image file bytes */ +LLAVA_API struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length); +/** build an image embed from a path to an image filename */ +LLAVA_API struct llava_image_embed * llava_image_embed_make_with_filename(struct clip_ctx * ctx_clip, int n_threads, const char * image_path); +LLAVA_API void llava_image_embed_free(struct llava_image_embed * embed); +/** free an embedding made with llava_image_embed_make_* */ + +/** write the image represented by embed into the llama context with batch size n_batch, starting at context pos n_past. on completion, n_past points to the next position in the context after the image embed. */ +LLAVA_API bool llava_eval_image_embed(struct llama_context * ctx_llama, const struct llava_image_embed * embed, int n_batch, int * n_past); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/bb/llm/rt.h b/src/bb/llm/rt.h index 948eec6d..f92ea06d 100644 --- a/src/bb/llm/rt.h +++ b/src/bb/llm/rt.h @@ -1,57 +1 @@ -#ifndef ION_BB_LLM_RT_H -#define ION_BB_LLM_RT_H - -#include - -#include - -#include "ion/export.h" - -#include "log.h" -#include "json/json.hpp" - -namespace ion { -namespace bb { -namespace llm { - -std::map extern_functions; - -class RegisterExtern { - public: - RegisterExtern(std::string key, Halide::ExternCFunction f) { - extern_functions[key] = f; - } -}; - -} // llm -} // bb -} // ion - -#define ION_REGISTER_EXTERN(NAME) static auto ion_register_extern_##NAME = ion::bb::llm::RegisterExtern(#NAME, NAME); - -extern "C" -ION_EXPORT int ion_bb_llm_llava(halide_buffer_t *in, halide_buffer_t *prompt, halide_buffer_t *out) { - try { - ion::log::info("ion_bb_llm_llava"); - // if (in->is_bounds_query()) { - // in->dim[0] = out->dim[0]; - // in->dim[1] = out->dim[1]; - // in->dim[2] = out->dim[2]; - // return 0; - // } - - return 0; - - } catch (const std::exception &e) { - std::cerr << e.what() << std::endl; - return -1; - } catch (...) { - std::cerr << "Unknown error" << std::endl; - return -1; - } -} -ION_REGISTER_EXTERN(ion_bb_llm_llava) - -#undef ION_REGISTER_EXTERN - -#endif // ION_BB_LLM_BB_H +// NOTE: This file is remaining just for compatibility with bb.cc header including system \ No newline at end of file diff --git a/src/bb/llm/LICENSE b/src/bb/llm/thirdparty_notice.txt similarity index 95% rename from src/bb/llm/LICENSE rename to src/bb/llm/thirdparty_notice.txt index ec414831..d2e6f214 100644 --- a/src/bb/llm/LICENSE +++ b/src/bb/llm/thirdparty_notice.txt @@ -1,6 +1,8 @@ +llama.cpp + MIT License -Copyright 2020 Fixstars Corporation. +Copyright (c) 2023-2024 The ggml authors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal