Skip to content

Commit

Permalink
WIP: Called llama.cpp from extern functions
Browse files Browse the repository at this point in the history
  • Loading branch information
iitaku committed Apr 23, 2024
1 parent 661f788 commit c9c0b91
Show file tree
Hide file tree
Showing 10 changed files with 252 additions and 83 deletions.
7 changes: 4 additions & 3 deletions example/llm_llava.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@ int main(int argc, char *argv[]) {
Buffer<int8_t> prompt{1024};

Builder b;
b.set_target(Halide::get_target_from_environment().with_feature(Halide::Target::TracePipeline));
b.set_target(Halide::get_target_from_environment().with_feature(Halide::Target::Debug).with_feature(Halide::Target::TracePipeline));
b.with_bb_module("ion-bb");

auto n_img = b.add("image_io_color_data_loader").set_param(Param("url", "http://www.onthejob.education/images/4th_level/Road_Worker/Road_Worker_Darwin.jpg"), Param("width", width), Param("height", height));
auto n_txt = b.add("llm_llava")(n_img["output"], prompt);
auto n_img = b.add("image_io_color_data_loader").set_param(Param{"url", "http://www.onthejob.education/images/4th_level/Road_Worker/Road_Worker_Darwin.jpg"}, Param{"width", width}, Param{"height", height});
n_img = b.add("base_reorder_buffer_3d_uint8")(n_img["output"]).set_param(Param{"dim0", 2}, Param{"dim1", 0}, Param{"dim2", 1});
auto n_txt = b.add("llm_llava")(n_img["output"], prompt).set_param(Param{"width", width}, Param{"height", height});

Buffer<int8_t> txt_output{1024};
n_txt["output"].bind(txt_output);
Expand Down
9 changes: 7 additions & 2 deletions src/bb/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ cmake_policy(SET CMP0057 NEW)
set(ION_BB_INCLUDE_DIRS)
set(ION_BB_LINK_DIRS)
set(ION_BB_LIBRARIES)
set(ION_BB_SRCS)
file(GLOB childs ${CMAKE_CURRENT_SOURCE_DIR}/*)
set(BB_NAMES base dnn fpga image-io image-processing opencv sgm llm)
foreach(BB_NAME IN LISTS BB_NAMES)
Expand All @@ -23,12 +24,16 @@ foreach(BB_NAME IN LISTS BB_NAMES)
set(ION_BB_BUILD_${BB_NAME} FALSE PARENT_SCOPE)
message("Skip building \"${BB_NAME}\"")
endif()

if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${BB_NAME}/bb.cc)
list(APPEND ION_BB_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/${BB_NAME}/bb.cc)
endif()
endif()
endforeach()

add_library(ion-bb SHARED bb.cc)
add_library(ion-bb SHARED bb.cc ${ION_BB_SRCS})
target_include_directories(ion-bb PUBLIC ${PROJECT_SOURCE_DIR}/include ${PROJECT_SOURCE_DIR}/src ${CMAKE_CURRENT_SOURCE_DIR} ${ION_BB_INCLUDE_DIRS})

target_link_directories(ion-bb PUBLIC ${ION_BB_LINK_DIRS})
target_link_libraries(ion-bb PUBLIC ion-core ${ION_BB_LIBRARIES})
if(UNIX)
target_compile_options(ion-bb PUBLIC -fno-rtti) # For Halide::Generator
Expand Down
27 changes: 10 additions & 17 deletions src/bb/image-io/rt_file.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,26 +25,19 @@ extern "C" int ION_EXPORT ion_bb_image_io_color_data_loader(halide_buffer_t *ses
using namespace ion::bb::image_io;

try {

if (out->is_bounds_query()) {
out->dim[0].min = 0;
out->dim[0].extent = width,
out->dim[1].min = 0;
out->dim[1].extent = height;
out->dim[2].min = 0;
out->dim[2].extent = 3;
} else {
const std::string session_id(reinterpret_cast<const char *>(session_id_buf->host));
const std::string url = reinterpret_cast<const char *>(url_buf->host);
static std::unordered_map<std::string, std::unique_ptr<ImageSequence<uint8_t>>> seqs;
if (seqs.count(session_id) == 0) {
seqs[session_id] = std::unique_ptr<ImageSequence<uint8_t>>(new ImageSequence<uint8_t>(session_id, url));
}

Halide::Runtime::Buffer<uint8_t> obuf(*out);
seqs[session_id]->get(width, height, IMREAD_COLOR, obuf);
return 0;
}

const std::string session_id(reinterpret_cast<const char *>(session_id_buf->host));
const std::string url = reinterpret_cast<const char *>(url_buf->host);
static std::unordered_map<std::string, std::unique_ptr<ImageSequence<uint8_t>>> seqs;
if (seqs.count(session_id) == 0) {
seqs[session_id] = std::unique_ptr<ImageSequence<uint8_t>>(new ImageSequence<uint8_t>(session_id, url));
}

Halide::Runtime::Buffer<uint8_t> obuf(*out);
seqs[session_id]->get(width, height, IMREAD_COLOR, obuf);
} catch (const std::exception &e) {
std::cerr << e.what() << std::endl;
return -1;
Expand Down
81 changes: 81 additions & 0 deletions src/bb/llm/bb.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#include <fstream>

#include <Halide.h>

#include <llama.h>

#include "ion/export.h"

#include "log.h"
#include "json/json.hpp"

#include "clip.h"
#include "llava.h"

namespace ion {
namespace bb {
namespace llm {

std::map<std::string, Halide::ExternCFunction> extern_functions;

class RegisterExtern {
public:
RegisterExtern(std::string key, Halide::ExternCFunction f) {
extern_functions[key] = f;
}
};

} // llm
} // bb
} // ion

#define ION_REGISTER_EXTERN(NAME) static auto ion_register_extern_##NAME = ion::bb::llm::RegisterExtern(#NAME, NAME);

extern "C"
ION_EXPORT int ion_bb_llm_llava(halide_buffer_t *in, halide_buffer_t *prompt, int32_t width, int32_t height, halide_buffer_t *out) {
try {
if (in->is_bounds_query()) {
in->dim[0].min = 0;
in->dim[0].extent = 3;
in->dim[1].min = 0;
in->dim[1].extent = width;
in->dim[2].min = 0;
in->dim[2].extent = height;
return 0;
}

Halide::Runtime::Buffer<int8_t> obuf(*out);

std::ofstream ofs("test.bin");
Halide::Runtime::Buffer<uint8_t> ibuf(*in);
ofs.write(reinterpret_cast<const char*>(ibuf.data()), in->size_in_bytes());

auto verbosity = 1;
auto ctx_clip = clip_model_load("pasu", verbosity);

llama_backend_init();
// llama_numa_init(params->numa);

auto n_threads = 1;
auto embed = llava_image_embed_make_with_bytes(ctx_clip, n_threads, ibuf.data(), ibuf.size_in_bytes());
if (!embed) {
ion::log::error("Could not load image");
return 1;
}

obuf.fill(0);
obuf(0) = 'x';

return 0;

} catch (const std::exception &e) {
std::cerr << e.what() << std::endl;
return -1;
} catch (...) {
std::cerr << "Unknown error" << std::endl;
return -1;
}
}
ION_REGISTER_EXTERN(ion_bb_llm_llava)

#undef ION_REGISTER_EXTERN
8 changes: 6 additions & 2 deletions src/bb/llm/bb.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,15 @@ namespace ion {
namespace bb {
namespace llm {

extern std::map<std::string, Halide::ExternCFunction> extern_functions;

class Llava : public BuildingBlock<Llava> {
public:
Input<Halide::Func> input{"input", Halide::type_of<uint8_t>(), 3};
Input<Halide::Func> prompt{"prompt", Halide::type_of<int8_t>(), 1};
Output<Halide::Func> output{"output", Halide::type_of<int8_t>(), 1};
BuildingBlockParam<int32_t> width{"width", 640};
BuildingBlockParam<int32_t> height{"height", 480};

void generate() {
using namespace Halide;
Expand All @@ -24,8 +28,8 @@ class Llava : public BuildingBlock<Llava> {
Func prompt_;
prompt_(_) = prompt(_);
prompt_.compute_root();
std::vector<ExternFuncArgument> params = {input_, prompt_};

std::vector<ExternFuncArgument> params = {input_, prompt_, static_cast<int32_t>(width), static_cast<int32_t>(height)};
Func llava("llava");
llava.define_extern("ion_bb_llm_llava", params, type_of<int8_t>(), 1);
llava.compute_root();
Expand Down
85 changes: 85 additions & 0 deletions src/bb/llm/clip.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#ifndef CLIP_H
#define CLIP_H

#include <stddef.h>
#include <stdint.h>

#ifdef LLAMA_SHARED
# if defined(_WIN32) && !defined(__MINGW32__)
# ifdef LLAMA_BUILD
# define CLIP_API __declspec(dllexport)
# else
# define CLIP_API __declspec(dllimport)
# endif
# else
# define CLIP_API __attribute__ ((visibility ("default")))
# endif
#else
# define CLIP_API
#endif

struct clip_ctx;

#ifdef __cplusplus
extern "C" {
#endif

struct clip_ctx;

struct clip_image_u8_batch {
struct clip_image_u8 * data;
size_t size;
};

struct clip_image_f32_batch {
struct clip_image_f32 * data;
size_t size;
};

CLIP_API struct clip_ctx * clip_model_load (const char * fname, int verbosity);
CLIP_API struct clip_ctx * clip_model_load_cpu(const char * fname, int verbosity);

CLIP_API void clip_free(struct clip_ctx * ctx);

CLIP_API size_t clip_embd_nbytes(const struct clip_ctx * ctx);

CLIP_API int32_t clip_image_size (const struct clip_ctx * ctx);
CLIP_API int32_t clip_patch_size (const struct clip_ctx * ctx);
CLIP_API int32_t clip_hidden_size(const struct clip_ctx * ctx);

// TODO: should be enum, not string
CLIP_API const char * clip_patch_merge_type(const struct clip_ctx * ctx);

CLIP_API const int32_t * clip_image_grid(const struct clip_ctx * ctx);

CLIP_API int clip_n_patches (const struct clip_ctx * ctx);
CLIP_API int clip_n_mmproj_embd(const struct clip_ctx * ctx);

CLIP_API struct clip_image_u8 * clip_image_u8_init ();
CLIP_API struct clip_image_f32 * clip_image_f32_init();

CLIP_API void clip_image_u8_free (struct clip_image_u8 * img);
CLIP_API void clip_image_f32_free(struct clip_image_f32 * img);
CLIP_API void clip_image_u8_batch_free (struct clip_image_u8_batch * batch);
CLIP_API void clip_image_f32_batch_free(struct clip_image_f32_batch * batch);

CLIP_API bool clip_image_load_from_file(const char * fname, struct clip_image_u8 * img);

/** interpret bytes as an image file with length bytes_length, and use the result to populate img */
CLIP_API bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, struct clip_image_u8 * img);

/** preprocess img and store the result in res_imgs, pad_to_square may be overriden to false depending on model configuration */
CLIP_API bool clip_image_preprocess(struct clip_ctx * ctx, const struct clip_image_u8 * img, struct clip_image_f32_batch * res_imgs );

CLIP_API struct ggml_tensor * clip_get_newline_tensor(const struct clip_ctx * ctx);

CLIP_API bool clip_image_encode (struct clip_ctx * ctx, int n_threads, struct clip_image_f32 * img, float * vec);
CLIP_API bool clip_image_batch_encode(struct clip_ctx * ctx, int n_threads, const struct clip_image_f32_batch * imgs, float * vec);

CLIP_API bool clip_model_quantize(const char * fname_inp, const char * fname_out, int itype);

#ifdef __cplusplus
}
#endif

#endif // CLIP_H
6 changes: 5 additions & 1 deletion src/bb/llm/config.cmake
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
# Build and install llama.cpp with following command
# cmake -D CMAKE_INSTALL_PREFIX=<prefix> -D BUILD_SHARED_LIBS=on -D LLAMA_STATIC=off ..
find_package(Llama QUIET)
if (${Llama_FOUND})
set(ION_BB_BUILD_llm TRUE)
set(LIBRARIES llama)
set(INCLUDE_DIRS ${LLAMA_INCLUDE_DIR})
set(LINK_DIRS ${LLAMA_LIB_DIR})
set(LIBRARIES llama llava_shared)
else()
set(ION_BB_BUILD_llm FALSE)
endif()
50 changes: 50 additions & 0 deletions src/bb/llm/llava.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#ifndef LLAVA_H
#define LLAVA_H

#include "ggml.h"

#ifdef LLAMA_SHARED
# if defined(_WIN32) && !defined(__MINGW32__)
# ifdef LLAMA_BUILD
# define LLAVA_API __declspec(dllexport)
# else
# define LLAVA_API __declspec(dllimport)
# endif
# else
# define LLAVA_API __attribute__ ((visibility ("default")))
# endif
#else
# define LLAVA_API
#endif

struct clip_ctx;

#ifdef __cplusplus
extern "C" {
#endif

struct llava_image_embed {
float * embed;
int n_image_pos;
};

/** sanity check for clip <-> llava embed size match */
LLAVA_API bool llava_validate_embed_size(const struct llama_context * ctx_llama, const struct clip_ctx * ctx_clip);

LLAVA_API bool llava_image_embed_make_with_clip_img(struct clip_ctx * ctx_clip, int n_threads, const struct clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out);

/** build an image embed from image file bytes */
LLAVA_API struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length);
/** build an image embed from a path to an image filename */
LLAVA_API struct llava_image_embed * llava_image_embed_make_with_filename(struct clip_ctx * ctx_clip, int n_threads, const char * image_path);
LLAVA_API void llava_image_embed_free(struct llava_image_embed * embed);
/** free an embedding made with llava_image_embed_make_* */

/** write the image represented by embed into the llama context with batch size n_batch, starting at context pos n_past. on completion, n_past points to the next position in the context after the image embed. */
LLAVA_API bool llava_eval_image_embed(struct llama_context * ctx_llama, const struct llava_image_embed * embed, int n_batch, int * n_past);

#ifdef __cplusplus
}
#endif

#endif
58 changes: 1 addition & 57 deletions src/bb/llm/rt.h
Original file line number Diff line number Diff line change
@@ -1,57 +1 @@
#ifndef ION_BB_LLM_RT_H
#define ION_BB_LLM_RT_H

#include <Halide.h>

#include <llama.h>

#include "ion/export.h"

#include "log.h"
#include "json/json.hpp"

namespace ion {
namespace bb {
namespace llm {

std::map<std::string, Halide::ExternCFunction> extern_functions;

class RegisterExtern {
public:
RegisterExtern(std::string key, Halide::ExternCFunction f) {
extern_functions[key] = f;
}
};

} // llm
} // bb
} // ion

#define ION_REGISTER_EXTERN(NAME) static auto ion_register_extern_##NAME = ion::bb::llm::RegisterExtern(#NAME, NAME);

extern "C"
ION_EXPORT int ion_bb_llm_llava(halide_buffer_t *in, halide_buffer_t *prompt, halide_buffer_t *out) {
try {
ion::log::info("ion_bb_llm_llava");
// if (in->is_bounds_query()) {
// in->dim[0] = out->dim[0];
// in->dim[1] = out->dim[1];
// in->dim[2] = out->dim[2];
// return 0;
// }

return 0;

} catch (const std::exception &e) {
std::cerr << e.what() << std::endl;
return -1;
} catch (...) {
std::cerr << "Unknown error" << std::endl;
return -1;
}
}
ION_REGISTER_EXTERN(ion_bb_llm_llava)

#undef ION_REGISTER_EXTERN

#endif // ION_BB_LLM_BB_H
// NOTE: This file is remaining just for compatibility with bb.cc header including system
Loading

0 comments on commit c9c0b91

Please sign in to comment.