-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
WIP: Called llama.cpp from extern functions
- Loading branch information
Showing
10 changed files
with
252 additions
and
83 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
#include <fstream> | ||
|
||
#include <Halide.h> | ||
|
||
#include <llama.h> | ||
|
||
#include "ion/export.h" | ||
|
||
#include "log.h" | ||
#include "json/json.hpp" | ||
|
||
#include "clip.h" | ||
#include "llava.h" | ||
|
||
namespace ion { | ||
namespace bb { | ||
namespace llm { | ||
|
||
std::map<std::string, Halide::ExternCFunction> extern_functions; | ||
|
||
class RegisterExtern { | ||
public: | ||
RegisterExtern(std::string key, Halide::ExternCFunction f) { | ||
extern_functions[key] = f; | ||
} | ||
}; | ||
|
||
} // llm | ||
} // bb | ||
} // ion | ||
|
||
#define ION_REGISTER_EXTERN(NAME) static auto ion_register_extern_##NAME = ion::bb::llm::RegisterExtern(#NAME, NAME); | ||
|
||
extern "C" | ||
ION_EXPORT int ion_bb_llm_llava(halide_buffer_t *in, halide_buffer_t *prompt, int32_t width, int32_t height, halide_buffer_t *out) { | ||
try { | ||
if (in->is_bounds_query()) { | ||
in->dim[0].min = 0; | ||
in->dim[0].extent = 3; | ||
in->dim[1].min = 0; | ||
in->dim[1].extent = width; | ||
in->dim[2].min = 0; | ||
in->dim[2].extent = height; | ||
return 0; | ||
} | ||
|
||
Halide::Runtime::Buffer<int8_t> obuf(*out); | ||
|
||
std::ofstream ofs("test.bin"); | ||
Halide::Runtime::Buffer<uint8_t> ibuf(*in); | ||
ofs.write(reinterpret_cast<const char*>(ibuf.data()), in->size_in_bytes()); | ||
|
||
auto verbosity = 1; | ||
auto ctx_clip = clip_model_load("pasu", verbosity); | ||
|
||
llama_backend_init(); | ||
// llama_numa_init(params->numa); | ||
|
||
auto n_threads = 1; | ||
auto embed = llava_image_embed_make_with_bytes(ctx_clip, n_threads, ibuf.data(), ibuf.size_in_bytes()); | ||
if (!embed) { | ||
ion::log::error("Could not load image"); | ||
return 1; | ||
} | ||
|
||
obuf.fill(0); | ||
obuf(0) = 'x'; | ||
|
||
return 0; | ||
|
||
} catch (const std::exception &e) { | ||
std::cerr << e.what() << std::endl; | ||
return -1; | ||
} catch (...) { | ||
std::cerr << "Unknown error" << std::endl; | ||
return -1; | ||
} | ||
} | ||
ION_REGISTER_EXTERN(ion_bb_llm_llava) | ||
|
||
#undef ION_REGISTER_EXTERN |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
#ifndef CLIP_H | ||
#define CLIP_H | ||
|
||
#include <stddef.h> | ||
#include <stdint.h> | ||
|
||
#ifdef LLAMA_SHARED | ||
# if defined(_WIN32) && !defined(__MINGW32__) | ||
# ifdef LLAMA_BUILD | ||
# define CLIP_API __declspec(dllexport) | ||
# else | ||
# define CLIP_API __declspec(dllimport) | ||
# endif | ||
# else | ||
# define CLIP_API __attribute__ ((visibility ("default"))) | ||
# endif | ||
#else | ||
# define CLIP_API | ||
#endif | ||
|
||
struct clip_ctx; | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
struct clip_ctx; | ||
|
||
struct clip_image_u8_batch { | ||
struct clip_image_u8 * data; | ||
size_t size; | ||
}; | ||
|
||
struct clip_image_f32_batch { | ||
struct clip_image_f32 * data; | ||
size_t size; | ||
}; | ||
|
||
CLIP_API struct clip_ctx * clip_model_load (const char * fname, int verbosity); | ||
CLIP_API struct clip_ctx * clip_model_load_cpu(const char * fname, int verbosity); | ||
|
||
CLIP_API void clip_free(struct clip_ctx * ctx); | ||
|
||
CLIP_API size_t clip_embd_nbytes(const struct clip_ctx * ctx); | ||
|
||
CLIP_API int32_t clip_image_size (const struct clip_ctx * ctx); | ||
CLIP_API int32_t clip_patch_size (const struct clip_ctx * ctx); | ||
CLIP_API int32_t clip_hidden_size(const struct clip_ctx * ctx); | ||
|
||
// TODO: should be enum, not string | ||
CLIP_API const char * clip_patch_merge_type(const struct clip_ctx * ctx); | ||
|
||
CLIP_API const int32_t * clip_image_grid(const struct clip_ctx * ctx); | ||
|
||
CLIP_API int clip_n_patches (const struct clip_ctx * ctx); | ||
CLIP_API int clip_n_mmproj_embd(const struct clip_ctx * ctx); | ||
|
||
CLIP_API struct clip_image_u8 * clip_image_u8_init (); | ||
CLIP_API struct clip_image_f32 * clip_image_f32_init(); | ||
|
||
CLIP_API void clip_image_u8_free (struct clip_image_u8 * img); | ||
CLIP_API void clip_image_f32_free(struct clip_image_f32 * img); | ||
CLIP_API void clip_image_u8_batch_free (struct clip_image_u8_batch * batch); | ||
CLIP_API void clip_image_f32_batch_free(struct clip_image_f32_batch * batch); | ||
|
||
CLIP_API bool clip_image_load_from_file(const char * fname, struct clip_image_u8 * img); | ||
|
||
/** interpret bytes as an image file with length bytes_length, and use the result to populate img */ | ||
CLIP_API bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, struct clip_image_u8 * img); | ||
|
||
/** preprocess img and store the result in res_imgs, pad_to_square may be overriden to false depending on model configuration */ | ||
CLIP_API bool clip_image_preprocess(struct clip_ctx * ctx, const struct clip_image_u8 * img, struct clip_image_f32_batch * res_imgs ); | ||
|
||
CLIP_API struct ggml_tensor * clip_get_newline_tensor(const struct clip_ctx * ctx); | ||
|
||
CLIP_API bool clip_image_encode (struct clip_ctx * ctx, int n_threads, struct clip_image_f32 * img, float * vec); | ||
CLIP_API bool clip_image_batch_encode(struct clip_ctx * ctx, int n_threads, const struct clip_image_f32_batch * imgs, float * vec); | ||
|
||
CLIP_API bool clip_model_quantize(const char * fname_inp, const char * fname_out, int itype); | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif | ||
|
||
#endif // CLIP_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,11 @@ | ||
# Build and install llama.cpp with following command | ||
# cmake -D CMAKE_INSTALL_PREFIX=<prefix> -D BUILD_SHARED_LIBS=on -D LLAMA_STATIC=off .. | ||
find_package(Llama QUIET) | ||
if (${Llama_FOUND}) | ||
set(ION_BB_BUILD_llm TRUE) | ||
set(LIBRARIES llama) | ||
set(INCLUDE_DIRS ${LLAMA_INCLUDE_DIR}) | ||
set(LINK_DIRS ${LLAMA_LIB_DIR}) | ||
set(LIBRARIES llama llava_shared) | ||
else() | ||
set(ION_BB_BUILD_llm FALSE) | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
#ifndef LLAVA_H | ||
#define LLAVA_H | ||
|
||
#include "ggml.h" | ||
|
||
#ifdef LLAMA_SHARED | ||
# if defined(_WIN32) && !defined(__MINGW32__) | ||
# ifdef LLAMA_BUILD | ||
# define LLAVA_API __declspec(dllexport) | ||
# else | ||
# define LLAVA_API __declspec(dllimport) | ||
# endif | ||
# else | ||
# define LLAVA_API __attribute__ ((visibility ("default"))) | ||
# endif | ||
#else | ||
# define LLAVA_API | ||
#endif | ||
|
||
struct clip_ctx; | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
struct llava_image_embed { | ||
float * embed; | ||
int n_image_pos; | ||
}; | ||
|
||
/** sanity check for clip <-> llava embed size match */ | ||
LLAVA_API bool llava_validate_embed_size(const struct llama_context * ctx_llama, const struct clip_ctx * ctx_clip); | ||
|
||
LLAVA_API bool llava_image_embed_make_with_clip_img(struct clip_ctx * ctx_clip, int n_threads, const struct clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out); | ||
|
||
/** build an image embed from image file bytes */ | ||
LLAVA_API struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length); | ||
/** build an image embed from a path to an image filename */ | ||
LLAVA_API struct llava_image_embed * llava_image_embed_make_with_filename(struct clip_ctx * ctx_clip, int n_threads, const char * image_path); | ||
LLAVA_API void llava_image_embed_free(struct llava_image_embed * embed); | ||
/** free an embedding made with llava_image_embed_make_* */ | ||
|
||
/** write the image represented by embed into the llama context with batch size n_batch, starting at context pos n_past. on completion, n_past points to the next position in the context after the image embed. */ | ||
LLAVA_API bool llava_eval_image_embed(struct llama_context * ctx_llama, const struct llava_image_embed * embed, int n_batch, int * n_past); | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,57 +1 @@ | ||
#ifndef ION_BB_LLM_RT_H | ||
#define ION_BB_LLM_RT_H | ||
|
||
#include <Halide.h> | ||
|
||
#include <llama.h> | ||
|
||
#include "ion/export.h" | ||
|
||
#include "log.h" | ||
#include "json/json.hpp" | ||
|
||
namespace ion { | ||
namespace bb { | ||
namespace llm { | ||
|
||
std::map<std::string, Halide::ExternCFunction> extern_functions; | ||
|
||
class RegisterExtern { | ||
public: | ||
RegisterExtern(std::string key, Halide::ExternCFunction f) { | ||
extern_functions[key] = f; | ||
} | ||
}; | ||
|
||
} // llm | ||
} // bb | ||
} // ion | ||
|
||
#define ION_REGISTER_EXTERN(NAME) static auto ion_register_extern_##NAME = ion::bb::llm::RegisterExtern(#NAME, NAME); | ||
|
||
extern "C" | ||
ION_EXPORT int ion_bb_llm_llava(halide_buffer_t *in, halide_buffer_t *prompt, halide_buffer_t *out) { | ||
try { | ||
ion::log::info("ion_bb_llm_llava"); | ||
// if (in->is_bounds_query()) { | ||
// in->dim[0] = out->dim[0]; | ||
// in->dim[1] = out->dim[1]; | ||
// in->dim[2] = out->dim[2]; | ||
// return 0; | ||
// } | ||
|
||
return 0; | ||
|
||
} catch (const std::exception &e) { | ||
std::cerr << e.what() << std::endl; | ||
return -1; | ||
} catch (...) { | ||
std::cerr << "Unknown error" << std::endl; | ||
return -1; | ||
} | ||
} | ||
ION_REGISTER_EXTERN(ion_bb_llm_llava) | ||
|
||
#undef ION_REGISTER_EXTERN | ||
|
||
#endif // ION_BB_LLM_BB_H | ||
// NOTE: This file is remaining just for compatibility with bb.cc header including system |
Oops, something went wrong.