From 1f1125a745ada5d7e041e4bb74cb1844b284ce59 Mon Sep 17 00:00:00 2001 From: axel7083 <42176370+axel7083@users.noreply.github.com> Date: Fri, 21 Jun 2024 17:04:38 +0200 Subject: [PATCH 1/2] feat: adding support for hf_pretrained_model option Signed-off-by: axel7083 <42176370+axel7083@users.noreply.github.com> --- model_servers/llamacpp_python/src/requirements.txt | 1 + model_servers/llamacpp_python/src/run.sh | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/model_servers/llamacpp_python/src/requirements.txt b/model_servers/llamacpp_python/src/requirements.txt index 8ec01572..9a19eec2 100644 --- a/model_servers/llamacpp_python/src/requirements.txt +++ b/model_servers/llamacpp_python/src/requirements.txt @@ -1,2 +1,3 @@ llama-cpp-python[server]==0.2.78 +transformers==4.41.2 pip==24.0 diff --git a/model_servers/llamacpp_python/src/run.sh b/model_servers/llamacpp_python/src/run.sh index 851ef429..ba719e8e 100644 --- a/model_servers/llamacpp_python/src/run.sh +++ b/model_servers/llamacpp_python/src/run.sh @@ -5,14 +5,20 @@ if [ ${CONFIG_PATH} ] || [[ ${MODEL_PATH} && ${CONFIG_PATH} ]]; then fi if [ ${MODEL_PATH} ]; then - python -m llama_cpp.server \ + CMD="python -m llama_cpp.server \ --model ${MODEL_PATH} \ --host ${HOST:=0.0.0.0} \ --port ${PORT:=8001} \ --n_gpu_layers ${GPU_LAYERS:=0} \ --clip_model_path ${CLIP_MODEL_PATH:=None} \ --chat_format ${CHAT_FORMAT:=llama-2} \ - --interrupt_requests ${INTERRUPT_REQUESTS:=False} + --interrupt_requests ${INTERRUPT_REQUESTS:=False}" + + if [ ! -z "${HF_PRETRAINED_MODEL}" ] && [ "${HF_PRETRAINED_MODEL}" != "None" ]; then + CMD+=" --hf_pretrained_model_name_or_path ${HF_PRETRAINED_MODEL}" + fi + + eval $CMD exit 0 fi From 0d4249d732a411ff7b3ee686fb1b25d16db1338b Mon Sep 17 00:00:00 2001 From: greg pereira Date: Fri, 21 Jun 2024 09:03:13 -0700 Subject: [PATCH 2/2] refactor for consistency Signed-off-by: greg pereira --- model_servers/llamacpp_python/src/run.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/model_servers/llamacpp_python/src/run.sh b/model_servers/llamacpp_python/src/run.sh index ba719e8e..f34fef4b 100644 --- a/model_servers/llamacpp_python/src/run.sh +++ b/model_servers/llamacpp_python/src/run.sh @@ -4,21 +4,21 @@ if [ ${CONFIG_PATH} ] || [[ ${MODEL_PATH} && ${CONFIG_PATH} ]]; then exit 0 fi +if [ "${HF_PRETRAINED_MODEL}" == "None" ]; then + HF_PRETRAINED_MODEL="" +fi + if [ ${MODEL_PATH} ]; then - CMD="python -m llama_cpp.server \ + python -m llama_cpp.server \ --model ${MODEL_PATH} \ --host ${HOST:=0.0.0.0} \ --port ${PORT:=8001} \ --n_gpu_layers ${GPU_LAYERS:=0} \ --clip_model_path ${CLIP_MODEL_PATH:=None} \ --chat_format ${CHAT_FORMAT:=llama-2} \ - --interrupt_requests ${INTERRUPT_REQUESTS:=False}" - - if [ ! -z "${HF_PRETRAINED_MODEL}" ] && [ "${HF_PRETRAINED_MODEL}" != "None" ]; then - CMD+=" --hf_pretrained_model_name_or_path ${HF_PRETRAINED_MODEL}" - fi - - eval $CMD + ${PRETRAINED_MODEL_PATH:=} + ${HF_PRETRAINED_MODEL:%=--hf_pretrained_model_name_or_path %} \ + --interrupt_requests ${INTERRUPT_REQUESTS:=False} exit 0 fi