Skip to content

Commit

Permalink
feat: change unload model and model status to POST (#558)
Browse files Browse the repository at this point in the history
  • Loading branch information
vansangpfiev authored May 14, 2024
1 parent 88c7421 commit 48dcae3
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 9 deletions.
4 changes: 2 additions & 2 deletions .github/scripts/e2e-test-llama-linux-and-mac.sh
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,10 @@ response2=$(
)

# unload model
response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request GET -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/unloadModel" \
response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request POST -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/unloadModel" \
--header 'Content-Type: application/json' \
--data '{
"llama_model_path": "/tmp/testllm"
"model": "testllm"
}')

# load embedding model
Expand Down
8 changes: 4 additions & 4 deletions .github/scripts/e2e-test-llama-windows.bat
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ if not exist "%MODEL_EMBEDDING_PATH%" (
rem Define JSON strings for curl data
call set "MODEL_LLM_PATH_STRING=%%MODEL_LLM_PATH:\=\\%%"
call set "MODEL_EMBEDDING_PATH_STRING=%%MODEL_EMBEDDING_PATH:\=\\%%"
set "curl_data1={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\"}"
set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":false,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}"
set "curl_data3={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\"}"
set "curl_data1={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\", \"model_alias\":\"gpt-3.5-turbo\"}"
set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}"
set "curl_data3={\"model\":\"gpt-3.5-turbo\"}"
set "curl_data4={\"llama_model_path\":\"%MODEL_EMBEDDING_PATH_STRING%\", \"embedding\": true, \"model_type\": \"embedding\"}"
set "curl_data5={\"input\": \"Hello\", \"model\": \"test-embedding\", \"encoding_format\": \"float\"}"

Expand All @@ -82,7 +82,7 @@ curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --
--header "Content-Type: application/json" ^
--data "%curl_data2%" > %TEMP%\response2.log 2>&1

curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request GET -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1
curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1

curl.exe --connect-timeout 60 -o "%TEMP%\response4.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/loadModel" --header "Content-Type: application/json" --data "%curl_data4%" > %TEMP%\response4.log 2>&1

Expand Down
4 changes: 2 additions & 2 deletions cortex-cpp/controllers/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ class server : public drogon::HttpController<server>,
METHOD_ADD(server::ChatCompletion, "chat_completion", Post);
METHOD_ADD(server::Embedding, "embedding", Post);
METHOD_ADD(server::LoadModel, "loadmodel", Post);
METHOD_ADD(server::UnloadModel, "unloadmodel", Get);
METHOD_ADD(server::ModelStatus, "modelstatus", Get);
METHOD_ADD(server::UnloadModel, "unloadmodel", Post);
METHOD_ADD(server::ModelStatus, "modelstatus", Post);

// Openai compatible path
ADD_METHOD_TO(server::ChatCompletion, "/v1/chat/completions", Post);
Expand Down
2 changes: 1 addition & 1 deletion cortex-cpp/engines/cortex.llamacpp/engine.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# cortex.llamacpp release version
set(VERSION 0.1.1)
set(VERSION 0.1.2)
set(ENGINE_VERSION v${VERSION})

# MESSAGE("ENGINE_VERSION=" ${ENGINE_VERSION})
Expand Down

0 comments on commit 48dcae3

Please sign in to comment.