Skip to content

Commit

Permalink
Update llama.cpp submodule to latest release b4397 (#352)
Browse files Browse the repository at this point in the history
* Update submodule to latest release b4397

* fix: build

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: vansangpfiev <[email protected]>
  • Loading branch information
3 people authored Dec 30, 2024
1 parent eb45b83 commit 5a94d51
Show file tree
Hide file tree
Showing 8 changed files with 22 additions and 22 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ jobs:
- os: "linux"
name: "arm64"
runs-on: "ubuntu-2004-arm64"
cmake-flags: "-DCORTEXLLAMA_VERSION=${{needs.create-draft-release.outputs.version}} -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
cmake-flags: "-DCORTEXLLAMA_VERSION=${{needs.create-draft-release.outputs.version}} -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DGGML_NATIVE=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
run-e2e: true
vulkan: false
ccache: true
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/nightly-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
- os: "linux"
name: "arm64"
runs-on: "ubuntu-2004-arm64"
cmake-flags: "-DCORTEXLLAMA_VERSION=${{needs.create-draft-release.outputs.version}} -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
cmake-flags: "-DCORTEXLLAMA_VERSION=${{needs.create-draft-release.outputs.version}} -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DGGML_NATIVE=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
run-e2e: true
vulkan: false
ccache: true
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/template-e2e-weekend-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
- os: "linux"
name: "arm64"
runs-on: "ubuntu-2004-arm64"
cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
run-e2e: true
vulkan: false
ccache: true
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/template-quality-gate-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
- os: "linux"
name: "arm64"
runs-on: "ubuntu-2004-arm64"
cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
run-e2e: true
vulkan: false
ccache: true
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/template-quality-gate-submodule.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
- os: "linux"
name: "arm64"
runs-on: "ubuntu-2004-arm64"
cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
cmake-flags: "-DCORTEXLLAMA_VERSION=${{github.event.pull_request.head.sha}} -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DGGML_NATIVE=OFF -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_BUILD_TYPE='Release' -GNinja"
run-e2e: true
vulkan: false
ccache: true
Expand Down
2 changes: 1 addition & 1 deletion llama.cpp
Submodule llama.cpp updated 113 files
30 changes: 15 additions & 15 deletions src/llama_client_slot.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,19 +76,19 @@ json LlamaClientSlot::GetFormatedTimings() {
}

void LlamaClientSlot::PrintTimings() const {
LOG_DEBUG << __func__ << ": prompt eval time = " << t_prompt_processing
<< "ms / " << num_prompt_tokens_processed << " tokens ("
<< t_prompt_processing / num_prompt_tokens_processed
<< " ms per "
"token, "
<< 1e3 / t_prompt_processing * num_prompt_tokens_processed
<< " tokens per second)";
LOG_DEBUG << __func__ << ": eval time = " << t_token_generation
<< " ms / " << n_decoded << " runs ("
<< t_token_generation / n_decoded
<< " ms per "
"token, "
<< 1e3 / t_token_generation * n_decoded << " tokens per second)\n";
LOG_DEBUG << __func__ << ": total time = "
<< t_prompt_processing + t_token_generation << " ms";
LOG_INFO << __func__ << ": prompt eval time = " << t_prompt_processing
<< "ms / " << num_prompt_tokens_processed << " tokens ("
<< t_prompt_processing / num_prompt_tokens_processed
<< " ms per "
"token, "
<< 1e3 / t_prompt_processing * num_prompt_tokens_processed
<< " tokens per second)";
LOG_INFO << __func__ << ": eval time = " << t_token_generation
<< " ms / " << n_decoded << " runs ("
<< t_token_generation / n_decoded
<< " ms per "
"token, "
<< 1e3 / t_token_generation * n_decoded << " tokens per second)\n";
LOG_INFO << __func__ << ": total time = "
<< t_prompt_processing + t_token_generation << " ms";
}
2 changes: 1 addition & 1 deletion src/llama_server_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1066,7 +1066,7 @@ void LlamaServerContext::SendEmbedding(LlamaClientSlot& slot) {
continue;
}

common_embd_normalize(embd, embd_res.data(), n_embd);
common_embd_normalize(embd, embd_res.data(), n_embd, 2);
}
res.result_json = json{
{"tokens_evaluated", slot.num_prompt_tokens},
Expand Down

0 comments on commit 5a94d51

Please sign in to comment.