Skip to content

41 feat batch inference for nitro #86

41 feat batch inference for nitro

41 feat batch inference for nitro #86

Workflow file for this run

name: CI
on:
push:
branches:
- main
tags: ['v*.*.*']
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']
pull_request:
types: [opened, synchronize, reopened]
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']
env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
jobs:
create-draft-release:
runs-on: ubuntu-latest
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
outputs:
upload_url: ${{ steps.create_release.outputs.upload_url }}
version: ${{ steps.get_version.outputs.version }}
permissions:
contents: write
steps:
- name: Extract tag name without v prefix
id: get_version
run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV && echo "::set-output name=version::${GITHUB_REF#refs/tags/v}"
env:
GITHUB_REF: ${{ github.ref }}
- name: Create Draft Release
id: create_release
uses: actions/create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: ${{ github.ref_name }}
release_name: "${{ env.VERSION }}"
draft: true
prerelease: false
ubuntu-amd64-build:
runs-on: linux-gpu
needs: create-draft-release
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped')
permissions:
contents: write
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: recursive
# - name: Dependencies
# id: depends
# run: |
# sudo apt-get update
# sudo apt-get install build-essential gcc-8
- name: Build
id: make_build
run: |
./install_deps.sh
mkdir build && cd build
cmake ..
CC=gcc-8 make -j $(nproc)
ls -la
- name: Package
shell: bash
run: |
mkdir -p nitro
cp build/nitro nitro/
zip -r nitro.zip nitro
- uses: actions/[email protected]
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
asset_path: ./nitro.zip
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-linux-amd64.zip
asset_content_type: application/zip
ubuntu-amd64-cuda-build:
runs-on: linux-gpu
needs: create-draft-release
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped')
permissions:
contents: write
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: recursive
# - name: Dependencies
# id: depends
# run: |
# sudo apt-get update
# sudo apt-get install build-essential gcc-8 uuid-dev
- name: Build
id: make_build
run: |
./install_deps.sh
mkdir build && cd build
cmake -DLLAMA_CUBLAS=ON ..
CC=gcc-8 make -j $(nproc)
ls -la
- name: Package
shell: bash
run: |
mkdir -p nitro
cp build/nitro nitro/
zip -r nitro.zip nitro
- uses: actions/[email protected]
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
asset_path: ./nitro.zip
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-linux-amd64-cuda.zip
asset_content_type: application/zip
macOS-M-build:
runs-on: mac-silicon
needs: create-draft-release
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped')
permissions:
contents: write
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: recursive
- name: Dependencies
id: depends
continue-on-error: true
run: |
brew update
brew install cmake gcc@8
- name: Build
id: cmake_build
run: |
./install_deps.sh
mkdir build && cd build
cmake ..
CC=gcc-8 make -j $(nproc)
ls -la
- name: Package
shell: bash
run: |
mkdir -p nitro
cp llama.cpp/ggml-metal.metal nitro/
cp build/nitro nitro/
zip -r nitro.zip nitro
- uses: actions/[email protected]
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
asset_path: ./nitro.zip
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-mac-arm64.zip
asset_content_type: application/zip
macOS-Intel-build:
runs-on: macos-latest
needs: create-draft-release
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped')
permissions:
contents: write
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: recursive
- name: Dependencies
id: depends
continue-on-error: true
run: |
brew update
- name: Build
id: cmake_build
run: |
./install_deps.sh
mkdir build && cd build
cmake -DLLAMA_METAL=OFF ..
CC=gcc-8 make -j $(nproc)
ls -la
- name: Package
shell: bash
run: |
mkdir -p nitro
cp build/nitro nitro/
zip -r nitro.zip nitro
- uses: actions/[email protected]
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
asset_path: ./nitro.zip
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-mac-amd64.zip
asset_content_type: application/zip
windows-amd64-build:
runs-on: windows-latest
needs: create-draft-release
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped')
permissions:
contents: write
env:
OPENBLAS_VERSION: 0.3.23
OPENCL_VERSION: 2023.04.17
CLBLAST_VERSION: 1.6.0
strategy:
matrix:
include:
- build: 'normal'
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: recursive
- name: Setup VSWhere.exe
uses: warrenbuckley/Setup-VSWhere@v1
with:
version: latest
silent: true
env:
ACTIONS_ALLOW_UNSECURE_COMMANDS: true
- name: actions-setup-cmake
uses: jwlawson/[email protected]
- name: Build
id: cmake_build
shell: cmd
run: |
cmake -S ./nitro_deps -B ./build_deps/nitro_deps
cmake --build ./build_deps/nitro_deps --config Release
mkdir -p build
cd build
cmake ..
cmake --build . --config Release -j 4
- name: Pack artifacts
id: pack_artifacts
shell: cmd
run: |
robocopy build_deps\_install\bin .\build\Release zlib.dll
robocopy build\bin\Release .\build\Release llama.dll
robocopy ext_libs .\build\Release libcrypto-3-x64.dll
robocopy ext_libs .\build\Release libssl-3-x64.dll
7z a nitro.zip .\build\Release\*
- uses: actions/[email protected]
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
asset_path: ./nitro.zip
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64-${{ matrix.build }}.zip
asset_content_type: application/zip
windows-amd64-cuda-build:
runs-on: windows-nvidia
needs: create-draft-release
if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped')
permissions:
contents: write
strategy:
matrix:
cuda: ['12.2.0']
build: ['cublas']
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: recursive
- name: Setup VSWhere.exe
uses: warrenbuckley/Setup-VSWhere@v1
with:
version: latest
silent: true
env:
ACTIONS_ALLOW_UNSECURE_COMMANDS: true
- name: Build
id: cmake_build
shell: cmd
run: |
cmake -S ./nitro_deps -B ./build_deps/nitro_deps
cmake --build ./build_deps/nitro_deps --config Release
mkdir -p build
cd build
cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=ON
cmake --build . --config Release -j 4
- name: Pack artifacts
id: pack_artifacts
shell: cmd
run: |
set PATH=%PATH%;C:\Program Files\7-Zip\
echo %PATH%
robocopy build_deps\_install\bin .\build\Release zlib.dll
robocopy build\bin\Release .\build\Release llama.dll
robocopy ext_libs .\build\Release libcrypto-3-x64.dll
robocopy ext_libs .\build\Release libssl-3-x64.dll
7z a nitro.zip .\build\Release\*
- uses: actions/[email protected]
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
asset_path: ./nitro.zip
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64-${{ matrix.build }}-cu${{ matrix.cuda }}.zip
asset_content_type: application/zip
update_release_draft:
needs: [ubuntu-amd64-build, ubuntu-amd64-cuda-build, macOS-M-build, macOS-Intel-build, windows-amd64-build, windows-amd64-cuda-build]
permissions:
contents: write
pull-requests: write
runs-on: ubuntu-latest
steps:
- uses: release-drafter/release-drafter@v5
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}