diff --git a/.github/workflows/convert-model.yml b/.github/workflows/convert-model.yml new file mode 100644 index 0000000..4e47bbe --- /dev/null +++ b/.github/workflows/convert-model.yml @@ -0,0 +1,114 @@ +name: Convert model to ONNX + +on: + workflow_dispatch: + inputs: + source_model_id: + description: "Source HuggingFace model ID to pull. For ex: meta-llama/Meta-Llama-3.1-8B-Instruct" + required: true + source_model_size: + description: "The model size. For ex: 8b" + required: true + type: string + target_model_id: + description: "Target HuggingFace model ID to push. For ex: llama3.1" + required: true + type: string + + +env: + USER_NAME: cortexso + SOURCE_MODEL_ID: ${{ inputs.source_model_id }} + SOURCE_MODEL_SIZE: ${{ inputs.source_model_size }} + TARGET_MODEL_ID: ${{ inputs.target_model_id }} + PRECISION: int4 # Valid values: int4,fp16,fp3 + EXECUTOR: dml # Valid values: cpu,cuda,dml,web + +jobs: + converter: + runs-on: windows-onnx + steps: + - name: Checkout + uses: actions/checkout@v4 # v4.1.7 + with: + submodules: recursive + + - name: Set up Python + uses: actions/setup-python@v5 # v5.1.1 + with: + python-version: '3.10' + # architecture: 'x64' + + - name: Cache Python packages + uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2 + with: + path: | + ~/.cache/pip + ~/.local/share/pip + .venv + key: ${{ runner.os }}-pip-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Install dependencies + shell: powershell + run: | + pip3 install -I --user huggingface_hub hf-transfer numpy==1.26.4 torch==2.3.1 transformers==4.43.4 onnx==1.16.1 onnxruntime==1.18.0 sentencepiece==0.2.0 + + - name: Extract MODEL_NAME + shell: powershell + run: | + $SOURCE_MODEL_ID = "${{ env.SOURCE_MODEL_ID }}" + $ADDR = $SOURCE_MODEL_ID -split '/' + $MODEL_NAME = $ADDR[-1] + $MODEL_NAME_LOWER = $MODEL_NAME.ToLower() + echo "MODEL_NAME=$MODEL_NAME_LOWER" >> $env:GITHUB_ENV + echo "MODEL_NAME_LOWER=$MODEL_NAME_LOWER" # For debugging + + - name: Print environment variables + run: | + echo "SOURCE_MODEL_ID: ${{ env.SOURCE_MODEL_ID }}" + echo "PRECISION: ${{ env.PRECISION }}" + echo "EXECUTOR: ${{ env.EXECUTOR }}" + echo "MODEL_NAME: ${{ env.MODEL_NAME }}" + - name: Check file existence + id: check_files + uses: andstor/file-existence-action@v1 + with: + files: "C:\\models\\${{ env.MODEL_NAME }}/hf" + + + - name: Prepare folders + if: steps.check_files.outputs.files_exists == 'false' + run: | + mkdir -p C:\\models\\${{ env.MODEL_NAME }}/hf + mkdir -p C:\\models\\${{ env.MODEL_NAME }}/onnx + mkdir -p C:\\models\\${{ env.MODEL_NAME }}/cache + + - name: Download Hugging Face model + id: download_hf + if: steps.check_files.outputs.files_exists == 'false' + run: | + huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} --add-to-git-credential + huggingface-cli download --repo-type model --local-dir C:\\models\\${{ env.MODEL_NAME }}/hf ${{ env.SOURCE_MODEL_ID }} + huggingface-cli logout + + - name: Convert to ONNX - DirectML - INT4 + shell: powershell + run: | + mkdir -p ${{ env.MODEL_NAME }}/onnx + huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} --add-to-git-credential + python3 "onnxruntime-genai/src/python/py/models/builder.py" -i "c:/models/${{ env.MODEL_NAME }}/hf" -o "c:/models/${{ env.MODEL_NAME }}/onnx" -p ${{ env.PRECISION }} -e ${{ env.EXECUTOR }} -c "c:/models/${{ env.MODEL_NAME }}/cache" + huggingface-cli logout + + - name: Upload to Hugging Face + run: | + Get-ChildItem -Path "C:\\models\\${{ env.MODEL_NAME }}/onnx" -Force + huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_WRITE }} --add-to-git-credential + huggingface-cli upload "${{ env.USER_NAME }}/${{ env.TARGET_MODEL_ID }}" "c:/models/${{ env.MODEL_NAME }}/onnx" . --revision "${{ env.SOURCE_MODEL_SIZE }}-onnx" + huggingface-cli logout + + - name: Cleanup + if: always() + run: | + Remove-Item -Recurse -Force -Path "${{ env.MODEL_NAME }}" diff --git a/Makefile b/Makefile index 472767d..d3978a2 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,7 @@ endif build-onnxruntime: ifeq ($(OS),Windows_NT) # Windows - @powershell -Command "cmake -S .\onnxruntime-genai\ -B .\onnxruntime-genai\build -DUSE_DML=ON -DUSE_CUDA=OFF -DENABLE_PYTHON=OFF -DORT_HOME=\".\build_deps\ort\";" + @powershell -Command "cmake -S .\onnxruntime-genai\ -B .\onnxruntime-genai\build -DUSE_DML=ON -DUSE_CUDA=OFF -DUSE_ROCM=OFF -DENABLE_PYTHON=OFF -DORT_HOME=\".\build_deps\ort\";" @powershell -Command "cmake --build .\onnxruntime-genai\build --config Release -j4;" else # Unix-like systems (Linux and MacOS) @echo "Skipping install dependencies" diff --git a/build_cortex_onnx.bat b/build_cortex_onnx.bat index 1af5516..50c0959 100644 --- a/build_cortex_onnx.bat +++ b/build_cortex_onnx.bat @@ -1,5 +1,5 @@ cmake -S ./third-party -B ./build_deps/third-party cmake --build ./build_deps/third-party --config Release -j4 -cmake -S .\onnxruntime-genai\ -B .\onnxruntime-genai\build -DUSE_DML=ON -DUSE_CUDA=OFF -DORT_HOME="./build_deps/ort" -DENABLE_PYTHON=OFF -DENABLE_TESTS=OFF -DENABLE_MODEL_BENCHMARK=OFF +cmake -S .\onnxruntime-genai\ -B .\onnxruntime-genai\build -DUSE_DML=ON -DUSE_CUDA=OFF -DUSE_ROCM=OFF -DORT_HOME="./build_deps/ort" -DENABLE_PYTHON=OFF -DENABLE_TESTS=OFF -DENABLE_MODEL_BENCHMARK=OFF cmake --build .\onnxruntime-genai\build --config Release -j4 diff --git a/onnxruntime-genai b/onnxruntime-genai index 17f772b..a7ca019 160000 --- a/onnxruntime-genai +++ b/onnxruntime-genai @@ -1 +1 @@ -Subproject commit 17f772b5eee9885dd87615c3fb689ac71853d1b6 +Subproject commit a7ca0191803ddceaa2c7ef2de38f8efc6fa1a498