janhq · dungpham91 · Jul 31, 2024 · Aug 6, 2024 · Aug 9, 2024 · Aug 9, 2024
diff --git a/.github/workflows/convert-model.yml b/.github/workflows/convert-model.yml
@@ -0,0 +1,127 @@
+name: Convert model to ONNX
+
+on:
+  # push:
+  #   branches:
+  #     - 'chore/convert-onnx'
+  workflow_dispatch:
+    inputs:
+      source_model_id:
+        description: "Source HuggingFace model ID to pull. For ex: meta-llama/Meta-Llama-3.1-8B-Instruct"
+        required: true
+      source_model_size:
+        description: "The model size. For ex: 8b"
+        required: true
+        type: string
+      target_model_id:
+        description: "Target HuggingFace model ID to push. For ex: llama3.1"
+        required: true
+        type: string
+
+# concurrency:
+#   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+#   cancel-in-progress: true
+
+env:
+  USER_NAME: cortexso
+  SOURCE_MODEL_ID:  ${{ inputs.source_model_id }}
+  SOURCE_MODEL_SIZE: ${{ inputs.source_model_size }}
+  TARGET_MODEL_ID:  ${{ inputs.target_model_id }}
+  PRECISION: int4                   # Valid values: int4,fp16,fp3
+  EXECUTOR: dml                     # Valid values: cpu,cuda,dml,web
+  ONNXRUNTIME_GENAI_VERSION: 0.3.0  # Check version from: https://github.com/microsoft/onnxruntime-genai/releases
+
+jobs:
+  converter:
+    runs-on: windows-onnx
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4 # v4.1.7
+        with:
+          submodules: recursive
+
+      - name: Set up Python
+        uses: actions/setup-python@v5 # v5.1.1
+        with:
+          python-version: '3.10'
+          # architecture: 'x64'
+
+      - name: Cache Python packages
+        uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
+        with:
+          path: |
+            ~/.cache/pip
+            ~/.local/share/pip
+            .venv
+          key: ${{ runner.os }}-pip-${{ github.sha }}
+          restore-keys: |
+            ${{ runner.os }}-pip-
+
+      - name: Install dependencies
+        shell: powershell
+        run: |
+          pip3 install -I --user huggingface_hub hf-transfer numpy==1.26.4 torch==2.3.1 transformers==4.43.4 onnx==1.16.1 onnxruntime==1.18.0 sentencepiece==0.2.0
+
+      - name: Extract MODEL_NAME
+        shell: powershell
+        run: |
+          $SOURCE_MODEL_ID = "${{ env.SOURCE_MODEL_ID }}"
+          $ADDR = $SOURCE_MODEL_ID -split '/'
+          $MODEL_NAME = $ADDR[-1]
+          $MODEL_NAME_LOWER = $MODEL_NAME.ToLower()
+          echo "MODEL_NAME=$MODEL_NAME_LOWER" >> $env:GITHUB_ENV
+          echo "MODEL_NAME_LOWER=$MODEL_NAME_LOWER" # For debugging
+
+      - name: Print environment variables
+        run: |
+          echo "SOURCE_MODEL_ID: ${{ env.SOURCE_MODEL_ID }}"
+          echo "PRECISION: ${{ env.PRECISION }}"
+          echo "EXECUTOR: ${{ env.EXECUTOR }}"
+          echo "MODEL_NAME: ${{ env.MODEL_NAME }}"
+      - name: Check file existence
+        id: check_files
+        uses: andstor/file-existence-action@v1
+        with:
+          files: "C:\\models\\${{ env.MODEL_NAME }}/hf"
+
+
+      - name: Prepare folders
+        if: steps.check_files.outputs.files_exists == 'false'
+        run: |
+          mkdir -p C:\\models\\${{ env.MODEL_NAME }}/hf
+          mkdir -p C:\\models\\${{ env.MODEL_NAME }}/onnx
+          mkdir -p C:\\models\\${{ env.MODEL_NAME }}/cache
+
+      - name: Download Hugging Face model
+        id: download_hf
+        if: steps.check_files.outputs.files_exists == 'false'
+        run: |
+          huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} --add-to-git-credential
+          huggingface-cli download --repo-type model --local-dir C:\\models\\${{ env.MODEL_NAME }}/hf ${{ env.SOURCE_MODEL_ID }}
+          huggingface-cli logout
+
+      # - name: Remove Failure Download
+      #   if: steps.download_hf.outcome == 'failure'
+      #   run: |
+      #     Remove-Item -Recurse -Force -Path "$C:\\models\\{{ env.MODEL_NAME }}"
+
+
+      - name: Convert to ONNX - DirectML - INT4
+        shell: powershell
+        run: |
+          mkdir -p ${{ env.MODEL_NAME }}/onnx
+          huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} --add-to-git-credential
+          python3 "onnxruntime-genai/src/python/py/models/builder.py" -i "c:/models/${{ env.MODEL_NAME }}/hf" -o "c:/models/${{ env.MODEL_NAME }}/onnx" -p ${{ env.PRECISION }} -e ${{ env.EXECUTOR }} -c "c:/models/${{ env.MODEL_NAME }}/cache"
+          huggingface-cli logout
+
+      - name: Upload to Hugging Face
+        run: |
+          Get-ChildItem -Path "C:\\models\\${{ env.MODEL_NAME }}/onnx" -Force
+          huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_WRITE }} --add-to-git-credential
+          huggingface-cli upload "${{ env.USER_NAME }}/${{ env.TARGET_MODEL_ID }}" "c:/models/${{ env.MODEL_NAME }}/onnx" . --revision "${{ env.SOURCE_MODEL_SIZE }}-onnx"
+          huggingface-cli logout
+
+      - name: Cleanup
+        if: always()
+        run: |
+          Remove-Item -Recurse -Force -Path "${{ env.MODEL_NAME }}"
diff --git a/Makefile b/Makefile
@@ -20,7 +20,7 @@ endif
 
 build-onnxruntime:
 ifeq ($(OS),Windows_NT) # Windows
-	@powershell -Command "cmake -S .\onnxruntime-genai\ -B .\onnxruntime-genai\build -DUSE_DML=ON -DUSE_CUDA=OFF -DENABLE_PYTHON=OFF -DORT_HOME=\".\build_deps\ort\";"
+	@powershell -Command "cmake -S .\onnxruntime-genai\ -B .\onnxruntime-genai\build -DUSE_DML=ON -DUSE_CUDA=OFF -DUSE_ROCM=OFF -DENABLE_PYTHON=OFF -DORT_HOME=\".\build_deps\ort\";"
 	@powershell -Command "cmake --build .\onnxruntime-genai\build --config Release -j4;"
 else  # Unix-like systems (Linux and MacOS)
 	@echo "Skipping install dependencies"

diff --git a/build_cortex_onnx.bat b/build_cortex_onnx.bat
@@ -1,5 +1,5 @@
 cmake -S ./third-party -B ./build_deps/third-party
 cmake --build ./build_deps/third-party --config Release -j4
 
-cmake -S .\onnxruntime-genai\ -B .\onnxruntime-genai\build -DUSE_DML=ON -DUSE_CUDA=OFF -DORT_HOME="./build_deps/ort" -DENABLE_PYTHON=OFF -DENABLE_TESTS=OFF -DENABLE_MODEL_BENCHMARK=OFF
+cmake -S .\onnxruntime-genai\ -B .\onnxruntime-genai\build -DUSE_DML=ON -DUSE_CUDA=OFF -DUSE_ROCM=OFF -DORT_HOME="./build_deps/ort" -DENABLE_PYTHON=OFF -DENABLE_TESTS=OFF -DENABLE_MODEL_BENCHMARK=OFF
 cmake --build .\onnxruntime-genai\build --config Release -j4
diff --git a/onnxruntime-genai b/onnxruntime-genai