.github/workflows/convert-model.yml

name: Convert model to ONNX

on:
  push:
    # Sequence of patterns matched against refs/heads
    branches:
      - 'chore/convert-onnx'
  workflow_dispatch:
    inputs:
      source_model_id:
        description: "Source HuggingFace model ID to pull. For ex: meta-llama/Meta-Llama-3.1-8B-Instruct"
        required: true
      source_model_size:
        description: "The model size. For ex: 8B"
        required: true
        type: string
      target_model_id:
        description: "Target HuggingFace model ID to push. For ex: llama3.1"
        required: true
        type: string

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true
  
env:
  USER_NAME: cortexso
  SOURCE_MODEL_ID: meta-llama/Meta-Llama-3.1-8B-Instruct #${{ inputs.source_model_id }}
  SOURCE_MODEL_SIZE: 8b #${{ inputs.source_model_size }}
  TARGET_MODEL_ID: llama3.1 #${{ inputs.target_model_id }}
  PRECISION: int4                   # Valid values: int4,fp16,fp3
  EXECUTOR: dml                     # Valid values: cpu,cuda,dml,web
  ONNXRUNTIME_GENAI_VERSION: 0.3.0  # Check version from: https://github.com/microsoft/onnxruntime-genai/releases
  
jobs:
  converter:
    runs-on: windows-amd
    steps:
      - name: Checkout
        uses: actions/checkout@v4 # v4.1.7
        with:
          submodules: recursive

      - name: Set up Python
        uses: actions/setup-python@v4 # v5.1.1
        with:
          python-version: '3.12'
          architecture: 'x64'

      - name: Cache Python packages
        uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
        with:
          path: |
            ~/.cache/pip
            ~/.local/share/pip
            .venv
          key: ${{ runner.os }}-pip-${{ github.sha }}
          restore-keys: |
            ${{ runner.os }}-pip-

      - name: Install dependencies
        run: |
          python.exe -m ensurepip
          python.exe -m pip install --upgrade pip
          pip install huggingface_hub==0.23.3 hf-transfer fire numpy==1.26.4 torch==2.3.1 transformers==4.43.4 onnx==1.16.1 onnxruntime==1.18.0 sentencepiece==0.2.0

          # if ($env:EXECUTOR -eq 'cpu') {
          #   pip install --pre onnxruntime-genai=="$env:ONNXRUNTIME_GENAI_VERSION"
          # } elseif ($env:EXECUTOR -eq 'dml') {
          #   pip install --pre onnxruntime-genai-directml=="$env:ONNXRUNTIME_GENAI_VERSION"
          # } elseif ($env:EXECUTOR -eq 'cuda') {
          #   pip install --pre onnxruntime-genai-cuda=="$env:ONNXRUNTIME_GENAI_VERSION" --index-url=https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
          # } else {
          #   Write-Host "Error: Unknown EXECUTOR value: $env:EXECUTOR"
          #   exit 1
          # }

          # python -m onnxruntime_genai.models.builder --help

      - name: Extract MODEL_NAME
        shell: powershell
        run: |
          $SOURCE_MODEL_ID = "${{ env.SOURCE_MODEL_ID }}"
          $ADDR = $SOURCE_MODEL_ID -split '/'
          $MODEL_NAME = $ADDR[-1]
          $MODEL_NAME_LOWER = $MODEL_NAME.ToLower()
          echo "MODEL_NAME=$MODEL_NAME_LOWER" >> $env:GITHUB_ENV
          echo "MODEL_NAME_LOWER=$MODEL_NAME_LOWER" # For debugging

      - name: Print environment variables
        run: |
          echo "SOURCE_MODEL_ID: ${{ env.SOURCE_MODEL_ID }}"
          echo "PRECISION: ${{ env.PRECISION }}"
          echo "EXECUTOR: ${{ env.EXECUTOR }}"
          echo "MODEL_NAME: ${{ env.MODEL_NAME }}"

      - name: Prepare folders
        run: |
          mkdir -p ${{ env.MODEL_NAME }}/hf
          mkdir -p ${{ env.MODEL_NAME }}/onnx
          mkdir -p ${{ env.MODEL_NAME }}/cache

      - name: Cache Hugging Face model
        uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
        with:
          path: ${{ env.MODEL_NAME }}/hf
          key: ${{ runner.os }}-hf-model-${{ github.sha }}
          restore-keys: |
            {{ runner.os }}-hf-model-

      - name: Download Hugging Face model
        run: |
          huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} --add-to-git-credential
          huggingface-cli download --repo-type model --local-dir ${{ env.MODEL_NAME }}/hf ${{ env.SOURCE_MODEL_ID }}
          huggingface-cli logout

      - name: Convert to ONNX - DirectML - INT4
        run: |
          huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} --add-to-git-credential
          cd onnxruntime-genai/src/python/py/models
          python3 builder.py -i "../../../../../${{ env.MODEL_NAME }}/hf" -o "../../../../../${{ env.MODEL_NAME }}/onnx" -p ${{ env.PRECISION }} -e ${{ env.EXECUTOR }}
          cd "../../../../.."
          huggingface-cli logout

      - name: Upload to Hugging Face
        run: |
          Get-ChildItem -Path "${{ env.MODEL_NAME }}/onnx" -Force
          huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_WRITE }} --add-to-git-credential
          huggingface-cli upload ${{ env.USER_NAME }}/${{ env.TARGET_MODEL_ID }} ${{ env.MODEL_NAME }}/onnx . --revision "${{ env.SOURCE_MODEL_SIZE }}-onnx"
          huggingface-cli logout

      - name: Cleanup
        if: always()
        run: |
          Remove-Item -Recurse -Force -Path "${{ env.MODEL_NAME }}"