.github/workflows/convert-model.yml

name: Convert model to ONNX

on:
  push:
    # Sequence of patterns matched against refs/heads
    branches:
      - 'chore/convert-onnx'
  workflow_dispatch:
    inputs:
      source_model_id:
        description: "Source HuggingFace model ID to pull. For ex: meta-llama/Meta-Llama-3.1-8B-Instruct"
        required: true
      source_model_size:
        description: "The model size. For ex: 8B"
        required: true
        type: string
      target_model_id:
        description: "Target HuggingFace model ID to push. For ex: llama3.1"
        required: true
        type: string

# concurrency:
#   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
#   cancel-in-progress: true
  
env:
  USER_NAME: cortexso
  SOURCE_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct #${{ inputs.source_model_id }}
  SOURCE_MODEL_SIZE: 8b #${{ inputs.source_model_size }}
  TARGET_MODEL_ID: llama3.1 #${{ inputs.target_model_id }}
  PRECISION: int4                   # Valid values: int4,fp16,fp3
  EXECUTOR: dml                     # Valid values: cpu,cuda,dml,web
  ONNXRUNTIME_GENAI_VERSION: 0.3.0  # Check version from: https://github.com/microsoft/onnxruntime-genai/releases
  
jobs:
  converter:
    runs-on: windows-onnx
    steps:
      - name: Checkout
        uses: actions/checkout@v4 # v4.1.7
        with:
          submodules: recursive

      - name: Set up Python
        uses: actions/setup-python@v5 # v5.1.1
        with:
          python-version: '3.10'
          # architecture: 'x64'

      - name: Cache Python packages
        uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
        with:
          path: |
            ~/.cache/pip
            ~/.local/share/pip
            .venv
          key: ${{ runner.os }}-pip-${{ github.sha }}
          restore-keys: |
            ${{ runner.os }}-pip-

      - name: Install dependencies
        shell: powershell
        run: |
          # python.exe -m ensurepip
          # python.exe -m pip install --upgrade pip
          pip3 install -I --user huggingface_hub hf-transfer numpy==1.26.4 torch==2.3.1 transformers==4.43.4 onnx==1.16.1 onnxruntime==1.18.0 sentencepiece==0.2.0

          # if ($env:EXECUTOR -eq 'cpu') { fire
          #   pip install --pre onnxruntime-genai=="$env:ONNXRUNTIME_GENAI_VERSION"
          # } elseif ($env:EXECUTOR -eq 'dml') {
          #   pip install --pre onnxruntime-genai-directml=="$env:ONNXRUNTIME_GENAI_VERSION"
          # } elseif ($env:EXECUTOR -eq 'cuda') {
          #   pip install --pre onnxruntime-genai-cuda=="$env:ONNXRUNTIME_GENAI_VERSION" --index-url=https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
          # } else {
          #   Write-Host "Error: Unknown EXECUTOR value: $env:EXECUTOR"
          #   exit 1
          # }

          # python -m onnxruntime_genai.models.builder --help

      - name: Extract MODEL_NAME
        shell: powershell
        run: |
          $SOURCE_MODEL_ID = "${{ env.SOURCE_MODEL_ID }}"
          $ADDR = $SOURCE_MODEL_ID -split '/'
          $MODEL_NAME = $ADDR[-1]
          $MODEL_NAME_LOWER = $MODEL_NAME.ToLower()
          echo "MODEL_NAME=$MODEL_NAME_LOWER" >> $env:GITHUB_ENV
          echo "MODEL_NAME_LOWER=$MODEL_NAME_LOWER" # For debugging

      - name: Print environment variables
        run: |
          echo "SOURCE_MODEL_ID: ${{ env.SOURCE_MODEL_ID }}"
          echo "PRECISION: ${{ env.PRECISION }}"
          echo "EXECUTOR: ${{ env.EXECUTOR }}"
          echo "MODEL_NAME: ${{ env.MODEL_NAME }}"
      - name: Check file existence
        id: check_files
        uses: andstor/file-existence-action@v1
        with:
          files: "C:\\models\\${{ env.MODEL_NAME }}/hf"


      - name: Prepare folders
        if: steps.check_files.outputs.files_exists == 'false'
        run: |
          mkdir -p C:\\models\\${{ env.MODEL_NAME }}/hf
          mkdir -p C:\\models\\${{ env.MODEL_NAME }}/onnx
          mkdir -p C:\\models\\${{ env.MODEL_NAME }}/cache
      
      - name: Download Hugging Face model
        id: download_hf
        if: steps.check_files.outputs.files_exists == 'false'
        run: |
          huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} --add-to-git-credential
          huggingface-cli download --repo-type model --local-dir C:\\models\\${{ env.MODEL_NAME }}/hf ${{ env.SOURCE_MODEL_ID }}
          huggingface-cli logout

      # - name: Remove Failure Download
      #   if: steps.download_hf.outcome == 'failure'
      #   run: |
      #     Remove-Item -Recurse -Force -Path "$C:\\models\\{{ env.MODEL_NAME }}"


      - name: Convert to ONNX - DirectML - INT4
        shell: powershell
        run: |
          huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} --add-to-git-credential
          python3 "onnxruntime-genai/src/python/py/models/builder.py" -i "C:\\models\\${{ env.MODEL_NAME }}\\hf" -o "C:\\models\\${{ env.MODEL_NAME }}\\onnx" -p ${{ env.PRECISION }} -e ${{ env.EXECUTOR }} -c "C:\\models\\${{ env.MODEL_NAME }}\\cache"
          huggingface-cli logout

      - name: Upload to Hugging Face
        run: |
          Get-ChildItem -Path "C:\\models\\${{ env.MODEL_NAME }}/onnx" -Force
          huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_WRITE }} --add-to-git-credential
          huggingface-cli upload ${{ env.USER_NAME }}/${{ env.TARGET_MODEL_ID }} "C:\\models\\${{ env.MODEL_NAME }}/onnx" . --revision "${{ env.SOURCE_MODEL_SIZE }}-onnx"
          huggingface-cli logout

      # - name: Cleanup
      #   if: always()
      #   run: |
      #     Remove-Item -Recurse -Force -Path "${{ env.MODEL_NAME }}"