test: CI convert llama3.1 #63

Summary
Jobs
- converter
Run details
- Usage
- Workflow file

Workflow file for this run

.github/workflows/convert-model.yml at ecbbee5

	name: Convert model to ONNX

	on:
	push:
	# Sequence of patterns matched against refs/heads
	branches:
	- 'chore/convert-onnx'
	workflow_dispatch:
	inputs:
	source_model_id:
	description: "Source HuggingFace model ID to pull. For ex: meta-llama/Meta-Llama-3.1-8B-Instruct"
	required: true
	source_model_size:
	description: "The model size. For ex: 8B"
	required: true
	type: string
	target_model_id:
	description: "Target HuggingFace model ID to push. For ex: llama3.1"
	required: true
	type: string

	# concurrency:
	# group: ${{ github.workflow }}-${{ github.head_ref \|\| github.run_id }}
	# cancel-in-progress: true

	env:
	USER_NAME: cortexso
	SOURCE_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct #${{ inputs.source_model_id }}
	SOURCE_MODEL_SIZE: 8b #${{ inputs.source_model_size }}
	TARGET_MODEL_ID: llama3.1 #${{ inputs.target_model_id }}
	PRECISION: int4 # Valid values: int4,fp16,fp3
	EXECUTOR: dml # Valid values: cpu,cuda,dml,web
	ONNXRUNTIME_GENAI_VERSION: 0.3.0 # Check version from: https://github.com/microsoft/onnxruntime-genai/releases

	jobs:
	converter:
	runs-on: windows-onnx
	steps:
	- name: Checkout
	uses: actions/checkout@v4 # v4.1.7
	with:
	submodules: recursive

	- name: Set up Python
	uses: actions/setup-python@v5 # v5.1.1
	with:
	python-version: '3.10'
	# architecture: 'x64'

	- name: Cache Python packages
	uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
	with:
	path: \|
	~/.cache/pip
	~/.local/share/pip
	.venv
	key: ${{ runner.os }}-pip-${{ github.sha }}
	restore-keys: \|
	${{ runner.os }}-pip-

	- name: Install dependencies
	shell: powershell
	run: \|
	# python.exe -m ensurepip
	# python.exe -m pip install --upgrade pip
	pip3 install -I --user huggingface_hub hf-transfer numpy==1.26.4 torch==2.3.1 transformers==4.43.4 onnx==1.16.1 onnxruntime==1.18.0 sentencepiece==0.2.0

	# if ($env:EXECUTOR -eq 'cpu') { fire
	# pip install --pre onnxruntime-genai=="$env:ONNXRUNTIME_GENAI_VERSION"
	# } elseif ($env:EXECUTOR -eq 'dml') {
	# pip install --pre onnxruntime-genai-directml=="$env:ONNXRUNTIME_GENAI_VERSION"
	# } elseif ($env:EXECUTOR -eq 'cuda') {
	# pip install --pre onnxruntime-genai-cuda=="$env:ONNXRUNTIME_GENAI_VERSION" --index-url=https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
	# } else {
	# Write-Host "Error: Unknown EXECUTOR value: $env:EXECUTOR"
	# exit 1
	# }

	# python -m onnxruntime_genai.models.builder --help

	- name: Extract MODEL_NAME
	shell: powershell
	run: \|
	$SOURCE_MODEL_ID = "${{ env.SOURCE_MODEL_ID }}"
	$ADDR = $SOURCE_MODEL_ID -split '/'
	$MODEL_NAME = $ADDR[-1]
	$MODEL_NAME_LOWER = $MODEL_NAME.ToLower()
	echo "MODEL_NAME=$MODEL_NAME_LOWER" >> $env:GITHUB_ENV
	echo "MODEL_NAME_LOWER=$MODEL_NAME_LOWER" # For debugging

	- name: Print environment variables
	run: \|
	echo "SOURCE_MODEL_ID: ${{ env.SOURCE_MODEL_ID }}"
	echo "PRECISION: ${{ env.PRECISION }}"
	echo "EXECUTOR: ${{ env.EXECUTOR }}"
	echo "MODEL_NAME: ${{ env.MODEL_NAME }}"
	- name: Check file existence
	id: check_files
	uses: andstor/file-existence-action@v1
	with:
	files: "C:\\models\\${{ env.MODEL_NAME }}/hf"


	- name: Prepare folders
	if: steps.check_files.outputs.files_exists == 'false'
	run: \|
	mkdir -p C:\\models\\${{ env.MODEL_NAME }}/hf
	mkdir -p C:\\models\\${{ env.MODEL_NAME }}/onnx
	mkdir -p C:\\models\\${{ env.MODEL_NAME }}/cache

	- name: Download Hugging Face model
	id: download_hf
	if: steps.check_files.outputs.files_exists == 'false'
	run: \|
	huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} --add-to-git-credential
	huggingface-cli download --repo-type model --local-dir C:\\models\\${{ env.MODEL_NAME }}/hf ${{ env.SOURCE_MODEL_ID }}
	huggingface-cli logout

	# - name: Remove Failure Download
	# if: steps.download_hf.outcome == 'failure'
	# run: \|
	# Remove-Item -Recurse -Force -Path "$C:\\models\\{{ env.MODEL_NAME }}"


	- name: Convert to ONNX - DirectML - INT4
	shell: powershell
	run: \|
	huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} --add-to-git-credential
	python3 "onnxruntime-genai/src/python/py/models/builder.py" -i "C:\\models\\${{ env.MODEL_NAME }}\\hf" -o "C:\\models\\${{ env.MODEL_NAME }}\\onnx" -p ${{ env.PRECISION }} -e ${{ env.EXECUTOR }} -c "C:\\models\\${{ env.MODEL_NAME }}\\cache"
	huggingface-cli logout

	- name: Upload to Hugging Face
	run: \|
	Get-ChildItem -Path "C:\\models\\${{ env.MODEL_NAME }}/onnx" -Force
	huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_WRITE }} --add-to-git-credential
	huggingface-cli upload ${{ env.USER_NAME }}/${{ env.TARGET_MODEL_ID }} "C:\\models\\${{ env.MODEL_NAME }}/onnx" . --revision "${{ env.SOURCE_MODEL_SIZE }}-onnx"
	huggingface-cli logout

	# - name: Cleanup
	# if: always()
	# run: \|
	# Remove-Item -Recurse -Force -Path "${{ env.MODEL_NAME }}"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

test: CI convert llama3.1 #63

Workflow file

test: CI convert llama3.1 #63

Jobs

Run details

Workflow file for this run