-
Notifications
You must be signed in to change notification settings - Fork 0
135 lines (118 loc) · 5.17 KB
/
convert-model.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
name: Convert model to ONNX
on:
push:
# Sequence of patterns matched against refs/heads
branches:
- 'chore/convert-onnx'
workflow_dispatch:
inputs:
source_model_id:
description: "Source HuggingFace model ID to pull. For ex: meta-llama/Meta-Llama-3.1-8B-Instruct"
required: true
source_model_size:
description: "The model size. For ex: 8B"
required: true
type: string
target_model_id:
description: "Target HuggingFace model ID to push. For ex: llama3.1"
required: true
type: string
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
USER_NAME: cortexso
SOURCE_MODEL_ID: meta-llama/Meta-Llama-3.1-8B-Instruct #${{ inputs.source_model_id }}
SOURCE_MODEL_SIZE: 8b #${{ inputs.source_model_size }}
TARGET_MODEL_ID: llama3.1 #${{ inputs.target_model_id }}
PRECISION: int4 # Valid values: int4,fp16,fp3
EXECUTOR: dml # Valid values: cpu,cuda,dml,web
ONNXRUNTIME_GENAI_VERSION: 0.3.0 # Check version from: https://github.com/microsoft/onnxruntime-genai/releases
jobs:
converter:
runs-on: windows-amd
steps:
- name: Checkout
uses: actions/checkout@v4 # v4.1.7
with:
submodules: recursive
- name: Set up Python
uses: actions/setup-python@v4 # v5.1.1
with:
python-version: '3.12'
architecture: 'x64'
- name: Cache Python packages
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
with:
path: |
~/.cache/pip
~/.local/share/pip
.venv
key: ${{ runner.os }}-pip-${{ github.sha }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
python.exe -m ensurepip
python.exe -m pip install --upgrade pip
pip install huggingface_hub==0.23.3 hf-transfer fire numpy==1.26.4 torch==2.3.1 transformers==4.43.4 onnx==1.16.1 onnxruntime==1.18.0 sentencepiece==0.2.0
# if ($env:EXECUTOR -eq 'cpu') {
# pip install --pre onnxruntime-genai=="$env:ONNXRUNTIME_GENAI_VERSION"
# } elseif ($env:EXECUTOR -eq 'dml') {
# pip install --pre onnxruntime-genai-directml=="$env:ONNXRUNTIME_GENAI_VERSION"
# } elseif ($env:EXECUTOR -eq 'cuda') {
# pip install --pre onnxruntime-genai-cuda=="$env:ONNXRUNTIME_GENAI_VERSION" --index-url=https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
# } else {
# Write-Host "Error: Unknown EXECUTOR value: $env:EXECUTOR"
# exit 1
# }
# python -m onnxruntime_genai.models.builder --help
- name: Extract MODEL_NAME
shell: powershell
run: |
$SOURCE_MODEL_ID = "${{ env.SOURCE_MODEL_ID }}"
$ADDR = $SOURCE_MODEL_ID -split '/'
$MODEL_NAME = $ADDR[-1]
$MODEL_NAME_LOWER = $MODEL_NAME.ToLower()
echo "MODEL_NAME=$MODEL_NAME_LOWER" >> $env:GITHUB_ENV
echo "MODEL_NAME_LOWER=$MODEL_NAME_LOWER" # For debugging
- name: Print environment variables
run: |
echo "SOURCE_MODEL_ID: ${{ env.SOURCE_MODEL_ID }}"
echo "PRECISION: ${{ env.PRECISION }}"
echo "EXECUTOR: ${{ env.EXECUTOR }}"
echo "MODEL_NAME: ${{ env.MODEL_NAME }}"
- name: Prepare folders
run: |
mkdir -p ${{ env.MODEL_NAME }}/hf
mkdir -p ${{ env.MODEL_NAME }}/onnx
mkdir -p ${{ env.MODEL_NAME }}/cache
- name: Cache Hugging Face model
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
with:
path: ${{ env.MODEL_NAME }}/hf
key: ${{ runner.os }}-hf-model-${{ github.sha }}
restore-keys: |
{{ runner.os }}-hf-model-
- name: Download Hugging Face model
run: |
huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} --add-to-git-credential
huggingface-cli download --repo-type model --local-dir ${{ env.MODEL_NAME }}/hf ${{ env.SOURCE_MODEL_ID }}
huggingface-cli logout
- name: Convert to ONNX - DirectML - INT4
run: |
huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} --add-to-git-credential
cd onnxruntime-genai/src/python/py/models
python3 builder.py -i "../../../../../${{ env.MODEL_NAME }}/hf" -o "../../../../../${{ env.MODEL_NAME }}/onnx" -p ${{ env.PRECISION }} -e ${{ env.EXECUTOR }}
cd "../../../../.."
huggingface-cli logout
- name: Upload to Hugging Face
run: |
Get-ChildItem -Path "${{ env.MODEL_NAME }}/onnx" -Force
huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_WRITE }} --add-to-git-credential
huggingface-cli upload ${{ env.USER_NAME }}/${{ env.TARGET_MODEL_ID }} ${{ env.MODEL_NAME }}/onnx . --revision "${{ env.SOURCE_MODEL_SIZE }}-onnx"
huggingface-cli logout
- name: Cleanup
if: always()
run: |
Remove-Item -Recurse -Force -Path "${{ env.MODEL_NAME }}"