-
Notifications
You must be signed in to change notification settings - Fork 0
144 lines (126 loc) · 5.64 KB
/
convert-model.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
name: Convert model to ONNX
on:
push:
# Sequence of patterns matched against refs/heads
branches:
- 'chore/convert-onnx'
workflow_dispatch:
inputs:
source_model_id:
description: "Source HuggingFace model ID to pull. For ex: meta-llama/Meta-Llama-3.1-8B-Instruct"
required: true
source_model_size:
description: "The model size. For ex: 8B"
required: true
type: string
target_model_id:
description: "Target HuggingFace model ID to push. For ex: llama3.1"
required: true
type: string
# concurrency:
# group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
# cancel-in-progress: true
env:
USER_NAME: cortexso
SOURCE_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct #${{ inputs.source_model_id }}
SOURCE_MODEL_SIZE: 8b #${{ inputs.source_model_size }}
TARGET_MODEL_ID: llama3.1 #${{ inputs.target_model_id }}
PRECISION: int4 # Valid values: int4,fp16,fp3
EXECUTOR: dml # Valid values: cpu,cuda,dml,web
ONNXRUNTIME_GENAI_VERSION: 0.3.0 # Check version from: https://github.com/microsoft/onnxruntime-genai/releases
jobs:
converter:
runs-on: windows-onnx
steps:
- name: Checkout
uses: actions/checkout@v4 # v4.1.7
with:
submodules: recursive
- name: Set up Python
uses: actions/setup-python@v5 # v5.1.1
with:
python-version: '3.10'
# architecture: 'x64'
- name: Cache Python packages
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
with:
path: |
~/.cache/pip
~/.local/share/pip
.venv
key: ${{ runner.os }}-pip-${{ github.sha }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
shell: powershell
run: |
# python.exe -m ensurepip
# python.exe -m pip install --upgrade pip
pip3 install -I --user huggingface_hub hf-transfer numpy==1.26.4 torch==2.3.1 transformers==4.43.4 onnx==1.16.1 onnxruntime==1.18.0 sentencepiece==0.2.0
# if ($env:EXECUTOR -eq 'cpu') { fire
# pip install --pre onnxruntime-genai=="$env:ONNXRUNTIME_GENAI_VERSION"
# } elseif ($env:EXECUTOR -eq 'dml') {
# pip install --pre onnxruntime-genai-directml=="$env:ONNXRUNTIME_GENAI_VERSION"
# } elseif ($env:EXECUTOR -eq 'cuda') {
# pip install --pre onnxruntime-genai-cuda=="$env:ONNXRUNTIME_GENAI_VERSION" --index-url=https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
# } else {
# Write-Host "Error: Unknown EXECUTOR value: $env:EXECUTOR"
# exit 1
# }
# python -m onnxruntime_genai.models.builder --help
- name: Extract MODEL_NAME
shell: powershell
run: |
$SOURCE_MODEL_ID = "${{ env.SOURCE_MODEL_ID }}"
$ADDR = $SOURCE_MODEL_ID -split '/'
$MODEL_NAME = $ADDR[-1]
$MODEL_NAME_LOWER = $MODEL_NAME.ToLower()
echo "MODEL_NAME=$MODEL_NAME_LOWER" >> $env:GITHUB_ENV
echo "MODEL_NAME_LOWER=$MODEL_NAME_LOWER" # For debugging
- name: Print environment variables
run: |
echo "SOURCE_MODEL_ID: ${{ env.SOURCE_MODEL_ID }}"
echo "PRECISION: ${{ env.PRECISION }}"
echo "EXECUTOR: ${{ env.EXECUTOR }}"
echo "MODEL_NAME: ${{ env.MODEL_NAME }}"
# - name: Prepare folders
# run: |
# mkdir -p C:\\models\\$${{ env.MODEL_NAME }}/hf
# mkdir -p C:\\models\\$${{ env.MODEL_NAME }}/onnx
# mkdir -p C:\\models\\$${{ env.MODEL_NAME }}/cache
# - name: Check file existence
# id: check_files
# uses: andstor/file-existence-action@v1
# with:
# files: "C:\\models\\${{ env.MODEL_NAME }}/hf"
- name: Download Hugging Face model
id: download_hf
# if: steps.check_files.outputs.files_exists == 'false'
run: |
mkdir -p C:\\models\\${{ env.MODEL_NAME }}/hf
mkdir -p C:\\models\\${{ env.MODEL_NAME }}/onnx
mkdir -p C:\\models\\${{ env.MODEL_NAME }}/cache
huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} --add-to-git-credential
huggingface-cli download --repo-type model --local-dir C:\\models\\${{ env.MODEL_NAME }}/hf ${{ env.SOURCE_MODEL_ID }}
echo %ERRORLEVEL%
huggingface-cli logout
# - name: Remove Failure Download
# if: steps.download_hf.outcome == 'failure'
# run: |
# Remove-Item -Recurse -Force -Path "$C:\\models\\{{ env.MODEL_NAME }}"
- name: Convert to ONNX - DirectML - INT4
shell: powershell
run: |
huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} --add-to-git-credential
python3 "onnxruntime-genai/src/python/py/models/builder.py" -i "C:\\models\\${{ env.MODEL_NAME }}/hf" -o "C:\\models\\${{ env.MODEL_NAME }}/onnx" -p ${{ env.PRECISION }} -e ${{ env.EXECUTOR }}
huggingface-cli logout
- name: Upload to Hugging Face
run: |
Get-ChildItem -Path "C:\\models\\${{ env.MODEL_NAME }}/onnx" -Force
huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_WRITE }} --add-to-git-credential
huggingface-cli upload ${{ env.USER_NAME }}/${{ env.TARGET_MODEL_ID }} "C:\\models\\${{ env.MODEL_NAME }}/onnx" . --revision "${{ env.SOURCE_MODEL_SIZE }}-onnx"
huggingface-cli logout
# - name: Cleanup
# if: always()
# run: |
# Remove-Item -Recurse -Force -Path "${{ env.MODEL_NAME }}"