-
Notifications
You must be signed in to change notification settings - Fork 0
94 lines (82 loc) · 3.55 KB
/
convert-model.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
name: Convert model to ONNX
on:
# Trigger manually
workflow_dispatch:
inputs:
source_model_id:
description: "Source HuggingFace model ID to pull. For ex: meta-llama/Meta-Llama-3.1-8B-Instruct"
required: true
source_model_size:
description: "The model size. For ex: 8B"
required: true
type: string
target_model_id:
description: "Target HuggingFace model ID to push. For ex: cortexso/llama3.1"
required: true
type: string
env:
USER_NAME: jan-hq
SOURCE_MODEL_ID: ${{ inputs.source_model_id }}
SOURCE_MODEL_SIZE: ${{ inputs.source_model_size }}
TARGET_MODEL_ID: ${{ inputs.target_model_id }}
# Valid precision + execution provider combinations are: FP32 CPU, FP32 CUDA, FP16 CUDA, FP16 DML, INT4 CPU, INT4 CUDA, INT4 DML
PRECISION: INT4
EXECUTOR: DML
ONNXRUNTIME_GENAI_VERSION: 0.3.0 # Check latest version from: https://github.com/microsoft/onnxruntime-genai/releases
jobs:
converter:
runs-on: windows-latest
steps:
- name: Checkout
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
submodules: recursive
- name: Set up Python
uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
with:
python-version: "3.12.4"
cache: 'pip' # caching pip dependencies
- name: Install dependencies
run: |
python.exe -m pip install --upgrade pip
pip install huggingface_hub hf-transfer fire
pip install torch transformers onnx onnxruntime sentencepiece
pip install --pre onnxruntime-genai=="$env:ONNXRUNTIME_GENAI_VERSION"
- name: Extract MODEL_NAME
shell: powershell
run: |
$SOURCE_MODEL_ID = "${{ env.SOURCE_MODEL_ID }}"
$ADDR = $SOURCE_MODEL_ID -split '/'
$MODEL_NAME = $ADDR[-1]
$MODEL_NAME_LOWER = $MODEL_NAME.ToLower()
echo "MODEL_NAME=$MODEL_NAME_LOWER" >> $env:GITHUB_ENV
echo "MODEL_NAME_LOWER=$MODEL_NAME_LOWER" # For debugging
- name: Print environment variables
run: |
echo "SOURCE_MODEL_ID: ${{ env.SOURCE_MODEL_ID }}"
echo "PRECISION: ${{ env.PRECISION }}"
echo "EXECUTOR: ${{ env.EXECUTOR }}"
echo "MODEL_NAME: ${{ env.MODEL_NAME }}"
- name: Prepare folders
run: |
mkdir -p ${{ env.MODEL_NAME }}/hf
mkdir -p ${{ env.MODEL_NAME }}/onnx
mkdir -p ${{ env.MODEL_NAME }}/cache
- name: Download Hugging Face model
run: |
huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} --add-to-git-credential
huggingface-cli download --repo-type model --local-dir ${{ env.MODEL_NAME }}/hf ${{ env.SOURCE_MODEL_ID }}
huggingface-cli logout
- name: Convert to ONNX - DirectML - INT4
run: |
python -m onnxruntime_genai.models.builder -m "${{ env.MODEL_NAME }}/hf" -o "${{ env.MODEL_NAME }}/onnx" -p ${{ env.PRECISION }} -e ${{ env.EXECUTOR }}
# - name: Upload to Hugging Face
# run: |
# Get-ChildItem -Path "${{ env.MODEL_NAME }}/onnx" -Force
# huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_WRITE }} --add-to-git-credential
# huggingface-cli upload ${{ env.USER_NAME }}/${{ env.TARGET_MODEL_ID }} ${{ env.MODEL_NAME }}/onnx . --revision "${{ env.MODEL_SIZE }}-onnx"
# huggingface-cli logout
- name: Cleanup
if: always()
run: |
Remove-Item -Recurse -Force -Path "${{ env.MODEL_NAME }}"