Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

[GPTQ Enhence] Support GPTQ & AWQ inference for QWENv1, v1.5 and Mixtral. #452

[GPTQ Enhence] Support GPTQ & AWQ inference for QWENv1, v1.5 and Mixtral.

[GPTQ Enhence] Support GPTQ & AWQ inference for QWENv1, v1.5 and Mixtral. #452

name: CPP Graph Test
on:
pull_request:
branches: [main]
paths:
- '.github/workflows/cpp-graph-test.yml'
- '.github/workflows/scripts/models/cpp_graph_inference.sh'
- 'neural_speed/**'
- 'bestla/**'
- 'CMakeLists.txt'
- 'setup.py'
- '!**/*.md'
workflow_dispatch:
inputs:
compiler_version:
description: 'compiler_version'
required: false
type: string
default: '13.1.0'
models:
description: 'models (in json)'
required: false
type: string
default: '["llama-2-7b-chat", "gptj-6b"]'
runner:
description: 'runner'
required: false
type: string
default: 'spr'
# If there is a new commit, the previous jobs will be canceled
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
OUT_SCRIPT_PATH: ${{ github.workspace }}/.github/workflows/scripts/models
SCRIPT_PATH: ${{ github.workspace }}/.github/workflows/scripts
WORKING_DIR: ${{ github.workspace }}
CONTAINER_NAME: "codeScan"
INPUT_COMPILER_VERSION: ${{ inputs.compiler_version || '13.1.0' }}
jobs:
CPP-Graph-Workflow:
runs-on: ${{inputs.runner || 'spr'}}
strategy:
matrix:
modelName: ${{fromJson(inputs.models || '["llama-2-7b-chat", "gptj-6b"]')}}
steps:
- name: Checkout out Repo
uses: actions/checkout@v3
with:
submodules: "recursive"
fetch-tags: true
- name: Env build
run: |
bash ${{ github.workspace }}/.github/workflows/scripts/prepare_env_with_conda.sh "cpp-graph-test-neural-speed" "3.8"
- name: BF16 Benchmark
run: |
cd ${{ github.workspace }}/.github/workflows/scripts/models
bash cpp_graph_inference.sh cpp-graph-test-neural-speed ${{ matrix.modelName }} ${{ env.INPUT_COMPILER_VERSION }}
- name: Rename summary
run: |
cd ${{ github.workspace }}
cp cpp_graph_summary.log cpp_graph_summary_${{matrix.modelName}}.log
- name: Publish pipeline artifact
uses: actions/upload-artifact@v3
if: ${{ !cancelled() }}
with:
name: cpp_graph
path: ${{ github.workspace }}/cpp_graph_summary_${{matrix.modelName}}.log
if-no-files-found: ignore # 'warn' or 'ignore' are also available, defaults to `warn`
retention-days: 60 # 1 <= retention-days <= 90
Genreate-Report:
runs-on: ubuntu-latest
needs: [CPP-Graph-Workflow]
steps:
- name: Docker Clean Up
run: |
docker ps -a
if [[ $(docker ps -a | grep -i '${{ env.CONTAINER_NAME }}-${{ runner.name }}'$) ]]; then
docker start ${{ env.CONTAINER_NAME }}-${{ runner.name }}
echo "remove left files through container ..."
docker exec ${{ env.CONTAINER_NAME }}-${{ runner.name }} bash -c "ls -a /neural-speed && rm -fr /neural-speed/* && rm -fr /neural-speed/.* || true"
fi
- name: Checkout out Repo
uses: actions/checkout@v3
- name: Download Summary Log
uses: actions/download-artifact@v3
with:
path: ${{ env.OUT_SCRIPT_PATH }}/generated/log
- name: Merge CPP Graph Summary Log
run: |
cd ${{ env.OUT_SCRIPT_PATH }}/generated/log/cpp_graph
for summary in $(find . -name "cpp_graph_summary_*.log"); do cat $summary >> cpp_graph_summary.log; done
- name: Download Reference Artifact
id: download-artifact
uses: dawidd6/action-download-artifact@v2
with:
workflow: cpp-graph-test.yml
name: FinalReport
run_id: ${{ vars.GRAPH_REF_ID }}
path: ${{ env.OUT_SCRIPT_PATH }}
name_is_regexp: true
repo: ${{ github.repository }}
check_artifacts: false
search_artifacts: false
skip_unpack: false
if_no_artifact_found: warn
- name: Display structure of downloaded files
run: cd ${{ env.OUT_SCRIPT_PATH }} && ls -R
- name: Generate report
run: |
echo "------ Generating final report.html ------"
cd ${{ env.OUT_SCRIPT_PATH }}
/usr/bin/bash generate_report.sh --workflow=deploy
sed -n '/<body>/,/<\/body>/p' generated/report.html | sed -r '/^$/d' | sed -r 's/^ +//g' >> $GITHUB_STEP_SUMMARY
env:
RUN_DISPLAY_URL: https://github.com/neural-speed/actions/runs/${{ github.run_id }}
BUILD_NUMBER: ${{ github.run_id }}
JOB_STATUS: succeed
MR_source_branch: ${{ github.head_ref }}
ghprbActualCommit: ${{ github.event.pull_request.head.sha }}
- name: Publish Report
uses: actions/upload-artifact@v3
if: ${{ !cancelled() }}
with:
name: FinalReport
path: ${{ env.OUT_SCRIPT_PATH }}/generated
- name: Specify performance regression
run: |
if [ $(is_perf_reg) == 'true' ]; then
echo "[Performance Regression] Some model performance regression occurred, please check artifacts and reports."
exit 1
fi