This repository has been archived by the owner on Aug 30, 2024. It is now read-only.
[GPTQ Enhence] Add GPTQ int8 weight unpack function #647
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: CPP Graph Test | |
on: | |
pull_request: | |
branches: [main] | |
paths: | |
- '.github/workflows/cpp-graph-test.yml' | |
- '.github/workflows/scripts/models/cpp_graph_inference.sh' | |
- 'neural_speed/**' | |
- 'bestla/**' | |
- 'CMakeLists.txt' | |
- 'setup.py' | |
- '!**/*.md' | |
workflow_dispatch: | |
inputs: | |
compiler_version: | |
description: 'compiler_version' | |
required: false | |
type: string | |
default: '13.1.0' | |
models: | |
description: 'models (in json)' | |
required: false | |
type: string | |
default: '["llama-2-7b-chat", "gptj-6b"]' | |
runner: | |
description: 'runner' | |
required: false | |
type: string | |
default: 'spr' | |
# If there is a new commit, the previous jobs will be canceled | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} | |
cancel-in-progress: true | |
env: | |
OUT_SCRIPT_PATH: ${{ github.workspace }}/.github/workflows/scripts/models | |
SCRIPT_PATH: ${{ github.workspace }}/.github/workflows/scripts | |
WORKING_DIR: ${{ github.workspace }} | |
CONTAINER_NAME: "codeScan" | |
INPUT_COMPILER_VERSION: ${{ inputs.compiler_version || '13.1.0' }} | |
jobs: | |
CPP-Graph-Workflow: | |
runs-on: ${{inputs.runner || 'spr'}} | |
strategy: | |
matrix: | |
modelName: ${{fromJson(inputs.models || '["llama-2-7b-chat", "gptj-6b"]')}} | |
steps: | |
- name: Checkout out Repo | |
uses: actions/checkout@v3 | |
with: | |
submodules: "recursive" | |
fetch-tags: true | |
- name: Env build | |
run: | | |
bash ${{ github.workspace }}/.github/workflows/scripts/prepare_env_with_conda.sh "cpp-graph-test-neural-speed" "3.8" | |
- name: BF16 Benchmark | |
run: | | |
cd ${{ github.workspace }}/.github/workflows/scripts/models | |
bash cpp_graph_inference.sh cpp-graph-test-neural-speed ${{ matrix.modelName }} ${{ env.INPUT_COMPILER_VERSION }} | |
- name: Rename summary | |
run: | | |
cd ${{ github.workspace }} | |
cp cpp_graph_summary.log cpp_graph_summary_${{matrix.modelName}}.log | |
- name: Publish pipeline artifact | |
uses: actions/upload-artifact@v3 | |
if: ${{ !cancelled() }} | |
with: | |
name: cpp_graph | |
path: ${{ github.workspace }}/cpp_graph_summary_${{matrix.modelName}}.log | |
if-no-files-found: ignore # 'warn' or 'ignore' are also available, defaults to `warn` | |
retention-days: 60 # 1 <= retention-days <= 90 | |
Genreate-Report: | |
runs-on: ubuntu-latest | |
needs: [CPP-Graph-Workflow] | |
steps: | |
- name: Docker Clean Up | |
run: | | |
docker ps -a | |
if [[ $(docker ps -a | grep -i '${{ env.CONTAINER_NAME }}-${{ runner.name }}'$) ]]; then | |
docker start ${{ env.CONTAINER_NAME }}-${{ runner.name }} | |
echo "remove left files through container ..." | |
docker exec ${{ env.CONTAINER_NAME }}-${{ runner.name }} bash -c "ls -a /neural-speed && rm -fr /neural-speed/* && rm -fr /neural-speed/.* || true" | |
fi | |
- name: Checkout out Repo | |
uses: actions/checkout@v3 | |
- name: Download Summary Log | |
uses: actions/download-artifact@v3 | |
with: | |
path: ${{ env.OUT_SCRIPT_PATH }}/generated/log | |
- name: Merge CPP Graph Summary Log | |
run: | | |
cd ${{ env.OUT_SCRIPT_PATH }}/generated/log/cpp_graph | |
for summary in $(find . -name "cpp_graph_summary_*.log"); do cat $summary >> cpp_graph_summary.log; done | |
- name: Download Reference Artifact | |
id: download-artifact | |
uses: dawidd6/action-download-artifact@v2 | |
with: | |
workflow: cpp-graph-test.yml | |
name: FinalReport | |
run_id: ${{ vars.GRAPH_REF_ID }} | |
path: ${{ env.OUT_SCRIPT_PATH }} | |
name_is_regexp: true | |
repo: ${{ github.repository }} | |
check_artifacts: false | |
search_artifacts: false | |
skip_unpack: false | |
if_no_artifact_found: warn | |
- name: Display structure of downloaded files | |
run: cd ${{ env.OUT_SCRIPT_PATH }} && ls -R | |
- name: Generate report | |
run: | | |
echo "------ Generating final report.html ------" | |
cd ${{ env.OUT_SCRIPT_PATH }} | |
/usr/bin/bash generate_report.sh --workflow=deploy | |
sed -n '/<body>/,/<\/body>/p' generated/report.html | sed -r '/^$/d' | sed -r 's/^ +//g' >> $GITHUB_STEP_SUMMARY | |
env: | |
RUN_DISPLAY_URL: https://github.com/neural-speed/actions/runs/${{ github.run_id }} | |
BUILD_NUMBER: ${{ github.run_id }} | |
JOB_STATUS: succeed | |
MR_source_branch: ${{ github.head_ref }} | |
ghprbActualCommit: ${{ github.event.pull_request.head.sha }} | |
- name: Publish Report | |
uses: actions/upload-artifact@v3 | |
if: ${{ !cancelled() }} | |
with: | |
name: FinalReport | |
path: ${{ env.OUT_SCRIPT_PATH }}/generated | |
- name: Specify performance regression | |
run: | | |
if [ $(is_perf_reg) == 'true' ]; then | |
echo "[Performance Regression] Some model performance regression occurred, please check artifacts and reports." | |
exit 1 | |
fi |