diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..837bdb7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,38 @@ +--- +name: Bug report +about: Create a report to help us improve +title: 'bug: [DESCRIPTION]' +labels: 'type: bug' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. iOS] + - Browser [e.g. chrome, safari] + - Version [e.g. 22] + +**Smartphone (please complete the following information):** + - Device: [e.g. iPhone6] + - OS: [e.g. iOS8.1] + - Browser [e.g. stock browser, safari] + - Version [e.g. 22] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/discussion-thread.md b/.github/ISSUE_TEMPLATE/discussion-thread.md new file mode 100644 index 0000000..09e52ae --- /dev/null +++ b/.github/ISSUE_TEMPLATE/discussion-thread.md @@ -0,0 +1,14 @@ +--- +name: Discussion thread +about: Start an open ended discussion +title: 'Discussion: [TOPIC HERE]' +labels: '' +assignees: '' + +--- + +**Motivation** + +**Discussion** + +**Resources** diff --git a/.github/ISSUE_TEMPLATE/epic-request.md b/.github/ISSUE_TEMPLATE/epic-request.md new file mode 100644 index 0000000..bfad8e5 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/epic-request.md @@ -0,0 +1,20 @@ +--- +name: Epic request +about: Suggest an idea for this project +title: 'epic: [DESCRIPTION]' +labels: 'type: epic' +assignees: '' + +--- + +**Problem** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Success Criteria** +A clear and concise description of what you want to happen. + +**Sub Issues** +- + +**Additional context** +Add any other context or screenshots about the epic request here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..26f586b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,17 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: 'feat: [DESCRIPTION]' +labels: 'type: feature request' +assignees: '' + +--- + +**Problem** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Success Criteria** +A clear and concise description of what you want to happen. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml new file mode 100644 index 0000000..dd7239d --- /dev/null +++ b/.github/release-drafter.yml @@ -0,0 +1,26 @@ +categories: + - title: '🚀 Features' + labels: + - 'type: enhancement' + - 'type: epic' + - 'type: feature request' + - title: '🐛 Bug Fixes' + labels: + - 'type: bug' + - title: '🧰 Maintenance' + labels: + - 'type: chore' + - 'type: ci' + - title: '📖 Documentaion' + labels: + - 'type: documentation' +change-template: '- $TITLE @$AUTHOR (#$NUMBER)' +change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks. +template: | + ## Changes + + $CHANGES + + ## Contributor + + $CONTRIBUTORS \ No newline at end of file diff --git a/.github/scripts/e2e-test-server.py b/.github/scripts/e2e-test-server.py new file mode 100644 index 0000000..829212b --- /dev/null +++ b/.github/scripts/e2e-test-server.py @@ -0,0 +1,189 @@ +import requests +import json +import subprocess +import os +import logging +import sys +import random +import platform + +n = len(sys.argv) +print("Total arguments passed:", n) +if n < 3: + print("The number of arguments should >= 3") + exit(1) + +BINARY_PATH = sys.argv[1] +if platform.system == 'Windows': + BINARY_PATH += '.exe' +MODEL_PATH = sys.argv[2] + +CONST_CTX_SIZE = 1024 +CONST_USER_ROLE = "user" +CONST_ASSISTANT_ROLE = "assistant" + + + +logging.basicConfig(filename='./test.log', + filemode='w', + format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s', + datefmt='%H:%M:%S', + level=logging.INFO) + +chat_data = [] + +def RequestPost(req_data, url, is_stream = False): + try: + r = requests.post(url, json=req_data, stream=is_stream) + r.raise_for_status() + if is_stream: + if r.encoding is None: + r.encoding = 'utf-8' + + res = "" + for line in r.iter_lines(decode_unicode=True): + if line and "[DONE]" not in line: + data = json.loads(line[5:]) + content = data['choices'][0]['delta']['content'] + res += content + logging.info('{\'assistant\': \'' + res + '\'}') + chat_data.append({ + "role": CONST_ASSISTANT_ROLE, + "content": res + }) + # Can be an error when model generates gabarge data + res_len = len(res.split()) + if res_len >= CONST_CTX_SIZE - 50: + logging.warning("Maybe generated gabarge data: " + str(res_len)) + # return False + else: + res_json = r.json() + logging.info(res_json) + + if r.status_code == 200: + return True + else: + logging.warning('{\'status_code\': ' + str(r.status_code) + '}') + return False + except requests.exceptions.HTTPError as error: + logging.error(error) + return False + +def RequestGet(url): + try: + r = requests.get(url) + r.raise_for_status() + res_json = r.json() + logging.info(res_json) + if r.status_code == 200: + return True + else: + logging.warning('{\'status_code\': ' + str(r.status_code) + '}') + return False + except requests.exceptions.HTTPError as error: + logging.error(error) + return False + +def StopServer(): + url = "http://127.0.0.1:"+ str(port) + "/destroy" + try: + r = requests.delete(url) + logging.info(r.status_code) + except requests.ConnectionError as error: + logging.error(error) + +def CleanUp(): + StopServer() + p.communicate() + with open('./test.log', 'r') as f: + print(f.read()) + + +def TestLoadChatModel(): + new_data = { + "model_path": cwd + "/" + MODEL_PATH, + "user_prompt": "<|user|>", + "ai_prompt": "<|end|><|assistant|>", + } + + url_post = "http://127.0.0.1:"+ str(port) + "/loadmodel" + + res = RequestPost(new_data, url_post) + if not res: + CleanUp() + exit(1) + +def TestChatCompletion(): + content = "How are you today?" + user_msg = { + "role": CONST_USER_ROLE, + "content": content + } + logging.info('{\'user\': \'' + content + '\'}') + + chat_data.append(user_msg) + new_data = { + "frequency_penalty": 0, + "max_tokens": CONST_CTX_SIZE, + "messages": chat_data, + "presence_penalty": 0, + "stop": ["[/INST]", ""], + "stream": True, + "temperature": 0.7, + "top_p": 0.95 + } + + url_post = "http://127.0.0.1:"+ str(port) + "/v1/chat/completions" + + res = RequestPost(new_data, url_post, True) + if not res: + CleanUp() + exit(1) + + content = "Tell me a short story" + user_msg = { + "role": CONST_USER_ROLE, + "content": content + } + logging.info('{\'user\': \'' + content + '\'}') + + chat_data.append(user_msg) + + new_data = { + "frequency_penalty": 0, + "max_tokens": CONST_CTX_SIZE, + "messages": chat_data, + "presence_penalty": 0, + "stop": ["[/INST]", ""], + "stream": True, + "temperature": 0.7, + "top_p": 0.95 + } + + res = RequestPost(new_data, url_post, True) + if not res: + CleanUp() + exit(1) + +def TestUnloadModel(): + new_data = {} + + url_post = "http://127.0.0.1:"+ str(port) + "/unloadmodel" + + res = RequestPost(new_data, url_post) + if not res: + CleanUp() + exit(1) + +port = random.randint(10000, 11000) + +cwd = os.getcwd() +print(cwd) +p = subprocess.Popen([cwd + '/' + BINARY_PATH, '127.0.0.1', str(port)]) +print("Server started!") + +TestLoadChatModel() +TestChatCompletion() +TestUnloadModel() +CleanUp() + diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..64d32fd --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,122 @@ +name: CI + +on: + push: + tags: ["v[0-9]+.[0-9]+.[0-9]+"] + paths: + [ + ".github/scripts/**", + ".github/workflows/build.yml", + "**/CMakeLists.txt", + "**/Makefile", + "**/*.h", + "**/*.hpp", + "**/*.c", + "**/*.cpp", + "**/*.cu", + "**/*.cc", + "**/*.cxx", + "llama.cpp", + "!docs/**", + "!.gitignore", + "!README.md", + ] + workflow_dispatch: + +jobs: + create-draft-release: + runs-on: ubuntu-latest + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + outputs: + upload_url: ${{ steps.create_release.outputs.upload_url }} + version: ${{ steps.get_version.outputs.version }} + permissions: + contents: write + steps: + - name: Extract tag name without v prefix + id: get_version + run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV && echo "::set-output name=version::${GITHUB_REF#refs/tags/v}" + env: + GITHUB_REF: ${{ github.ref }} + - name: Create Draft Release + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ github.ref_name }} + release_name: "${{ env.VERSION }}" + draft: true + prerelease: false + + build-and-test: + runs-on: ${{ matrix.runs-on }} + needs: [create-draft-release] + timeout-minutes: 40 + strategy: + matrix: + include: + - os: "windows" + name: "amd64" + runs-on: "windows-amd" + cmake-flags: "" + run-e2e: true + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v3 + with: + submodules: recursive + + - name: Install make on Windows + if: runner.os == 'windows' + run: | + choco install make -y + + - name: Install dependencies + run: | + make install-dependencies + + - name: Build onnx runtime + run: | + make build-onnxruntime + + - name: Build engine + run: | + make build-engine + + - name: Build example server + run: | + make build-example-server + + - name: Install Python + if: ${{matrix.run-e2e}} + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Run e2e test + if: ${{matrix.run-e2e}} + run: | + make run-e2e-test + + - name: Package + run: | + make package + + - name: Upload Artifact + uses: actions/upload-artifact@v2 + with: + name: cortex.onnx-${{ matrix.os }}-${{ matrix.name }} + path: ./cortex.onnx + + - uses: actions/upload-release-asset@v1.0.1 + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.create-draft-release.outputs.upload_url }} + asset_path: ./cortex.onnx.tar.gz + asset_name: cortex.onnx-${{ needs.create-draft-release.outputs.version }}-${{ matrix.os }}-${{ matrix.name }}.tar.gz + asset_content_type: application/gzip diff --git a/.github/workflows/quality-gate.yml b/.github/workflows/quality-gate.yml new file mode 100644 index 0000000..7c1a966 --- /dev/null +++ b/.github/workflows/quality-gate.yml @@ -0,0 +1,68 @@ +name: CI + +on: + pull_request: + types: [opened, synchronize, reopened] + workflow_dispatch: + +jobs: + build-and-test: + runs-on: ${{ matrix.runs-on }} + timeout-minutes: 40 + strategy: + matrix: + include: + - os: "windows" + name: "amd64" + runs-on: "windows-amd" + cmake-flags: "" + run-e2e: true + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v3 + with: + submodules: recursive + + - name: Install make on Windows + if: runner.os == 'windows' + run: | + choco install make -y + + - name: Install dependencies + run: | + make install-dependencies + + - name: Build onnx runtime + run: | + make build-onnxruntime + + - name: Build engine + run: | + make build-engine + + - name: Build example server + run: | + make build-example-server + + - name: Install Python + if: ${{matrix.run-e2e}} + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Run e2e test + if: ${{matrix.run-e2e}} + run: | + make run-e2e-test + + - name: Package + run: | + make package + + - name: Upload Artifact + uses: actions/upload-artifact@v2 + with: + name: cortex.onnx-${{ matrix.os }}-${{ matrix.name }} + path: ./cortex.onnx diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2bd975a --- /dev/null +++ b/Makefile @@ -0,0 +1,79 @@ +# Makefile for Cortex onnx engine - Build, Lint, Test, and Clean + +CMAKE_EXTRA_FLAGS ?= "" +RUN_TESTS ?= true +MODEL_PATH ?= ./directml/directml-int4-awq-block-128/ + +# Default target, does nothing +all: + @echo "Specify a target to run" + +# Build the Cortex onnx engine +install-dependencies: +ifeq ($(OS),Windows_NT) # Windows + @powershell -Command "cmake -S ./third-party -B ./build_deps/third-party;" + @powershell -Command "cmake --build ./build_deps/third-party --config Release -j4;" +else # Unix-like systems (Linux and MacOS) + @echo "Skipping install dependencies" + @exit 0 +endif + +build-onnxruntime: +ifeq ($(OS),Windows_NT) # Windows + @powershell -Command "cmake -S .\onnxruntime-genai\ -B .\onnxruntime-genai\build -DUSE_DML=ON -DUSE_CUDA=OFF -DENABLE_PYTHON=OFF -DORT_HOME=\".\build_deps\ort\";" + @powershell -Command "cmake --build .\onnxruntime-genai\build --config Release -j4;" +else # Unix-like systems (Linux and MacOS) + @echo "Skipping install dependencies" + @exit 0 +endif + +build-engine: +ifeq ($(OS),Windows_NT) + @powershell -Command "mkdir -p build; cd build; cmake .. $(CMAKE_EXTRA_FLAGS); cmake --build . --config Release;" +else + @echo "Skipping build engine" + @exit 0 +endif + +build-example-server: +ifeq ($(OS),Windows_NT) + @powershell -Command "mkdir -p .\examples\server\build\Release\engines\cortex.onnx; cd .\examples\server\build; cmake .. $(CMAKE_EXTRA_FLAGS); cmake --build . --config Release;" + @powershell -Command "cp .\build_deps\ort\lib\*.dll .\examples\server\build\Release\;" + @powershell -Command "cp .\onnxruntime-genai\build\Release\onnxruntime-genai.dll .\examples\server\build\Release\;" + @powershell -Command "cp .\build\Release\engine.dll .\examples\server\build\Release\engines\cortex.onnx\;" +else + @echo "Skipping build example server" + @exit 0 +endif + +package: +ifeq ($(OS),Windows_NT) + @powershell -Command "mkdir -p cortex.onnx; cp build\Release\engine.dll cortex.onnx\; cp .\examples\server\build\Release\*.dll cortex.onnx\; 7z a -ttar temp.tar cortex.onnx\*; 7z a -tgzip cortex.onnx.tar.gz temp.tar;" +else + @echo "Skipping package" + @exit 0 +endif + +run-e2e-test: +ifeq ($(RUN_TESTS),false) + @echo "Skipping tests" +else +ifeq ($(OS),Windows_NT) + @powershell -Command "python -m pip install --upgrade pip;" + @powershell -Command "python -m pip install requests;" + @powershell -Command "python -m pip install huggingface-hub[cli];" + @powershell -Command "huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include directml/* --local-dir . ;" + @powershell -Command "cd examples\server\build\Release; python ..\..\..\..\.github\scripts\e2e-test-server.py server ..\..\..\..\$(MODEL_PATH);" +else + @echo "Skipping run e2e test" + @exit 0 +endif +endif + +clean: +ifeq ($(OS),Windows_NT) + cmd /C "rmdir /S /Q build examples\\server\\build cortex.onnx cortex.onnx.tar.gz cortex.onnx.zip" +else + @echo "Skipping clean" + @exit 0 +endif \ No newline at end of file diff --git a/build_cortex_onnx.bat b/build_cortex_onnx.bat index a10464c..bd321d4 100644 --- a/build_cortex_onnx.bat +++ b/build_cortex_onnx.bat @@ -1,5 +1,5 @@ cmake -S ./third-party -B ./build_deps/third-party cmake --build ./build_deps/third-party --config Release -j4 -cmake -S .\onnxruntime-genai\ -B .\onnxruntime-genai\build -DUSE_DML=ON -DUSE_CUDA=OFF -DENABLE_PYTHON=OFF -DORT_HOME=./build_deps/ort +cmake -S .\onnxruntime-genai\ -B .\onnxruntime-genai\build -DUSE_DML=ON -DUSE_CUDA=OFF -DORT_HOME="./build_deps/ort" -DENABLE_PYTHON=OFF cmake --build .\onnxruntime-genai\build --config Release -j4