diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 0000000..837bdb7
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,38 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: 'bug: [DESCRIPTION]'
+labels: 'type: bug'
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+1. Go to '...'
+2. Click on '....'
+3. Scroll down to '....'
+4. See error
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Screenshots**
+If applicable, add screenshots to help explain your problem.
+
+**Desktop (please complete the following information):**
+ - OS: [e.g. iOS]
+ - Browser [e.g. chrome, safari]
+ - Version [e.g. 22]
+
+**Smartphone (please complete the following information):**
+ - Device: [e.g. iPhone6]
+ - OS: [e.g. iOS8.1]
+ - Browser [e.g. stock browser, safari]
+ - Version [e.g. 22]
+
+**Additional context**
+Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/discussion-thread.md b/.github/ISSUE_TEMPLATE/discussion-thread.md
new file mode 100644
index 0000000..09e52ae
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/discussion-thread.md
@@ -0,0 +1,14 @@
+---
+name: Discussion thread
+about: Start an open ended discussion
+title: 'Discussion: [TOPIC HERE]'
+labels: ''
+assignees: ''
+
+---
+
+**Motivation**
+
+**Discussion**
+
+**Resources**
diff --git a/.github/ISSUE_TEMPLATE/epic-request.md b/.github/ISSUE_TEMPLATE/epic-request.md
new file mode 100644
index 0000000..bfad8e5
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/epic-request.md
@@ -0,0 +1,20 @@
+---
+name: Epic request
+about: Suggest an idea for this project
+title: 'epic: [DESCRIPTION]'
+labels: 'type: epic'
+assignees: ''
+
+---
+
+**Problem**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Success Criteria**
+A clear and concise description of what you want to happen.
+
+**Sub Issues**
+- 
+
+**Additional context**
+Add any other context or screenshots about the epic request here.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 0000000..26f586b
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,17 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: 'feat: [DESCRIPTION]'
+labels: 'type: feature request'
+assignees: ''
+
+---
+
+**Problem**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Success Criteria**
+A clear and concise description of what you want to happen.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.
diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml
new file mode 100644
index 0000000..dd7239d
--- /dev/null
+++ b/.github/release-drafter.yml
@@ -0,0 +1,26 @@
+categories:
+  - title: '🚀 Features'
+    labels:
+      - 'type: enhancement'
+      - 'type: epic'
+      - 'type: feature request'
+  - title: '🐛 Bug Fixes'
+    labels:
+      - 'type: bug'
+  - title: '🧰 Maintenance'
+    labels: 
+      - 'type: chore'
+      - 'type: ci'
+  - title: '📖 Documentaion'
+    labels: 
+      - 'type: documentation'
+change-template: '- $TITLE @$AUTHOR (#$NUMBER)'
+change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks.
+template: |
+  ## Changes
+
+  $CHANGES
+
+  ## Contributor
+
+  $CONTRIBUTORS
\ No newline at end of file
diff --git a/.github/scripts/e2e-test-server.py b/.github/scripts/e2e-test-server.py
new file mode 100644
index 0000000..829212b
--- /dev/null
+++ b/.github/scripts/e2e-test-server.py
@@ -0,0 +1,189 @@
+import requests
+import json
+import subprocess
+import os
+import logging
+import sys
+import random
+import platform
+
+n = len(sys.argv)
+print("Total arguments passed:", n)
+if n < 3:
+    print("The number of arguments should >= 3")
+    exit(1)
+
+BINARY_PATH = sys.argv[1]
+if platform.system == 'Windows':
+    BINARY_PATH += '.exe'
+MODEL_PATH = sys.argv[2]
+
+CONST_CTX_SIZE = 1024
+CONST_USER_ROLE = "user"
+CONST_ASSISTANT_ROLE = "assistant"
+
+
+        
+logging.basicConfig(filename='./test.log',
+                    filemode='w',
+                    format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
+                    datefmt='%H:%M:%S',
+                    level=logging.INFO)
+
+chat_data = []
+
+def RequestPost(req_data, url, is_stream = False):
+    try:
+        r = requests.post(url, json=req_data, stream=is_stream)
+        r.raise_for_status()
+        if is_stream: 
+            if r.encoding is None:
+                r.encoding = 'utf-8'
+
+            res = ""
+            for line in r.iter_lines(decode_unicode=True):
+                if line and "[DONE]" not in line:
+                    data = json.loads(line[5:])
+                    content = data['choices'][0]['delta']['content']
+                    res += content
+            logging.info('{\'assistant\': \''  + res + '\'}')
+            chat_data.append({
+                "role": CONST_ASSISTANT_ROLE,
+                "content": res
+            })
+            # Can be an error when model generates gabarge data  
+            res_len = len(res.split())          
+            if res_len >= CONST_CTX_SIZE - 50:
+                logging.warning("Maybe generated gabarge data: " + str(res_len))
+                # return False
+        else:
+            res_json = r.json()
+            logging.info(res_json)
+        
+        if r.status_code == 200:
+            return True
+        else:
+            logging.warning('{\'status_code\': '  + str(r.status_code) + '}') 
+            return False
+    except requests.exceptions.HTTPError as error:
+        logging.error(error)
+        return False
+
+def RequestGet(url):
+    try:
+        r = requests.get(url)
+        r.raise_for_status()       
+        res_json = r.json()        
+        logging.info(res_json)        
+        if r.status_code == 200:
+            return True
+        else:
+            logging.warning('{\'status_code\': '  + str(r.status_code) + '}') 
+            return False
+    except requests.exceptions.HTTPError as error:
+        logging.error(error)
+        return False
+
+def StopServer():
+    url = "http://127.0.0.1:"+ str(port) + "/destroy"
+    try:
+        r = requests.delete(url)
+        logging.info(r.status_code)
+    except requests.ConnectionError as error:
+        logging.error(error)
+        
+def CleanUp():
+    StopServer()
+    p.communicate()
+    with open('./test.log', 'r') as f:
+        print(f.read())
+    
+
+def TestLoadChatModel():
+    new_data = {
+        "model_path": cwd + "/" + MODEL_PATH,
+        "user_prompt": "<|user|>",
+        "ai_prompt": "<|end|><|assistant|>",
+    }
+
+    url_post = "http://127.0.0.1:"+ str(port) + "/loadmodel"
+
+    res = RequestPost(new_data, url_post)
+    if not res:
+        CleanUp()
+        exit(1)
+
+def TestChatCompletion():
+    content = "How are you today?"
+    user_msg = {
+        "role": CONST_USER_ROLE,
+        "content": content
+    }
+    logging.info('{\'user\': \''  + content + '\'}')
+    
+    chat_data.append(user_msg)
+    new_data = {
+        "frequency_penalty": 0,
+        "max_tokens": CONST_CTX_SIZE,
+        "messages": chat_data,
+        "presence_penalty": 0,
+        "stop": ["[/INST]", "</s>"],
+        "stream": True,
+        "temperature": 0.7,
+        "top_p": 0.95
+    }
+    
+    url_post = "http://127.0.0.1:"+ str(port) + "/v1/chat/completions"
+
+    res = RequestPost(new_data, url_post, True)
+    if not res:
+        CleanUp()
+        exit(1)
+    
+    content = "Tell me a short story"
+    user_msg = {
+        "role": CONST_USER_ROLE,
+        "content": content
+    }
+    logging.info('{\'user\': \''  + content + '\'}')
+    
+    chat_data.append(user_msg)
+
+    new_data = {
+        "frequency_penalty": 0,
+        "max_tokens": CONST_CTX_SIZE,
+        "messages": chat_data,
+        "presence_penalty": 0,
+        "stop": ["[/INST]", "</s>"],
+        "stream": True,
+        "temperature": 0.7,
+        "top_p": 0.95
+    }
+
+    res = RequestPost(new_data, url_post, True)
+    if not res:
+        CleanUp()
+        exit(1)
+
+def TestUnloadModel():
+    new_data = {}
+
+    url_post = "http://127.0.0.1:"+ str(port) + "/unloadmodel"
+
+    res = RequestPost(new_data, url_post)
+    if not res:
+        CleanUp()
+        exit(1)
+
+port = random.randint(10000, 11000)
+
+cwd = os.getcwd()
+print(cwd)
+p = subprocess.Popen([cwd + '/' + BINARY_PATH, '127.0.0.1', str(port)])
+print("Server started!")
+
+TestLoadChatModel()
+TestChatCompletion()
+TestUnloadModel()
+CleanUp()
+
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 0000000..64d32fd
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,122 @@
+name: CI
+
+on:
+  push:
+    tags: ["v[0-9]+.[0-9]+.[0-9]+"]
+    paths:
+      [
+        ".github/scripts/**",
+        ".github/workflows/build.yml",
+        "**/CMakeLists.txt",
+        "**/Makefile",
+        "**/*.h",
+        "**/*.hpp",
+        "**/*.c",
+        "**/*.cpp",
+        "**/*.cu",
+        "**/*.cc",
+        "**/*.cxx",
+        "llama.cpp",
+        "!docs/**",
+        "!.gitignore",
+        "!README.md",
+      ]
+  workflow_dispatch:
+
+jobs:
+  create-draft-release:
+    runs-on: ubuntu-latest
+    if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
+    outputs:
+      upload_url: ${{ steps.create_release.outputs.upload_url }}
+      version: ${{ steps.get_version.outputs.version }}
+    permissions:
+      contents: write
+    steps:
+      - name: Extract tag name without v prefix
+        id: get_version
+        run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV && echo "::set-output name=version::${GITHUB_REF#refs/tags/v}"
+        env:
+          GITHUB_REF: ${{ github.ref }}
+      - name: Create Draft Release
+        id: create_release
+        uses: actions/create-release@v1
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          tag_name: ${{ github.ref_name }}
+          release_name: "${{ env.VERSION }}"
+          draft: true
+          prerelease: false
+
+  build-and-test:
+    runs-on: ${{ matrix.runs-on }}
+    needs: [create-draft-release]
+    timeout-minutes: 40
+    strategy:
+      matrix:
+        include:
+          - os: "windows"
+            name: "amd64"
+            runs-on: "windows-amd"
+            cmake-flags: ""
+            run-e2e: true
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: recursive
+      
+      - name: Install make on Windows
+        if: runner.os == 'windows'
+        run: |
+          choco install make -y
+
+      - name: Install dependencies
+        run: |
+          make install-dependencies
+
+      - name: Build onnx runtime
+        run: |
+          make build-onnxruntime
+
+      - name: Build engine 
+        run: |
+          make build-engine
+
+      - name: Build example server
+        run: |
+          make build-example-server
+
+      - name: Install Python
+        if: ${{matrix.run-e2e}}
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10' 
+
+      - name: Run e2e test
+        if: ${{matrix.run-e2e}}
+        run: |
+          make run-e2e-test
+
+      - name: Package
+        run: |
+          make package
+
+      - name: Upload Artifact
+        uses: actions/upload-artifact@v2
+        with:
+          name: cortex.onnx-${{ matrix.os }}-${{ matrix.name }}
+          path: ./cortex.onnx
+
+      - uses: actions/upload-release-asset@v1.0.1
+        if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
+          asset_path: ./cortex.onnx.tar.gz
+          asset_name: cortex.onnx-${{ needs.create-draft-release.outputs.version }}-${{ matrix.os }}-${{ matrix.name }}.tar.gz
+          asset_content_type: application/gzip
diff --git a/.github/workflows/quality-gate.yml b/.github/workflows/quality-gate.yml
new file mode 100644
index 0000000..7c1a966
--- /dev/null
+++ b/.github/workflows/quality-gate.yml
@@ -0,0 +1,68 @@
+name: CI
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+  workflow_dispatch:
+
+jobs:
+  build-and-test:
+    runs-on: ${{ matrix.runs-on }}
+    timeout-minutes: 40
+    strategy:
+      matrix:
+        include:
+          - os: "windows"
+            name: "amd64"
+            runs-on: "windows-amd"
+            cmake-flags: ""
+            run-e2e: true
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: recursive
+      
+      - name: Install make on Windows
+        if: runner.os == 'windows'
+        run: |
+          choco install make -y
+
+      - name: Install dependencies
+        run: |
+          make install-dependencies
+      
+      - name: Build onnx runtime
+        run: |
+          make build-onnxruntime
+
+      - name: Build engine 
+        run: |
+          make build-engine
+
+      - name: Build example server
+        run: |
+          make build-example-server
+
+      - name: Install Python
+        if: ${{matrix.run-e2e}}
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10' 
+
+      - name: Run e2e test
+        if: ${{matrix.run-e2e}}
+        run: |
+          make run-e2e-test
+
+      - name: Package
+        run: |
+          make package
+
+      - name: Upload Artifact
+        uses: actions/upload-artifact@v2
+        with:
+          name: cortex.onnx-${{ matrix.os }}-${{ matrix.name }}
+          path: ./cortex.onnx
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..2bd975a
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,79 @@
+# Makefile for Cortex onnx engine - Build, Lint, Test, and Clean
+
+CMAKE_EXTRA_FLAGS ?= ""
+RUN_TESTS ?= true
+MODEL_PATH ?= ./directml/directml-int4-awq-block-128/
+
+# Default target, does nothing
+all:
+	@echo "Specify a target to run"
+
+# Build the Cortex onnx engine
+install-dependencies:
+ifeq ($(OS),Windows_NT) # Windows
+	@powershell -Command "cmake -S ./third-party -B ./build_deps/third-party;"
+	@powershell -Command "cmake --build ./build_deps/third-party --config Release -j4;"
+else  # Unix-like systems (Linux and MacOS)
+	@echo "Skipping install dependencies"
+	@exit 0
+endif
+
+build-onnxruntime:
+ifeq ($(OS),Windows_NT) # Windows
+	@powershell -Command "cmake -S .\onnxruntime-genai\ -B .\onnxruntime-genai\build -DUSE_DML=ON -DUSE_CUDA=OFF -DENABLE_PYTHON=OFF -DORT_HOME=\".\build_deps\ort\";"
+	@powershell -Command "cmake --build .\onnxruntime-genai\build --config Release -j4;"
+else  # Unix-like systems (Linux and MacOS)
+	@echo "Skipping install dependencies"
+	@exit 0
+endif
+
+build-engine:
+ifeq ($(OS),Windows_NT)
+	@powershell -Command "mkdir -p build; cd build; cmake .. $(CMAKE_EXTRA_FLAGS); cmake --build . --config Release;"
+else
+	@echo "Skipping build engine"
+	@exit 0
+endif
+
+build-example-server:
+ifeq ($(OS),Windows_NT)
+	@powershell -Command "mkdir -p .\examples\server\build\Release\engines\cortex.onnx; cd .\examples\server\build; cmake .. $(CMAKE_EXTRA_FLAGS); cmake --build . --config Release;"
+	@powershell -Command "cp .\build_deps\ort\lib\*.dll .\examples\server\build\Release\;"
+	@powershell -Command "cp .\onnxruntime-genai\build\Release\onnxruntime-genai.dll .\examples\server\build\Release\;"
+	@powershell -Command "cp .\build\Release\engine.dll .\examples\server\build\Release\engines\cortex.onnx\;"
+else 
+	@echo "Skipping build example server"
+	@exit 0
+endif
+
+package:
+ifeq ($(OS),Windows_NT)
+	@powershell -Command "mkdir -p cortex.onnx; cp build\Release\engine.dll cortex.onnx\; cp .\examples\server\build\Release\*.dll cortex.onnx\; 7z a -ttar temp.tar cortex.onnx\*; 7z a -tgzip cortex.onnx.tar.gz temp.tar;"
+else
+	@echo "Skipping package"
+	@exit 0
+endif
+
+run-e2e-test:
+ifeq ($(RUN_TESTS),false)
+	@echo "Skipping tests"
+else
+ifeq ($(OS),Windows_NT)
+	@powershell -Command "python -m pip install --upgrade pip;"
+	@powershell -Command "python -m pip install requests;"
+	@powershell -Command "python -m pip install huggingface-hub[cli];"
+	@powershell -Command "huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include directml/* --local-dir . ;"
+	@powershell -Command "cd examples\server\build\Release; python ..\..\..\..\.github\scripts\e2e-test-server.py server ..\..\..\..\$(MODEL_PATH);"
+else
+	@echo "Skipping run e2e test"
+	@exit 0
+endif
+endif
+
+clean:
+ifeq ($(OS),Windows_NT)
+	cmd /C "rmdir /S /Q build examples\\server\\build cortex.onnx cortex.onnx.tar.gz cortex.onnx.zip"
+else
+	@echo "Skipping clean"
+	@exit 0
+endif
\ No newline at end of file
diff --git a/build_cortex_onnx.bat b/build_cortex_onnx.bat
index a10464c..bd321d4 100644
--- a/build_cortex_onnx.bat
+++ b/build_cortex_onnx.bat
@@ -1,5 +1,5 @@
 cmake -S ./third-party -B ./build_deps/third-party
 cmake --build ./build_deps/third-party --config Release -j4
 
-cmake -S .\onnxruntime-genai\ -B .\onnxruntime-genai\build -DUSE_DML=ON -DUSE_CUDA=OFF -DENABLE_PYTHON=OFF -DORT_HOME=./build_deps/ort
+cmake -S .\onnxruntime-genai\ -B .\onnxruntime-genai\build -DUSE_DML=ON -DUSE_CUDA=OFF -DORT_HOME="./build_deps/ort" -DENABLE_PYTHON=OFF 
 cmake --build .\onnxruntime-genai\build --config Release -j4