Skip to content

Commit

Permalink
Feat e2e test cortexso hub (#1590)
Browse files Browse the repository at this point in the history
* feat: e2e testing cortexso model hub

* chore: schedule to run models test weekly

* chore: resolve warning pytest

* chore: use default branch cortexso hub

---------

Co-authored-by: Hien To <[email protected]>
  • Loading branch information
hiento09 and hientominh authored Nov 1, 2024
1 parent f37ad6b commit 11cd87b
Show file tree
Hide file tree
Showing 4 changed files with 278 additions and 0 deletions.
109 changes: 109 additions & 0 deletions .github/workflows/test-cortexso-model-hub.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
name: Test cortexso Model Hub

on:
schedule:
- cron: "0 16 * * 5" # every Friday at 23:00 UTC+7
workflow_dispatch:

jobs:
build-and-test:
runs-on: ${{ matrix.runs-on }}
timeout-minutes: 1440
strategy:
fail-fast: false
matrix:
include:
- os: "linux"
name: "amd64"
runs-on: "ubuntu-20-04-e2e-cortexcpp-model-hub"
cmake-flags: "-DCORTEX_CPP_VERSION=${{github.head_ref}} -DCMAKE_BUILD_TEST=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake"
build-deps-cmake-flags: ""
ccache-dir: ""
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: recursive

- name: use python
uses: actions/setup-python@v5
with:
python-version: "3.10"

- name: Install tools on Linux
run: |
sudo chown -R runner:runner /home/runner/cortexcpp
python3 -m pip install awscli
- name: Download vcpkg cache from s3
continue-on-error: true
run: |
aws s3 sync s3://${{ secrets.MINIO_BUCKET_NAME }}/cortex-cpp-vcpkg-linux /home/runner/.cache/vcpkg --endpoint ${{ secrets.MINIO_ENDPOINT }} --cli-read-timeout 0
env:
AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"

- name: Configure vcpkg
run: |
cd engine
make configure-vcpkg
- name: Build
run: |
cd engine
make build CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" BUILD_DEPS_CMAKE_EXTRA_FLAGS="${{ matrix.build-deps-cmake-flags }}"
- name: Run unit tests
run: |
cd engine
make run-unit-tests
- name: Run setup config for linux
shell: bash
run: |
cd engine
./build/cortex --version
sed -i 's/huggingFaceToken: ""/huggingFaceToken: "${{ secrets.HUGGINGFACE_TOKEN_READ }}"/' ~/.cortexrc
- name: Run e2e tests
run: |
cd engine
cp build/cortex build/cortex-nightly
cp build/cortex build/cortex-beta
python -m pip install --upgrade pip
python -m pip install -r e2e-test/requirements.txt
pytest e2e-test/test_api_cortexso_hub_llamacpp_engine.py
rm build/cortex-nightly
rm build/cortex-beta
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
HF_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN_E2E }}

- name: Pre-package
run: |
cd engine
make pre-package DESTINATION_BINARY_NAME="cortex"
- name: Package
run: |
cd engine
make package
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
name: cortex-${{ matrix.os }}-${{ matrix.name }}
path: ./engine/cortex


- name: Upload linux vcpkg cache to s3
continue-on-error: true
if: always()
run: |
aws s3 sync /home/runner/.cache/vcpkg s3://${{ secrets.MINIO_BUCKET_NAME }}/cortex-cpp-vcpkg-linux --endpoint ${{ secrets.MINIO_ENDPOINT }}
env:
AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
2 changes: 2 additions & 0 deletions engine/e2e-test/pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[pytest]
asyncio_default_fixture_loop_scope = function
1 change: 1 addition & 0 deletions engine/e2e-test/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ websockets
pytest
pytest-asyncio
requests
pyyaml
166 changes: 166 additions & 0 deletions engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
import pytest
import requests
import os
import yaml

from pathlib import Path
from test_runner import (
run,
start_server,
stop_server,
wait_for_websocket_download_success_event,
)

collection_id = "cortexso/local-models-6683a6e29e8f3018845b16db"
token = os.getenv("HF_TOKEN")
if not token:
raise ValueError("HF_TOKEN environment variable not set")

def get_repos_in_collection(collection_id, token):
# API endpoint to get list of repos in the collection
url = f"https://huggingface.co/api/collections/{collection_id}"
headers = {"Authorization": f"Bearer {token}"}
response = requests.get(url, headers=headers)

# Check response and retrieve repo IDs if successful
if response.status_code == 200:
return [repo['id'] for repo in response.json()["items"]]
else:
print("Error fetching repos:", response.status_code, response.json())
return []

def get_repo_default_branch(repo_id, token):
# Direct link to metadata.yaml on the main branch
url = f"https://huggingface.co/{repo_id}/resolve/main/metadata.yml"
headers = {"Authorization": f"Bearer {token}"}
response = requests.get(url, headers=headers)

# Check response and retrieve the 'default' field value
if response.status_code == 200:
# Read YAML content from response text
metadata = yaml.safe_load(response.text)
return metadata.get("default")
else:
print(f"Error fetching metadata for {repo_id}:", response.status_code, response.json())
return None

def get_all_repos_and_default_branches_from_metadata(collection_id, token):
# Get list of repos from the collection
repos = get_repos_in_collection(collection_id, token)
combined_list = []

# Iterate over each repo and fetch the default branch from metadata
for repo_id in repos:
default_branch = get_repo_default_branch(repo_id, token)
if default_branch and "gguf" in default_branch:
combined_list.append(f"{repo_id.split('/')[1]}:{default_branch}")

return combined_list

#Call the function and print the results
repo_branches = get_all_repos_and_default_branches_from_metadata(collection_id, token)

class TestCortexsoModels:

@pytest.fixture(autouse=True)
def setup_and_teardown(self, request):
# Setup
success = start_server()
if not success:
raise Exception("Failed to start server")
# Delete model if exists
for model_url in repo_branches:
run(
"Delete model",
[
"models",
"delete",
model_url,
],
)
yield

# Teardown
for model_url in repo_branches:
run(
"Delete model",
[
"models",
"delete",
model_url,
],
)
stop_server()

@pytest.mark.parametrize("model_url", repo_branches)
@pytest.mark.asyncio
async def test_models_on_cortexso_hub(self, model_url):

# Pull model from cortexso hub
json_body = {
"model": model_url
}
response = requests.post("http://localhost:3928/models/pull", json=json_body)
assert response.status_code == 200, f"Failed to pull model: {model_url}"

await wait_for_websocket_download_success_event(timeout=None)

# Check if the model was pulled successfully
get_model_response = requests.get(
f"http://127.0.0.1:3928/models/{model_url}"
)
assert get_model_response.status_code == 200, f"Failed to fetch model: {model_url}"
assert (
get_model_response.json()["model"] == model_url
), f"Unexpected model name for: {model_url}"

# Check if the model is available in the list of models
response = requests.get("http://localhost:3928/models")
assert response.status_code == 200
models = [i["id"] for i in response.json()["data"]]
assert model_url in models, f"Model not found in list: {model_url}"

# Install Engine
exit_code, output, error = run(
"Install Engine", ["engines", "install", "llama-cpp"], timeout=None, capture = False
)
root = Path.home()
assert os.path.exists(root / "cortexcpp" / "engines" / "cortex.llamacpp" / "version.txt")
assert exit_code == 0, f"Install engine failed with error: {error}"

# Start the model
response = requests.post("http://localhost:3928/models/start", json=json_body)
assert response.status_code == 200, f"status_code: {response.status_code}"

# Send an inference request
inference_json_body = {
"frequency_penalty": 0.2,
"max_tokens": 4096,
"messages": [
{
"content": "",
"role": "user"
}
],
"model": model_url,
"presence_penalty": 0.6,
"stop": [
"End"
],
"stream": False,
"temperature": 0.8,
"top_p": 0.95
}
response = requests.post("http://localhost:3928/v1/chat/completions", json=inference_json_body, headers={"Content-Type": "application/json"})
assert response.status_code == 200, f"status_code: {response.status_code} response: {response.json()}"

# Stop the model
response = requests.post("http://localhost:3928/models/stop", json=json_body)
assert response.status_code == 200, f"status_code: {response.status_code}"

# Uninstall Engine
exit_code, output, error = run(
"Uninstall engine", ["engines", "uninstall", "llama-cpp"]
)
assert "Engine llama-cpp uninstalled successfully!" in output
assert exit_code == 0, f"Install engine failed with error: {error}"

0 comments on commit 11cd87b

Please sign in to comment.