Haystack 1.x Benchmarks #1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Benchmarks | |
on: | |
workflow_dispatch: | |
schedule: | |
# At 00:01 on Sunday | |
- cron: "1 0 * * 0" | |
permissions: | |
id-token: write | |
contents: read | |
env: | |
AWS_REGION: eu-central-1 | |
jobs: | |
deploy-runner: | |
runs-on: ubuntu-latest | |
outputs: | |
cml_runner_id: ${{ steps.deploy.outputs.cml_runner_id }} | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: iterative/setup-cml@v2 | |
- name: AWS authentication | |
uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a | |
with: | |
aws-region: ${{ env.AWS_REGION }} | |
role-to-assume: ${{ secrets.AWS_CI_ROLE_ARN }} | |
- name: Launch EC2 instance and deploy runner | |
id: deploy | |
env: | |
repo_token: ${{ secrets.HAYSTACK_BOT_TOKEN }} | |
run: | | |
OUTPUT=$(cml runner launch \ | |
--cloud aws \ | |
--cloud-region ${{ env.AWS_REGION }} \ | |
--cloud-type=p3.2xlarge \ | |
--cloud-hdd-size=64 \ | |
--labels=cml 2>&1 | tee /dev/fd/2) | |
# Extract 'id' from the log and set it as an environment variable | |
ID_VALUE=$(echo "$OUTPUT" | jq -r '.message? | fromjson? | select(.id != null) | .id // empty') | |
echo "cml_runner_id=$ID_VALUE" >> "$GITHUB_OUTPUT" | |
run-reader-benchmarks: | |
needs: deploy-runner | |
runs-on: [self-hosted, cml] | |
container: | |
image: docker://iterativeai/cml:0-dvc2-base1-gpu | |
options: --gpus all | |
timeout-minutes: 2880 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Install Haystack + Datadog requirements | |
run: | | |
pip install .[metrics,benchmarks,inference] | |
pip install -r test/benchmarks/datadog/requirements.txt | |
- name: Run benchmarks | |
working-directory: test/benchmarks | |
run: | | |
mkdir +p out | |
for f in ./configs/reader/*.yml; do | |
name="${f%.*}" | |
echo "=== Running benchmarks for $name ==="; | |
config_name="$(basename "$name")" | |
python run.py --output "out/$config_name.json" "$f"; | |
echo "=== Benchmarks done for $name (or failed) ==="; | |
done | |
- name: Send Benchmark results to Datadog | |
working-directory: test/benchmarks | |
run: | | |
python datadog/send_metrics.py out/ ${{ secrets.CORE_DATADOG_API_KEY }} https://api.datadoghq.eu | |
- name: Archive benchmark results | |
uses: actions/upload-artifact@v3 | |
with: | |
name: benchmark-results-reader | |
path: test/benchmarks/out/ | |
run-elasticsearch-benchmarks: | |
needs: | |
- deploy-runner | |
- run-reader-benchmarks | |
runs-on: [self-hosted, cml] | |
container: | |
image: docker://iterativeai/cml:0-dvc2-base1-gpu | |
options: --gpus all | |
services: | |
elasticsearch: | |
image: elasticsearch:7.17.6 | |
env: | |
discovery.type: "single-node" | |
ports: | |
- 9201:9200 | |
timeout-minutes: 2880 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Install Haystack + Datadog requirements | |
run: | | |
pip install .[metrics,elasticsearch,benchmarks,inference] | |
pip install -r test/benchmarks/datadog/requirements.txt | |
- name: Run benchmarks | |
working-directory: test/benchmarks | |
run: | | |
mkdir +p out | |
for f in ./configs/**/*-elasticsearch-*.yml; do | |
name="${f%.*}" | |
echo "=== Running benchmarks for $name ==="; | |
config_name="$(basename "$name")" | |
python run.py --output "out/$config_name.json" "$f"; | |
echo "=== Benchmarks done for $name (or failed) ==="; | |
done | |
- name: Send Benchmark results to Datadog | |
working-directory: test/benchmarks | |
run: | | |
python datadog/send_metrics.py out/ ${{ secrets.CORE_DATADOG_API_KEY }} https://api.datadoghq.eu | |
- name: Archive benchmark results | |
uses: actions/upload-artifact@v3 | |
with: | |
name: benchmark-results-elasticsearch | |
path: test/benchmarks/out/ | |
run-weaviate-benchmarks: | |
needs: | |
- deploy-runner | |
- run-elasticsearch-benchmarks | |
runs-on: [self-hosted, cml] | |
container: | |
image: docker://iterativeai/cml:0-dvc2-base1-gpu | |
options: --gpus all | |
services: | |
weaviate: | |
image: semitechnologies/weaviate:1.17.2 | |
env: | |
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: "true" | |
PERSISTENCE_DATA_PATH: "/var/lib/weaviate" | |
ports: | |
- 8080:8080 | |
timeout-minutes: 2880 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Install Haystack + Datadog requirements | |
run: | | |
pip install .[metrics,weaviate,benchmarks,inference] | |
pip install -r test/benchmarks/datadog/requirements.txt | |
- name: Run benchmarks | |
working-directory: test/benchmarks | |
run: | | |
mkdir +p out | |
for f in ./configs/**/*-weaviate-*.yml; do | |
name="${f%.*}" | |
echo "=== Running benchmarks for $name ==="; | |
config_name="$(basename "$name")" | |
python run.py --output "out/$config_name.json" "$f"; | |
echo "=== Benchmarks done for $name (or failed) ==="; | |
done | |
- name: Send Benchmark results to Datadog | |
working-directory: test/benchmarks | |
run: | | |
python datadog/send_metrics.py out/ ${{ secrets.CORE_DATADOG_API_KEY }} https://api.datadoghq.eu | |
- name: Archive benchmark results | |
uses: actions/upload-artifact@v3 | |
with: | |
name: benchmark-results-weaviate | |
path: test/benchmarks/out/ | |
run-opensearch-benchmarks: | |
needs: | |
- deploy-runner | |
- run-weaviate-benchmarks | |
runs-on: [self-hosted, cml] | |
container: | |
image: docker://iterativeai/cml:0-dvc2-base1-gpu | |
options: --gpus all | |
services: | |
opensearch: | |
image: opensearchproject/opensearch:1.3.5 | |
env: | |
discovery.type: "single-node" | |
OPENSEARCH_JAVA_OPTS: "-Xms4096m -Xmx4096m" | |
ports: | |
- 9200:9200 | |
timeout-minutes: 2880 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Install Haystack + Datadog requirements | |
run: | | |
pip install .[metrics,opensearch,benchmarks,inference] | |
pip install -r test/benchmarks/datadog/requirements.txt | |
- name: Run benchmarks | |
working-directory: test/benchmarks | |
run: | | |
mkdir +p out | |
for f in ./configs/**/*-opensearch-*.yml; do | |
name="${f%.*}" | |
echo "=== Running benchmarks for $name ==="; | |
config_name="$(basename "$name")" | |
python run.py --output "out/$config_name.json" "$f"; | |
echo "=== Benchmarks done for $name (or failed) ==="; | |
done | |
- name: Send Benchmark results to Datadog | |
working-directory: test/benchmarks | |
run: | | |
python datadog/send_metrics.py out/ ${{ secrets.CORE_DATADOG_API_KEY }} https://api.datadoghq.eu | |
- name: Archive benchmark results | |
uses: actions/upload-artifact@v3 | |
with: | |
name: benchmark-results-opensearch | |
path: test/benchmarks/out/ | |
terminate-runner: | |
if: always() | |
needs: | |
- deploy-runner | |
- run-opensearch-benchmarks | |
runs-on: ubuntu-latest | |
steps: | |
- name: AWS authentication | |
uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a | |
with: | |
aws-region: ${{ env.AWS_REGION }} | |
role-to-assume: ${{ secrets.AWS_CI_ROLE_ARN }} | |
- name: Terminate EC2 instance | |
env: | |
CML_RUNNER_ID: ${{needs.deploy-runner.outputs.cml_runner_id}} | |
run: | | |
# Get the instance ID using its Name tag and terminate the instance | |
INSTANCE_ID=$(aws ec2 describe-instances --filters "Name=tag:Name,Values=${{ env.CML_RUNNER_ID }}" --query "Reservations[*].Instances[*].[InstanceId]" --output text) | |
aws ec2 terminate-instances --instance-ids "$INSTANCE_ID" |