Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compute vector store usage bytes #84

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions client/.github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -321,3 +321,39 @@ jobs:
- name: run tests
run: |
poetry run pytest -s --disable-warnings tests/test_streaming_run.py

run-astra-assistants-tests-vector-store-bytes:
runs-on: ubuntu-latest
name: run astra-assistants vector store bytes tests
env:
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_REGION_NAME: ${{ secrets.AWS_REGION_NAME }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
base_url: ${{ secrets.BASE_URL }}
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
PERPLEXITYAI_API_KEY: ${{ secrets.PERPLEXITYAI_API_KEY }}
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}

steps:
- name: Git checkout
uses: actions/checkout@v3
- name: Set up Python 3.10.12
uses: actions/setup-python@v2
with:
python-version: '3.10.12'
- name: Install Poetry
run: |
curl -sSL https://install.python-poetry.org | python3 -
- name: Check Poetry Version
run: poetry --version
- name: Configure Poetry to Use Python 3.10.12
run: poetry env use python3.10
- name: get dependencies
run: |
poetry install
- name: run tests
run: |
poetry run pytest -s --disable-warnings tests/test_vector_store_bytes.py
36 changes: 36 additions & 0 deletions client/tests/astra-assistants/test_vector_store_bytes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os
import pytest
from impl.routes_v2.vector_stores import create_vector_store, create_vector_store_file
from openapi_server_v2.models.create_vector_store_request import CreateVectorStoreRequest
from openapi_server_v2.models.create_vector_store_file_request import CreateVectorStoreFileRequest
from openapi_server_v2.models.vector_store_object import VectorStoreObject
from openapi_server_v2.models.vector_store_file_object import VectorStoreFileObject
from impl.astra_vector import CassandraClient

@pytest.fixture(scope="module")
def astradb():
# Setup Cassandra client
client = CassandraClient()
yield client
client.close()

def test_vector_store_usage_bytes(astradb):
# Create a vector store
vector_store_request = CreateVectorStoreRequest(name="Test Vector Store", file_ids=[])
vector_store: VectorStoreObject = create_vector_store(vector_store_request, astradb)

# Attach files to the vector store
file_paths = ["./tests/fixtures/sample1.txt", "./tests/fixtures/sample2.txt"]
total_usage_bytes = 0

for file_path in file_paths:
file_size = os.path.getsize(file_path)
total_usage_bytes += file_size

file_request = CreateVectorStoreFileRequest(file_id=file_path)
vector_store_file: VectorStoreFileObject = create_vector_store_file(vector_store.id, file_request, astradb)
assert vector_store_file.usage_bytes == file_size

# Verify the usage_bytes attribute of the vector store
updated_vector_store: VectorStoreObject = create_vector_store(vector_store_request, astradb)
assert updated_vector_store.usage_bytes == total_usage_bytes
23 changes: 17 additions & 6 deletions impl/routes_v2/vector_stores.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from datetime import datetime
import logging
import time
import os

from fastapi import APIRouter, Path, Depends, Body, Query
from fastapi import APIRouter, Path, Depends, Body, Query, HTTPException

from impl.astra_vector import CassandraClient
from impl.model_v2.vector_store_object import VectorStoreObject
Expand Down Expand Up @@ -67,12 +68,12 @@ async def create_vector_store(
usage_bytes = 0
for file_id in create_vector_store_request.file_ids:
request = CreateVectorStoreFileRequest(file_id=file_id)
await create_vector_store_file(
vsf = await create_vector_store_file(
vector_store_id=vector_store_id,
create_vector_store_file_request=request,
astradb=astradb
)
#TODO - compute usage_bytes
usage_bytes += vsf.usage_bytes

file_id_count = len(create_vector_store_request.file_ids)
file_counts = VectorStoreObjectFileCounts(
Expand Down Expand Up @@ -118,13 +119,19 @@ async def create_vector_store_file(
) -> VectorStoreFileObject:
created_at = int(time.mktime(datetime.now().timetuple()) * 1000)

file_info = astradb.select_from_table_by_pk(
table="files", partition_keys=["id"], args={"id": create_vector_store_file_request.file_id}
)
if len(file_info) == 0:
raise HTTPException(status_code=404, detail="File not found")
file_size = file_info[0]["bytes"]

extra_fields = {
"id": create_vector_store_file_request.file_id,
"vector_store_id": vector_store_id,
"object": "vector_store.file",
"created_at": created_at,
# TODO - grab from file
"usage_bytes": -1,
"usage_bytes": file_size,
"status": "completed"
}
vector_store_file: VectorStoreFileObject = await store_object(
Expand Down Expand Up @@ -262,4 +269,8 @@ async def delete_vector_store_file(
created_at = vsf.created_at
break
astradb.delete_by_pks(table="vector_store_files", keys=["id", "created_at", "vector_store_id"], values=[file_id, created_at, vector_store_id])

return DeleteVectorStoreFileResponse(
id=file_id,
object="vector_store.file",
deleted=True
)
Loading