Skip to content

Commit

Permalink
trying to add a test for this new functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
RyanMarten committed Jan 3, 2025
1 parent 1c7ab00 commit 8076324
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ async def poll_and_process_batches(self) -> None:
await asyncio.gather(*status_tasks)
await self.update_batch_objects_file()

# update progress bari
# update progress bar
self.request_pbar.n = self.tracker.n_finished_or_downloaded_succeeded_requests
self.request_pbar.refresh()

Expand Down
47 changes: 47 additions & 0 deletions tests/batch/batch_objects.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
{
"n_total_requests": 1551,
"unsubmitted_request_files": [],
"submitted_batches": {
"msgbatch_01AAs5rns9HhDrWvFxCoivqg": {
"request_file": "tests/batch/test_working_dir/requests_0.jsonl",
"id": "msgbatch_01AAs5rns9HhDrWvFxCoivqg",
"created_at": "2024-12-29T15:19:35.497436Z",
"finished_at": null,
"status": "submitted",
"api_key_suffix": "fwAA",
"request_counts": {
"total": 1551,
"failed": 0,
"succeeded": 0,
"raw_request_counts_object": {
"canceled": 0,
"errored": 0,
"expired": 0,
"processing": 1551,
"succeeded": 0
}
},
"raw_status": "in_progress",
"raw_batch": {
"id": "msgbatch_01AAs5rns9HhDrWvFxCoivqg",
"archived_at": null,
"cancel_initiated_at": null,
"created_at": "2024-12-29T15:19:35.497436Z",
"ended_at": null,
"expires_at": "2024-12-30T15:19:35.497436Z",
"processing_status": "in_progress",
"request_counts": {
"canceled": 0,
"errored": 0,
"expired": 0,
"processing": 1551,
"succeeded": 0
},
"results_url": null,
"type": "message_batch"
}
},
},
"finished_batches": {},
"downloaded_batches": {}
}
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,15 @@
from bespokelabs.curator.types.generic_batch import GenericBatch
from bespokelabs.curator.llm.prompt_formatter import PromptFormatter
import json
import shutil
import os
from bespokelabs.curator import LLM
from datasets import Dataset


def test_generic_response_file_from_responses(responses):
def test_generic_response_file_from_responses():
with open("tests/batch/msgbatch_01AAs5rns9HhDrWvFxCoivqg_results.jsonl", "r") as f:
responses = [json.loads(line) for line in f]

config = BatchRequestProcessorConfig(
model="claude-3-5-haiku-20241022",
Expand Down Expand Up @@ -66,9 +72,30 @@ def test_generic_response_file_from_responses(responses):
abrp.generic_response_file_from_responses(responses, batch)


if __name__ == "__main__":
def test_partial_batch_resubmit():
# Clean up test directory if it exists
test_dir = "tests/batch/test_working_dir"
if os.path.exists(test_dir):
shutil.rmtree(test_dir)

with open("tests/batch/msgbatch_01AAs5rns9HhDrWvFxCoivqg_results.jsonl", "r") as f:
responses = [json.loads(line) for line in f]
# Create test directory and copy files
os.makedirs(test_dir)
shutil.copy("tests/batch/requests_0.jsonl", f"{test_dir}/requests_0.jsonl")
shutil.copy("tests/batch/batch_objects.jsonl", f"{test_dir}/batch_objects.jsonl")

test_generic_response_file_from_responses(responses)
llm = LLM(
prompt_func=lambda row: row["instruction"],
model_name="claude-3-5-haiku-20241022",
response_format=None,
batch=True,
batch_size=50_000,
batch_check_interval=60,
base_url=None,
)

# this doesn't work because it creates a subdir with the hash in the test_working_dir
llm(dataset=Dataset.from_dict({"instruction": ["just say 'hi'"] * 1551}), working_dir=test_dir)


if __name__ == "__main__":
test_partial_batch_resubmit()

0 comments on commit 8076324

Please sign in to comment.