-
Notifications
You must be signed in to change notification settings - Fork 82
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[python] support uploading examples with attachments and running eval…
…s on examples with attachments (#1209) Co-authored-by: Ankush Gola <[email protected]> Co-authored-by: Bagatur <[email protected]> Co-authored-by: Jake Rachleff <[email protected]> Co-authored-by: William Fu-Hinthorn <[email protected]> Co-authored-by: Ankush Gola <[email protected]>
- Loading branch information
1 parent
6fc9f3e
commit 82383fe
Showing
11 changed files
with
2,107 additions
and
47 deletions.
There are no files selected for viewing
111 changes: 111 additions & 0 deletions
111
python/bench/upload_example_with_large_file_attachment.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
import os | ||
import statistics | ||
import time | ||
from pathlib import Path | ||
from typing import Dict | ||
|
||
from langsmith import Client | ||
from langsmith.schemas import ExampleUpsertWithAttachments | ||
|
||
WRITE_BATCH = 10000 | ||
|
||
|
||
def create_large_file(size: int, dir: str) -> str: | ||
"""Create a large file for benchmarking purposes.""" | ||
filename = f"large_file_{size}.txt" | ||
filepath = os.path.join(dir, filename) | ||
|
||
# delete the file if it exists | ||
print("Deleting existing file...") | ||
if os.path.exists(filepath): | ||
os.remove(filepath) | ||
|
||
print("Creating big file...") | ||
with open(filepath, "w") as f: | ||
curr_size = 0 | ||
while curr_size < size: | ||
f.write("a" * (size - curr_size)) | ||
curr_size += size - curr_size | ||
|
||
print("Done creating big file...") | ||
return filepath | ||
|
||
|
||
DATASET_NAME = "upsert_big_file_to_dataset" | ||
|
||
|
||
def benchmark_big_file_upload( | ||
size_bytes: int, num_examples: int, samples: int = 1 | ||
) -> Dict: | ||
""" | ||
Benchmark run creation with specified parameters. | ||
Returns timing statistics. | ||
""" | ||
multipart_timings = [] | ||
|
||
for _ in range(samples): | ||
client = Client() | ||
|
||
if client.has_dataset(dataset_name=DATASET_NAME): | ||
client.delete_dataset(dataset_name=DATASET_NAME) | ||
|
||
dataset = client.create_dataset( | ||
DATASET_NAME, | ||
description="Test dataset for big file upload", | ||
) | ||
large_file = create_large_file(size_bytes, "/tmp") | ||
examples = [ | ||
ExampleUpsertWithAttachments( | ||
dataset_id=dataset.id, | ||
inputs={"a": 1}, | ||
outputs={"b": 2}, | ||
attachments={ | ||
"bigfile": ("text/plain", Path(large_file)), | ||
}, | ||
) | ||
for _ in range(num_examples) | ||
] | ||
|
||
multipart_start = time.perf_counter() | ||
client.upsert_examples_multipart(upserts=examples) | ||
multipart_elapsed = time.perf_counter() - multipart_start | ||
|
||
multipart_timings.append(multipart_elapsed) | ||
|
||
return { | ||
"mean": statistics.mean(multipart_timings), | ||
"median": statistics.median(multipart_timings), | ||
"stdev": ( | ||
statistics.stdev(multipart_timings) if len(multipart_timings) > 1 else 0 | ||
), | ||
"min": min(multipart_timings), | ||
"max": max(multipart_timings), | ||
} | ||
|
||
|
||
size_bytes = 50000000 | ||
num_examples = 10 | ||
|
||
|
||
def main(size_bytes: int, num_examples: int = 1): | ||
""" | ||
Run benchmarks with different combinations of parameters and report results. | ||
""" | ||
results = benchmark_big_file_upload(size_bytes, num_examples) | ||
|
||
print(f"\nBenchmark Results for size {size_bytes} and {num_examples} examples:") | ||
print("-" * 30) | ||
print(f"{'Metric':<15} {'Result':>20}") | ||
print("-" * 30) | ||
|
||
metrics = ["mean", "median", "stdev", "min", "max"] | ||
for metric in metrics: | ||
print(f"{results[metric]:>20.4f}") | ||
|
||
print("-" * 30) | ||
print(f"{'Throughput':<15} {num_examples / results['mean']:>20.2f} ") | ||
print("(examples/second)") | ||
|
||
|
||
if __name__ == "__main__": | ||
main(size_bytes, num_examples) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
import statistics | ||
import time | ||
from typing import Dict | ||
from uuid import uuid4 | ||
|
||
from langsmith import Client | ||
from langsmith.schemas import DataType, ExampleUpsertWithAttachments | ||
|
||
|
||
def create_large_json(length: int) -> Dict: | ||
"""Create a large JSON object for benchmarking purposes.""" | ||
large_array = [ | ||
{ | ||
"index": i, | ||
"data": f"This is element number {i}", | ||
"nested": {"id": i, "value": f"Nested value for element {i}"}, | ||
} | ||
for i in range(length) | ||
] | ||
|
||
return { | ||
"name": "Huge JSON" + str(uuid4()), | ||
"description": "This is a very large JSON object for benchmarking purposes.", | ||
"array": large_array, | ||
"metadata": { | ||
"created_at": "2024-10-22T19:00:00Z", | ||
"author": "Python Program", | ||
"version": 1.0, | ||
}, | ||
} | ||
|
||
|
||
def create_example_data(dataset_id: str, json_size: int) -> Dict: | ||
"""Create a single example data object.""" | ||
return ExampleUpsertWithAttachments( | ||
**{ | ||
"dataset_id": dataset_id, | ||
"inputs": create_large_json(json_size), | ||
"outputs": create_large_json(json_size), | ||
} | ||
) | ||
|
||
|
||
DATASET_NAME = "upsert_llm_evaluator_benchmark_dataset" | ||
|
||
|
||
def benchmark_example_uploading( | ||
num_examples: int, json_size: int, samples: int = 1 | ||
) -> Dict: | ||
""" | ||
Benchmark run creation with specified parameters. | ||
Returns timing statistics. | ||
""" | ||
multipart_timings, old_timings = [], [] | ||
|
||
for _ in range(samples): | ||
client = Client() | ||
|
||
if client.has_dataset(dataset_name=DATASET_NAME): | ||
client.delete_dataset(dataset_name=DATASET_NAME) | ||
|
||
dataset = client.create_dataset( | ||
DATASET_NAME, | ||
description="Test dataset for multipart example upload", | ||
data_type=DataType.kv, | ||
) | ||
examples = [ | ||
create_example_data(dataset.id, json_size) for i in range(num_examples) | ||
] | ||
|
||
# Old method | ||
old_start = time.perf_counter() | ||
# inputs = [e.inputs for e in examples] | ||
# outputs = [e.outputs for e in examples] | ||
# # the create_examples endpoint fails above 20mb | ||
# # so this will crash with json_size > ~100 | ||
# client.create_examples(inputs=inputs, outputs=outputs, dataset_id=dataset.id) | ||
old_elapsed = time.perf_counter() - old_start | ||
|
||
# New method | ||
multipart_start = time.perf_counter() | ||
client.upsert_examples_multipart(upserts=examples) | ||
multipart_elapsed = time.perf_counter() - multipart_start | ||
|
||
multipart_timings.append(multipart_elapsed) | ||
old_timings.append(old_elapsed) | ||
|
||
return { | ||
"old": { | ||
"mean": statistics.mean(old_timings), | ||
"median": statistics.median(old_timings), | ||
"stdev": statistics.stdev(old_timings) if len(old_timings) > 1 else 0, | ||
"min": min(old_timings), | ||
"max": max(old_timings), | ||
}, | ||
"new": { | ||
"mean": statistics.mean(multipart_timings), | ||
"median": statistics.median(multipart_timings), | ||
"stdev": ( | ||
statistics.stdev(multipart_timings) if len(multipart_timings) > 1 else 0 | ||
), | ||
"min": min(multipart_timings), | ||
"max": max(multipart_timings), | ||
}, | ||
} | ||
|
||
|
||
json_size = 1000 | ||
num_examples = 1000 | ||
|
||
|
||
def main(json_size: int, num_examples: int): | ||
""" | ||
Run benchmarks with different combinations of parameters and report results. | ||
""" | ||
results = benchmark_example_uploading( | ||
num_examples=num_examples, json_size=json_size | ||
) | ||
|
||
print( | ||
f"\nBenchmark Results for {num_examples} examples with JSON size {json_size}:" | ||
) | ||
print("-" * 60) | ||
print(f"{'Metric':<15} {'Old Method':>20} {'New Method':>20}") | ||
print("-" * 60) | ||
|
||
metrics = ["mean", "median", "stdev", "min", "max"] | ||
for metric in metrics: | ||
print( | ||
f"{metric:<15} {results['old'][metric]:>20.4f} " | ||
f"{results['new'][metric]:>20.4f}" | ||
) | ||
|
||
print("-" * 60) | ||
print( | ||
f"{'Throughput':<15} {num_examples / results['old']['mean']:>20.2f} " | ||
f"{num_examples / results['new']['mean']:>20.2f}" | ||
) | ||
print("(examples/second)") | ||
|
||
|
||
if __name__ == "__main__": | ||
main(json_size, num_examples) |
Oops, something went wrong.