Skip to content

Commit

Permalink
[python] support uploading examples with attachments and running eval…
Browse files Browse the repository at this point in the history
…s on examples with attachments (#1209)

Co-authored-by: Ankush Gola <[email protected]>
Co-authored-by: Bagatur <[email protected]>
Co-authored-by: Jake Rachleff <[email protected]>
Co-authored-by: William Fu-Hinthorn <[email protected]>
Co-authored-by: Ankush Gola <[email protected]>
  • Loading branch information
6 people authored Dec 10, 2024
1 parent 6fc9f3e commit 82383fe
Show file tree
Hide file tree
Showing 11 changed files with 2,107 additions and 47 deletions.
111 changes: 111 additions & 0 deletions python/bench/upload_example_with_large_file_attachment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import os
import statistics
import time
from pathlib import Path
from typing import Dict

from langsmith import Client
from langsmith.schemas import ExampleUpsertWithAttachments

WRITE_BATCH = 10000


def create_large_file(size: int, dir: str) -> str:
"""Create a large file for benchmarking purposes."""
filename = f"large_file_{size}.txt"
filepath = os.path.join(dir, filename)

# delete the file if it exists
print("Deleting existing file...")
if os.path.exists(filepath):
os.remove(filepath)

print("Creating big file...")
with open(filepath, "w") as f:
curr_size = 0
while curr_size < size:
f.write("a" * (size - curr_size))
curr_size += size - curr_size

print("Done creating big file...")
return filepath


DATASET_NAME = "upsert_big_file_to_dataset"


def benchmark_big_file_upload(
size_bytes: int, num_examples: int, samples: int = 1
) -> Dict:
"""
Benchmark run creation with specified parameters.
Returns timing statistics.
"""
multipart_timings = []

for _ in range(samples):
client = Client()

if client.has_dataset(dataset_name=DATASET_NAME):
client.delete_dataset(dataset_name=DATASET_NAME)

dataset = client.create_dataset(
DATASET_NAME,
description="Test dataset for big file upload",
)
large_file = create_large_file(size_bytes, "/tmp")
examples = [
ExampleUpsertWithAttachments(
dataset_id=dataset.id,
inputs={"a": 1},
outputs={"b": 2},
attachments={
"bigfile": ("text/plain", Path(large_file)),
},
)
for _ in range(num_examples)
]

multipart_start = time.perf_counter()
client.upsert_examples_multipart(upserts=examples)
multipart_elapsed = time.perf_counter() - multipart_start

multipart_timings.append(multipart_elapsed)

return {
"mean": statistics.mean(multipart_timings),
"median": statistics.median(multipart_timings),
"stdev": (
statistics.stdev(multipart_timings) if len(multipart_timings) > 1 else 0
),
"min": min(multipart_timings),
"max": max(multipart_timings),
}


size_bytes = 50000000
num_examples = 10


def main(size_bytes: int, num_examples: int = 1):
"""
Run benchmarks with different combinations of parameters and report results.
"""
results = benchmark_big_file_upload(size_bytes, num_examples)

print(f"\nBenchmark Results for size {size_bytes} and {num_examples} examples:")
print("-" * 30)
print(f"{'Metric':<15} {'Result':>20}")
print("-" * 30)

metrics = ["mean", "median", "stdev", "min", "max"]
for metric in metrics:
print(f"{results[metric]:>20.4f}")

print("-" * 30)
print(f"{'Throughput':<15} {num_examples / results['mean']:>20.2f} ")
print("(examples/second)")


if __name__ == "__main__":
main(size_bytes, num_examples)
143 changes: 143 additions & 0 deletions python/bench/upload_examples_bench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import statistics
import time
from typing import Dict
from uuid import uuid4

from langsmith import Client
from langsmith.schemas import DataType, ExampleUpsertWithAttachments


def create_large_json(length: int) -> Dict:
"""Create a large JSON object for benchmarking purposes."""
large_array = [
{
"index": i,
"data": f"This is element number {i}",
"nested": {"id": i, "value": f"Nested value for element {i}"},
}
for i in range(length)
]

return {
"name": "Huge JSON" + str(uuid4()),
"description": "This is a very large JSON object for benchmarking purposes.",
"array": large_array,
"metadata": {
"created_at": "2024-10-22T19:00:00Z",
"author": "Python Program",
"version": 1.0,
},
}


def create_example_data(dataset_id: str, json_size: int) -> Dict:
"""Create a single example data object."""
return ExampleUpsertWithAttachments(
**{
"dataset_id": dataset_id,
"inputs": create_large_json(json_size),
"outputs": create_large_json(json_size),
}
)


DATASET_NAME = "upsert_llm_evaluator_benchmark_dataset"


def benchmark_example_uploading(
num_examples: int, json_size: int, samples: int = 1
) -> Dict:
"""
Benchmark run creation with specified parameters.
Returns timing statistics.
"""
multipart_timings, old_timings = [], []

for _ in range(samples):
client = Client()

if client.has_dataset(dataset_name=DATASET_NAME):
client.delete_dataset(dataset_name=DATASET_NAME)

dataset = client.create_dataset(
DATASET_NAME,
description="Test dataset for multipart example upload",
data_type=DataType.kv,
)
examples = [
create_example_data(dataset.id, json_size) for i in range(num_examples)
]

# Old method
old_start = time.perf_counter()
# inputs = [e.inputs for e in examples]
# outputs = [e.outputs for e in examples]
# # the create_examples endpoint fails above 20mb
# # so this will crash with json_size > ~100
# client.create_examples(inputs=inputs, outputs=outputs, dataset_id=dataset.id)
old_elapsed = time.perf_counter() - old_start

# New method
multipart_start = time.perf_counter()
client.upsert_examples_multipart(upserts=examples)
multipart_elapsed = time.perf_counter() - multipart_start

multipart_timings.append(multipart_elapsed)
old_timings.append(old_elapsed)

return {
"old": {
"mean": statistics.mean(old_timings),
"median": statistics.median(old_timings),
"stdev": statistics.stdev(old_timings) if len(old_timings) > 1 else 0,
"min": min(old_timings),
"max": max(old_timings),
},
"new": {
"mean": statistics.mean(multipart_timings),
"median": statistics.median(multipart_timings),
"stdev": (
statistics.stdev(multipart_timings) if len(multipart_timings) > 1 else 0
),
"min": min(multipart_timings),
"max": max(multipart_timings),
},
}


json_size = 1000
num_examples = 1000


def main(json_size: int, num_examples: int):
"""
Run benchmarks with different combinations of parameters and report results.
"""
results = benchmark_example_uploading(
num_examples=num_examples, json_size=json_size
)

print(
f"\nBenchmark Results for {num_examples} examples with JSON size {json_size}:"
)
print("-" * 60)
print(f"{'Metric':<15} {'Old Method':>20} {'New Method':>20}")
print("-" * 60)

metrics = ["mean", "median", "stdev", "min", "max"]
for metric in metrics:
print(
f"{metric:<15} {results['old'][metric]:>20.4f} "
f"{results['new'][metric]:>20.4f}"
)

print("-" * 60)
print(
f"{'Throughput':<15} {num_examples / results['old']['mean']:>20.2f} "
f"{num_examples / results['new']['mean']:>20.2f}"
)
print("(examples/second)")


if __name__ == "__main__":
main(json_size, num_examples)
Loading

0 comments on commit 82383fe

Please sign in to comment.