Skip to content

Commit

Permalink
update examples multipart (#1310)
Browse files Browse the repository at this point in the history
  • Loading branch information
isahers1 authored Dec 10, 2024
1 parent 70c3f3c commit 8bb0826
Show file tree
Hide file tree
Showing 3 changed files with 407 additions and 5 deletions.
89 changes: 84 additions & 5 deletions python/langsmith/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3464,6 +3464,7 @@ def _prepate_multipart_data(
examples: Union[
List[ls_schemas.ExampleUploadWithAttachments]
| List[ls_schemas.ExampleUpsertWithAttachments]
| List[ls_schemas.ExampleUpdateWithAttachments],
],
include_dataset_id: bool = False,
) -> Tuple[Any, bytes]:
Expand All @@ -3477,21 +3478,29 @@ def _prepate_multipart_data(
dataset_id = examples[0].dataset_id

for example in examples:
if not isinstance(
example, ls_schemas.ExampleUploadWithAttachments
) and not isinstance(example, ls_schemas.ExampleUpsertWithAttachments):
if (
not isinstance(example, ls_schemas.ExampleUploadWithAttachments)
and not isinstance(example, ls_schemas.ExampleUpsertWithAttachments)
and not isinstance(example, ls_schemas.ExampleUpdateWithAttachments)
):
raise ValueError(
"The examples must be of type ExampleUploadWithAttachments"
" or ExampleUpsertWithAttachments"
" or ExampleUpdateWithAttachments"
)
if example.id is not None:
example_id = str(example.id)
else:
example_id = str(uuid.uuid4())

if isinstance(example, ls_schemas.ExampleUpdateWithAttachments):
created_at = None
else:
created_at = example.created_at

example_body = {
**({"dataset_id": dataset_id} if include_dataset_id else {}),
"created_at": example.created_at,
**({"created_at": created_at} if created_at is not None else {}),
}
if example.metadata is not None:
example_body["metadata"] = example.metadata
Expand Down Expand Up @@ -3582,6 +3591,23 @@ def _prepate_multipart_data(
)
)

if (
isinstance(example, ls_schemas.ExampleUpdateWithAttachments)
and example.attachments_operations
):
attachments_operationsb = _dumps_json(example.attachments_operations)
parts.append(
(
f"{example_id}.attachments_operations",
(
None,
attachments_operationsb,
"application/json",
{},
),
)
)

encoder = rqtb_multipart.MultipartEncoder(parts, boundary=BOUNDARY)
if encoder.len <= 20_000_000: # ~20 MB
data = encoder.to_string()
Expand All @@ -3590,6 +3616,38 @@ def _prepate_multipart_data(

return encoder, data

def update_examples_multipart(
self,
*,
dataset_id: ID_TYPE,
updates: Optional[List[ls_schemas.ExampleUpdateWithAttachments]] = None,
) -> ls_schemas.UpsertExamplesResponse:
"""Upload examples."""
if not (self.info.instance_flags or {}).get(
"dataset_examples_multipart_enabled", False
):
raise ValueError(
"Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version."
)
if updates is None:
updates = []

encoder, data = self._prepate_multipart_data(updates, include_dataset_id=False)

response = self.request_with_retries(
"PATCH",
f"/v1/platform/datasets/{dataset_id}/examples",
request_kwargs={
"data": data,
"headers": {
**self._headers,
"Content-Type": encoder.content_type,
},
},
)
ls_utils.raise_for_status_with_text(response)
return response.json()

def upload_examples_multipart(
self,
*,
Expand Down Expand Up @@ -4072,6 +4130,7 @@ def update_example(
metadata: Optional[Dict] = None,
split: Optional[str | List[str]] = None,
dataset_id: Optional[ID_TYPE] = None,
attachments_operations: Optional[ls_schemas.AttachmentsOperations] = None,
) -> Dict[str, Any]:
"""Update a specific example.
Expand All @@ -4096,12 +4155,20 @@ def update_example(
Dict[str, Any]
The updated example.
"""
if attachments_operations is not None:
if not (self.info.instance_flags or {}).get(
"dataset_examples_multipart_enabled", False
):
raise ValueError(
"Your LangSmith version does not allow using the attachment operations, please update to the latest version."
)
example = dict(
inputs=inputs,
outputs=outputs,
dataset_id=dataset_id,
metadata=metadata,
split=split,
attachments_operations=attachments_operations,
)
response = self.request_with_retries(
"PATCH",
Expand All @@ -4121,6 +4188,9 @@ def update_examples(
metadata: Optional[Sequence[Optional[Dict]]] = None,
splits: Optional[Sequence[Optional[str | List[str]]]] = None,
dataset_ids: Optional[Sequence[Optional[ID_TYPE]]] = None,
attachments_operations: Optional[
Sequence[Optional[ls_schemas.AttachmentsOperations]]
] = None,
) -> Dict[str, Any]:
"""Update multiple examples.
Expand All @@ -4145,12 +4215,20 @@ def update_examples(
Dict[str, Any]
The response from the server (specifies the number of examples updated).
"""
if attachments_operations is not None:
if not (self.info.instance_flags or {}).get(
"dataset_examples_multipart_enabled", False
):
raise ValueError(
"Your LangSmith version does not allow using the attachment operations, please update to the latest version."
)
sequence_args = {
"inputs": inputs,
"outputs": outputs,
"metadata": metadata,
"splits": splits,
"dataset_ids": dataset_ids,
"attachments_operations": attachments_operations,
}
# Since inputs are required, we will check against them
examples_len = len(example_ids)
Expand All @@ -4168,8 +4246,9 @@ def update_examples(
"dataset_id": dataset_id_,
"metadata": metadata_,
"split": split_,
"attachments_operations": attachments_operations_,
}
for id_, in_, out_, metadata_, split_, dataset_id_ in zip(
for id_, in_, out_, metadata_, split_, dataset_id_, attachments_operations_ in zip(
example_ids,
inputs or [None] * len(example_ids),
outputs or [None] * len(example_ids),
Expand Down
17 changes: 17 additions & 0 deletions python/langsmith/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,12 +183,24 @@ class ExampleSearch(ExampleBase):
id: UUID


class AttachmentsOperations(BaseModel):
"""Operations to perform on attachments."""

rename: Dict[str, str] = Field(
default_factory=dict, description="Mapping of old attachment names to new names"
)
retain: List[str] = Field(
default_factory=list, description="List of attachment names to keep"
)


class ExampleUpdate(BaseModel):
"""Update class for Example."""

dataset_id: Optional[UUID] = None
inputs: Optional[Dict[str, Any]] = None
outputs: Optional[Dict[str, Any]] = None
attachments_operations: Optional[AttachmentsOperations] = None
metadata: Optional[Dict[str, Any]] = None
split: Optional[Union[str, List[str]]] = None

Expand All @@ -202,7 +214,12 @@ class ExampleUpdateWithAttachments(ExampleUpdate):
"""Example update with attachments."""

id: UUID
inputs: Dict[str, Any] = Field(default_factory=dict)
outputs: Optional[Dict[str, Any]] = Field(default=None)
metadata: Optional[Dict[str, Any]] = Field(default=None)
split: Optional[Union[str, List[str]]] = None
attachments: Optional[Attachments] = None
attachments_operations: Optional[AttachmentsOperations] = None


class DataType(str, Enum):
Expand Down
Loading

0 comments on commit 8bb0826

Please sign in to comment.