diff --git a/python/langsmith/_testing.py b/python/langsmith/_testing.py index d4a3305f1..8dd72fbcb 100644 --- a/python/langsmith/_testing.py +++ b/python/langsmith/_testing.py @@ -373,8 +373,7 @@ def _end_tests( def _serde_example_values(values: VT) -> VT: if values is None: return values - # Don't try to magically serialize Python objects, just use their REPRs. - bts = ls_client._dumps_json(values, serialize_py=False) + bts = ls_client._dumps_json(values) return orjson.loads(bts) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 6df0f9004..fcef0b4b6 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -13,17 +13,21 @@ from __future__ import annotations import atexit +import base64 import collections import concurrent.futures as cf import contextlib import datetime +import decimal import functools import importlib import importlib.metadata import io +import ipaddress import json import logging import os +import pathlib import random import re import sys @@ -68,11 +72,20 @@ from langsmith import utils as ls_utils from langsmith._internal._beta_decorator import warn_beta +try: + from zoneinfo import ZoneInfo # type: ignore[import-not-found] +except ImportError: + + class ZoneInfo: # type: ignore[no-redef] + """Introduced in python 3.9.""" + + if TYPE_CHECKING: import pandas as pd # type: ignore from langsmith.evaluation import evaluator as ls_evaluator + logger = logging.getLogger(__name__) _urllib3_logger = logging.getLogger("urllib3.connectionpool") @@ -162,74 +175,83 @@ def _default_retry_config() -> Retry: return ls_utils.LangSmithRetry(**retry_params) # type: ignore -_MAX_DEPTH = 2 - - -def _simple_default(obj: Any) -> Any: - # Don't traverse into nested objects +def _simple_default(obj): try: + # Only need to handle types that orjson doesn't serialize by default + # https://github.com/ijl/orjson#serialize if isinstance(obj, datetime.datetime): return obj.isoformat() if isinstance(obj, uuid.UUID): return str(obj) - return json.loads(json.dumps(obj)) + if hasattr(obj, "model_dump") and callable(obj.model_dump): + return obj.model_dump() + elif hasattr(obj, "dict") and callable(obj.dict): + return obj.dict() + elif hasattr(obj, "_asdict") and callable(obj._asdict): + return obj._asdict() + elif isinstance(obj, BaseException): + return {"error": type(obj).__name__, "message": str(obj)} + elif isinstance(obj, (set, frozenset, collections.deque)): + return list(obj) + elif isinstance(obj, (datetime.timezone, ZoneInfo)): + return obj.tzname(None) + elif isinstance(obj, datetime.timedelta): + return obj.total_seconds() + elif isinstance(obj, decimal.Decimal): + if obj.as_tuple().exponent >= 0: + return int(obj) + else: + return float(obj) + elif isinstance( + obj, + ( + ipaddress.IPv4Address, + ipaddress.IPv4Interface, + ipaddress.IPv4Network, + ipaddress.IPv6Address, + ipaddress.IPv6Interface, + ipaddress.IPv6Network, + pathlib.Path, + ), + ): + return str(obj) + elif isinstance(obj, re.Pattern): + return obj.pattern + elif isinstance(obj, (bytes, bytearray)): + return base64.b64encode(obj).decode() + return repr(obj) except BaseException as e: logger.debug(f"Failed to serialize {type(obj)} to JSON: {e}") - return repr(obj) + return repr(obj) -def _serialize_json(obj: Any, depth: int = 0, serialize_py: bool = True) -> Any: +def _serialize_json(obj: Any) -> Any: try: - if depth >= _MAX_DEPTH: - try: - return orjson.loads(_dumps_json_single(obj)) - except BaseException: - return repr(obj) - if isinstance(obj, bytes): - return obj.decode("utf-8") if isinstance(obj, (set, tuple)): - return orjson.loads(_dumps_json_single(list(obj))) + if hasattr(obj, "_asdict") and callable(obj._asdict): + # NamedTuple + return obj._asdict() + return list(obj) serialization_methods = [ - ("model_dump_json", True), # Pydantic V2 - ("json", True), # Pydantic V1 - ("to_json", False), # dataclass_json ("model_dump", True), # Pydantic V2 with non-serializable fields - ("dict", False), # Pydantic V1 with non-serializable fields + ("dict", False), # Pydantic V1 with non-serializable field + ("to_dict", False), # dataclasses-json ] for attr, exclude_none in serialization_methods: if hasattr(obj, attr) and callable(getattr(obj, attr)): try: method = getattr(obj, attr) - json_str = ( + return ( method(exclude_none=exclude_none) if exclude_none else method() ) - if isinstance(json_str, str): - return json.loads(json_str) - return orjson.loads( - _dumps_json( - json_str, depth=depth + 1, serialize_py=serialize_py - ) - ) except Exception as e: - logger.debug(f"Failed to serialize {type(obj)} to JSON: {e}") + logger.error( + f"Failed to use {attr} to serialize {type(obj)} to" + f" JSON: {repr(e)}" + ) pass - if serialize_py: - all_attrs = {} - if hasattr(obj, "__slots__"): - all_attrs.update( - {slot: getattr(obj, slot, None) for slot in obj.__slots__} - ) - if hasattr(obj, "__dict__"): - all_attrs.update(vars(obj)) - if all_attrs: - filtered = { - k: v if v is not obj else repr(v) for k, v in all_attrs.items() - } - return orjson.loads( - _dumps_json(filtered, depth=depth + 1, serialize_py=serialize_py) - ) - return repr(obj) + return _simple_default(obj) except BaseException as e: logger.debug(f"Failed to serialize {type(obj)} to JSON: {e}") return repr(obj) @@ -247,7 +269,7 @@ def _dumps_json_single( try: return orjson.dumps( obj, - default=default, + default=default or _simple_default, option=orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_SERIALIZE_DATACLASS | orjson.OPT_SERIALIZE_UUID @@ -270,7 +292,7 @@ def _dumps_json_single( return result -def _dumps_json(obj: Any, depth: int = 0, serialize_py: bool = True) -> bytes: +def _dumps_json(obj: Any, depth: int = 0) -> bytes: """Serialize an object to a JSON formatted string. Parameters @@ -285,9 +307,7 @@ def _dumps_json(obj: Any, depth: int = 0, serialize_py: bool = True) -> bytes: str The JSON formatted string. """ - return _dumps_json_single( - obj, functools.partial(_serialize_json, depth=depth, serialize_py=serialize_py) - ) + return _dumps_json_single(obj, _serialize_json) def close_session(session: requests.Session) -> None: diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py index 0e648ffc4..2e8b2043a 100644 --- a/python/tests/unit_tests/test_client.py +++ b/python/tests/unit_tests/test_client.py @@ -7,7 +7,6 @@ import json import math import sys -import threading import time import uuid import warnings @@ -15,11 +14,10 @@ from datetime import datetime, timezone from enum import Enum from io import BytesIO -from typing import Any, NamedTuple, Optional, Type, Union +from typing import Dict, NamedTuple, Optional, Type, Union from unittest import mock from unittest.mock import MagicMock, patch -import attr import dataclasses_json import orjson import pytest @@ -690,18 +688,20 @@ def __init__(self, x: int) -> None: self.a_dict = {"foo": "bar"} self.my_bytes = b"foo" + def __repr__(self) -> str: + return "I fell back" + + def __hash__(self) -> int: + return 1 + class ClassWithTee: def __init__(self) -> None: tee_a, tee_b = itertools.tee(range(10)) self.tee_a = tee_a self.tee_b = tee_b - class MyClassWithSlots: - __slots__ = ["x", "y"] - - def __init__(self, x: int) -> None: - self.x = x - self.y = "y" + def __repr__(self): + return "tee_a, tee_b" class MyPydantic(BaseModel): foo: str @@ -719,11 +719,11 @@ class MyEnum(str, Enum): FOO = "foo" BAR = "bar" - class ClassWithFakeJson: - def json(self): + class ClassWithFakeDict: + def dict(self) -> Dict: raise ValueError("This should not be called") - def to_json(self) -> dict: + def to_dict(self) -> Dict: return {"foo": "bar"} @dataclasses_json.dataclass_json @@ -731,40 +731,9 @@ def to_json(self) -> dict: class Person: name: str - @attr.dataclass - class AttrDict: - foo: str = attr.ib() - bar: int - uid = uuid.uuid4() current_time = datetime.now() - class NestedClass: - __slots__ = ["person", "lock"] - - def __init__(self) -> None: - self.person = Person(name="foo") - self.lock = [threading.Lock()] - - class CyclicClass: - def __init__(self) -> None: - self.cyclic = self - - def __repr__(self) -> str: - return "SoCyclic" - - class CyclicClass2: - def __init__(self) -> None: - self.cyclic: Any = None - self.other: Any = None - - def __repr__(self) -> str: - return "SoCyclic2" - - cycle_2 = CyclicClass2() - cycle_2.cyclic = CyclicClass2() - cycle_2.cyclic.other = cycle_2 - class MyNamedTuple(NamedTuple): foo: str bar: int @@ -774,59 +743,39 @@ class MyNamedTuple(NamedTuple): "time": current_time, "my_class": MyClass(1), "class_with_tee": ClassWithTee(), - "my_slotted_class": MyClassWithSlots(1), "my_dataclass": MyDataclass("foo", 1), "my_enum": MyEnum.FOO, "my_pydantic": MyPydantic(foo="foo", bar=1), - "person": Person(name="foo"), + "person": Person(name="foo_person"), "a_bool": True, "a_none": None, "a_str": "foo", "an_int": 1, "a_float": 1.1, - "nested_class": NestedClass(), - "attr_dict": AttrDict(foo="foo", bar=1), "named_tuple": MyNamedTuple(foo="foo", bar=1), - "cyclic": CyclicClass(), - "cyclic2": cycle_2, - "fake_json": ClassWithFakeJson(), + "fake_json": ClassWithFakeDict(), + "some_set": set("a"), + "set_with_class": set([MyClass(1)]), } res = orjson.loads(_dumps_json(to_serialize)) expected = { "uid": str(uid), "time": current_time.isoformat(), - "my_class": { - "x": 1, - "y": "y", - "a_list": [1, 2, 3], - "a_tuple": [1, 2, 3], - "a_set": [1, 2, 3], - "a_dict": {"foo": "bar"}, - "my_bytes": "foo", - }, - "class_with_tee": lambda val: all( - ["_tee object" in val[key] for key in ["tee_a", "tee_b"]] - ), - "my_slotted_class": {"x": 1, "y": "y"}, + "my_class": "I fell back", + "class_with_tee": "tee_a, tee_b", "my_dataclass": {"foo": "foo", "bar": 1}, "my_enum": "foo", "my_pydantic": {"foo": "foo", "bar": 1}, - "person": {"name": "foo"}, + "person": {"name": "foo_person"}, "a_bool": True, "a_none": None, "a_str": "foo", "an_int": 1, "a_float": 1.1, - "nested_class": ( - lambda val: val["person"] == {"name": "foo"} - and "_thread.lock object" in str(val.get("lock")) - ), - "attr_dict": {"foo": "foo", "bar": 1}, - "named_tuple": ["foo", 1], - "cyclic": {"cyclic": "SoCyclic"}, - # We don't really care about this case just want to not err - "cyclic2": lambda _: True, + "named_tuple": {"bar": 1, "foo": "foo"}, "fake_json": {"foo": "bar"}, + "some_set": ["a"], + "set_with_class": ["I fell back"], } assert set(expected) == set(res) for k, v in expected.items(): @@ -838,6 +787,20 @@ class MyNamedTuple(NamedTuple): except AssertionError: raise + @dataclasses.dataclass + class CyclicClass: + other: Optional["CyclicClass"] + + def __repr__(self) -> str: + return "my_cycles..." + + my_cyclic = CyclicClass(other=CyclicClass(other=None)) + my_cyclic.other.other = my_cyclic # type: ignore + + res = orjson.loads(_dumps_json({"cyclic": my_cyclic})) + assert res == {"cyclic": "my_cycles..."} + expected = {"foo": "foo", "bar": 1} + def test__dumps_json(): chars = "".join(chr(cp) for cp in range(0, sys.maxunicode + 1))