langchain-ai · angus-langchain · Dec 21, 2024 · Dec 10, 2024 · Dec 10, 2024 · Dec 10, 2024
diff --git a/python/langsmith/_internal/_background_thread.py b/python/langsmith/_internal/_background_thread.py
@@ -1,18 +1,24 @@
 from __future__ import annotations
 
 import functools
+import io
 import logging
 import sys
 import threading
+import time
 import weakref
 from queue import Empty, Queue
 from typing import (
     TYPE_CHECKING,
+    Iterable,
     List,
+    Optional,
     Union,
     cast,
 )
 
+import zstandard as zstd
+
 from langsmith import schemas as ls_schemas
 from langsmith._internal._constants import (
     _AUTO_SCALE_DOWN_NEMPTY_TRIGGER,
@@ -88,6 +94,42 @@ def _tracing_thread_drain_queue(
     return next_batch
 
 
+def _tracing_thread_drain_compressed_buffer(
+    client: Client,
+    size_limit: int = 100,
+    size_limit_bytes: int = 50 * 1024 * 1024
+) -> Optional[Iterable[bytes]]:
+    with client._buffer_lock:
+        current_size = client.compressed_runs_buffer.tell()
+
+        # Check if we should send now
+        if not (client._run_count >= size_limit or current_size >= size_limit_bytes):
+            return None
+
+        # Write final boundary and close compression stream
+        client.compressor_writer.write(f'--{client.boundary}--\r\n'.encode())
+        client.compressor_writer.flush()
+        client.compressor_writer.close()
+
+        client.compressed_runs_buffer.seek(0)
+
+        def data_stream() -> Iterable[bytes]:
+            chunk_size = 65536
+            while True:
+                chunk = client.compressed_runs_buffer.read(chunk_size)
+                if not chunk:
+                    break
+                yield chunk
+
+        # Reinitialize for next batch
+        client.compressed_runs_buffer = io.BytesIO()
+        client.compressor = zstd.ZstdCompressor()
+        client.compressor_writer = client.compressor.stream_writer(
+            client.compressed_runs_buffer, closefd=False)
+        client._run_count = 0
+
+        return data_stream()
+
 def _tracing_thread_handle_batch(
     client: Client,
     tracing_queue: Queue,
@@ -123,7 +165,7 @@ def _ensure_ingest_config(
 ) -> ls_schemas.BatchIngestConfig:
     default_config = ls_schemas.BatchIngestConfig(
         use_multipart_endpoint=False,
-        size_limit_bytes=None,  # Note this field is not used here
+        size_limit_bytes=50 * 1024 * 1024,
         size_limit=100,
         scale_up_nthreads_limit=_AUTO_SCALE_UP_NTHREADS_LIMIT,
         scale_up_qsize_trigger=_AUTO_SCALE_UP_QSIZE_TRIGGER,
@@ -199,6 +241,52 @@ def keep_thread_active() -> bool:
     ):
         _tracing_thread_handle_batch(client, tracing_queue, next_batch, use_multipart)
 
+def tracing_control_thread_func_compress(client_ref: weakref.ref[Client]) -> None:
+    client = client_ref()
+    if client is None:
+        return
+    batch_ingest_config = _ensure_ingest_config(client.info)
+    size_limit: int = batch_ingest_config["size_limit"]
+    size_limit_bytes = batch_ingest_config.get("size_limit_bytes", 50 * 1024 * 1024)
+    assert size_limit_bytes is not None
+
+
+    def keep_thread_active() -> bool:
+        # if `client.cleanup()` was called, stop thread
+        if not client or (
+            hasattr(client, "_manual_cleanup")
+            and client._manual_cleanup
+        ):
+            return False
+        if not threading.main_thread().is_alive():
+            # main thread is dead. should not be active
+            return False
+        return True
+
+    while keep_thread_active():
+        try:
+            data_stream = _tracing_thread_drain_compressed_buffer(
+                client, size_limit, size_limit_bytes)
+            if data_stream is not None:
+                for chunk in data_stream:
+                    time.sleep(0.150)  # Backend call simulation
+            else:
+                time.sleep(0.05)
+        except Exception:
+            logger.error("Error in tracing compression thread", exc_info=True)
+            time.sleep(0.1)  # Wait before retrying on error
+
+    # Drain the buffer on exit
+    try:
+        final_data_stream = _tracing_thread_drain_compressed_buffer(
+            client, size_limit=1, size_limit_bytes=1)  # Force final drain
+        if final_data_stream is not None:
+            for chunk in final_data_stream:
+                time.sleep(0.150)  # Final backend calls
+    except Exception:
+        logger.error("Error in final buffer drain", exc_info=True)
+
+
 
 def _tracing_sub_thread_func(
     client_ref: weakref.ref[Client],

diff --git a/python/langsmith/_internal/_operations.py b/python/langsmith/_internal/_operations.py
@@ -5,6 +5,8 @@
 import uuid
 from typing import Literal, Optional, Union, cast
 
+import zstandard
+
 from langsmith import schemas as ls_schemas
 from langsmith._internal import _orjson
 from langsmith._internal._multipart import MultipartPart, MultipartPartsAndContext
@@ -271,3 +273,36 @@ def serialized_run_operation_to_multipart_parts_and_context(
         acc_parts,
         f"trace={op.trace_id},id={op.id}",
     )
+
+
+def compress_multipart_parts_and_context(
+    parts_and_context: MultipartPartsAndContext, 
+    compressor_writer: zstandard.ZstdCompressionWriter,
+    boundary: str
+) -> None:
+    for part_name, (filename, data, content_type, headers) in parts_and_context.parts:
+        part_header = f'--{boundary}\r\n'
+        part_header += f'Content-Disposition: form-data; name="{part_name}"'
+
+        if filename:
+            part_header += f'; filename="{filename}"'
+
+        part_header += f'\r\nContent-Type: {content_type}\r\n'
+
+        for header_name, header_value in headers.items():
+            part_header += f'{header_name}: {header_value}\r\n'
+
+        part_header += '\r\n'
+        compressor_writer.write(part_header.encode())
+
+        if isinstance(data, (bytes, bytearray)):
+            with memoryview(data) as view:
+                chunk_size = 1024 * 1024  # 1MB chunks
+                for i in range(0, len(view), chunk_size):
+                    chunk = view[i:i + chunk_size]
+                    compressor_writer.write(chunk)
+        else:
+            compressor_writer.write(str(data).encode())
+
+        # Write part terminator
+        compressor_writer.write(b'\r\n')
diff --git a/python/langsmith/client.py b/python/langsmith/client.py
@@ -57,6 +57,7 @@
 from urllib import parse as urllib_parse
 
 import requests
+import zstandard
 from requests import adapters as requests_adapters
 from requests_toolbelt import (  # type: ignore[import-untyped]
     multipart as rqtb_multipart,
@@ -76,6 +77,9 @@
 from langsmith._internal._background_thread import (
     tracing_control_thread_func as _tracing_control_thread_func,
 )
+from langsmith._internal._background_thread import (
+    tracing_control_thread_func_compress as _tracing_control_thread_func_compress,
+)
 from langsmith._internal._beta_decorator import warn_beta
 from langsmith._internal._constants import (
     _AUTO_SCALE_UP_NTHREADS_LIMIT,
@@ -90,6 +94,7 @@
     SerializedFeedbackOperation,
     SerializedRunOperation,
     combine_serialized_queue_operations,
+    compress_multipart_parts_and_context,
     serialize_feedback_dict,
     serialize_run_dict,
     serialized_feedback_operation_to_multipart_parts_and_context,
@@ -388,6 +393,13 @@ class Client:
         "_settings",
         "_manual_cleanup",
         "_pyo3_client",
+        "compress_traces",
+        "boundary",
+        "compressor",
+        "compressor_writer",
+        "_run_count",
+        "_buffer_lock",
+        "compressed_runs_buffer",
     ]
 
     def __init__(
@@ -489,6 +501,16 @@ def __init__(
         # Create a session and register a finalizer to close it
         session_ = session if session else requests.Session()
         self.session = session_
+        self.compress_traces = os.getenv("LANGSMITH_COMPRESS_TRACES") == "true"
+        if self.compress_traces:
+            self.boundary = BOUNDARY
+            self.compressor: zstandard.ZstdCompressor = zstandard.ZstdCompressor()
+            self.compressed_runs_buffer: io.BytesIO = io.BytesIO()
+            self.compressor_writer: zstandard.ZstdCompressionWriter = self.compressor.stream_writer(
+                self.compressed_runs_buffer, closefd=False)
+            self._buffer_lock: threading.Lock = threading.Lock()
+            self._run_count: int = 0
+
         self._info = (
             info
             if info is None or isinstance(info, ls_schemas.LangSmithInfo)
@@ -497,7 +519,14 @@ def __init__(
         weakref.finalize(self, close_session, self.session)
         atexit.register(close_session, session_)
         # Initialize auto batching
-        if auto_batch_tracing:
+        if auto_batch_tracing and self.compress_traces:
+            threading.Thread(
+                target=_tracing_control_thread_func_compress,
+                # arg must be a weakref to self to avoid the Thread object
+                # preventing garbage collection of the Client object
+                args=(weakref.ref(self),),
+            ).start()
+        elif auto_batch_tracing:
             self.tracing_queue: Optional[PriorityQueue] = PriorityQueue()
 
             threading.Thread(
@@ -1289,6 +1318,14 @@ def create_run(
         ):
             if self._pyo3_client is not None:
                 self._pyo3_client.create_run(run_create)
+            if self.compressed_runs_buffer is not None:
+                serialized_op = serialize_run_dict("post", run_create)
+                multipart_form = serialized_run_operation_to_multipart_parts_and_context(
+                    serialized_op)
+                with self._buffer_lock:
+                    compress_multipart_parts_and_context(
+                        multipart_form, self.compressor_writer, self.boundary)
+                    self._run_count += 1
             elif self.tracing_queue is not None:
                 serialized_op = serialize_run_dict("post", run_create)
                 self.tracing_queue.put(
@@ -1730,6 +1767,7 @@ def update_run(
             data["attachments"] = attachments
         use_multipart = (
             self.tracing_queue is not None
+            or self.compressed_runs_buffer is not None
             # batch ingest requires trace_id and dotted_order to be set
             and data["trace_id"] is not None
             and data["dotted_order"] is not None
@@ -1752,9 +1790,18 @@ def update_run(
             data["events"] = events
         if data["extra"]:
             self._insert_runtime_env([data])
-        if use_multipart and self.tracing_queue is not None:
-            # not collecting attachments currently, use empty dict
-            serialized_op = serialize_run_dict(operation="patch", payload=data)
+        if not use_multipart:
+            self._update_run(data)
+            return
+
+        serialized_op = serialize_run_dict(operation="patch", payload=data)
+        if self.compressed_runs_buffer is not None:
+            multipart_form = serialized_run_operation_to_multipart_parts_and_context(serialized_op)
+            with self._buffer_lock:
+                compress_multipart_parts_and_context(
+                    multipart_form, self.compressor_writer, self.boundary)
+                self._run_count += 1
+        elif self.tracing_queue is not None:
             self.tracing_queue.put(
                 TracingQueueItem(data["dotted_order"], serialized_op)
             )