langchain-ai · angus-langchain · Dec 21, 2024 · Dec 10, 2024 · Dec 10, 2024 · Dec 10, 2024
diff --git a/python/langsmith/_internal/_background_thread.py b/python/langsmith/_internal/_background_thread.py
@@ -1,18 +1,24 @@
 from __future__ import annotations
 
+import concurrent.futures as cf
 import functools
+import io
 import logging
 import sys
 import threading
 import weakref
+from multiprocessing import cpu_count
 from queue import Empty, Queue
 from typing import (
     TYPE_CHECKING,
     List,
+    Optional,
     Union,
     cast,
 )
 
+import zstandard as zstd
+
 from langsmith import schemas as ls_schemas
 from langsmith._internal._constants import (
     _AUTO_SCALE_DOWN_NEMPTY_TRIGGER,
@@ -30,6 +36,8 @@
 
 logger = logging.getLogger("langsmith.client")
 
+HTTP_REQUEST_THREAD_POOL = cf.ThreadPoolExecutor(max_workers=cpu_count() * 3)
+
 
 @functools.total_ordering
 class TracingQueueItem:
@@ -88,6 +96,42 @@
     return next_batch
 
 
+def _tracing_thread_drain_compressed_buffer(
+    client: Client, size_limit: int = 100, size_limit_bytes: int | None = 20_971_520
+) -> Optional[io.BytesIO]:
+    assert client.compressed_runs_buffer is not None
+    assert client.compressor_writer is not None
+    with client._buffer_lock:
+        current_size = client.compressed_runs_buffer.tell()
+
+        if size_limit is not None and size_limit <= 0:
+            raise ValueError(f"size_limit must be positive; got {size_limit}")
+        if size_limit_bytes is not None and size_limit_bytes < 0:
+            raise ValueError(
+                f"size_limit_bytes must be nonnegative; got {size_limit_bytes}"
+            )
+
+        if (size_limit_bytes is None or current_size < size_limit_bytes) and (
+            size_limit is None or client._run_count < size_limit
+        ):
+            return None
+
+        # Write final boundary and close compression stream
+        client.compressor_writer.write(f"--{client._boundary}--\r\n".encode())
+        client.compressor_writer.close()
+
+        filled_buffer = client.compressed_runs_buffer
+
+        client.compressed_runs_buffer = io.BytesIO()
+        client.compressor_writer = zstd.ZstdCompressor(
+            level=3, threads=-1
+        ).stream_writer(client.compressed_runs_buffer, closefd=False)
+        client._run_count = 0
+
+    filled_buffer.seek(0)
+    return filled_buffer
+
+
 def _tracing_thread_handle_batch(
     client: Client,
     tracing_queue: Queue,
@@ -200,6 +244,85 @@
         _tracing_thread_handle_batch(client, tracing_queue, next_batch, use_multipart)
 
 
+def tracing_control_thread_func_compress_parallel(
+    client_ref: weakref.ref[Client],
+) -> None:
+    client = client_ref()
+    if client is None:
+        return
+
+    batch_ingest_config = _ensure_ingest_config(client.info)
+    size_limit: int = batch_ingest_config["size_limit"]
+    size_limit_bytes = batch_ingest_config.get("size_limit_bytes", 20_971_520)
+    num_known_refs = 3
+
+    def keep_thread_active() -> bool:
+        # if `client.cleanup()` was called, stop thread
+        if not client or (
+            hasattr(client, "_manual_cleanup") and client._manual_cleanup
+        ):
+            return False
+        if not threading.main_thread().is_alive():
+            # main thread is dead. should not be active
+            return False
+        if hasattr(sys, "getrefcount"):
+            # check if client refs count indicates we're the only remaining
+            # reference to the client
+
+            # Count active threads
+            thread_pool = HTTP_REQUEST_THREAD_POOL._threads
+            active_count = sum(
+                1 for thread in thread_pool if thread is not None and thread.is_alive()
+            )
+
+            return sys.getrefcount(client) > num_known_refs + active_count
+        else:
+            # in PyPy, there is no sys.getrefcount attribute
+            # for now, keep thread alive
+            return True
+
+    while True:
+        triggered = client._data_available_event.wait(timeout=0.05)
+        if not keep_thread_active():
+            break
+        if not triggered:
+            continue
+        client._data_available_event.clear()
+
+        data_stream = _tracing_thread_drain_compressed_buffer(
+            client, size_limit, size_limit_bytes
+        )
+
+        if data_stream is not None:
+            try:
+                future = HTTP_REQUEST_THREAD_POOL.submit(
+                    client._send_compressed_multipart_req, data_stream
+                )
+                client._futures.add(future)
+            except RuntimeError:
+                client._send_compressed_multipart_req(data_stream)
+
+    # Drain the buffer on exit
+    try:
+        final_data_stream = _tracing_thread_drain_compressed_buffer(
+            client, size_limit=1, size_limit_bytes=1
+        )  # Force final drain
+        if final_data_stream is not None:
+            try:
+                cf.wait(
+                    [
+                        HTTP_REQUEST_THREAD_POOL.submit(
+                            client._send_compressed_multipart_req, final_data_stream
+                        )
+                    ]
+                )
+            except RuntimeError:
+                client._send_compressed_multipart_req(final_data_stream)
+
+    except Exception:
+        logger.error("Error in final cleanup", exc_info=True)
+
+
 def _tracing_sub_thread_func(
     client_ref: weakref.ref[Client],
     use_multipart: bool,

diff --git a/python/langsmith/_internal/_operations.py b/python/langsmith/_internal/_operations.py
@@ -5,6 +5,8 @@
 import uuid
 from typing import Literal, Optional, Union, cast
 
+import zstandard as zstd
+
 from langsmith import schemas as ls_schemas
 from langsmith._internal import _orjson
 from langsmith._internal._multipart import MultipartPart, MultipartPartsAndContext
@@ -271,3 +273,32 @@ def serialized_run_operation_to_multipart_parts_and_context(
         acc_parts,
         f"trace={op.trace_id},id={op.id}",
     )
+
+
+def compress_multipart_parts_and_context(
+    parts_and_context: MultipartPartsAndContext,
+    compressor_writer: zstd.ZstdCompressionWriter,
+    boundary: str,
+) -> None:
+    for part_name, (filename, data, content_type, headers) in parts_and_context.parts:
+        part_header = f"--{boundary}\r\n"
+        part_header += f'Content-Disposition: form-data; name="{part_name}"'
+
+        if filename:
+            part_header += f'; filename="{filename}"'
+
+        part_header += f"\r\nContent-Type: {content_type}\r\n"
+
+        for header_name, header_value in headers.items():
+            part_header += f"{header_name}: {header_value}\r\n"
+
+        part_header += "\r\n"
+        compressor_writer.write(part_header.encode())
+
+        if isinstance(data, (bytes, bytearray)):
+            compressor_writer.write(data)
+        else:
+            compressor_writer.write(str(data).encode())
+
+        # Write part terminator
+        compressor_writer.write(b"\r\n")