From bdcb94f9f7a75e798ccdd760c61722ec79d740da Mon Sep 17 00:00:00 2001
From: Jeremiah Lowin <153965+jlowin@users.noreply.github.com>
Date: Fri, 8 Nov 2024 00:17:44 -0500
Subject: [PATCH 1/3] Fix AI Labeler YAML formatting (#1127)

---
 .github/workflows/ai-label.yml | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/ai-label.yml b/.github/workflows/ai-label.yml
index e8c868090..86651cea1 100644
--- a/.github/workflows/ai-label.yml
+++ b/.github/workflows/ai-label.yml
@@ -1,20 +1,21 @@
 name: AI Labeler
 
 on:
-issues:
+  issues:
     types: [opened, reopened]
-pull_request:
+  pull_request:
     types: [opened, reopened]
 
 jobs:
-ai-labeler:
+  ai-labeler:
     runs-on: ubuntu-latest
     permissions:
-    contents: read
-    issues: write
-    pull-requests: write
+      contents: read
+      issues: write
+      pull-requests: write
     steps:
-    - uses: actions/checkout@v4
-    - uses: jlowin/ai-labeler@v0.2.0
+      - uses: actions/checkout@v4
+      - uses: jlowin/ai-labeler@v0.4.0
         with:
-        openai-api-key: ${{ secrets.OPENAI_API_KEY }}
\ No newline at end of file
+          include-repo-labels: true
+          openai-api-key: ${{ secrets.OPENAI_API_KEY }}

From b45a1fc58e0e442e2ae938852ec5e09be59327ba Mon Sep 17 00:00:00 2001
From: ivan <49297252+ivanbelenky@users.noreply.github.com>
Date: Fri, 8 Nov 2024 02:42:26 -0300
Subject: [PATCH 2/3] fix: OpenAI prompt details and completion tokens details
 missing from total usage (#1105)

Co-authored-by: Ivan Leo <ivanleomk@gmail.com>
---
 instructor/retry.py | 7 +++++--
 instructor/utils.py | 6 ++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/instructor/retry.py b/instructor/retry.py
index ffc41194d..205c7843f 100644
--- a/instructor/retry.py
+++ b/instructor/retry.py
@@ -14,7 +14,7 @@
 from instructor.utils import update_total_usage
 from instructor.validators import AsyncValidationError
 from openai.types.chat import ChatCompletion
-from openai.types.completion_usage import CompletionUsage
+from openai.types.completion_usage import CompletionUsage, CompletionTokensDetails, PromptTokensDetails
 from pydantic import BaseModel, ValidationError
 from tenacity import (
     AsyncRetrying,
@@ -71,7 +71,10 @@ def initialize_usage(mode: Mode) -> CompletionUsage | Any:
     Returns:
         CompletionUsage | Any: Initialized usage object.
     """
-    total_usage = CompletionUsage(completion_tokens=0, prompt_tokens=0, total_tokens=0)
+    total_usage = CompletionUsage(completion_tokens=0, prompt_tokens=0, total_tokens=0,
+        completion_tokens_details = CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0),
+        prompt_token_details = PromptTokensDetails(audio_tokens=0, cached_tokens=0)
+    )
     if mode in {Mode.ANTHROPIC_TOOLS, Mode.ANTHROPIC_JSON}:
         from anthropic.types import Usage as AnthropicUsage
 
diff --git a/instructor/utils.py b/instructor/utils.py
index 6bee2b1c6..90f769cbc 100644
--- a/instructor/utils.py
+++ b/instructor/utils.py
@@ -142,6 +142,12 @@ def update_total_usage(
         total_usage.completion_tokens += response_usage.completion_tokens or 0
         total_usage.prompt_tokens += response_usage.prompt_tokens or 0
         total_usage.total_tokens += response_usage.total_tokens or 0
+        if (rtd := response_usage.completion_tokens_details) and (ttd := total_usage.completion_tokens_details):
+            ttd.audio_tokens = (ttd.audio_tokens or 0) + (rtd.audio_tokens or 0)
+            ttd.reasoning_tokens = (ttd.reasoning_tokens or 0) + (rtd.reasoning_tokens or 0)
+        if (rpd := response_usage.prompt_tokens_details) and (tpd := total_usage.prompt_tokens_details):
+            tpd.audio_tokens = (tpd.audio_tokens or 0) + (rpd.audio_tokens or 0)
+            tpd.cached_tokens = (tpd.cached_tokens or 0) + (rpd.cached_tokens or 0)
         response.usage = total_usage  # Replace each response usage with the total usage
         return response
 

From 4327e14addaf79df51a2bd2d7161da275ac955ea Mon Sep 17 00:00:00 2001
From: Jordy Williams <43758351+jordyjwilliams@users.noreply.github.com>
Date: Fri, 8 Nov 2024 17:29:14 +1100
Subject: [PATCH 3/3] fix: `max_retries` typing (#1135)

Co-authored-by: Ivan Leo <ivanleomk@gmail.com>
---
 instructor/client.py      | 40 +++++++++++++++++++++------------------
 instructor/dsl/partial.py | 10 +++++++---
 instructor/patch.py       | 13 +++++++++----
 3 files changed, 38 insertions(+), 25 deletions(-)

diff --git a/instructor/client.py b/instructor/client.py
index d50659112..87cf1cb8c 100644
--- a/instructor/client.py
+++ b/instructor/client.py
@@ -13,6 +13,10 @@
     Literal,
     Any,
 )
+from tenacity import (
+    AsyncRetrying,
+    Retrying,
+)
 from collections.abc import Generator, Iterable, Awaitable, AsyncGenerator
 from typing_extensions import Self
 from pydantic import BaseModel
@@ -114,7 +118,7 @@ def create(
         self: AsyncInstructor,
         response_model: type[T],
         messages: list[ChatCompletionMessageParam],
-        max_retries: int = 3,
+        max_retries: int | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,
         context: dict[str, Any] | None = None,  # {{ edit_1 }}
         strict: bool = True,
@@ -126,7 +130,7 @@ def create(
         self: Self,
         response_model: type[T],
         messages: list[ChatCompletionMessageParam],
-        max_retries: int = 3,
+        max_retries: int | Retrying = 3,
         validation_context: dict[str, Any] | None = None,
         context: dict[str, Any] | None = None,  # {{ edit_1 }}
         strict: bool = True,
@@ -138,7 +142,7 @@ def create(
         self: AsyncInstructor,
         response_model: None,
         messages: list[ChatCompletionMessageParam],
-        max_retries: int = 3,
+        max_retries: int | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,
         context: dict[str, Any] | None = None,  # {{ edit_1 }}
         strict: bool = True,
@@ -150,7 +154,7 @@ def create(
         self: Self,
         response_model: None,
         messages: list[ChatCompletionMessageParam],
-        max_retries: int = 3,
+        max_retries: int | Retrying = 3,
         validation_context: dict[str, Any] | None = None,
         context: dict[str, Any] | None = None,  # {{ edit_1 }}
         strict: bool = True,
@@ -161,7 +165,7 @@ def create(
         self,
         response_model: type[T] | None,
         messages: list[ChatCompletionMessageParam],
-        max_retries: int = 3,
+        max_retries: int | Retrying | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -185,7 +189,7 @@ def create_partial(
         self: AsyncInstructor,
         response_model: type[T],
         messages: list[ChatCompletionMessageParam],
-        max_retries: int = 3,
+        max_retries: int | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,
         context: dict[str, Any] | None = None,  # {{ edit_1 }}
         strict: bool = True,
@@ -197,7 +201,7 @@ def create_partial(
         self: Self,
         response_model: type[T],
         messages: list[ChatCompletionMessageParam],
-        max_retries: int = 3,
+        max_retries: int | Retrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -208,7 +212,7 @@ def create_partial(
         self,
         response_model: type[T],
         messages: list[ChatCompletionMessageParam],
-        max_retries: int = 3,
+        max_retries: int | Retrying | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -235,7 +239,7 @@ def create_iterable(
         self: AsyncInstructor,
         messages: list[ChatCompletionMessageParam],
         response_model: type[T],
-        max_retries: int = 3,
+        max_retries: int | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -247,7 +251,7 @@ def create_iterable(
         self: Self,
         messages: list[ChatCompletionMessageParam],
         response_model: type[T],
-        max_retries: int = 3,
+        max_retries: int | Retrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -258,7 +262,7 @@ def create_iterable(
         self,
         messages: list[ChatCompletionMessageParam],
         response_model: type[T],
-        max_retries: int = 3,
+        max_retries: int | Retrying | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -284,7 +288,7 @@ def create_with_completion(
         self: AsyncInstructor,
         messages: list[ChatCompletionMessageParam],
         response_model: type[T],
-        max_retries: int = 3,
+        max_retries: int | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -296,7 +300,7 @@ def create_with_completion(
         self: Self,
         messages: list[ChatCompletionMessageParam],
         response_model: type[T],
-        max_retries: int = 3,
+        max_retries: int | Retrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -307,7 +311,7 @@ def create_with_completion(
         self,
         messages: list[ChatCompletionMessageParam],
         response_model: type[T],
-        max_retries: int = 3,
+        max_retries: int | Retrying | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -373,7 +377,7 @@ async def create(
         self,
         response_model: type[T] | None,
         messages: list[ChatCompletionMessageParam],
-        max_retries: int = 3,
+        max_retries: int | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -395,7 +399,7 @@ async def create_partial(
         self,
         response_model: type[T],
         messages: list[ChatCompletionMessageParam],
-        max_retries: int = 3,
+        max_retries: int | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -419,7 +423,7 @@ async def create_iterable(
         self,
         messages: list[ChatCompletionMessageParam],
         response_model: type[T],
-        max_retries: int = 3,
+        max_retries: int | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -443,7 +447,7 @@ async def create_with_completion(
         self,
         messages: list[ChatCompletionMessageParam],
         response_model: type[T],
-        max_retries: int = 3,
+        max_retries: int | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
diff --git a/instructor/dsl/partial.py b/instructor/dsl/partial.py
index e869dbdac..41b5dc35e 100644
--- a/instructor/dsl/partial.py
+++ b/instructor/dsl/partial.py
@@ -129,7 +129,9 @@ def model_from_chunks(
         partial_model = cls.get_partial_model()
         for chunk in json_chunks:
             potential_object += chunk
-            obj = from_json((potential_object.strip() or "{}").encode(), partial_mode="on")
+            obj = from_json(
+                (potential_object.strip() or "{}").encode(), partial_mode="on"
+            )
             obj = partial_model.model_validate(obj, strict=None, **kwargs)
             yield obj
 
@@ -141,7 +143,9 @@ async def model_from_chunks_async(
         partial_model = cls.get_partial_model()
         async for chunk in json_chunks:
             potential_object += chunk
-            obj = from_json((potential_object.strip() or "{}").encode(), partial_mode="on")
+            obj = from_json(
+                (potential_object.strip() or "{}").encode(), partial_mode="on"
+            )
             obj = partial_model.model_validate(obj, strict=None, **kwargs)
             yield obj
 
@@ -163,7 +167,7 @@ def extract_json(
                     import json
 
                     resp = chunk.candidates[0].content.parts[0].function_call
-                    resp_dict = type(resp).to_dict(resp) # type:ignore
+                    resp_dict = type(resp).to_dict(resp)  # type:ignore
                     if "args" in resp_dict:
                         yield json.dumps(resp_dict["args"])
                 elif chunk.choices:
diff --git a/instructor/patch.py b/instructor/patch.py
index 2d1f340e9..b64ad32d5 100644
--- a/instructor/patch.py
+++ b/instructor/patch.py
@@ -22,6 +22,11 @@
 from instructor.mode import Mode
 import logging
 
+from tenacity import (
+    AsyncRetrying,
+    Retrying,
+)
+
 logger = logging.getLogger("instructor")
 
 T_Model = TypeVar("T_Model", bound=BaseModel)
@@ -35,7 +40,7 @@ def __call__(
         response_model: type[T_Model] | None = None,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
-        max_retries: int = 1,
+        max_retries: int | Retrying = 1,
         *args: Any,
         **kwargs: Any,
     ) -> T_Model: ...
@@ -47,7 +52,7 @@ async def __call__(
         response_model: type[T_Model] | None = None,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
-        max_retries: int = 1,
+        max_retries: int | AsyncRetrying = 1,
         *args: Any,
         **kwargs: Any,
     ) -> T_Model: ...
@@ -140,7 +145,7 @@ async def new_create_async(
         response_model: type[T_Model] | None = None,
         validation_context: dict[str, Any] | None = None,
         context: dict[str, Any] | None = None,
-        max_retries: int = 1,
+        max_retries: int | AsyncRetrying = 1,
         strict: bool = True,
         hooks: Hooks | None = None,
         *args: T_ParamSpec.args,
@@ -171,7 +176,7 @@ def new_create_sync(
         response_model: type[T_Model] | None = None,
         validation_context: dict[str, Any] | None = None,
         context: dict[str, Any] | None = None,
-        max_retries: int = 1,
+        max_retries: int | Retrying = 1,
         strict: bool = True,
         hooks: Hooks | None = None,
         *args: T_ParamSpec.args,