diff --git a/instructor/client.py b/instructor/client.py
index d50659112..87cf1cb8c 100644
--- a/instructor/client.py
+++ b/instructor/client.py
@@ -13,6 +13,10 @@
     Literal,
     Any,
 )
+from tenacity import (
+    AsyncRetrying,
+    Retrying,
+)
 from collections.abc import Generator, Iterable, Awaitable, AsyncGenerator
 from typing_extensions import Self
 from pydantic import BaseModel
@@ -114,7 +118,7 @@ def create(
         self: AsyncInstructor,
         response_model: type[T],
         messages: list[ChatCompletionMessageParam],
-        max_retries: int = 3,
+        max_retries: int | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,
         context: dict[str, Any] | None = None,  # {{ edit_1 }}
         strict: bool = True,
@@ -126,7 +130,7 @@ def create(
         self: Self,
         response_model: type[T],
         messages: list[ChatCompletionMessageParam],
-        max_retries: int = 3,
+        max_retries: int | Retrying = 3,
         validation_context: dict[str, Any] | None = None,
         context: dict[str, Any] | None = None,  # {{ edit_1 }}
         strict: bool = True,
@@ -138,7 +142,7 @@ def create(
         self: AsyncInstructor,
         response_model: None,
         messages: list[ChatCompletionMessageParam],
-        max_retries: int = 3,
+        max_retries: int | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,
         context: dict[str, Any] | None = None,  # {{ edit_1 }}
         strict: bool = True,
@@ -150,7 +154,7 @@ def create(
         self: Self,
         response_model: None,
         messages: list[ChatCompletionMessageParam],
-        max_retries: int = 3,
+        max_retries: int | Retrying = 3,
         validation_context: dict[str, Any] | None = None,
         context: dict[str, Any] | None = None,  # {{ edit_1 }}
         strict: bool = True,
@@ -161,7 +165,7 @@ def create(
         self,
         response_model: type[T] | None,
         messages: list[ChatCompletionMessageParam],
-        max_retries: int = 3,
+        max_retries: int | Retrying | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -185,7 +189,7 @@ def create_partial(
         self: AsyncInstructor,
         response_model: type[T],
         messages: list[ChatCompletionMessageParam],
-        max_retries: int = 3,
+        max_retries: int | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,
         context: dict[str, Any] | None = None,  # {{ edit_1 }}
         strict: bool = True,
@@ -197,7 +201,7 @@ def create_partial(
         self: Self,
         response_model: type[T],
         messages: list[ChatCompletionMessageParam],
-        max_retries: int = 3,
+        max_retries: int | Retrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -208,7 +212,7 @@ def create_partial(
         self,
         response_model: type[T],
         messages: list[ChatCompletionMessageParam],
-        max_retries: int = 3,
+        max_retries: int | Retrying | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -235,7 +239,7 @@ def create_iterable(
         self: AsyncInstructor,
         messages: list[ChatCompletionMessageParam],
         response_model: type[T],
-        max_retries: int = 3,
+        max_retries: int | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -247,7 +251,7 @@ def create_iterable(
         self: Self,
         messages: list[ChatCompletionMessageParam],
         response_model: type[T],
-        max_retries: int = 3,
+        max_retries: int | Retrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -258,7 +262,7 @@ def create_iterable(
         self,
         messages: list[ChatCompletionMessageParam],
         response_model: type[T],
-        max_retries: int = 3,
+        max_retries: int | Retrying | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -284,7 +288,7 @@ def create_with_completion(
         self: AsyncInstructor,
         messages: list[ChatCompletionMessageParam],
         response_model: type[T],
-        max_retries: int = 3,
+        max_retries: int | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -296,7 +300,7 @@ def create_with_completion(
         self: Self,
         messages: list[ChatCompletionMessageParam],
         response_model: type[T],
-        max_retries: int = 3,
+        max_retries: int | Retrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -307,7 +311,7 @@ def create_with_completion(
         self,
         messages: list[ChatCompletionMessageParam],
         response_model: type[T],
-        max_retries: int = 3,
+        max_retries: int | Retrying | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -373,7 +377,7 @@ async def create(
         self,
         response_model: type[T] | None,
         messages: list[ChatCompletionMessageParam],
-        max_retries: int = 3,
+        max_retries: int | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -395,7 +399,7 @@ async def create_partial(
         self,
         response_model: type[T],
         messages: list[ChatCompletionMessageParam],
-        max_retries: int = 3,
+        max_retries: int | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -419,7 +423,7 @@ async def create_iterable(
         self,
         messages: list[ChatCompletionMessageParam],
         response_model: type[T],
-        max_retries: int = 3,
+        max_retries: int | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
@@ -443,7 +447,7 @@ async def create_with_completion(
         self,
         messages: list[ChatCompletionMessageParam],
         response_model: type[T],
-        max_retries: int = 3,
+        max_retries: int | AsyncRetrying = 3,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
diff --git a/instructor/dsl/partial.py b/instructor/dsl/partial.py
index e869dbdac..41b5dc35e 100644
--- a/instructor/dsl/partial.py
+++ b/instructor/dsl/partial.py
@@ -129,7 +129,9 @@ def model_from_chunks(
         partial_model = cls.get_partial_model()
         for chunk in json_chunks:
             potential_object += chunk
-            obj = from_json((potential_object.strip() or "{}").encode(), partial_mode="on")
+            obj = from_json(
+                (potential_object.strip() or "{}").encode(), partial_mode="on"
+            )
             obj = partial_model.model_validate(obj, strict=None, **kwargs)
             yield obj
 
@@ -141,7 +143,9 @@ async def model_from_chunks_async(
         partial_model = cls.get_partial_model()
         async for chunk in json_chunks:
             potential_object += chunk
-            obj = from_json((potential_object.strip() or "{}").encode(), partial_mode="on")
+            obj = from_json(
+                (potential_object.strip() or "{}").encode(), partial_mode="on"
+            )
             obj = partial_model.model_validate(obj, strict=None, **kwargs)
             yield obj
 
@@ -163,7 +167,7 @@ def extract_json(
                     import json
 
                     resp = chunk.candidates[0].content.parts[0].function_call
-                    resp_dict = type(resp).to_dict(resp) # type:ignore
+                    resp_dict = type(resp).to_dict(resp)  # type:ignore
                     if "args" in resp_dict:
                         yield json.dumps(resp_dict["args"])
                 elif chunk.choices:
diff --git a/instructor/patch.py b/instructor/patch.py
index 2d1f340e9..b64ad32d5 100644
--- a/instructor/patch.py
+++ b/instructor/patch.py
@@ -22,6 +22,11 @@
 from instructor.mode import Mode
 import logging
 
+from tenacity import (
+    AsyncRetrying,
+    Retrying,
+)
+
 logger = logging.getLogger("instructor")
 
 T_Model = TypeVar("T_Model", bound=BaseModel)
@@ -35,7 +40,7 @@ def __call__(
         response_model: type[T_Model] | None = None,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
-        max_retries: int = 1,
+        max_retries: int | Retrying = 1,
         *args: Any,
         **kwargs: Any,
     ) -> T_Model: ...
@@ -47,7 +52,7 @@ async def __call__(
         response_model: type[T_Model] | None = None,
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
-        max_retries: int = 1,
+        max_retries: int | AsyncRetrying = 1,
         *args: Any,
         **kwargs: Any,
     ) -> T_Model: ...
@@ -140,7 +145,7 @@ async def new_create_async(
         response_model: type[T_Model] | None = None,
         validation_context: dict[str, Any] | None = None,
         context: dict[str, Any] | None = None,
-        max_retries: int = 1,
+        max_retries: int | AsyncRetrying = 1,
         strict: bool = True,
         hooks: Hooks | None = None,
         *args: T_ParamSpec.args,
@@ -171,7 +176,7 @@ def new_create_sync(
         response_model: type[T_Model] | None = None,
         validation_context: dict[str, Any] | None = None,
         context: dict[str, Any] | None = None,
-        max_retries: int = 1,
+        max_retries: int | Retrying = 1,
         strict: bool = True,
         hooks: Hooks | None = None,
         *args: T_ParamSpec.args,