From f969534979b9986947a21dc98b8518a47159a352 Mon Sep 17 00:00:00 2001
From: Pierce Kelaita <pierce@kelaita.com>
Date: Fri, 12 Jul 2024 11:12:37 -0700
Subject: [PATCH] [exceptions] add LLMRateLimitError

---
 CHANGELOG.md                      |  8 ++++++-
 README.md                         |  2 +-
 l2m2/__init__.py                  |  2 +-
 l2m2/_internal/http.py            |  9 ++++++--
 l2m2/exceptions.py                |  6 ++++++
 tests/l2m2/_internal/test_http.py | 35 ++++++++++++++++++++++++-------
 6 files changed, 50 insertions(+), 12 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4d21583..bf0ae74 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,13 @@ _Current version: 0.0.24_
 
 [PyPi link](https://pypi.org/project/l2m2/)
 
-### 0.0.24 - July 12, 2024
+### 0.0.25 - July 12, 2024
+
+#### Added
+
+- Custom exception `LLMRateLimitError`, raised when an LLM call returns a 429 status code.
+
+### 0.0.24 - July 11, 2024
 
 #### Added
 
diff --git a/README.md b/README.md
index 463f238..966b742 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # L2M2: A Simple Python LLM Manager 💬👍
 
-[![Tests](https://github.com/pkelaita/l2m2/actions/workflows/tests.yml/badge.svg?timestamp=1720770641)](https://github.com/pkelaita/l2m2/actions/workflows/tests.yml) [![codecov](https://codecov.io/github/pkelaita/l2m2/graph/badge.svg?token=UWIB0L9PR8)](https://codecov.io/github/pkelaita/l2m2) [![PyPI version](https://badge.fury.io/py/l2m2.svg?timestamp=1720770641)](https://badge.fury.io/py/l2m2)
+[![Tests](https://github.com/pkelaita/l2m2/actions/workflows/tests.yml/badge.svg?timestamp=1720807924)](https://github.com/pkelaita/l2m2/actions/workflows/tests.yml) [![codecov](https://codecov.io/github/pkelaita/l2m2/graph/badge.svg?token=UWIB0L9PR8)](https://codecov.io/github/pkelaita/l2m2) [![PyPI version](https://badge.fury.io/py/l2m2.svg?timestamp=1720807924)](https://badge.fury.io/py/l2m2)
 
 **L2M2** ("LLM Manager" &rarr; "LLMM" &rarr; "L2M2") is a tiny and very simple LLM manager for Python that exposes lots of models through a unified API. This is useful for evaluation, demos, production applications etc. that need to easily be model-agnostic.
 
diff --git a/l2m2/__init__.py b/l2m2/__init__.py
index 5681085..5a6b518 100644
--- a/l2m2/__init__.py
+++ b/l2m2/__init__.py
@@ -1 +1 @@
-__version__ = "0.0.24"
+__version__ = "0.0.25"
diff --git a/l2m2/_internal/http.py b/l2m2/_internal/http.py
index a207561..558c0be 100644
--- a/l2m2/_internal/http.py
+++ b/l2m2/_internal/http.py
@@ -1,7 +1,7 @@
 from typing import Optional, Dict, Any
 import httpx
 
-from l2m2.exceptions import LLMTimeoutError
+from l2m2.exceptions import LLMTimeoutError, LLMRateLimitError
 from l2m2.model_info import API_KEY, MODEL_ID, PROVIDER_INFO
 
 
@@ -66,7 +66,12 @@ async def llm_post(
     if provider == "replicate" and response.status_code == 201:
         return await _handle_replicate_201(client, response, api_key)
 
-    if response.status_code != 200:
+    if response.status_code == 429:
+        raise LLMRateLimitError(
+            f"Reached rate limit for provider {provider} with model {model_id}."
+        )
+
+    elif response.status_code != 200:
         raise Exception(response.text)
 
     return response.json()
diff --git a/l2m2/exceptions.py b/l2m2/exceptions.py
index 802c2ec..504987e 100644
--- a/l2m2/exceptions.py
+++ b/l2m2/exceptions.py
@@ -2,3 +2,9 @@ class LLMTimeoutError(Exception):
     """Raised when a request to an LLM provider API times out."""
 
     pass
+
+
+class LLMRateLimitError(Exception):
+    """Raised when a request to an LLM provider API is rate limited."""
+
+    pass
diff --git a/tests/l2m2/_internal/test_http.py b/tests/l2m2/_internal/test_http.py
index 06d5baf..972cc85 100644
--- a/tests/l2m2/_internal/test_http.py
+++ b/tests/l2m2/_internal/test_http.py
@@ -240,13 +240,14 @@ async def test_llm_post_failure():
 
 
 @pytest.mark.asyncio
+@respx.mock
 @patch(PROVIDER_INFO_PATH, MOCK_PROVIDER_INFO)
 async def test_llm_post_timeout():
     provider = "test_provider"
     api_key = "test_api_key"
     data = {"input": "test input"}
     model_id = "test_model_id"
-    timeout = 5  # Set a small timeout for the test
+    timeout = 5
 
     endpoint = (
         MOCK_PROVIDER_INFO[provider]["endpoint"]
@@ -254,10 +255,30 @@ async def test_llm_post_timeout():
         .replace(MODEL_ID, model_id)
     )
 
-    with respx.mock:
-        respx.post(endpoint).mock(side_effect=httpx.ReadTimeout)
-        async with httpx.AsyncClient() as client:
-            with pytest.raises(LLMTimeoutError) as exc_info:
-                await llm_post(client, provider, api_key, data, timeout, model_id)
+    respx.post(endpoint).mock(side_effect=httpx.ReadTimeout)
+    async with httpx.AsyncClient() as client:
+        with pytest.raises(LLMTimeoutError):
+            await llm_post(client, provider, api_key, data, timeout, model_id)
+
+
+@pytest.mark.asyncio
+@respx.mock
+@patch(PROVIDER_INFO_PATH, MOCK_PROVIDER_INFO)
+async def test_llm_post_rate_limit_error():
+    provider = "test_provider"
+    api_key = "test_api_key"
+    data = {"input": "test input"}
+    model_id = "test_model_id"
 
-            assert "Request timed out after" in str(exc_info.value)
+    endpoint = (
+        MOCK_PROVIDER_INFO[provider]["endpoint"]
+        .replace(API_KEY, api_key)
+        .replace(MODEL_ID, model_id)
+    )
+
+    respx.post(endpoint).mock(
+        return_value=httpx.Response(429, text="Rate Limit Exceeded")
+    )
+    async with httpx.AsyncClient() as client:
+        with pytest.raises(Exception):
+            await llm_post(client, provider, api_key, data, model_id)