From f969534979b9986947a21dc98b8518a47159a352 Mon Sep 17 00:00:00 2001 From: Pierce Kelaita Date: Fri, 12 Jul 2024 11:12:37 -0700 Subject: [PATCH] [exceptions] add LLMRateLimitError --- CHANGELOG.md | 8 ++++++- README.md | 2 +- l2m2/__init__.py | 2 +- l2m2/_internal/http.py | 9 ++++++-- l2m2/exceptions.py | 6 ++++++ tests/l2m2/_internal/test_http.py | 35 ++++++++++++++++++++++++------- 6 files changed, 50 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d21583..bf0ae74 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,13 @@ _Current version: 0.0.24_ [PyPi link](https://pypi.org/project/l2m2/) -### 0.0.24 - July 12, 2024 +### 0.0.25 - July 12, 2024 + +#### Added + +- Custom exception `LLMRateLimitError`, raised when an LLM call returns a 429 status code. + +### 0.0.24 - July 11, 2024 #### Added diff --git a/README.md b/README.md index 463f238..966b742 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # L2M2: A Simple Python LLM Manager 💬👍 -[![Tests](https://github.com/pkelaita/l2m2/actions/workflows/tests.yml/badge.svg?timestamp=1720770641)](https://github.com/pkelaita/l2m2/actions/workflows/tests.yml) [![codecov](https://codecov.io/github/pkelaita/l2m2/graph/badge.svg?token=UWIB0L9PR8)](https://codecov.io/github/pkelaita/l2m2) [![PyPI version](https://badge.fury.io/py/l2m2.svg?timestamp=1720770641)](https://badge.fury.io/py/l2m2) +[![Tests](https://github.com/pkelaita/l2m2/actions/workflows/tests.yml/badge.svg?timestamp=1720807924)](https://github.com/pkelaita/l2m2/actions/workflows/tests.yml) [![codecov](https://codecov.io/github/pkelaita/l2m2/graph/badge.svg?token=UWIB0L9PR8)](https://codecov.io/github/pkelaita/l2m2) [![PyPI version](https://badge.fury.io/py/l2m2.svg?timestamp=1720807924)](https://badge.fury.io/py/l2m2) **L2M2** ("LLM Manager" → "LLMM" → "L2M2") is a tiny and very simple LLM manager for Python that exposes lots of models through a unified API. This is useful for evaluation, demos, production applications etc. that need to easily be model-agnostic. diff --git a/l2m2/__init__.py b/l2m2/__init__.py index 5681085..5a6b518 100644 --- a/l2m2/__init__.py +++ b/l2m2/__init__.py @@ -1 +1 @@ -__version__ = "0.0.24" +__version__ = "0.0.25" diff --git a/l2m2/_internal/http.py b/l2m2/_internal/http.py index a207561..558c0be 100644 --- a/l2m2/_internal/http.py +++ b/l2m2/_internal/http.py @@ -1,7 +1,7 @@ from typing import Optional, Dict, Any import httpx -from l2m2.exceptions import LLMTimeoutError +from l2m2.exceptions import LLMTimeoutError, LLMRateLimitError from l2m2.model_info import API_KEY, MODEL_ID, PROVIDER_INFO @@ -66,7 +66,12 @@ async def llm_post( if provider == "replicate" and response.status_code == 201: return await _handle_replicate_201(client, response, api_key) - if response.status_code != 200: + if response.status_code == 429: + raise LLMRateLimitError( + f"Reached rate limit for provider {provider} with model {model_id}." + ) + + elif response.status_code != 200: raise Exception(response.text) return response.json() diff --git a/l2m2/exceptions.py b/l2m2/exceptions.py index 802c2ec..504987e 100644 --- a/l2m2/exceptions.py +++ b/l2m2/exceptions.py @@ -2,3 +2,9 @@ class LLMTimeoutError(Exception): """Raised when a request to an LLM provider API times out.""" pass + + +class LLMRateLimitError(Exception): + """Raised when a request to an LLM provider API is rate limited.""" + + pass diff --git a/tests/l2m2/_internal/test_http.py b/tests/l2m2/_internal/test_http.py index 06d5baf..972cc85 100644 --- a/tests/l2m2/_internal/test_http.py +++ b/tests/l2m2/_internal/test_http.py @@ -240,13 +240,14 @@ async def test_llm_post_failure(): @pytest.mark.asyncio +@respx.mock @patch(PROVIDER_INFO_PATH, MOCK_PROVIDER_INFO) async def test_llm_post_timeout(): provider = "test_provider" api_key = "test_api_key" data = {"input": "test input"} model_id = "test_model_id" - timeout = 5 # Set a small timeout for the test + timeout = 5 endpoint = ( MOCK_PROVIDER_INFO[provider]["endpoint"] @@ -254,10 +255,30 @@ async def test_llm_post_timeout(): .replace(MODEL_ID, model_id) ) - with respx.mock: - respx.post(endpoint).mock(side_effect=httpx.ReadTimeout) - async with httpx.AsyncClient() as client: - with pytest.raises(LLMTimeoutError) as exc_info: - await llm_post(client, provider, api_key, data, timeout, model_id) + respx.post(endpoint).mock(side_effect=httpx.ReadTimeout) + async with httpx.AsyncClient() as client: + with pytest.raises(LLMTimeoutError): + await llm_post(client, provider, api_key, data, timeout, model_id) + + +@pytest.mark.asyncio +@respx.mock +@patch(PROVIDER_INFO_PATH, MOCK_PROVIDER_INFO) +async def test_llm_post_rate_limit_error(): + provider = "test_provider" + api_key = "test_api_key" + data = {"input": "test input"} + model_id = "test_model_id" - assert "Request timed out after" in str(exc_info.value) + endpoint = ( + MOCK_PROVIDER_INFO[provider]["endpoint"] + .replace(API_KEY, api_key) + .replace(MODEL_ID, model_id) + ) + + respx.post(endpoint).mock( + return_value=httpx.Response(429, text="Rate Limit Exceeded") + ) + async with httpx.AsyncClient() as client: + with pytest.raises(Exception): + await llm_post(client, provider, api_key, data, model_id)