Skip to content

Commit

Permalink
[exceptions] add LLMRateLimitError
Browse files Browse the repository at this point in the history
  • Loading branch information
pkelaita committed Jul 12, 2024
1 parent 360c53e commit f969534
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 12 deletions.
8 changes: 7 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@ _Current version: 0.0.24_

[PyPi link](https://pypi.org/project/l2m2/)

### 0.0.24 - July 12, 2024
### 0.0.25 - July 12, 2024

#### Added

- Custom exception `LLMRateLimitError`, raised when an LLM call returns a 429 status code.

### 0.0.24 - July 11, 2024

#### Added

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# L2M2: A Simple Python LLM Manager 💬👍

[![Tests](https://github.com/pkelaita/l2m2/actions/workflows/tests.yml/badge.svg?timestamp=1720770641)](https://github.com/pkelaita/l2m2/actions/workflows/tests.yml) [![codecov](https://codecov.io/github/pkelaita/l2m2/graph/badge.svg?token=UWIB0L9PR8)](https://codecov.io/github/pkelaita/l2m2) [![PyPI version](https://badge.fury.io/py/l2m2.svg?timestamp=1720770641)](https://badge.fury.io/py/l2m2)
[![Tests](https://github.com/pkelaita/l2m2/actions/workflows/tests.yml/badge.svg?timestamp=1720807924)](https://github.com/pkelaita/l2m2/actions/workflows/tests.yml) [![codecov](https://codecov.io/github/pkelaita/l2m2/graph/badge.svg?token=UWIB0L9PR8)](https://codecov.io/github/pkelaita/l2m2) [![PyPI version](https://badge.fury.io/py/l2m2.svg?timestamp=1720807924)](https://badge.fury.io/py/l2m2)

**L2M2** ("LLM Manager" → "LLMM" → "L2M2") is a tiny and very simple LLM manager for Python that exposes lots of models through a unified API. This is useful for evaluation, demos, production applications etc. that need to easily be model-agnostic.

Expand Down
2 changes: 1 addition & 1 deletion l2m2/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.24"
__version__ = "0.0.25"
9 changes: 7 additions & 2 deletions l2m2/_internal/http.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Optional, Dict, Any
import httpx

from l2m2.exceptions import LLMTimeoutError
from l2m2.exceptions import LLMTimeoutError, LLMRateLimitError
from l2m2.model_info import API_KEY, MODEL_ID, PROVIDER_INFO


Expand Down Expand Up @@ -66,7 +66,12 @@ async def llm_post(
if provider == "replicate" and response.status_code == 201:
return await _handle_replicate_201(client, response, api_key)

if response.status_code != 200:
if response.status_code == 429:
raise LLMRateLimitError(
f"Reached rate limit for provider {provider} with model {model_id}."
)

elif response.status_code != 200:
raise Exception(response.text)

return response.json()
6 changes: 6 additions & 0 deletions l2m2/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,9 @@ class LLMTimeoutError(Exception):
"""Raised when a request to an LLM provider API times out."""

pass


class LLMRateLimitError(Exception):
"""Raised when a request to an LLM provider API is rate limited."""

pass
35 changes: 28 additions & 7 deletions tests/l2m2/_internal/test_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,24 +240,45 @@ async def test_llm_post_failure():


@pytest.mark.asyncio
@respx.mock
@patch(PROVIDER_INFO_PATH, MOCK_PROVIDER_INFO)
async def test_llm_post_timeout():
provider = "test_provider"
api_key = "test_api_key"
data = {"input": "test input"}
model_id = "test_model_id"
timeout = 5 # Set a small timeout for the test
timeout = 5

endpoint = (
MOCK_PROVIDER_INFO[provider]["endpoint"]
.replace(API_KEY, api_key)
.replace(MODEL_ID, model_id)
)

with respx.mock:
respx.post(endpoint).mock(side_effect=httpx.ReadTimeout)
async with httpx.AsyncClient() as client:
with pytest.raises(LLMTimeoutError) as exc_info:
await llm_post(client, provider, api_key, data, timeout, model_id)
respx.post(endpoint).mock(side_effect=httpx.ReadTimeout)
async with httpx.AsyncClient() as client:
with pytest.raises(LLMTimeoutError):
await llm_post(client, provider, api_key, data, timeout, model_id)


@pytest.mark.asyncio
@respx.mock
@patch(PROVIDER_INFO_PATH, MOCK_PROVIDER_INFO)
async def test_llm_post_rate_limit_error():
provider = "test_provider"
api_key = "test_api_key"
data = {"input": "test input"}
model_id = "test_model_id"

assert "Request timed out after" in str(exc_info.value)
endpoint = (
MOCK_PROVIDER_INFO[provider]["endpoint"]
.replace(API_KEY, api_key)
.replace(MODEL_ID, model_id)
)

respx.post(endpoint).mock(
return_value=httpx.Response(429, text="Rate Limit Exceeded")
)
async with httpx.AsyncClient() as client:
with pytest.raises(Exception):
await llm_post(client, provider, api_key, data, model_id)

0 comments on commit f969534

Please sign in to comment.