Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Model features: native async #110

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
34 changes: 32 additions & 2 deletions libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
)
from urllib.parse import urlparse, urlunparse

import httpx
import requests
from httpx import AsyncClient
from pydantic import (
BaseModel,
ConfigDict,
Expand Down Expand Up @@ -76,6 +78,7 @@ class _NVIDIAClient(BaseModel):
description="Path for polling after HTTP 202 responses",
)
get_session_fn: Callable = Field(requests.Session)
get_asession_fn: Callable = Field(AsyncClient)

api_key: Optional[SecretStr] = Field(
default_factory=lambda: SecretStr(
Expand All @@ -100,7 +103,7 @@ class _NVIDIAClient(BaseModel):
last_inputs: Optional[dict] = Field(
default={}, description="Last inputs sent over to the server"
)
last_response: Response = Field(
last_response: Optional[Response] = Field(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why make this optional?

None, description="Last response sent from the server"
)
headers_tmpl: dict = Field(
Expand Down Expand Up @@ -369,6 +372,26 @@ def _post(
self._try_raise(response)
return response, session

async def _apost(
self,
invoke_url: str,
payload: Optional[dict] = {},
extra_headers: dict = {},
) -> Tuple[httpx.Response, Any]:
"""Async Method for posting to the AI Foundation Model Function API."""
self.last_inputs = {
"url": invoke_url,
"headers": {
**self.headers_tmpl["call"],
**extra_headers,
},
"json": payload,
}
async with self.get_asession_fn() as session:
response = await session.post(**self.__add_authorization(self.last_inputs))
self._try_raise(response)
return response, session

def _get(
self,
invoke_url: str,
Expand Down Expand Up @@ -475,9 +498,16 @@ def get_req(
)
return self._wait(response, session)

async def aget_req(
self,
payload: dict = {},
extra_headers: dict = {},
) -> Tuple[httpx.Response, Any]:
return await self._apost(self.infer_url, payload, extra_headers=extra_headers)

def postprocess(
self,
response: Union[str, Response],
response: Union[str, Any[Response, httpx.Response]],
) -> Tuple[dict, bool]:
"""Parses a response from the AI Foundation Model Function API.
Strongly assumes that the API will return a single response.
Expand Down
29 changes: 28 additions & 1 deletion libs/ai-endpoints/langchain_nvidia_ai_endpoints/chat_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ class ChatNVIDIA(BaseChatModel):
response = model.invoke("Hello")
"""

_client: _NVIDIAClient = PrivateAttr(_NVIDIAClient)
_client: _NVIDIAClient = PrivateAttr()
base_url: Optional[str] = Field(
default=None,
description="Base url for model listing an invocation",
Expand Down Expand Up @@ -389,6 +389,33 @@ def _generate(
generation = ChatGeneration(message=AIMessage(**parsed_response))
return ChatResult(generations=[generation], llm_output=responses)

async def _agenerate(
self,
messages: List[BaseMessage],
stop: Optional[List[str]] = None,
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> ChatResult:
inputs = [
message
for message in [convert_message_to_dict(message) for message in messages]
]
inputs, extra_headers = _process_for_vlm(inputs, self._client.model)
payload = self._get_payload(inputs=inputs, stop=stop, stream=False, **kwargs)
if payload.get("stream", False) is True:
payload = {**payload, "stream": False}
response, _ = await self._client.aget_req(
payload=payload, extra_headers=extra_headers
)
responses, _ = self._client.postprocess(response)
self._set_callback_out(responses, run_manager)
parsed_response = self._custom_postprocess(responses, streaming=False)
# for pre 0.2 compatibility w/ ChatMessage
# ChatMessage had a role property that was not present in AIMessage
parsed_response.update({"role": "assistant"})
generation = ChatGeneration(message=AIMessage(**responses))
return ChatResult(generations=[generation], llm_output=responses)

def _stream(
self,
messages: List[BaseMessage],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class NVIDIAEmbeddings(BaseModel, Embeddings):
validate_assignment=True,
)

_client: _NVIDIAClient = PrivateAttr(_NVIDIAClient)
_client: _NVIDIAClient = PrivateAttr()
base_url: Optional[str] = Field(
default=None,
description="Base url for model listing an invocation",
Expand Down
2 changes: 1 addition & 1 deletion libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class NVIDIA(LLM):
validate_assignment=True,
)

_client: _NVIDIAClient = PrivateAttr(_NVIDIAClient)
_client: _NVIDIAClient = PrivateAttr()
_default_model_name: str = "nvidia/mistral-nemo-minitron-8b-base"
base_url: Optional[str] = Field(
default=None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class NVIDIARerank(BaseDocumentCompressor):
validate_assignment=True,
)

_client: _NVIDIAClient = PrivateAttr(_NVIDIAClient)
_client: _NVIDIAClient = PrivateAttr()

base_url: Optional[str] = Field(
default=None,
Expand Down
Loading
Loading