Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Core refac #13627

Merged
merged 15 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,9 @@
import asyncio
from abc import ABC, abstractmethod
from functools import partial
from typing import Any, Literal, Sequence
from typing import Any, Sequence

from langchain_core.load.serializable import Serializable
from langchain_core.pydantic_v1 import Field


class Document(Serializable):
"""Class for storing a piece of text and associated metadata."""

page_content: str
"""String text."""
metadata: dict = Field(default_factory=dict)
"""Arbitrary metadata about the page content (e.g., source, relationships to other
documents, etc.).
"""
type: Literal["Document"] = "Document"

@classmethod
def is_lc_serializable(cls) -> bool:
"""Return whether this class is serializable."""
return True
from langchain_core.document import Document


class BaseDocumentTransformer(ABC):
Expand Down
23 changes: 23 additions & 0 deletions libs/core/langchain_core/documents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from __future__ import annotations

from typing import Literal

from langchain_core.load.serializable import Serializable
from langchain_core.pydantic_v1 import Field


class Document(Serializable):
"""Class for storing a piece of text and associated metadata."""

page_content: str
"""String text."""
metadata: dict = Field(default_factory=dict)
"""Arbitrary metadata about the page content (e.g., source, relationships to other
documents, etc.).
"""
type: Literal["Document"] = "Document"

@classmethod
def is_lc_serializable(cls) -> bool:
"""Return whether this class is serializable."""
return True
44 changes: 44 additions & 0 deletions libs/core/langchain_core/exceptions.py
efriis marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from typing import Any, Optional


class LangChainException(Exception):
"""General LangChain exception."""


class OutputParserException(ValueError):
"""Exception that output parsers should raise to signify a parsing error.

This exists to differentiate parsing errors from other code or execution errors
that also may arise inside the output parser. OutputParserExceptions will be
available to catch and handle in ways to fix the parsing error, while other
errors will be raised.

Args:
error: The error that's being re-raised or an error message.
observation: String explanation of error which can be passed to a
model to try and remediate the issue.
llm_output: String model output which is error-ing.
send_to_llm: Whether to send the observation and llm_output back to an Agent
after an OutputParserException has been raised. This gives the underlying
model driving the agent the context that the previous output was improperly
structured, in the hopes that it will update the output to the correct
format.
"""

def __init__(
self,
error: Any,
observation: Optional[str] = None,
llm_output: Optional[str] = None,
send_to_llm: bool = False,
):
super(OutputParserException, self).__init__(error)
if send_to_llm:
if observation is None or llm_output is None:
raise ValueError(
"Arguments 'observation' & 'llm_output'"
" are required if 'send_to_llm' is True"
)
self.observation = observation
self.llm_output = llm_output
self.send_to_llm = send_to_llm
90 changes: 90 additions & 0 deletions libs/core/langchain_core/messages/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from typing import List, Sequence, Union

from langchain_core.messages.ai import AIMessage
from langchain_core.messages.base import BaseMessage
from langchain_core.messages.chat import ChatMessage
from langchain_core.messages.function import FunctionMessage
from langchain_core.messages.human import HumanMessage
from langchain_core.messages.system import SystemMessage
from langchain_core.messages.tool import ToolMessage

AnyMessage = Union[
AIMessage, HumanMessage, ChatMessage, SystemMessage, FunctionMessage, ToolMessage
]


def get_buffer_string(
messages: Sequence[BaseMessage], human_prefix: str = "Human", ai_prefix: str = "AI"
) -> str:
"""Convert sequence of Messages to strings and concatenate them into one string.

Args:
messages: Messages to be converted to strings.
human_prefix: The prefix to prepend to contents of HumanMessages.
ai_prefix: THe prefix to prepend to contents of AIMessages.

Returns:
A single string concatenation of all input messages.

Example:
.. code-block:: python

from langchain_core.schema import AIMessage, HumanMessage

messages = [
HumanMessage(content="Hi, how are you?"),
AIMessage(content="Good, how are you?"),
]
get_buffer_string(messages)
# -> "Human: Hi, how are you?\nAI: Good, how are you?"
"""
string_messages = []
for m in messages:
if isinstance(m, HumanMessage):
role = human_prefix
elif isinstance(m, AIMessage):
role = ai_prefix
elif isinstance(m, SystemMessage):
role = "System"
elif isinstance(m, FunctionMessage):
role = "Function"
elif isinstance(m, ChatMessage):
role = m.role
else:
raise ValueError(f"Got unsupported message type: {m}")
message = f"{role}: {m.content}"
if isinstance(m, AIMessage) and "function_call" in m.additional_kwargs:
message += f"{m.additional_kwargs['function_call']}"
string_messages.append(message)

return "\n".join(string_messages)


def _message_from_dict(message: dict) -> BaseMessage:
_type = message["type"]
if _type == "human":
return HumanMessage(**message["data"])
elif _type == "ai":
return AIMessage(**message["data"])
elif _type == "system":
return SystemMessage(**message["data"])
elif _type == "chat":
return ChatMessage(**message["data"])
elif _type == "function":
return FunctionMessage(**message["data"])
elif _type == "tool":
return ToolMessage(**message["data"])
else:
raise ValueError(f"Got unexpected message type: {_type}")


def messages_from_dict(messages: Sequence[dict]) -> List[BaseMessage]:
"""Convert a sequence of messages from dicts to Message objects.

Args:
messages: Sequence of messages (as dicts) to convert.

Returns:
List of messages (BaseMessages).
"""
return [_message_from_dict(m) for m in messages]
47 changes: 47 additions & 0 deletions libs/core/langchain_core/messages/ai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from typing import Any, Literal

from langchain_core.messages.base import (
BaseMessage,
BaseMessageChunk,
merge_content,
)


class AIMessage(BaseMessage):
"""A Message from an AI."""

example: bool = False
"""Whether this Message is being passed in to the model as part of an example
conversation.
"""

type: Literal["ai"] = "ai"


AIMessage.update_forward_refs()


class AIMessageChunk(AIMessage, BaseMessageChunk):
"""A Message chunk from an AI."""

# Ignoring mypy re-assignment here since we're overriding the value
# to make sure that the chunk variant can be discriminated from the
# non-chunk variant.
type: Literal["AIMessageChunk"] = "AIMessageChunk" # type: ignore[assignment] # noqa: E501

def __add__(self, other: Any) -> BaseMessageChunk: # type: ignore
if isinstance(other, AIMessageChunk):
if self.example != other.example:
raise ValueError(
"Cannot concatenate AIMessageChunks with different example values."
)

return self.__class__(
example=self.example,
content=merge_content(self.content, other.content),
additional_kwargs=self._merge_kwargs_dict(
self.additional_kwargs, other.additional_kwargs
),
)

return super().__add__(other)
126 changes: 126 additions & 0 deletions libs/core/langchain_core/messages/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Any, Dict, List, Sequence, Union

from langchain_core.load.serializable import Serializable
from langchain_core.pydantic_v1 import Extra, Field

if TYPE_CHECKING:
from langchain_core.prompts.chat import ChatPromptTemplate


class BaseMessage(Serializable):
"""The base abstract Message class.

Messages are the inputs and outputs of ChatModels.
"""

content: Union[str, List[Union[str, Dict]]]
"""The string contents of the message."""

additional_kwargs: dict = Field(default_factory=dict)
"""Any additional information."""

type: str

class Config:
extra = Extra.allow

@classmethod
def is_lc_serializable(cls) -> bool:
"""Return whether this class is serializable."""
return True

def __add__(self, other: Any) -> ChatPromptTemplate:
from langchain_core.prompts.chat import ChatPromptTemplate

prompt = ChatPromptTemplate(messages=[self])
return prompt + other


def merge_content(
first_content: Union[str, List[Union[str, Dict]]],
second_content: Union[str, List[Union[str, Dict]]],
) -> Union[str, List[Union[str, Dict]]]:
# If first chunk is a string
if isinstance(first_content, str):
# If the second chunk is also a string, then merge them naively
if isinstance(second_content, str):
return first_content + second_content
# If the second chunk is a list, add the first chunk to the start of the list
else:
return_list: List[Union[str, Dict]] = [first_content]
return return_list + second_content
# If both are lists, merge them naively
elif isinstance(second_content, List):
return first_content + second_content
# If the first content is a list, and the second content is a string
else:
# If the last element of the first content is a string
# Add the second content to the last element
if isinstance(first_content[-1], str):
return first_content[:-1] + [first_content[-1] + second_content]
else:
# Otherwise, add the second content as a new element of the list
return first_content + [second_content]


class BaseMessageChunk(BaseMessage):
"""A Message chunk, which can be concatenated with other Message chunks."""

def _merge_kwargs_dict(
self, left: Dict[str, Any], right: Dict[str, Any]
) -> Dict[str, Any]:
"""Merge additional_kwargs from another BaseMessageChunk into this one."""
merged = left.copy()
for k, v in right.items():
if k not in merged:
merged[k] = v
elif type(merged[k]) != type(v):
raise ValueError(
f'additional_kwargs["{k}"] already exists in this message,'
" but with a different type."
)
elif isinstance(merged[k], str):
merged[k] += v
elif isinstance(merged[k], dict):
merged[k] = self._merge_kwargs_dict(merged[k], v)
else:
raise ValueError(
f"Additional kwargs key {k} already exists in this message."
)
return merged

def __add__(self, other: Any) -> BaseMessageChunk: # type: ignore
if isinstance(other, BaseMessageChunk):
# If both are (subclasses of) BaseMessageChunk,
# concat into a single BaseMessageChunk

return self.__class__(
content=merge_content(self.content, other.content),
additional_kwargs=self._merge_kwargs_dict(
self.additional_kwargs, other.additional_kwargs
),
)
else:
raise TypeError(
'unsupported operand type(s) for +: "'
f"{self.__class__.__name__}"
f'" and "{other.__class__.__name__}"'
)


def _message_to_dict(message: BaseMessage) -> dict:
return {"type": message.type, "data": message.dict()}


def messages_to_dict(messages: Sequence[BaseMessage]) -> List[dict]:
"""Convert a sequence of Messages to a list of dictionaries.

Args:
messages: Sequence of messages (as BaseMessages) to convert.

Returns:
List of messages as dicts.
"""
return [_message_to_dict(m) for m in messages]
Loading
Loading