diff --git a/haystack/components/audio/whisper_local.py b/haystack/components/audio/whisper_local.py index 761cf4adc5..63f25a811e 100644 --- a/haystack/components/audio/whisper_local.py +++ b/haystack/components/audio/whisper_local.py @@ -134,16 +134,16 @@ def _raw_transcribe(self, sources: List[Union[str, Path, ByteStream]], **kwargs) if not isinstance(source, ByteStream): path = Path(source) source = ByteStream.from_file_path(path) - source.metadata["file_path"] = path + source.meta["file_path"] = path else: # If we received a ByteStream instance that doesn't have the "file_path" metadata set, # we dump the bytes into a temporary file. - path = source.metadata.get("file_path") + path = source.meta.get("file_path") if path is None: fp = tempfile.NamedTemporaryFile(delete=False) path = Path(fp.name) source.to_file(path) - source.metadata["file_path"] = path + source.meta["file_path"] = path transcription = self._model.transcribe(str(path), **kwargs) if not return_segments: diff --git a/haystack/components/audio/whisper_remote.py b/haystack/components/audio/whisper_remote.py index 02384e2d43..1fc5c049c9 100644 --- a/haystack/components/audio/whisper_remote.py +++ b/haystack/components/audio/whisper_remote.py @@ -129,13 +129,13 @@ def run(self, sources: List[Union[str, Path, ByteStream]]): if not isinstance(source, ByteStream): path = source source = ByteStream.from_file_path(Path(source)) - source.metadata["file_path"] = path + source.meta["file_path"] = path file = io.BytesIO(source.data) - file.name = str(source.metadata["file_path"]) if "file_path" in source.metadata else "__fallback__.wav" + file.name = str(source.meta["file_path"]) if "file_path" in source.meta else "__fallback__.wav" content = openai.Audio.transcribe(file=file, model=self.model_name, **self.whisper_params) - doc = Document(content=content["text"], meta=source.metadata) + doc = Document(content=content["text"], meta=source.meta) documents.append(doc) return {"documents": documents} diff --git a/haystack/components/builders/answer_builder.py b/haystack/components/builders/answer_builder.py index 5815add260..81dddc62b7 100644 --- a/haystack/components/builders/answer_builder.py +++ b/haystack/components/builders/answer_builder.py @@ -42,7 +42,7 @@ def run( self, query: str, replies: List[str], - metadata: Optional[List[Dict[str, Any]]] = None, + meta: Optional[List[Dict[str, Any]]] = None, documents: Optional[List[Document]] = None, pattern: Optional[str] = None, reference_pattern: Optional[str] = None, @@ -52,7 +52,7 @@ def run( :param query: The query used in the prompts for the Generator as a string. :param replies: The output of the Generator. A list of strings. - :param metadata: The metadata returned by the Generator. An optional list of dictionaries. If not specified, + :param meta: The metadata returned by the Generator. An optional list of dictionaries. If not specified, the generated answer will contain no metadata. :param documents: The documents used as input to the Generator. A list of `Document` objects. If `documents` are specified, they are added to the `Answer` objects. @@ -74,10 +74,10 @@ def run( If not specified, no parsing is done, and all documents are referenced. Default: `None`. """ - if not metadata: - metadata = [{}] * len(replies) - elif len(replies) != len(metadata): - raise ValueError(f"Number of replies ({len(replies)}), and metadata ({len(metadata)}) must match.") + if not meta: + meta = [{}] * len(replies) + elif len(replies) != len(meta): + raise ValueError(f"Number of replies ({len(replies)}), and metadata ({len(meta)}) must match.") if pattern: AnswerBuilder._check_num_groups_in_regex(pattern) @@ -86,7 +86,7 @@ def run( reference_pattern = reference_pattern or self.reference_pattern all_answers = [] - for reply, meta in zip(replies, metadata): + for reply, metadata in zip(replies, meta): referenced_docs = [] if documents: reference_idxs = [] @@ -102,7 +102,7 @@ def run( logger.warning("Document index '%s' referenced in Generator output is out of range. ", idx + 1) answer_string = AnswerBuilder._extract_answer_string(reply, pattern) - answer = GeneratedAnswer(data=answer_string, query=query, documents=referenced_docs, meta=meta) + answer = GeneratedAnswer(data=answer_string, query=query, documents=referenced_docs, meta=metadata) all_answers.append(answer) return {"answers": all_answers} diff --git a/haystack/components/builders/dynamic_prompt_builder.py b/haystack/components/builders/dynamic_prompt_builder.py index cb5554537f..59b025009d 100644 --- a/haystack/components/builders/dynamic_prompt_builder.py +++ b/haystack/components/builders/dynamic_prompt_builder.py @@ -53,7 +53,7 @@ class DynamicPromptBuilder: >> {'llm': {'replies': [ChatMessage(content="Berlin is the capital city of Germany and one of the most vibrant and diverse cities in Europe. Here are some key things to know...Enjoy your time exploring the vibrant and dynamic - capital of Germany!", role=, name=None, metadata={'model': 'gpt-3.5-turbo-0613', + capital of Germany!", role=, name=None, meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 27, 'completion_tokens': 681, 'total_tokens': 708}})]}} @@ -65,7 +65,7 @@ class DynamicPromptBuilder: print(res) >> {'llm': {'replies': [ChatMessage(content="Here is the weather forecast for Berlin in the next 5 days:\\n\\nDay 1: Mostly cloudy with a high of 22°C (72°F) and...so it's always a good idea to check for updates - closer to your visit.", role=, name=None, metadata={'model': 'gpt-3.5-turbo-0613', + closer to your visit.", role=, name=None, meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 37, 'completion_tokens': 201, 'total_tokens': 238}})]}} ``` @@ -126,7 +126,7 @@ def run(self, doc_input: str): "template_variables":{"query": "who's making a greeting?"}}}) >> {'llm': {'replies': [ChatMessage(content='Haystack', role=, name=None, - >> metadata={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage': + >> meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage': >> {'prompt_tokens': 51, 'completion_tokens': 2, 'total_tokens': 53}})]}} ``` @@ -159,7 +159,7 @@ def run(self, doc_input: str): "template_variables":{"query": "Where does the speaker live?"}}}) >> {'llm': {'replies': ['The speaker lives in Berlin.'], - >> 'metadata': [{'model': 'gpt-3.5-turbo-0613', + >> 'meta': [{'model': 'gpt-3.5-turbo-0613', >> 'index': 0, >> 'finish_reason': 'stop', >> 'usage': {'prompt_tokens': 28, diff --git a/haystack/components/converters/azure.py b/haystack/components/converters/azure.py index a66e05f9f9..2f97dcc810 100644 --- a/haystack/components/converters/azure.py +++ b/haystack/components/converters/azure.py @@ -104,11 +104,11 @@ def run(self, sources: List[Union[str, Path, ByteStream]], meta: Optional[List[D azure_output.append(result.to_dict()) file_suffix = None - if "file_path" in bytestream.metadata: - file_suffix = Path(bytestream.metadata["file_path"]).suffix + if "file_path" in bytestream.meta: + file_suffix = Path(bytestream.meta["file_path"]).suffix document = AzureOCRDocumentConverter._convert_azure_result_to_document(result, file_suffix) - merged_metadata = {**bytestream.metadata, **metadata} + merged_metadata = {**bytestream.meta, **metadata} document.meta = merged_metadata documents.append(document) diff --git a/haystack/components/converters/html.py b/haystack/components/converters/html.py index 859f62fed9..83f0acd431 100644 --- a/haystack/components/converters/html.py +++ b/haystack/components/converters/html.py @@ -83,7 +83,7 @@ def run(self, sources: List[Union[str, Path, ByteStream]], meta: Optional[List[D logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e) continue - merged_metadata = {**bytestream.metadata, **metadata} + merged_metadata = {**bytestream.meta, **metadata} document = Document(content=text, meta=merged_metadata) documents.append(document) diff --git a/haystack/components/converters/markdown.py b/haystack/components/converters/markdown.py index eb867bc664..c5c5a83a2c 100644 --- a/haystack/components/converters/markdown.py +++ b/haystack/components/converters/markdown.py @@ -83,7 +83,7 @@ def run(self, sources: List[Union[str, Path, ByteStream]], meta: Optional[List[D logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e) continue - merged_metadata = {**bytestream.metadata, **metadata} + merged_metadata = {**bytestream.meta, **metadata} document = Document(content=text, meta=merged_metadata) documents.append(document) diff --git a/haystack/components/converters/pypdf.py b/haystack/components/converters/pypdf.py index ba2fb14720..735426fdf7 100644 --- a/haystack/components/converters/pypdf.py +++ b/haystack/components/converters/pypdf.py @@ -111,7 +111,7 @@ def run(self, sources: List[Union[str, Path, ByteStream]], meta: Optional[List[D logger.warning("Could not read %s and convert it to Document, skipping. %s", source, e) continue - merged_metadata = {**bytestream.metadata, **metadata} + merged_metadata = {**bytestream.meta, **metadata} document.meta = merged_metadata documents.append(document) diff --git a/haystack/components/converters/tika.py b/haystack/components/converters/tika.py index 4543da24a5..eed81fe306 100644 --- a/haystack/components/converters/tika.py +++ b/haystack/components/converters/tika.py @@ -77,7 +77,7 @@ def run(self, sources: List[Union[str, Path, ByteStream]], meta: Optional[List[D logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e) continue - merged_metadata = {**bytestream.metadata, **metadata} + merged_metadata = {**bytestream.meta, **metadata} document = Document(content=text, meta=merged_metadata) documents.append(document) return {"documents": documents} diff --git a/haystack/components/converters/txt.py b/haystack/components/converters/txt.py index 4e16759dda..08c48b97c0 100644 --- a/haystack/components/converters/txt.py +++ b/haystack/components/converters/txt.py @@ -63,13 +63,13 @@ def run(self, sources: List[Union[str, Path, ByteStream]], meta: Optional[List[D logger.warning("Could not read %s. Skipping it. Error: %s", source, e) continue try: - encoding = bytestream.metadata.get("encoding", self.encoding) + encoding = bytestream.meta.get("encoding", self.encoding) text = bytestream.data.decode(encoding) except Exception as e: logger.warning("Could not convert file %s. Skipping it. Error message: %s", source, e) continue - merged_metadata = {**bytestream.metadata, **metadata} + merged_metadata = {**bytestream.meta, **metadata} document = Document(content=text, meta=merged_metadata) documents.append(document) diff --git a/haystack/components/converters/utils.py b/haystack/components/converters/utils.py index b8871aeecc..d5040635e2 100644 --- a/haystack/components/converters/utils.py +++ b/haystack/components/converters/utils.py @@ -15,6 +15,6 @@ def get_bytestream_from_source(source: Union[str, Path, ByteStream]) -> ByteStre return source if isinstance(source, (str, Path)): bs = ByteStream.from_file_path(Path(source)) - bs.metadata["file_path"] = str(source) + bs.meta["file_path"] = str(source) return bs raise ValueError(f"Unsupported source type {type(source)}") diff --git a/haystack/components/fetchers/link_content.py b/haystack/components/fetchers/link_content.py index e1fee5f718..7d531694cd 100644 --- a/haystack/components/fetchers/link_content.py +++ b/haystack/components/fetchers/link_content.py @@ -118,7 +118,7 @@ def run(self, urls: List[str]): # don't use multithreading if there's only one URL if len(urls) == 1: stream_metadata, stream = self.fetch(urls[0]) - stream.metadata.update(stream_metadata) + stream.meta.update(stream_metadata) streams.append(stream) else: with ThreadPoolExecutor() as executor: @@ -126,7 +126,7 @@ def run(self, urls: List[str]): for stream_metadata, stream in results: # type: ignore if stream_metadata is not None and stream is not None: - stream.metadata.update(stream_metadata) + stream.meta.update(stream_metadata) streams.append(stream) return {"streams": streams} diff --git a/haystack/components/generators/chat/hugging_face_tgi.py b/haystack/components/generators/chat/hugging_face_tgi.py index 33ad9c67bc..7363497c56 100644 --- a/haystack/components/generators/chat/hugging_face_tgi.py +++ b/haystack/components/generators/chat/hugging_face_tgi.py @@ -241,7 +241,7 @@ def _run_streaming( self.streaming_callback(stream_chunk) # type: ignore # streaming_callback is not None (verified in the run method) message = ChatMessage.from_assistant(chunk.generated_text) - message.metadata.update( + message.meta.update( { "finish_reason": chunk.details.finish_reason.value, "index": 0, @@ -264,7 +264,7 @@ def _run_non_streaming( prepared_prompt, details=True, **generation_kwargs ) message = ChatMessage.from_assistant(tgr.generated_text) - message.metadata.update( + message.meta.update( { "finish_reason": tgr.details.finish_reason.value, "index": _i, diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index 1ec2b541e2..09cd4e58ea 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -42,7 +42,7 @@ class GPTChatGenerator: >>{'replies': [ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence >>that focuses on enabling computers to understand, interpret, and generate human language in a way that is >>meaningful and useful.', role=, name=None, - >>metadata={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', + >>meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', >>'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})]} ``` @@ -218,7 +218,7 @@ def _connect_chunks(self, chunk: OpenAIObject, chunks: List[StreamingChunk]) -> :param chunks: The list of all chunks returned by the OpenAI API. """ complete_response = ChatMessage.from_assistant("".join([chunk.content for chunk in chunks])) - complete_response.metadata.update( + complete_response.meta.update( { "model": chunk.model, "index": 0, @@ -239,7 +239,7 @@ def _build_message(self, completion: OpenAIObject, choice: OpenAIObject) -> Chat # message.content is str but message.function_call is OpenAIObject but JSON in fact, convert to str content = str(message.function_call) if choice.finish_reason == "function_call" else message.content chat_message = ChatMessage.from_assistant(content) - chat_message.metadata.update( + chat_message.meta.update( { "model": completion.model, "index": choice.index, @@ -264,9 +264,7 @@ def _build_chunk(self, chunk: OpenAIObject, choice: OpenAIObject) -> StreamingCh else: content = "" chunk_message = StreamingChunk(content) - chunk_message.metadata.update( - {"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason} - ) + chunk_message.meta.update({"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason}) return chunk_message def _check_finish_reason(self, message: ChatMessage) -> None: @@ -275,13 +273,13 @@ def _check_finish_reason(self, message: ChatMessage) -> None: If the `finish_reason` is `length` or `content_filter`, log a warning. :param message: The message returned by the LLM. """ - if message.metadata["finish_reason"] == "length": + if message.meta["finish_reason"] == "length": logger.warning( "The completion for index %s has been truncated before reaching a natural stopping point. " "Increase the max_tokens parameter to allow for longer completions.", - message.metadata["index"], + message.meta["index"], ) - if message.metadata["finish_reason"] == "content_filter": + if message.meta["finish_reason"] == "content_filter": logger.warning( - "The completion for index %s has been truncated due to the content filter.", message.metadata["index"] + "The completion for index %s has been truncated due to the content filter.", message.meta["index"] ) diff --git a/haystack/components/generators/hugging_face_tgi.py b/haystack/components/generators/hugging_face_tgi.py index e7d9128d82..f55a0e2da9 100644 --- a/haystack/components/generators/hugging_face_tgi.py +++ b/haystack/components/generators/hugging_face_tgi.py @@ -157,7 +157,7 @@ def _get_telemetry_data(self) -> Dict[str, Any]: # Don't send URL as it is sensitive information return {"model": self.model} - @component.output_types(replies=List[str], metadata=List[Dict[str, Any]]) + @component.output_types(replies=List[str], meta=List[Dict[str, Any]]) def run(self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None): """ Invoke the text generation inference for the given prompt and generation parameters. @@ -204,15 +204,15 @@ def _run_streaming(self, prompt: str, prompt_token_count: int, generation_kwargs chunks.append(stream_chunk) self.streaming_callback(stream_chunk) # type: ignore # streaming_callback is not None (verified in the run method) metadata = { - "finish_reason": chunks[-1].metadata.get("finish_reason", None), + "finish_reason": chunks[-1].meta.get("finish_reason", None), "model": self.client.model, "usage": { - "completion_tokens": chunks[-1].metadata.get("generated_tokens", 0), + "completion_tokens": chunks[-1].meta.get("generated_tokens", 0), "prompt_tokens": prompt_token_count, - "total_tokens": prompt_token_count + chunks[-1].metadata.get("generated_tokens", 0), + "total_tokens": prompt_token_count + chunks[-1].meta.get("generated_tokens", 0), }, } - return {"replies": ["".join([chunk.content for chunk in chunks])], "metadata": [metadata]} + return {"replies": ["".join([chunk.content for chunk in chunks])], "meta": [metadata]} def _run_non_streaming( self, prompt: str, prompt_token_count: int, num_responses: int, generation_kwargs: Dict[str, Any] @@ -234,4 +234,4 @@ def _run_non_streaming( } ) responses.append(tgr.generated_text) - return {"replies": responses, "metadata": all_metadata} + return {"replies": responses, "meta": all_metadata} diff --git a/haystack/components/generators/openai.py b/haystack/components/generators/openai.py index bb4f807bf1..73a8298309 100644 --- a/haystack/components/generators/openai.py +++ b/haystack/components/generators/openai.py @@ -37,7 +37,7 @@ class GPTGenerator: >> {'replies': ['Natural Language Processing (NLP) is a branch of artificial intelligence that focuses on >> the interaction between computers and human language. It involves enabling computers to understand, interpret, - >> and respond to natural human language in a way that is both meaningful and useful.'], 'metadata': [{'model': + >> and respond to natural human language in a way that is both meaningful and useful.'], 'meta': [{'model': >> 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', 'usage': {'prompt_tokens': 16, >> 'completion_tokens': 49, 'total_tokens': 65}}]} ``` @@ -146,7 +146,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "GPTGenerator": data["init_parameters"]["streaming_callback"] = deserialize_callback_handler(serialized_callback_handler) return default_from_dict(cls, data) - @component.output_types(replies=List[str], metadata=List[Dict[str, Any]]) + @component.output_types(replies=List[str], meta=List[Dict[str, Any]]) def run(self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None): """ Invoke the text generation inference based on the provided messages and generation parameters. @@ -200,7 +200,7 @@ def run(self, prompt: str, generation_kwargs: Optional[Dict[str, Any]] = None): return { "replies": [message.content for message in completions], - "metadata": [message.metadata for message in completions], + "meta": [message.meta for message in completions], } def _convert_to_openai_format(self, messages: List[ChatMessage]) -> List[Dict[str, Any]]: @@ -222,7 +222,7 @@ def _connect_chunks(self, chunk: OpenAIObject, chunks: List[StreamingChunk]) -> Connects the streaming chunks into a single ChatMessage. """ complete_response = ChatMessage.from_assistant("".join([chunk.content for chunk in chunks])) - complete_response.metadata.update( + complete_response.meta.update( { "model": chunk.model, "index": 0, @@ -242,7 +242,7 @@ def _build_message(self, completion: OpenAIObject, choice: OpenAIObject) -> Chat message: OpenAIObject = choice.message content = dict(message.function_call) if choice.finish_reason == "function_call" else message.content chat_message = ChatMessage.from_assistant(content) - chat_message.metadata.update( + chat_message.meta.update( { "model": completion.model, "index": choice.index, @@ -267,9 +267,7 @@ def _build_chunk(self, chunk: OpenAIObject, choice: OpenAIObject) -> StreamingCh else: content = "" chunk_message = StreamingChunk(content) - chunk_message.metadata.update( - {"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason} - ) + chunk_message.meta.update({"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason}) return chunk_message def _check_finish_reason(self, message: ChatMessage) -> None: @@ -278,13 +276,13 @@ def _check_finish_reason(self, message: ChatMessage) -> None: If the `finish_reason` is `length`, log a warning to the user. :param message: The message returned by the LLM. """ - if message.metadata["finish_reason"] == "length": + if message.meta["finish_reason"] == "length": logger.warning( "The completion for index %s has been truncated before reaching a natural stopping point. " "Increase the max_tokens parameter to allow for longer completions.", - message.metadata["index"], + message.meta["index"], ) - if message.metadata["finish_reason"] == "content_filter": + if message.meta["finish_reason"] == "content_filter": logger.warning( - "The completion for index %s has been truncated due to the content filter.", message.metadata["index"] + "The completion for index %s has been truncated due to the content filter.", message.meta["index"] ) diff --git a/haystack/components/routers/file_type_router.py b/haystack/components/routers/file_type_router.py index 932472bf14..59f87f0aee 100644 --- a/haystack/components/routers/file_type_router.py +++ b/haystack/components/routers/file_type_router.py @@ -58,7 +58,7 @@ def run(self, sources: List[Union[str, Path, ByteStream]]) -> Dict[str, List[Uni if isinstance(source, Path): mime_type = self.get_mime_type(source) elif isinstance(source, ByteStream): - mime_type = source.metadata.get("content_type") + mime_type = source.meta.get("content_type") else: raise ValueError(f"Unsupported data source type: {type(source)}") diff --git a/haystack/dataclasses/byte_stream.py b/haystack/dataclasses/byte_stream.py index dd84e1c26b..6ccf324640 100644 --- a/haystack/dataclasses/byte_stream.py +++ b/haystack/dataclasses/byte_stream.py @@ -10,7 +10,7 @@ class ByteStream: """ data: bytes - metadata: Dict[str, Any] = field(default_factory=dict, hash=False) + meta: Dict[str, Any] = field(default_factory=dict, hash=False) mime_type: Optional[str] = field(default=None) def to_file(self, destination_path: Path): diff --git a/haystack/dataclasses/chat_message.py b/haystack/dataclasses/chat_message.py index 08c61d6cfc..b85c137475 100644 --- a/haystack/dataclasses/chat_message.py +++ b/haystack/dataclasses/chat_message.py @@ -20,13 +20,13 @@ class ChatMessage: :param content: The text content of the message. :param role: The role of the entity sending the message. :param name: The name of the function being called (only applicable for role FUNCTION). - :param metadata: Additional metadata associated with the message. + :param meta: Additional metadata associated with the message. """ content: str role: ChatRole name: Optional[str] - metadata: Dict[str, Any] = field(default_factory=dict, hash=False) + meta: Dict[str, Any] = field(default_factory=dict, hash=False) def is_from(self, role: ChatRole) -> bool: """ @@ -38,15 +38,15 @@ def is_from(self, role: ChatRole) -> bool: return self.role == role @classmethod - def from_assistant(cls, content: str, metadata: Optional[Dict[str, Any]] = None) -> "ChatMessage": + def from_assistant(cls, content: str, meta: Optional[Dict[str, Any]] = None) -> "ChatMessage": """ Create a message from the assistant. :param content: The text content of the message. - :param metadata: Additional metadata associated with the message. + :param meta: Additional metadata associated with the message. :return: A new ChatMessage instance. """ - return cls(content, ChatRole.ASSISTANT, None, metadata or {}) + return cls(content, ChatRole.ASSISTANT, None, meta or {}) @classmethod def from_user(cls, content: str) -> "ChatMessage": diff --git a/haystack/dataclasses/streaming_chunk.py b/haystack/dataclasses/streaming_chunk.py index 1245560431..f2f2b990f8 100644 --- a/haystack/dataclasses/streaming_chunk.py +++ b/haystack/dataclasses/streaming_chunk.py @@ -10,8 +10,8 @@ class StreamingChunk: streamed data in a systematic manner. :param content: The content of the message chunk as a string. - :param metadata: A dictionary containing metadata related to the message chunk. + :param meta: A dictionary containing metadata related to the message chunk. """ content: str - metadata: Dict[str, Any] = field(default_factory=dict, hash=False) + meta: Dict[str, Any] = field(default_factory=dict, hash=False) diff --git a/haystack/pipeline_utils/rag.py b/haystack/pipeline_utils/rag.py index fdf8ca5638..1c088efe83 100644 --- a/haystack/pipeline_utils/rag.py +++ b/haystack/pipeline_utils/rag.py @@ -67,7 +67,7 @@ def __init__(self, retriever: Any, embedder: Any, generator: Any, prompt_templat self.pipeline.connect("retriever", "prompt_builder.documents") self.pipeline.connect("prompt_builder.prompt", "llm.prompt") self.pipeline.connect("llm.replies", "answer_builder.replies") - self.pipeline.connect("llm.metadata", "answer_builder.metadata") + self.pipeline.connect("llm.meta", "answer_builder.meta") self.pipeline.connect("retriever", "answer_builder.documents") def run(self, query: str) -> Answer: diff --git a/releasenotes/notes/changed-metadata-to-meta-64cceb9ed19722fe.yaml b/releasenotes/notes/changed-metadata-to-meta-64cceb9ed19722fe.yaml new file mode 100644 index 0000000000..5710841bab --- /dev/null +++ b/releasenotes/notes/changed-metadata-to-meta-64cceb9ed19722fe.yaml @@ -0,0 +1,4 @@ +--- +enhancements: + - | + Rename all metadata references to meta. diff --git a/test/components/audio/test_whisper_local.py b/test/components/audio/test_whisper_local.py index 82da74ab74..0fb6fdfc11 100644 --- a/test/components/audio/test_whisper_local.py +++ b/test/components/audio/test_whisper_local.py @@ -125,7 +125,7 @@ def test_transcribe_stream(self): } path = SAMPLES_PATH / "audio" / "this is the content of the document.wav" bs = ByteStream.from_file_path(path) - bs.metadata["file_path"] = path + bs.meta["file_path"] = path results = comp.transcribe(sources=[bs]) expected = Document( content="test transcription", meta={"audio_file": path, "other_metadata": ["other", "meta", "data"]} diff --git a/test/components/audio/test_whisper_remote.py b/test/components/audio/test_whisper_remote.py index 2c1b459632..fb5e31ab6e 100644 --- a/test/components/audio/test_whisper_remote.py +++ b/test/components/audio/test_whisper_remote.py @@ -210,7 +210,7 @@ def test_run_bytestream(self, test_files_path): transcriber = RemoteWhisperTranscriber(api_key="test_api_key", model_name=model, response_format="json") with open(file_path, "rb") as audio_stream: byte_stream = audio_stream.read() - audio_file = ByteStream(byte_stream, metadata={"file_path": str(file_path.absolute())}) + audio_file = ByteStream(byte_stream, meta={"file_path": str(file_path.absolute())}) result = transcriber.run(sources=[audio_file]) diff --git a/test/components/builders/test_answer_builder.py b/test/components/builders/test_answer_builder.py index 10ec43ba6f..4f94852df2 100644 --- a/test/components/builders/test_answer_builder.py +++ b/test/components/builders/test_answer_builder.py @@ -10,7 +10,7 @@ class TestAnswerBuilder: def test_run_unmatching_input_len(self): component = AnswerBuilder() with pytest.raises(ValueError): - component.run(query="query", replies=["reply1"], metadata=[{"test": "meta"}, {"test": "meta2"}]) + component.run(query="query", replies=["reply1"], meta=[{"test": "meta"}, {"test": "meta2"}]) def test_run_without_meta(self): component = AnswerBuilder() @@ -24,7 +24,7 @@ def test_run_without_meta(self): def test_run_meta_is_an_empty_list(self): component = AnswerBuilder() - output = component.run(query="query", replies=["reply1"], metadata=[]) + output = component.run(query="query", replies=["reply1"], meta=[]) answers = output["answers"] assert answers[0].data == "reply1" assert answers[0].meta == {} @@ -34,7 +34,7 @@ def test_run_meta_is_an_empty_list(self): def test_run_without_pattern(self): component = AnswerBuilder() - output = component.run(query="test query", replies=["Answer: AnswerString"], metadata=[{}]) + output = component.run(query="test query", replies=["Answer: AnswerString"], meta=[{}]) answers = output["answers"] assert len(answers) == 1 assert answers[0].data == "Answer: AnswerString" @@ -45,7 +45,7 @@ def test_run_without_pattern(self): def test_run_with_pattern_with_capturing_group(self): component = AnswerBuilder(pattern=r"Answer: (.*)") - output = component.run(query="test query", replies=["Answer: AnswerString"], metadata=[{}]) + output = component.run(query="test query", replies=["Answer: AnswerString"], meta=[{}]) answers = output["answers"] assert len(answers) == 1 assert answers[0].data == "AnswerString" @@ -56,7 +56,7 @@ def test_run_with_pattern_with_capturing_group(self): def test_run_with_pattern_without_capturing_group(self): component = AnswerBuilder(pattern=r"'.*'") - output = component.run(query="test query", replies=["Answer: 'AnswerString'"], metadata=[{}]) + output = component.run(query="test query", replies=["Answer: 'AnswerString'"], meta=[{}]) answers = output["answers"] assert len(answers) == 1 assert answers[0].data == "'AnswerString'" @@ -71,9 +71,7 @@ def test_run_with_pattern_with_more_than_one_capturing_group(self): def test_run_with_pattern_set_at_runtime(self): component = AnswerBuilder(pattern="unused pattern") - output = component.run( - query="test query", replies=["Answer: AnswerString"], metadata=[{}], pattern=r"Answer: (.*)" - ) + output = component.run(query="test query", replies=["Answer: AnswerString"], meta=[{}], pattern=r"Answer: (.*)") answers = output["answers"] assert len(answers) == 1 assert answers[0].data == "AnswerString" @@ -87,7 +85,7 @@ def test_run_with_documents_without_reference_pattern(self): output = component.run( query="test query", replies=["Answer: AnswerString"], - metadata=[{}], + meta=[{}], documents=[Document(content="test doc 1"), Document(content="test doc 2")], ) answers = output["answers"] @@ -104,7 +102,7 @@ def test_run_with_documents_with_reference_pattern(self): output = component.run( query="test query", replies=["Answer: AnswerString[2]"], - metadata=[{}], + meta=[{}], documents=[Document(content="test doc 1"), Document(content="test doc 2")], ) answers = output["answers"] @@ -121,7 +119,7 @@ def test_run_with_documents_with_reference_pattern_and_no_match(self, caplog): output = component.run( query="test query", replies=["Answer: AnswerString[3]"], - metadata=[{}], + meta=[{}], documents=[Document(content="test doc 1"), Document(content="test doc 2")], ) answers = output["answers"] @@ -137,7 +135,7 @@ def test_run_with_reference_pattern_set_at_runtime(self): output = component.run( query="test query", replies=["Answer: AnswerString[2][3]"], - metadata=[{}], + meta=[{}], documents=[Document(content="test doc 1"), Document(content="test doc 2"), Document(content="test doc 3")], reference_pattern="\\[(\\d+)\\]", ) diff --git a/test/components/converters/test_azure_ocr_doc_converter.py b/test/components/converters/test_azure_ocr_doc_converter.py index e5f4345194..36f8f6a902 100644 --- a/test/components/converters/test_azure_ocr_doc_converter.py +++ b/test/components/converters/test_azure_ocr_doc_converter.py @@ -45,7 +45,7 @@ def test_run(self, test_files_path): } def test_run_with_meta(self): - bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"}) + bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"}) with patch("haystack.components.converters.azure.DocumentAnalysisClient"): component = AzureOCRDocumentConverter(endpoint="test_endpoint", api_key="test_credential_key") diff --git a/test/components/converters/test_html_to_document.py b/test/components/converters/test_html_to_document.py index 1cdb478127..dfd3cfd96d 100644 --- a/test/components/converters/test_html_to_document.py +++ b/test/components/converters/test_html_to_document.py @@ -63,7 +63,7 @@ def test_run_bytestream_metadata(self, test_files_path): converter = HTMLToDocument() with open(test_files_path / "html" / "what_is_haystack.html", "rb") as file: byte_stream = file.read() - stream = ByteStream(byte_stream, metadata={"content_type": "text/html", "url": "test_url"}) + stream = ByteStream(byte_stream, meta={"content_type": "text/html", "url": "test_url"}) results = converter.run(sources=[stream]) docs = results["documents"] @@ -81,7 +81,7 @@ def test_run_bytestream_and_doc_metadata(self, test_files_path): converter = HTMLToDocument() with open(test_files_path / "html" / "what_is_haystack.html", "rb") as file: byte_stream = file.read() - stream = ByteStream(byte_stream, metadata={"content_type": "text/html", "url": "test_url"}) + stream = ByteStream(byte_stream, meta={"content_type": "text/html", "url": "test_url"}) metadata = [{"file_name": "what_is_haystack.html"}] results = converter.run(sources=[stream], meta=metadata) @@ -103,7 +103,7 @@ def test_run_bytestream_doc_overlapping_metadata(self, test_files_path): with open(test_files_path / "html" / "what_is_haystack.html", "rb") as file: byte_stream = file.read() # ByteStream has "url" present in metadata - stream = ByteStream(byte_stream, metadata={"content_type": "text/html", "url": "test_url_correct"}) + stream = ByteStream(byte_stream, meta={"content_type": "text/html", "url": "test_url_correct"}) # "url" supplied by the user overwrites value present in metadata metadata = [{"file_name": "what_is_haystack.html", "url": "test_url_new"}] diff --git a/test/components/converters/test_markdown_to_document.py b/test/components/converters/test_markdown_to_document.py index 7b47551ac7..3764fd7a63 100644 --- a/test/components/converters/test_markdown_to_document.py +++ b/test/components/converters/test_markdown_to_document.py @@ -32,7 +32,7 @@ def test_run(self, test_files_path): assert "# git clone https://github.com/deepset-ai/haystack.git" in doc.content def test_run_with_meta(self): - bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"}) + bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"}) converter = MarkdownToDocument() diff --git a/test/components/converters/test_pypdf_to_document.py b/test/components/converters/test_pypdf_to_document.py index 7da02a4cd0..d403cc0e79 100644 --- a/test/components/converters/test_pypdf_to_document.py +++ b/test/components/converters/test_pypdf_to_document.py @@ -30,7 +30,7 @@ def test_run(self, test_files_path): assert "ReAct" in docs[0].content def test_run_with_meta(self): - bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"}) + bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"}) converter = PyPDFToDocument() with patch("haystack.components.converters.pypdf.PdfReader"): diff --git a/test/components/converters/test_textfile_to_document.py b/test/components/converters/test_textfile_to_document.py index a99c524208..5e115775a3 100644 --- a/test/components/converters/test_textfile_to_document.py +++ b/test/components/converters/test_textfile_to_document.py @@ -14,8 +14,8 @@ def test_run(self, test_files_path): Test if the component runs correctly. """ bytestream = ByteStream.from_file_path(test_files_path / "txt" / "doc_3.txt") - bytestream.metadata["file_path"] = str(test_files_path / "txt" / "doc_3.txt") - bytestream.metadata["key"] = "value" + bytestream.meta["file_path"] = str(test_files_path / "txt" / "doc_3.txt") + bytestream.meta["key"] = "value" files = [str(test_files_path / "txt" / "doc_1.txt"), test_files_path / "txt" / "doc_2.txt", bytestream] converter = TextFileToDocument() output = converter.run(sources=files) @@ -26,7 +26,7 @@ def test_run(self, test_files_path): assert "That's yet another file!" in docs[2].content assert docs[0].meta["file_path"] == str(files[0]) assert docs[1].meta["file_path"] == str(files[1]) - assert docs[2].meta == bytestream.metadata + assert docs[2].meta == bytestream.meta def test_run_error_handling(self, test_files_path, caplog): """ @@ -47,18 +47,18 @@ def test_encoding_override(self, test_files_path): Test if the encoding metadata field is used properly """ bytestream = ByteStream.from_file_path(test_files_path / "txt" / "doc_1.txt") - bytestream.metadata["key"] = "value" + bytestream.meta["key"] = "value" converter = TextFileToDocument(encoding="utf-16") output = converter.run(sources=[bytestream]) assert "Some text for testing." not in output["documents"][0].content - bytestream.metadata["encoding"] = "utf-8" + bytestream.meta["encoding"] = "utf-8" output = converter.run(sources=[bytestream]) assert "Some text for testing." in output["documents"][0].content def test_run_with_meta(self): - bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"}) + bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"}) converter = TextFileToDocument() diff --git a/test/components/converters/test_tika_doc_converter.py b/test/components/converters/test_tika_doc_converter.py index e65fb0c8b1..23c1fa92f0 100644 --- a/test/components/converters/test_tika_doc_converter.py +++ b/test/components/converters/test_tika_doc_converter.py @@ -19,7 +19,7 @@ def test_run(self, mock_tika_parser): assert documents[0].content == "Content of mock source" def test_run_with_meta(self): - bytestream = ByteStream(data=b"test", metadata={"author": "test_author", "language": "en"}) + bytestream = ByteStream(data=b"test", meta={"author": "test_author", "language": "en"}) converter = TikaDocumentConverter() with patch("haystack.components.converters.tika.tika_parser.from_buffer"): diff --git a/test/components/fetchers/test_link_content_fetcher.py b/test/components/fetchers/test_link_content_fetcher.py index 816185d4d9..e3f350d5a1 100644 --- a/test/components/fetchers/test_link_content_fetcher.py +++ b/test/components/fetchers/test_link_content_fetcher.py @@ -67,7 +67,7 @@ def test_run_text(self): streams = fetcher.run(urls=["https://www.example.com"])["streams"] first_stream = streams[0] assert first_stream.data == correct_response - assert first_stream.metadata["content_type"] == "text/plain" + assert first_stream.meta["content_type"] == "text/plain" def test_run_html(self): correct_response = b"

Example test response

" @@ -79,7 +79,7 @@ def test_run_html(self): streams = fetcher.run(urls=["https://www.example.com"])["streams"] first_stream = streams[0] assert first_stream.data == correct_response - assert first_stream.metadata["content_type"] == "text/html" + assert first_stream.meta["content_type"] == "text/html" def test_run_binary(self, test_files_path): file_bytes = open(test_files_path / "pdf" / "sample_pdf_1.pdf", "rb").read() @@ -91,7 +91,7 @@ def test_run_binary(self, test_files_path): streams = fetcher.run(urls=["https://www.example.com"])["streams"] first_stream = streams[0] assert first_stream.data == file_bytes - assert first_stream.metadata["content_type"] == "application/pdf" + assert first_stream.meta["content_type"] == "application/pdf" def test_run_bad_status_code(self): empty_byte_stream = b"" @@ -105,7 +105,7 @@ def test_run_bad_status_code(self): assert len(streams) == 1 first_stream = streams[0] assert first_stream.data == empty_byte_stream - assert first_stream.metadata["content_type"] == "text/html" + assert first_stream.meta["content_type"] == "text/html" @pytest.mark.integration def test_link_content_fetcher_html(self): @@ -113,8 +113,8 @@ def test_link_content_fetcher_html(self): streams = fetcher.run([HTML_URL])["streams"] first_stream = streams[0] assert "Haystack" in first_stream.data.decode("utf-8") - assert first_stream.metadata["content_type"] == "text/html" - assert "url" in first_stream.metadata and first_stream.metadata["url"] == HTML_URL + assert first_stream.meta["content_type"] == "text/html" + assert "url" in first_stream.meta and first_stream.meta["url"] == HTML_URL @pytest.mark.integration def test_link_content_fetcher_text(self): @@ -122,8 +122,8 @@ def test_link_content_fetcher_text(self): streams = fetcher.run([TEXT_URL])["streams"] first_stream = streams[0] assert "Haystack" in first_stream.data.decode("utf-8") - assert first_stream.metadata["content_type"] == "text/plain" - assert "url" in first_stream.metadata and first_stream.metadata["url"] == TEXT_URL + assert first_stream.meta["content_type"] == "text/plain" + assert "url" in first_stream.meta and first_stream.meta["url"] == TEXT_URL @pytest.mark.integration def test_link_content_fetcher_pdf(self): @@ -131,8 +131,8 @@ def test_link_content_fetcher_pdf(self): streams = fetcher.run([PDF_URL])["streams"] assert len(streams) == 1 first_stream = streams[0] - assert first_stream.metadata["content_type"] in ("application/octet-stream", "application/pdf") - assert "url" in first_stream.metadata and first_stream.metadata["url"] == PDF_URL + assert first_stream.meta["content_type"] in ("application/octet-stream", "application/pdf") + assert "url" in first_stream.meta and first_stream.meta["url"] == PDF_URL @pytest.mark.integration def test_link_content_fetcher_multiple_different_content_types(self): @@ -143,10 +143,10 @@ def test_link_content_fetcher_multiple_different_content_types(self): streams = fetcher.run([PDF_URL, HTML_URL])["streams"] assert len(streams) == 2 for stream in streams: - assert stream.metadata["content_type"] in ("text/html", "application/pdf", "application/octet-stream") - if stream.metadata["content_type"] == "text/html": + assert stream.meta["content_type"] in ("text/html", "application/pdf", "application/octet-stream") + if stream.meta["content_type"] == "text/html": assert "Haystack" in stream.data.decode("utf-8") - elif stream.metadata["content_type"] == "application/pdf": + elif stream.meta["content_type"] == "application/pdf": assert len(stream.data) > 0 @pytest.mark.integration @@ -160,10 +160,10 @@ def test_link_content_fetcher_multiple_html_streams(self): streams = fetcher.run([PDF_URL, HTML_URL, "https://google.com"])["streams"] assert len(streams) == 3 for stream in streams: - assert stream.metadata["content_type"] in ("text/html", "application/pdf", "application/octet-stream") - if stream.metadata["content_type"] == "text/html": + assert stream.meta["content_type"] in ("text/html", "application/pdf", "application/octet-stream") + if stream.meta["content_type"] == "text/html": assert "Haystack" in stream.data.decode("utf-8") or "Google" in stream.data.decode("utf-8") - elif stream.metadata["content_type"] == "application/pdf": + elif stream.meta["content_type"] == "application/pdf": assert len(stream.data) > 0 @pytest.mark.integration @@ -177,7 +177,7 @@ def test_mix_of_good_and_failed_requests(self): result = fetcher.run(["https://non_existent_website_dot.com/", "https://www.google.com/"]) assert len(result["streams"]) == 1 first_stream = result["streams"][0] - assert first_stream.metadata["content_type"] == "text/html" + assert first_stream.meta["content_type"] == "text/html" @pytest.mark.integration def test_bad_request_exception_raised(self): diff --git a/test/components/generators/chat/test_openai.py b/test/components/generators/chat/test_openai.py index e62930cef8..9d8aa34f43 100644 --- a/test/components/generators/chat/test_openai.py +++ b/test/components/generators/chat/test_openai.py @@ -241,7 +241,7 @@ def test_check_abnormal_completions(self, caplog): component = GPTChatGenerator(api_key="test-api-key") messages = [ ChatMessage.from_assistant( - "", metadata={"finish_reason": "content_filter" if i % 2 == 0 else "length", "index": i} + "", meta={"finish_reason": "content_filter" if i % 2 == 0 else "length", "index": i} ) for i, _ in enumerate(range(4)) ] diff --git a/test/components/generators/test_hugging_face_tgi.py b/test/components/generators/test_hugging_face_tgi.py index 0d59aa7812..4ab077147c 100644 --- a/test/components/generators/test_hugging_face_tgi.py +++ b/test/components/generators/test_hugging_face_tgi.py @@ -124,11 +124,11 @@ def test_generate_text_response_with_valid_prompt_and_generation_parameters( assert isinstance(response, dict) assert "replies" in response - assert "metadata" in response + assert "meta" in response assert isinstance(response["replies"], list) - assert isinstance(response["metadata"], list) + assert isinstance(response["meta"], list) assert len(response["replies"]) == 1 - assert len(response["metadata"]) == 1 + assert len(response["meta"]) == 1 assert [isinstance(reply, str) for reply in response["replies"]] def test_generate_multiple_text_responses_with_valid_prompt_and_generation_parameters( @@ -157,14 +157,14 @@ def test_generate_multiple_text_responses_with_valid_prompt_and_generation_param assert isinstance(response, dict) assert "replies" in response - assert "metadata" in response + assert "meta" in response assert isinstance(response["replies"], list) assert [isinstance(reply, str) for reply in response["replies"]] - assert isinstance(response["metadata"], list) + assert isinstance(response["meta"], list) assert len(response["replies"]) == 3 - assert len(response["metadata"]) == 3 - assert [isinstance(reply, dict) for reply in response["metadata"]] + assert len(response["meta"]) == 3 + assert [isinstance(reply, dict) for reply in response["meta"]] def test_initialize_with_invalid_model(self, mock_check_valid_model): model = "invalid_model" @@ -200,9 +200,9 @@ def test_generate_text_with_stop_words(self, mock_check_valid_model, mock_auto_t assert [isinstance(reply, str) for reply in response["replies"]] # Assert that the response contains the metadata - assert "metadata" in response - assert isinstance(response["metadata"], list) - assert len(response["metadata"]) > 0 + assert "meta" in response + assert isinstance(response["meta"], list) + assert len(response["meta"]) > 0 assert [isinstance(reply, dict) for reply in response["replies"]] def test_generate_text_with_custom_generation_parameters( @@ -226,9 +226,9 @@ def test_generate_text_with_custom_generation_parameters( assert response["replies"][0] == "I'm fine, thanks." # Assert that the response contains the metadata - assert "metadata" in response - assert isinstance(response["metadata"], list) - assert len(response["metadata"]) > 0 + assert "meta" in response + assert isinstance(response["meta"], list) + assert len(response["meta"]) > 0 assert [isinstance(reply, str) for reply in response["replies"]] def test_generate_text_with_streaming_callback( @@ -278,7 +278,7 @@ def mock_iter(self): assert [isinstance(reply, str) for reply in response["replies"]] # Assert that the response contains the metadata - assert "metadata" in response - assert isinstance(response["metadata"], list) - assert len(response["metadata"]) > 0 + assert "meta" in response + assert isinstance(response["meta"], list) + assert len(response["meta"]) > 0 assert [isinstance(reply, dict) for reply in response["replies"]] diff --git a/test/components/generators/test_openai.py b/test/components/generators/test_openai.py index e81752bded..a5cc2fc38b 100644 --- a/test/components/generators/test_openai.py +++ b/test/components/generators/test_openai.py @@ -242,7 +242,7 @@ def test_check_abnormal_completions(self, caplog): for i, _ in enumerate(range(4)): message = ChatMessage.from_assistant("Hello") metadata = {"finish_reason": "content_filter" if i % 2 == 0 else "length", "index": i} - message.metadata.update(metadata) + message.meta.update(metadata) messages.append(message) for m in messages: diff --git a/test/components/routers/test_file_router.py b/test/components/routers/test_file_router.py index c9c8684707..9409cc5d5b 100644 --- a/test/components/routers/test_file_router.py +++ b/test/components/routers/test_file_router.py @@ -46,13 +46,13 @@ def test_run_with_bytestreams(self, test_files_path): for path, mime_type in zip(file_paths, mime_types): stream = ByteStream(path.read_bytes()) - stream.metadata["content_type"] = mime_type + stream.meta["content_type"] = mime_type byte_streams.append(stream) # add unclassified ByteStream bs = ByteStream(b"unclassified content") - bs.metadata["content_type"] = "unknown_type" + bs.meta["content_type"] = "unknown_type" byte_streams.append(bs) router = FileTypeRouter(mime_types=["text/plain", "audio/x-wav", "image/jpeg"]) @@ -75,7 +75,7 @@ def test_run_with_bytestreams_and_file_paths(self, test_files_path): byte_stream_sources = [] for path, mime_type in zip(file_paths, mime_types): stream = ByteStream(path.read_bytes()) - stream.metadata["content_type"] = mime_type + stream.meta["content_type"] = mime_type byte_stream_sources.append(stream) mixed_sources = file_paths[:2] + byte_stream_sources[2:] diff --git a/test/dataclasses/test_streaming_chunk.py b/test/dataclasses/test_streaming_chunk.py index cecad8a08c..97c1b19879 100644 --- a/test/dataclasses/test_streaming_chunk.py +++ b/test/dataclasses/test_streaming_chunk.py @@ -4,25 +4,25 @@ def test_create_chunk_with_content_and_metadata(): - chunk = StreamingChunk(content="Test content", metadata={"key": "value"}) + chunk = StreamingChunk(content="Test content", meta={"key": "value"}) assert chunk.content == "Test content" - assert chunk.metadata == {"key": "value"} + assert chunk.meta == {"key": "value"} def test_create_chunk_with_only_content(): chunk = StreamingChunk(content="Test content") assert chunk.content == "Test content" - assert chunk.metadata == {} + assert chunk.meta == {} def test_access_content(): - chunk = StreamingChunk(content="Test content", metadata={"key": "value"}) + chunk = StreamingChunk(content="Test content", meta={"key": "value"}) assert chunk.content == "Test content" def test_create_chunk_with_empty_content(): chunk = StreamingChunk(content="") assert chunk.content == "" - assert chunk.metadata == {} + assert chunk.meta == {}