From 640aa4667e320381ae95716db7aae3b527d93aff Mon Sep 17 00:00:00 2001 From: Massimiliano Pippi Date: Thu, 4 Apr 2024 15:17:23 +0200 Subject: [PATCH 01/28] Add langfuse integration --- integrations/langfuse/LICENSE.txt | 73 +++++++ integrations/langfuse/README.md | 22 +++ integrations/langfuse/example/basic_rag.py | 67 +++++++ integrations/langfuse/example/chat.py | 33 ++++ .../langfuse/example/requirements.txt | 3 + integrations/langfuse/pyproject.toml | 186 ++++++++++++++++++ .../components/others/langfuse/__about__.py | 1 + .../components/others/langfuse/__init__.py | 3 + .../components/others/langfuse/component.py | 16 ++ .../tracing/langfuse/__init__.py | 6 + .../tracing/langfuse/tracer.py | 89 +++++++++ integrations/langfuse/tests/__init__.py | 3 + 12 files changed, 502 insertions(+) create mode 100644 integrations/langfuse/LICENSE.txt create mode 100644 integrations/langfuse/README.md create mode 100644 integrations/langfuse/example/basic_rag.py create mode 100644 integrations/langfuse/example/chat.py create mode 100644 integrations/langfuse/example/requirements.txt create mode 100644 integrations/langfuse/pyproject.toml create mode 100644 integrations/langfuse/src/haystack_integrations/components/others/langfuse/__about__.py create mode 100644 integrations/langfuse/src/haystack_integrations/components/others/langfuse/__init__.py create mode 100644 integrations/langfuse/src/haystack_integrations/components/others/langfuse/component.py create mode 100644 integrations/langfuse/src/haystack_integrations/tracing/langfuse/__init__.py create mode 100644 integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py create mode 100644 integrations/langfuse/tests/__init__.py diff --git a/integrations/langfuse/LICENSE.txt b/integrations/langfuse/LICENSE.txt new file mode 100644 index 000000000..137069b82 --- /dev/null +++ b/integrations/langfuse/LICENSE.txt @@ -0,0 +1,73 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. + +"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: + + (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. + + You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + +To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/integrations/langfuse/README.md b/integrations/langfuse/README.md new file mode 100644 index 000000000..50c2db619 --- /dev/null +++ b/integrations/langfuse/README.md @@ -0,0 +1,22 @@ +# langfuse + +[![PyPI - Version](https://img.shields.io/pypi/v/langfuse-haystack.svg)](https://pypi.org/project/langfuse-haystack) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/langfuse-haystack.svg)](https://pypi.org/project/langfuse-haystack) + +----- + +**Table of Contents** + +- [langfuse](#langfuse) + - [Installation](#installation) + - [License](#license) + +## Installation + +```console +pip install langfuse-haystack +``` + +## License + +`langfuse` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. diff --git a/integrations/langfuse/example/basic_rag.py b/integrations/langfuse/example/basic_rag.py new file mode 100644 index 000000000..5c48dcb10 --- /dev/null +++ b/integrations/langfuse/example/basic_rag.py @@ -0,0 +1,67 @@ +import os + +os.environ["LANGFUSE_HOST"] = "https://cloud.langfuse.com" +os.environ["TOKENIZERS_PARALLELISM"] = "false" +os.environ["HAYSTACK_CONTENT_TRACING_ENABLED"] = "true" + +from datasets import load_dataset + +from haystack.document_stores.in_memory import InMemoryDocumentStore +from haystack import Document, Pipeline +from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder +from haystack.components.retrievers import InMemoryEmbeddingRetriever +from haystack.components.generators import OpenAIGenerator +from haystack.components.builders import PromptBuilder + +from haystack_integrations.components.others.langfuse import LangfuseComponent + + +def get_pipeline(document_store: InMemoryDocumentStore): + retriever = InMemoryEmbeddingRetriever(document_store=document_store, top_k=2) + + template = """ + Given the following information, answer the question. + + Context: + {% for document in documents %} + {{ document.content }} + {% endfor %} + + Question: {{question}} + Answer: + """ + + prompt_builder = PromptBuilder(template=template) + + basic_rag_pipeline = Pipeline() + # Add components to your pipeline + basic_rag_pipeline.add_component("tracer", LangfuseComponent("Basic RAG Pipeline")) + basic_rag_pipeline.add_component( + "text_embedder", SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2") + ) + basic_rag_pipeline.add_component("retriever", retriever) + basic_rag_pipeline.add_component("prompt_builder", prompt_builder) + basic_rag_pipeline.add_component("llm", OpenAIGenerator(model="gpt-3.5-turbo", generation_kwargs={"n": 2})) + + # Now, connect the components to each other + # NOTE: the tracer component doesn't need to be connected to anything in order to work + basic_rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding") + basic_rag_pipeline.connect("retriever", "prompt_builder.documents") + basic_rag_pipeline.connect("prompt_builder", "llm") + + return basic_rag_pipeline + + +if __name__ == "__main__": + document_store = InMemoryDocumentStore() + dataset = load_dataset("bilgeyucel/seven-wonders", split="train") + embedder = SentenceTransformersDocumentEmbedder("sentence-transformers/all-MiniLM-L6-v2") + embedder.warm_up() + docs_with_embeddings = embedder.run([Document(**ds) for ds in dataset]).get("documents") or [] # type: ignore + document_store.write_documents(docs_with_embeddings) + + pipeline = get_pipeline(document_store) + question = "What does Rhodes Statue look like?" + response = pipeline.run({"text_embedder": {"text": question}, "prompt_builder": {"question": question}}) + + print(response["llm"]["replies"][0]) diff --git a/integrations/langfuse/example/chat.py b/integrations/langfuse/example/chat.py new file mode 100644 index 000000000..a7408b118 --- /dev/null +++ b/integrations/langfuse/example/chat.py @@ -0,0 +1,33 @@ +import os + +os.environ["LANGFUSE_HOST"] = "https://cloud.langfuse.com" +os.environ["TOKENIZERS_PARALLELISM"] = "false" +os.environ["HAYSTACK_CONTENT_TRACING_ENABLED"] = "true" + +from haystack.components.builders import DynamicChatPromptBuilder +from haystack.components.generators.chat import OpenAIChatGenerator +from haystack.dataclasses import ChatMessage +from haystack import Pipeline +from haystack.utils import Secret + +from haystack_integrations.components.others.langfuse import LangfuseComponent + + +if __name__ == "__main__": + + pipe = Pipeline() + pipe.add_component("tracer", LangfuseComponent("Chat example")) + pipe.add_component("prompt_builder", DynamicChatPromptBuilder()) + pipe.add_component("llm", OpenAIChatGenerator(model="gpt-3.5-turbo")) + + pipe.connect("prompt_builder.prompt", "llm.messages") + + messages = [ + ChatMessage.from_system("Always respond in German even if some input data is in other languages."), + ChatMessage.from_user("Tell me about {{location}}"), + ] + + response = pipe.run( + data={"prompt_builder": {"template_variables": {"location": "Berlin"}, "prompt_source": messages}} + ) + print(response["llm"]["replies"][0]) diff --git a/integrations/langfuse/example/requirements.txt b/integrations/langfuse/example/requirements.txt new file mode 100644 index 000000000..3db2429f2 --- /dev/null +++ b/integrations/langfuse/example/requirements.txt @@ -0,0 +1,3 @@ +langfuse-haystack +datasets +sentence-transformers \ No newline at end of file diff --git a/integrations/langfuse/pyproject.toml b/integrations/langfuse/pyproject.toml new file mode 100644 index 000000000..50b037de6 --- /dev/null +++ b/integrations/langfuse/pyproject.toml @@ -0,0 +1,186 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "langfuse-haystack" +dynamic = ["version"] +description = '' +readme = "README.md" +requires-python = ">=3.8" +license = "Apache-2.0" +keywords = [] +authors = [ + { name = "deepset GmbH", email = "info@deepset.ai" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Programming Language :: Python", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dependencies = [ + "haystack-ai", + "langfuse" +] + +[project.urls] +Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/langfuse#readme" +Issues = "https://github.com/deepset-ai/haystack-core-integrations/issues" +Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/langfuse" + +[tool.hatch.build.targets.wheel] +packages = ["src/haystack_integrations"] + +[tool.hatch.version] +# Remove the following line and uncomment the others to enable git versioning +path = "src/haystack_integrations/components/others/langfuse/__about__.py" +# source = "vcs" +# tag-pattern = 'integrations\/langfuse-v(?P.*)' + + +[tool.hatch.version.raw-options] +root = "../.." +git_describe_command = 'git describe --tags --match="integrations/langfuse-v[0-9]*"' + +[tool.hatch.envs.default] +dependencies = [ + "coverage[toml]>=6.5", + "pytest", +] +[tool.hatch.envs.default.scripts] +test = "pytest {args:tests}" +test-cov = "coverage run -m pytest {args:tests}" +cov-report = [ + "- coverage combine", + "coverage report", +] +cov = [ + "test-cov", + "cov-report", +] +docs = ["pydoc-markdown pydoc/config.yml"] + + +[[tool.hatch.envs.all.matrix]] +python = ["3.8", "3.9", "3.10", "3.11", "3.12"] + +[tool.hatch.envs.lint] +detached = true +dependencies = ["black>=23.1.0", "mypy>=1.0.0", "ruff>=0.0.243"] + +[tool.hatch.envs.lint.scripts] +typing = "mypy --install-types --non-interactive --explicit-package-bases {args:src/ tests}" +style = ["ruff check {args:.}", "black --check --diff {args:.}"] +fmt = ["black {args:.}", "ruff --fix {args:.}", "style"] +all = ["style", "typing"] + +[tool.hatch.metadata] +allow-direct-references = true + +[tool.black] +target-version = ["py38"] +line-length = 120 +skip-string-normalization = true + +[tool.ruff] +target-version = "py38" +line-length = 120 +lint.select = [ + "A", + "ARG", + "B", + "C", + "DTZ", + "E", + "EM", + "F", + "I", + "ICN", + "ISC", + "N", + "PLC", + "PLE", + "PLR", + "PLW", + "Q", + "RUF", + "S", + "T", + "TID", + "UP", + "W", + "YTT", +] + +lint.ignore = [ + # Allow non-abstract empty methods in abstract base classes + "B027", + # Ignore checks for possible passwords + "S105", + "S106", + "S107", + # Ignore complexity + "C901", + "PLR0911", + "PLR0912", + "PLR0913", + "PLR0915", + # Asserts + "S101", +] +lint.unfixable = [ + # Don't touch unused imports + "F401", +] +extend-exclude = ["tests", "example"] + +[tool.ruff.lint.isort] +known-first-party = ["src"] + +[tool.ruff.lint.flake8-tidy-imports] +ban-relative-imports = "parents" + +[tool.ruff.lint.per-file-ignores] +# Tests can use magic values, assertions, and relative imports +"tests/**/*" = ["PLR2004", "S101", "TID252"] +# Examples can print their output +"examples/**" = ["T201"] +"tests/**" = ["T201"] + +[tool.coverage.run] +source = ["haystack_integrations"] +branch = true +parallel = false + + +[tool.coverage.report] +omit = ["*/tests/*", "*/__init__.py"] +show_missing=true +exclude_lines = [ + "no cov", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", +] + +[[tool.mypy.overrides]] +module = [ + "langfuse.*", + "haystack.*", + "haystack_integrations.*", + "pytest.*", + "numpy.*", +] +ignore_missing_imports = true + +[tool.pytest.ini_options] +addopts = "--strict-markers" +markers = [ + "integration: integration tests", +] +log_cli = true diff --git a/integrations/langfuse/src/haystack_integrations/components/others/langfuse/__about__.py b/integrations/langfuse/src/haystack_integrations/components/others/langfuse/__about__.py new file mode 100644 index 000000000..27fdca497 --- /dev/null +++ b/integrations/langfuse/src/haystack_integrations/components/others/langfuse/__about__.py @@ -0,0 +1 @@ +__version__ = "0.0.3" diff --git a/integrations/langfuse/src/haystack_integrations/components/others/langfuse/__init__.py b/integrations/langfuse/src/haystack_integrations/components/others/langfuse/__init__.py new file mode 100644 index 000000000..6aa8f9875 --- /dev/null +++ b/integrations/langfuse/src/haystack_integrations/components/others/langfuse/__init__.py @@ -0,0 +1,3 @@ +from .component import LangfuseComponent + +__all__ = ["LangfuseComponent"] diff --git a/integrations/langfuse/src/haystack_integrations/components/others/langfuse/component.py b/integrations/langfuse/src/haystack_integrations/components/others/langfuse/component.py new file mode 100644 index 000000000..8554946ea --- /dev/null +++ b/integrations/langfuse/src/haystack_integrations/components/others/langfuse/component.py @@ -0,0 +1,16 @@ +from haystack import tracing, component + +from haystack_integrations.tracing.langfuse import LangfuseTracer + +from langfuse import Langfuse + + +@component +class LangfuseComponent: + def __init__(self, name: str): + self.name = name + tracing.enable_tracing(LangfuseTracer(Langfuse(), name)) + + @component.output_types(name=str) + def run(self): + return {"name": self.name} diff --git a/integrations/langfuse/src/haystack_integrations/tracing/langfuse/__init__.py b/integrations/langfuse/src/haystack_integrations/tracing/langfuse/__init__.py new file mode 100644 index 000000000..e7331852d --- /dev/null +++ b/integrations/langfuse/src/haystack_integrations/tracing/langfuse/__init__.py @@ -0,0 +1,6 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from .tracer import LangfuseTracer + +__all__ = ["LangfuseTracer"] diff --git a/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py b/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py new file mode 100644 index 000000000..1465c15b1 --- /dev/null +++ b/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py @@ -0,0 +1,89 @@ +import contextlib +from typing import Any, Dict, Iterator, Optional, Union + +from haystack.tracing import Span, Tracer, tracer +from haystack.tracing import utils as tracing_utils + +import langfuse + + +class LangfuseSpan(Span): + def __init__(self, span: "Union[langfuse.client.StatefulSpanClient, langfuse.client.StatefulTraceClient]") -> None: + self._span = span + # locally cache tags + self._data = {} + + def set_tag(self, key: str, value: Any) -> None: + coerced_value = tracing_utils.coerce_tag_value(value) + self._span.update(metadata={key: coerced_value}) + self._data[key] = value + + def set_content_tag(self, key: str, value: Any) -> None: + if not tracer.is_content_tracing_enabled: + return + + if key.endswith(".input"): + if "messages" in value: + messages = [m.to_openai_format() for m in value["messages"]] + self._span.update(input=messages) + else: + self._span.update(input=value) + elif key.endswith(".output"): + if "replies" in value: + replies = [m.to_openai_format() for m in value["replies"]] + self._span.update(output=replies) + else: + self._span.update(output=value) + + self._data[key] = value + + def raw_span(self) -> Any: + return self._span + + def get_correlation_data_for_logs(self) -> Dict[str, Any]: + return {} + + +class LangfuseTracer(Tracer): + def __init__(self, tracer: "langfuse.Langfuse", name: str = "Haystack") -> None: + self._tracer = tracer + self._context: list[LangfuseSpan] = [] + self._name = name + + @contextlib.contextmanager + def trace(self, operation_name: str, tags: Optional[Dict[str, Any]] = None) -> Iterator[Span]: + tags = tags or {} + span_name = tags.get("haystack.component.name", operation_name) + + if tags.get("haystack.component.type") in ["OpenAIGenerator", "OpenAIChatGenerator"]: + span = LangfuseSpan(self.current_span().raw_span().generation(name=span_name)) + else: + span = LangfuseSpan(self.current_span().raw_span().span(name=span_name)) + + self._context.append(span) + span.set_tags(tags) + + yield span + + if tags.get("haystack.component.type") == "OpenAIGenerator": + meta = span._data.get("haystack.component.output", {}).get("meta") + if meta: + # Haystack returns one meta dict for each message, but the 'usage' value + # is always the same, let's just pick the first item + m = meta[0] + span._span.update(usage=m.get("usage"), model=m.get("model")) + elif tags.get("haystack.component.type") == "OpenAIChatGenerator": + replies = span._data.get("haystack.component.output", {}).get("replies") + if replies: + meta = replies[0].meta + span._span.update(usage=meta.get("usage"), model=meta.get("model")) + + span.raw_span().end() + self._context.pop() + self._tracer.flush() + + def current_span(self) -> Span: + if not self._context: + # The root span has to be a trace + self._context.append(LangfuseSpan(self._tracer.trace(name=self._name))) + return self._context[-1] diff --git a/integrations/langfuse/tests/__init__.py b/integrations/langfuse/tests/__init__.py new file mode 100644 index 000000000..6b5e14dc1 --- /dev/null +++ b/integrations/langfuse/tests/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 From b6185495e3f577804ba835a2264d6a3e55d8b339 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Wed, 24 Apr 2024 12:53:01 +0200 Subject: [PATCH 02/28] Trace pipeline run --- .../tracing/langfuse/tracer.py | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py b/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py index 1465c15b1..2d93918d6 100644 --- a/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py +++ b/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py @@ -1,6 +1,7 @@ import contextlib from typing import Any, Dict, Iterator, Optional, Union +from haystack.dataclasses import ChatMessage from haystack.tracing import Span, Tracer, tracer from haystack.tracing import utils as tracing_utils @@ -21,16 +22,15 @@ def set_tag(self, key: str, value: Any) -> None: def set_content_tag(self, key: str, value: Any) -> None: if not tracer.is_content_tracing_enabled: return - if key.endswith(".input"): if "messages" in value: - messages = [m.to_openai_format() for m in value["messages"]] + messages = [self.to_openai_format(m) for m in value["messages"]] self._span.update(input=messages) else: self._span.update(input=value) elif key.endswith(".output"): if "replies" in value: - replies = [m.to_openai_format() for m in value["replies"]] + replies = [self.to_openai_format(m) for m in value["replies"]] self._span.update(output=replies) else: self._span.update(output=value) @@ -43,6 +43,16 @@ def raw_span(self) -> Any: def get_correlation_data_for_logs(self) -> Dict[str, Any]: return {} + def to_openai_format(self, m: ChatMessage) -> Dict[str, Any]: + """ + Remove after haystack 2.0.1 has been released and use the `to_openai_format` method from the ChatMessage class + """ + msg = {"role": m.role.value, "content": m.content} + if m.name: + msg["name"] = m.name + + return msg + class LangfuseTracer(Tracer): def __init__(self, tracer: "langfuse.Langfuse", name: str = "Haystack") -> None: @@ -78,6 +88,13 @@ def trace(self, operation_name: str, tags: Optional[Dict[str, Any]] = None) -> I meta = replies[0].meta span._span.update(usage=meta.get("usage"), model=meta.get("model")) + pipeline_input = tags.get("haystack.pipeline.input_data", None) + if pipeline_input: + span._span.update(input=tags["haystack.pipeline.input_data"]) + pipeline_output = tags.get("haystack.pipeline.output_data", None) + if pipeline_output: + span._span.update(output=tags["haystack.pipeline.output_data"]) + span.raw_span().end() self._context.pop() self._tracer.flush() From ea1359351dbd259caec40726fcd1bb5f3e9b8f75 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Wed, 24 Apr 2024 14:17:33 +0200 Subject: [PATCH 03/28] Integration admin additions --- .github/labeler.yml | 5 +++ .github/workflows/langfuse.yml | 80 ++++++++++++++++++++++++++++++++++ README.md | 3 +- 3 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/langfuse.yml diff --git a/.github/labeler.yml b/.github/labeler.yml index cbfe6567e..a05444a00 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -59,6 +59,11 @@ integration:jina: - any-glob-to-any-file: "integrations/jina/**/*" - any-glob-to-any-file: ".github/workflows/jina.yml" +integration:langfuse: + - changed-files: + - any-glob-to-any-file: "integrations/langfuse/**/*" + - any-glob-to-any-file: ".github/workflows/langfuse.yml" + integration:llama_cpp: - changed-files: - any-glob-to-any-file: "integrations/llama_cpp/**/*" diff --git a/.github/workflows/langfuse.yml b/.github/workflows/langfuse.yml new file mode 100644 index 000000000..9d77ec2f9 --- /dev/null +++ b/.github/workflows/langfuse.yml @@ -0,0 +1,80 @@ +# This workflow comes from https://github.com/ofek/hatch-mypyc +# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml +name: Test / langfuse + +on: + schedule: + - cron: "0 0 * * *" + pull_request: + paths: + - "integrations/langfuse/**" + - ".github/workflows/langfuse.yml" + +defaults: + run: + working-directory: integrations/langfuse + +concurrency: + group: langfuse-${{ github.head_ref }} + cancel-in-progress: true + +env: + PYTHONUNBUFFERED: "1" + FORCE_COLOR: "1" + LANGFUSE_SECRET_KEY: ${{ secrets.LANGFUSE_SECRET_KEY }} + LANGFUSE_PUBLIC_KEY: ${{ secrets.LANGFUSE_PUBLIC_KEY }} + +jobs: + run: + name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + python-version: ["3.9", "3.10"] + + steps: + - name: Support longpaths + if: matrix.os == 'windows-latest' + working-directory: . + run: git config --system core.longpaths true + + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Hatch + run: pip install --upgrade hatch + + - name: Lint + if: matrix.python-version == '3.9' && runner.os == 'Linux' + run: hatch run lint:all + + - name: Generate docs + if: matrix.python-version == '3.9' && runner.os == 'Linux' + run: hatch run docs + + - name: Run tests + id: tests + run: hatch run cov + + - name: Nightly - run unit tests with Haystack main branch + if: github.event_name == 'schedule' + id: nightly-haystack-main + run: | + hatch run pip install git+https://github.com/deepset-ai/haystack.git + hatch run test -m "not integration" + + - name: Send event to Datadog for nightly failures + if: failure() && github.event_name == 'schedule' + uses: ./.github/actions/send_failure + with: + title: | + core-integrations failure: + ${{ (steps.tests.conclusion == 'nightly-haystack-main') && 'nightly-haystack-main' || 'tests' }} + - ${{ github.workflow }} + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} \ No newline at end of file diff --git a/README.md b/README.md index 734672371..21c4a6017 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta ## Inventory | Package | Type | PyPi Package | Status | -| -------------------------------------------------------------------------------------------------------------- | ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +|----------------------------------------------------------------------------------------------------------------|---------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | [amazon-bedrock-haystack](integrations/amazon-bedrock/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/amazon-bedrock-haystack.svg)](https://pypi.org/project/amazon-bedrock-haystack) | [![Test / amazon_bedrock](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/amazon_bedrock.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/amazon_bedrock.yml) | | [amazon-sagemaker-haystack](integrations/amazon_sagemaker/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/amazon-sagemaker-haystack.svg)](https://pypi.org/project/amazon-sagemaker-haystack) | [![Test / amazon_sagemaker](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/amazon_sagemaker.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/amazon_sagemaker.yml) | | [anthropic-haystack](integrations/anthropic/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/anthropic-haystack.svg)](https://pypi.org/project/anthropic-haystack) | [![Test / anthropic](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/anthropic.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/anthropic.yml) | @@ -38,6 +38,7 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta | [gradient-haystack](integrations/gradient/) | Embedder, Generator | [![PyPI - Version](https://img.shields.io/pypi/v/gradient-haystack.svg)](https://pypi.org/project/gradient-haystack) | [![Test / gradient](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/gradient.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/gradient.yml) | | [instructor-embedders-haystack](integrations/instructor_embedders/) | Embedder | [![PyPI - Version](https://img.shields.io/pypi/v/instructor-embedders-haystack.svg)](https://pypi.org/project/instructor-embedders-haystack) | [![Test / instructor-embedders](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/instructor_embedders.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/instructor_embedders.yml) | | [jina-haystack](integrations/jina/) | Embedder | [![PyPI - Version](https://img.shields.io/pypi/v/jina-haystack.svg)](https://pypi.org/project/jina-haystack) | [![Test / jina](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/jina.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/jina.yml) | +| [langfuse-haystack](integrations/langfuse/) | Tracer | [![PyPI - Version](https://img.shields.io/pypi/v/langfuse-haystack.svg?color=orange)](https://pypi.org/project/langfuse-haystack) | [![Test / langfuse](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/langfuse.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/langfuse.yml) | | [llama-cpp-haystack](integrations/llama_cpp/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/llama-cpp-haystack.svg?color=orange)](https://pypi.org/project/llama-cpp-haystack) | [![Test / llama-cpp](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/llama_cpp.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/llama_cpp.yml) | | [mistral-haystack](integrations/mistral/) | Embedder, Generator | [![PyPI - Version](https://img.shields.io/pypi/v/mistral-haystack.svg)](https://pypi.org/project/mistral-haystack) | [![Test / mistral](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/mistral.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/mistral.yml) | | [mongodb-atlas-haystack](integrations/mongodb_atlas/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/mongodb-atlas-haystack.svg?color=orange)](https://pypi.org/project/mongodb-atlas-haystack) | [![Test / mongodb-atlas](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/mongodb_atlas.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/mongodb_atlas.yml) | From 0dfb3d9567b21313a050d139e3f898a08e9aab28 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Wed, 24 Apr 2024 16:59:48 +0200 Subject: [PATCH 04/28] Pydoc config --- integrations/langfuse/pydoc/config.yml | 30 ++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 integrations/langfuse/pydoc/config.yml diff --git a/integrations/langfuse/pydoc/config.yml b/integrations/langfuse/pydoc/config.yml new file mode 100644 index 000000000..72eb8def8 --- /dev/null +++ b/integrations/langfuse/pydoc/config.yml @@ -0,0 +1,30 @@ +loaders: + - type: haystack_pydoc_tools.loaders.CustomPythonLoader + search_path: [../src] + modules: [ + "haystack_integrations.components.others.langfuse.component", + "haystack_integrations.tracing.langfuse.tracer", + ] + ignore_when_discovered: ["__init__"] +processors: + - type: filter + expression: + documented_only: true + do_not_filter_modules: false + skip_empty_modules: true + - type: smart + - type: crossref +renderer: + type: haystack_pydoc_tools.renderers.ReadmePreviewRenderer + excerpt: Langfuse integration for Haystack + category_slug: integrations-api + title: langfuse + slug: integrations-langfuse + order: 135 + markdown: + descriptive_class_title: false + classdef_code_block: false + descriptive_module_title: true + add_method_class_prefix: true + add_member_class_prefix: false + filename: _readme_langfuse.md From 17f3754f69cb0760b4f1913bac046788a430876d Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Wed, 24 Apr 2024 16:19:24 +0200 Subject: [PATCH 05/28] Capture trace url in tracer component --- .../components/others/langfuse/component.py | 8 +++++--- .../src/haystack_integrations/tracing/langfuse/tracer.py | 3 +++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/integrations/langfuse/src/haystack_integrations/components/others/langfuse/component.py b/integrations/langfuse/src/haystack_integrations/components/others/langfuse/component.py index 8554946ea..1725e7392 100644 --- a/integrations/langfuse/src/haystack_integrations/components/others/langfuse/component.py +++ b/integrations/langfuse/src/haystack_integrations/components/others/langfuse/component.py @@ -9,8 +9,10 @@ class LangfuseComponent: def __init__(self, name: str): self.name = name - tracing.enable_tracing(LangfuseTracer(Langfuse(), name)) + self.tracer = LangfuseTracer(Langfuse(), name) + tracing.enable_tracing(self.tracer) - @component.output_types(name=str) + @component.output_types(name=str, trace_url=str) def run(self): - return {"name": self.name} + return {"name": self.name, + "trace_url": self.tracer.get_trace_url()} diff --git a/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py b/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py index 2d93918d6..95fb053ac 100644 --- a/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py +++ b/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py @@ -104,3 +104,6 @@ def current_span(self) -> Span: # The root span has to be a trace self._context.append(LangfuseSpan(self._tracer.trace(name=self._name))) return self._context[-1] + + def get_trace_url(self) -> str: + return self._tracer.get_trace_url() \ No newline at end of file From 1540227292454ebcdd9929e320af8846387b38cc Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Wed, 24 Apr 2024 16:23:02 +0200 Subject: [PATCH 06/28] Add integration test, update example --- integrations/langfuse/example/chat.py | 1 + integrations/langfuse/tests/test_tracing.py | 54 +++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 integrations/langfuse/tests/test_tracing.py diff --git a/integrations/langfuse/example/chat.py b/integrations/langfuse/example/chat.py index a7408b118..9659a2b5e 100644 --- a/integrations/langfuse/example/chat.py +++ b/integrations/langfuse/example/chat.py @@ -31,3 +31,4 @@ data={"prompt_builder": {"template_variables": {"location": "Berlin"}, "prompt_source": messages}} ) print(response["llm"]["replies"][0]) + print(response["tracer"]["trace_url"]) diff --git a/integrations/langfuse/tests/test_tracing.py b/integrations/langfuse/tests/test_tracing.py new file mode 100644 index 000000000..130bbec2d --- /dev/null +++ b/integrations/langfuse/tests/test_tracing.py @@ -0,0 +1,54 @@ +import os +from urllib.parse import urlparse + +import pytest +import requests + +from haystack import Pipeline +from haystack.components.builders import DynamicChatPromptBuilder +from haystack.components.generators.chat import OpenAIChatGenerator +from haystack.dataclasses import ChatMessage +from requests.auth import HTTPBasicAuth + +from haystack_integrations.components.others.langfuse import LangfuseComponent + + +@pytest.mark.integration +@pytest.mark.skipif( + not os.environ.get("LANGFUSE_SECRET_KEY", None) and not os.environ.get("LANGFUSE_PUBLIC_KEY", None), + reason="Export an env var called LANGFUSE_SECRET_KEY and LANGFUSE_PUBLIC_KEY containing Langfuse credentials.", + ) +def test_tracing_integration(): + + pipe = Pipeline() + pipe.add_component("tracer", LangfuseComponent("Chat example")) + pipe.add_component("prompt_builder", DynamicChatPromptBuilder()) + pipe.add_component("llm", OpenAIChatGenerator(model="gpt-3.5-turbo")) + + pipe.connect("prompt_builder.prompt", "llm.messages") + + messages = [ + ChatMessage.from_system("Always respond in German even if some input data is in other languages."), + ChatMessage.from_user("Tell me about {{location}}"), + ] + + response = pipe.run( + data={"prompt_builder": {"template_variables": {"location": "Berlin"}, "prompt_source": messages}} + ) + assert "Berlin" in response["llm"]["replies"][0].content + assert response["tracer"]["trace_url"] + + url = "https://cloud.langfuse.com/api/public/traces/" + trace_url = response["tracer"]["trace_url"] + parsed_url = urlparse(trace_url) + # trace id is the last part of the path (after the last '/') + uuid = os.path.basename(parsed_url.path) + + try: + # GET request with Basic Authentication on the Langfuse API + response = requests.get(url+uuid, auth=HTTPBasicAuth(os.environ.get("LANGFUSE_PUBLIC_KEY"), + os.environ.get("LANGFUSE_SECRET_KEY"))) + + assert response.status_code == 200, f"Failed to retrieve data from Langfuse API: {response.status_code}" + except requests.exceptions.RequestException as e: + assert False, f"Failed to retrieve data from Langfuse API: {e}" \ No newline at end of file From 31f77927cb13fcaf5ae0460db2fca18b41ae7ef5 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Wed, 24 Apr 2024 16:26:36 +0200 Subject: [PATCH 07/28] Linting --- .../components/others/langfuse/component.py | 6 ++---- .../tracing/langfuse/tracer.py | 4 ++-- integrations/langfuse/tests/test_tracing.py | 13 +++++++------ 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/integrations/langfuse/src/haystack_integrations/components/others/langfuse/component.py b/integrations/langfuse/src/haystack_integrations/components/others/langfuse/component.py index 1725e7392..d6ba93820 100644 --- a/integrations/langfuse/src/haystack_integrations/components/others/langfuse/component.py +++ b/integrations/langfuse/src/haystack_integrations/components/others/langfuse/component.py @@ -1,5 +1,4 @@ -from haystack import tracing, component - +from haystack import component, tracing from haystack_integrations.tracing.langfuse import LangfuseTracer from langfuse import Langfuse @@ -14,5 +13,4 @@ def __init__(self, name: str): @component.output_types(name=str, trace_url=str) def run(self): - return {"name": self.name, - "trace_url": self.tracer.get_trace_url()} + return {"name": self.name, "trace_url": self.tracer.get_trace_url()} diff --git a/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py b/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py index 95fb053ac..826519d77 100644 --- a/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py +++ b/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py @@ -12,7 +12,7 @@ class LangfuseSpan(Span): def __init__(self, span: "Union[langfuse.client.StatefulSpanClient, langfuse.client.StatefulTraceClient]") -> None: self._span = span # locally cache tags - self._data = {} + self._data: Dict[str, Any] = {} def set_tag(self, key: str, value: Any) -> None: coerced_value = tracing_utils.coerce_tag_value(value) @@ -106,4 +106,4 @@ def current_span(self) -> Span: return self._context[-1] def get_trace_url(self) -> str: - return self._tracer.get_trace_url() \ No newline at end of file + return self._tracer.get_trace_url() diff --git a/integrations/langfuse/tests/test_tracing.py b/integrations/langfuse/tests/test_tracing.py index 130bbec2d..6855792f9 100644 --- a/integrations/langfuse/tests/test_tracing.py +++ b/integrations/langfuse/tests/test_tracing.py @@ -15,9 +15,9 @@ @pytest.mark.integration @pytest.mark.skipif( - not os.environ.get("LANGFUSE_SECRET_KEY", None) and not os.environ.get("LANGFUSE_PUBLIC_KEY", None), - reason="Export an env var called LANGFUSE_SECRET_KEY and LANGFUSE_PUBLIC_KEY containing Langfuse credentials.", - ) + not os.environ.get("LANGFUSE_SECRET_KEY", None) and not os.environ.get("LANGFUSE_PUBLIC_KEY", None), + reason="Export an env var called LANGFUSE_SECRET_KEY and LANGFUSE_PUBLIC_KEY containing Langfuse credentials.", +) def test_tracing_integration(): pipe = Pipeline() @@ -46,9 +46,10 @@ def test_tracing_integration(): try: # GET request with Basic Authentication on the Langfuse API - response = requests.get(url+uuid, auth=HTTPBasicAuth(os.environ.get("LANGFUSE_PUBLIC_KEY"), - os.environ.get("LANGFUSE_SECRET_KEY"))) + response = requests.get( + url + uuid, auth=HTTPBasicAuth(os.environ.get("LANGFUSE_PUBLIC_KEY"), os.environ.get("LANGFUSE_SECRET_KEY")) + ) assert response.status_code == 200, f"Failed to retrieve data from Langfuse API: {response.status_code}" except requests.exceptions.RequestException as e: - assert False, f"Failed to retrieve data from Langfuse API: {e}" \ No newline at end of file + assert False, f"Failed to retrieve data from Langfuse API: {e}" From 9ed1d2cc155cb408b703cd01a15f3da48080971c Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Fri, 26 Apr 2024 11:16:03 +0200 Subject: [PATCH 08/28] Add haystack-pydoc-tools dep --- integrations/langfuse/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/integrations/langfuse/pyproject.toml b/integrations/langfuse/pyproject.toml index 50b037de6..403f55560 100644 --- a/integrations/langfuse/pyproject.toml +++ b/integrations/langfuse/pyproject.toml @@ -52,6 +52,7 @@ git_describe_command = 'git describe --tags --match="integrations/langfuse-v[0-9 dependencies = [ "coverage[toml]>=6.5", "pytest", + "haystack-pydoc-tools", ] [tool.hatch.envs.default.scripts] test = "pytest {args:tests}" From 075588614a62d598f658bb3c85f980034f8966de Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Fri, 26 Apr 2024 14:46:09 +0200 Subject: [PATCH 09/28] Add comprehensive README --- integrations/langfuse/README.md | 83 +++++++++++++++++++++++++++++---- 1 file changed, 74 insertions(+), 9 deletions(-) diff --git a/integrations/langfuse/README.md b/integrations/langfuse/README.md index 50c2db619..e7381d15c 100644 --- a/integrations/langfuse/README.md +++ b/integrations/langfuse/README.md @@ -1,22 +1,87 @@ -# langfuse +# langfuse-haystack [![PyPI - Version](https://img.shields.io/pypi/v/langfuse-haystack.svg)](https://pypi.org/project/langfuse-haystack) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/langfuse-haystack.svg)](https://pypi.org/project/langfuse-haystack) ------ +langfuse-haystack integrates tracing capabilities into [Haystack](https://github.com/deepset-ai/haystack) (2.x) pipelines using [Langfuse](https://langfuse.com/). This package enhances the visibility of pipeline runs by capturing comprehensive details of the execution traces, including API calls, context data, prompts, and more. Whether you're monitoring model performance, pinpointing areas for improvement, or creating datasets for fine-tuning and testing from your pipeline executions, langfuse-haystack is the right tool for you. +## Features -**Table of Contents** - -- [langfuse](#langfuse) - - [Installation](#installation) - - [License](#license) +- Easy integration with Haystack pipelines +- Capture the full context of the execution +- Track model usage and cost +- Collect user feedback +- Identify low-quality outputs +- Build fine-tuning and testing datasets ## Installation -```console +To install langfuse-haystack, simply run the following command: + +```sh pip install langfuse-haystack ``` +## Usage + +To enable tracing in your Haystack pipeline, you need to add the `LangfuseComponent` to your pipeline. Here's an example: + +```python +import os + +os.environ["LANGFUSE_HOST"] = "https://cloud.langfuse.com" +os.environ["TOKENIZERS_PARALLELISM"] = "false" +os.environ["HAYSTACK_CONTENT_TRACING_ENABLED"] = "true" + +from haystack.components.builders import DynamicChatPromptBuilder +from haystack.components.generators.chat import OpenAIChatGenerator +from haystack.dataclasses import ChatMessage +from haystack import Pipeline + +from haystack_integrations.components.others.langfuse import LangfuseComponent + +if __name__ == "__main__": + pipe = Pipeline() + pipe.add_component("tracer", LangfuseComponent("Chat example")) + pipe.add_component("prompt_builder", DynamicChatPromptBuilder()) + pipe.add_component("llm", OpenAIChatGenerator(model="gpt-3.5-turbo")) + + pipe.connect("prompt_builder.prompt", "llm.messages") + + messages = [ + ChatMessage.from_system("Always respond in German even if some input data is in other languages."), + ChatMessage.from_user("Tell me about {{location}}"), + ] + + response = pipe.run( + data={"prompt_builder": {"template_variables": {"location": "Berlin"}, "prompt_source": messages}} + ) + print(response["llm"]["replies"][0]) + print(response["tracer"]["trace_url"]) +``` + +In this example, we add the `LangfuseComponent` to the pipeline with the name "tracer". Each run of the pipeline produces one trace viewable on the Langfuse website with a specific URL. The trace captures the entire execution context, including the prompts, completions, and metadata. + +## Trace Visualization + +Langfuse provides a user-friendly interface to visualize and analyze the traces generated by your Haystack pipeline. Simply login into your Langfuse account and navigate to the trace URL to view the trace details. + +## Contributing + +`hatch` is the best way to interact with this project, to install it: +```sh +pip install hatch +``` + +With `hatch` installed, to run all the tests: +``` +hatch run test +``` + +To run the linters `ruff` and `mypy`: +``` +hatch run lint:all +``` + ## License -`langfuse` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. +`langfuse-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. From e27d12020cc7d7a8aa67707104665aa60d9c524c Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Sat, 27 Apr 2024 20:52:57 +0200 Subject: [PATCH 10/28] Handle both ChatMessage and str payloads --- .../src/haystack_integrations/tracing/langfuse/tracer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py b/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py index 826519d77..f138e4912 100644 --- a/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py +++ b/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py @@ -30,7 +30,10 @@ def set_content_tag(self, key: str, value: Any) -> None: self._span.update(input=value) elif key.endswith(".output"): if "replies" in value: - replies = [self.to_openai_format(m) for m in value["replies"]] + if all(isinstance(r, ChatMessage) for r in value["replies"]): + replies = [self.to_openai_format(m) for m in value["replies"]] + else: + replies = value["replies"] self._span.update(output=replies) else: self._span.update(output=value) From 5d8b54d6ec329d0a04e5daf52da387cfe7d31608 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Tue, 30 Apr 2024 09:54:32 +0200 Subject: [PATCH 11/28] Renaming --- integrations/langfuse/README.md | 4 ++-- integrations/langfuse/example/basic_rag.py | 14 ++++++-------- integrations/langfuse/example/chat.py | 11 +++-------- integrations/langfuse/pydoc/config.yml | 2 +- .../components/connectors/langfuse/__init__.py | 6 ++++++ .../langfuse/langfuse_connector.py} | 2 +- integrations/langfuse/tests/test_tracing.py | 4 ++-- 7 files changed, 21 insertions(+), 22 deletions(-) create mode 100644 integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/__init__.py rename integrations/langfuse/src/haystack_integrations/components/{others/langfuse/component.py => connectors/langfuse/langfuse_connector.py} (94%) diff --git a/integrations/langfuse/README.md b/integrations/langfuse/README.md index e7381d15c..18cfe6d60 100644 --- a/integrations/langfuse/README.md +++ b/integrations/langfuse/README.md @@ -37,11 +37,11 @@ from haystack.components.generators.chat import OpenAIChatGenerator from haystack.dataclasses import ChatMessage from haystack import Pipeline -from haystack_integrations.components.others.langfuse import LangfuseComponent +from haystack_integrations.components.connectors.langfuse import LangfuseConnector if __name__ == "__main__": pipe = Pipeline() - pipe.add_component("tracer", LangfuseComponent("Chat example")) + pipe.add_component("tracer", LangfuseConnector("Chat example")) pipe.add_component("prompt_builder", DynamicChatPromptBuilder()) pipe.add_component("llm", OpenAIChatGenerator(model="gpt-3.5-turbo")) diff --git a/integrations/langfuse/example/basic_rag.py b/integrations/langfuse/example/basic_rag.py index 5c48dcb10..492a14d49 100644 --- a/integrations/langfuse/example/basic_rag.py +++ b/integrations/langfuse/example/basic_rag.py @@ -1,19 +1,16 @@ import os -os.environ["LANGFUSE_HOST"] = "https://cloud.langfuse.com" os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["HAYSTACK_CONTENT_TRACING_ENABLED"] = "true" from datasets import load_dataset - -from haystack.document_stores.in_memory import InMemoryDocumentStore from haystack import Document, Pipeline +from haystack.components.builders import PromptBuilder from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder -from haystack.components.retrievers import InMemoryEmbeddingRetriever from haystack.components.generators import OpenAIGenerator -from haystack.components.builders import PromptBuilder - -from haystack_integrations.components.others.langfuse import LangfuseComponent +from haystack.components.retrievers import InMemoryEmbeddingRetriever +from haystack.document_stores.in_memory import InMemoryDocumentStore +from haystack_integrations.components.connectors.langfuse import LangfuseConnector def get_pipeline(document_store: InMemoryDocumentStore): @@ -35,7 +32,7 @@ def get_pipeline(document_store: InMemoryDocumentStore): basic_rag_pipeline = Pipeline() # Add components to your pipeline - basic_rag_pipeline.add_component("tracer", LangfuseComponent("Basic RAG Pipeline")) + basic_rag_pipeline.add_component("tracer", LangfuseConnector("Basic RAG Pipeline")) basic_rag_pipeline.add_component( "text_embedder", SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2") ) @@ -65,3 +62,4 @@ def get_pipeline(document_store: InMemoryDocumentStore): response = pipeline.run({"text_embedder": {"text": question}, "prompt_builder": {"question": question}}) print(response["llm"]["replies"][0]) + print(response["tracer"]["trace_url"]) diff --git a/integrations/langfuse/example/chat.py b/integrations/langfuse/example/chat.py index 9659a2b5e..99ed7a238 100644 --- a/integrations/langfuse/example/chat.py +++ b/integrations/langfuse/example/chat.py @@ -1,22 +1,17 @@ import os -os.environ["LANGFUSE_HOST"] = "https://cloud.langfuse.com" -os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["HAYSTACK_CONTENT_TRACING_ENABLED"] = "true" +from haystack import Pipeline from haystack.components.builders import DynamicChatPromptBuilder from haystack.components.generators.chat import OpenAIChatGenerator from haystack.dataclasses import ChatMessage -from haystack import Pipeline -from haystack.utils import Secret - -from haystack_integrations.components.others.langfuse import LangfuseComponent - +from haystack_integrations.components.connectors.langfuse import LangfuseConnector if __name__ == "__main__": pipe = Pipeline() - pipe.add_component("tracer", LangfuseComponent("Chat example")) + pipe.add_component("tracer", LangfuseConnector("Chat example")) pipe.add_component("prompt_builder", DynamicChatPromptBuilder()) pipe.add_component("llm", OpenAIChatGenerator(model="gpt-3.5-turbo")) diff --git a/integrations/langfuse/pydoc/config.yml b/integrations/langfuse/pydoc/config.yml index 72eb8def8..dcfddfec8 100644 --- a/integrations/langfuse/pydoc/config.yml +++ b/integrations/langfuse/pydoc/config.yml @@ -2,7 +2,7 @@ loaders: - type: haystack_pydoc_tools.loaders.CustomPythonLoader search_path: [../src] modules: [ - "haystack_integrations.components.others.langfuse.component", + "haystack_integrations.components.connectors.langfuse.langfuse_connector", "haystack_integrations.tracing.langfuse.tracer", ] ignore_when_discovered: ["__init__"] diff --git a/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/__init__.py b/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/__init__.py new file mode 100644 index 000000000..c17a196ce --- /dev/null +++ b/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/__init__.py @@ -0,0 +1,6 @@ +# SPDX-FileCopyrightText: 2023-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 +from .langfuse_connector import LangfuseConnector + +__all__ = ["LangfuseConnector"] diff --git a/integrations/langfuse/src/haystack_integrations/components/others/langfuse/component.py b/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py similarity index 94% rename from integrations/langfuse/src/haystack_integrations/components/others/langfuse/component.py rename to integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py index d6ba93820..1b0f4eda2 100644 --- a/integrations/langfuse/src/haystack_integrations/components/others/langfuse/component.py +++ b/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py @@ -5,7 +5,7 @@ @component -class LangfuseComponent: +class LangfuseConnector: def __init__(self, name: str): self.name = name self.tracer = LangfuseTracer(Langfuse(), name) diff --git a/integrations/langfuse/tests/test_tracing.py b/integrations/langfuse/tests/test_tracing.py index 6855792f9..50f6e89c1 100644 --- a/integrations/langfuse/tests/test_tracing.py +++ b/integrations/langfuse/tests/test_tracing.py @@ -10,7 +10,7 @@ from haystack.dataclasses import ChatMessage from requests.auth import HTTPBasicAuth -from haystack_integrations.components.others.langfuse import LangfuseComponent +from haystack_integrations.components.connectors.langfuse import LangfuseConnector @pytest.mark.integration @@ -21,7 +21,7 @@ def test_tracing_integration(): pipe = Pipeline() - pipe.add_component("tracer", LangfuseComponent("Chat example")) + pipe.add_component("tracer", LangfuseConnector("Chat example")) pipe.add_component("prompt_builder", DynamicChatPromptBuilder()) pipe.add_component("llm", OpenAIChatGenerator(model="gpt-3.5-turbo")) From 72c08c2d70d72b006e4e6c7f0da879fa793419f4 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Tue, 30 Apr 2024 09:55:36 +0200 Subject: [PATCH 12/28] Versioning scheme --- integrations/langfuse/pyproject.toml | 6 ++---- .../components/others/langfuse/__about__.py | 1 - .../components/others/langfuse/__init__.py | 3 --- 3 files changed, 2 insertions(+), 8 deletions(-) delete mode 100644 integrations/langfuse/src/haystack_integrations/components/others/langfuse/__about__.py delete mode 100644 integrations/langfuse/src/haystack_integrations/components/others/langfuse/__init__.py diff --git a/integrations/langfuse/pyproject.toml b/integrations/langfuse/pyproject.toml index 403f55560..b9b5068ff 100644 --- a/integrations/langfuse/pyproject.toml +++ b/integrations/langfuse/pyproject.toml @@ -38,10 +38,8 @@ Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/int packages = ["src/haystack_integrations"] [tool.hatch.version] -# Remove the following line and uncomment the others to enable git versioning -path = "src/haystack_integrations/components/others/langfuse/__about__.py" -# source = "vcs" -# tag-pattern = 'integrations\/langfuse-v(?P.*)' +source = "vcs" +tag-pattern = 'integrations\/langfuse-v(?P.*)' [tool.hatch.version.raw-options] diff --git a/integrations/langfuse/src/haystack_integrations/components/others/langfuse/__about__.py b/integrations/langfuse/src/haystack_integrations/components/others/langfuse/__about__.py deleted file mode 100644 index 27fdca497..000000000 --- a/integrations/langfuse/src/haystack_integrations/components/others/langfuse/__about__.py +++ /dev/null @@ -1 +0,0 @@ -__version__ = "0.0.3" diff --git a/integrations/langfuse/src/haystack_integrations/components/others/langfuse/__init__.py b/integrations/langfuse/src/haystack_integrations/components/others/langfuse/__init__.py deleted file mode 100644 index 6aa8f9875..000000000 --- a/integrations/langfuse/src/haystack_integrations/components/others/langfuse/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .component import LangfuseComponent - -__all__ = ["LangfuseComponent"] From ff5b8e0dfa0d194fc8d6a4dc6e620dacf8648f32 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Tue, 30 Apr 2024 10:27:14 +0200 Subject: [PATCH 13/28] Pydocs, add public trace flag --- .../connectors/langfuse/langfuse_connector.py | 63 ++++++++++++++++++- .../tracing/langfuse/tracer.py | 5 +- integrations/langfuse/tests/test_tracing.py | 25 ++++---- 3 files changed, 75 insertions(+), 18 deletions(-) diff --git a/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py b/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py index 1b0f4eda2..89e512ab8 100644 --- a/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py +++ b/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py @@ -6,11 +6,70 @@ @component class LangfuseConnector: - def __init__(self, name: str): + """ + LangfuseConnector connects Haystack LLM framework with Langfuse in order to enable the tracing of operations + and data flow within various components of a pipeline. + + Simply add this component to your pipeline, DO NOT connect it to any other component. The LangfuseConnector will + automatically trace the operations and data flow within the pipeline. + + Here is an example of how to use it: + + ```python + import os + + os.environ["HAYSTACK_CONTENT_TRACING_ENABLED"] = "true" + + from haystack import Pipeline + from haystack.components.builders import DynamicChatPromptBuilder + from haystack.components.generators.chat import OpenAIChatGenerator + from haystack.dataclasses import ChatMessage + from haystack_integrations.components.connectors.langfuse import LangfuseConnector + + if __name__ == "__main__": + + pipe = Pipeline() + pipe.add_component("tracer", LangfuseConnector("Chat example")) + pipe.add_component("prompt_builder", DynamicChatPromptBuilder()) + pipe.add_component("llm", OpenAIChatGenerator(model="gpt-3.5-turbo")) + + pipe.connect("prompt_builder.prompt", "llm.messages") + + messages = [ + ChatMessage.from_system("Always respond in German even if some input data is in other languages."), + ChatMessage.from_user("Tell me about {{location}}"), + ] + + response = pipe.run( + data={"prompt_builder": {"template_variables": {"location": "Berlin"}, "prompt_source": messages}} + ) + print(response["llm"]["replies"][0]) + print(response["tracer"]["trace_url"]) + ``` + + """ + + def __init__(self, name: str, public: bool = False): + """ + Initialize the LangfuseConnector component. + + :param name: The name of the pipeline or component. This name will be used to identify the tracing run on the + Langfuse dashboard. + :param public: Whether the tracing data should be public or private. If set to `True`, the tracing data will be + publicly accessible to anyone with the tracing URL. If set to `False`, the tracing data will be private and + only accessible to the Langfuse account owner. Default is `False`. + """ self.name = name - self.tracer = LangfuseTracer(Langfuse(), name) + self.tracer = LangfuseTracer(tracer=Langfuse(), name=name, public=public) tracing.enable_tracing(self.tracer) @component.output_types(name=str, trace_url=str) def run(self): + """ + Runs the LangfuseConnector component. + + :returns: A dictionary with the following keys: + - `name`: The name of the tracing component. + - `trace_url`: The URL to the tracing data. + """ return {"name": self.name, "trace_url": self.tracer.get_trace_url()} diff --git a/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py b/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py index f138e4912..857081274 100644 --- a/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py +++ b/integrations/langfuse/src/haystack_integrations/tracing/langfuse/tracer.py @@ -58,10 +58,11 @@ def to_openai_format(self, m: ChatMessage) -> Dict[str, Any]: class LangfuseTracer(Tracer): - def __init__(self, tracer: "langfuse.Langfuse", name: str = "Haystack") -> None: + def __init__(self, tracer: "langfuse.Langfuse", name: str = "Haystack", public: bool = False) -> None: self._tracer = tracer self._context: list[LangfuseSpan] = [] self._name = name + self._public = public @contextlib.contextmanager def trace(self, operation_name: str, tags: Optional[Dict[str, Any]] = None) -> Iterator[Span]: @@ -105,7 +106,7 @@ def trace(self, operation_name: str, tags: Optional[Dict[str, Any]] = None) -> I def current_span(self) -> Span: if not self._context: # The root span has to be a trace - self._context.append(LangfuseSpan(self._tracer.trace(name=self._name))) + self._context.append(LangfuseSpan(self._tracer.trace(name=self._name, public=self._public))) return self._context[-1] def get_trace_url(self) -> str: diff --git a/integrations/langfuse/tests/test_tracing.py b/integrations/langfuse/tests/test_tracing.py index 50f6e89c1..3d61d2cce 100644 --- a/integrations/langfuse/tests/test_tracing.py +++ b/integrations/langfuse/tests/test_tracing.py @@ -1,4 +1,8 @@ import os + +# don't remove (or move) this env var setting from here, it's needed to turn tracing on +os.environ["HAYSTACK_CONTENT_TRACING_ENABLED"] = "true" + from urllib.parse import urlparse import pytest @@ -21,7 +25,7 @@ def test_tracing_integration(): pipe = Pipeline() - pipe.add_component("tracer", LangfuseConnector("Chat example")) + pipe.add_component("tracer", LangfuseConnector(name="Chat example", public=True)) # public so anyone can verify run pipe.add_component("prompt_builder", DynamicChatPromptBuilder()) pipe.add_component("llm", OpenAIChatGenerator(model="gpt-3.5-turbo")) @@ -37,19 +41,12 @@ def test_tracing_integration(): ) assert "Berlin" in response["llm"]["replies"][0].content assert response["tracer"]["trace_url"] - - url = "https://cloud.langfuse.com/api/public/traces/" trace_url = response["tracer"]["trace_url"] - parsed_url = urlparse(trace_url) - # trace id is the last part of the path (after the last '/') - uuid = os.path.basename(parsed_url.path) - try: - # GET request with Basic Authentication on the Langfuse API - response = requests.get( - url + uuid, auth=HTTPBasicAuth(os.environ.get("LANGFUSE_PUBLIC_KEY"), os.environ.get("LANGFUSE_SECRET_KEY")) - ) - - assert response.status_code == 200, f"Failed to retrieve data from Langfuse API: {response.status_code}" + # should be able to access the trace data because we set LangfuseConnector to public=True + response = requests.get(trace_url) + assert ( + response.status_code == 200 + ), f"Failed to retrieve tracing data from {trace_url} got: {response.status_code}" except requests.exceptions.RequestException as e: - assert False, f"Failed to retrieve data from Langfuse API: {e}" + assert False, f"Failed to retrieve tracing data from Langfuse: {e}" From fccd6a9c97a89c3966565d397c04c9032d76410f Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Tue, 30 Apr 2024 10:31:21 +0200 Subject: [PATCH 14/28] Add hatch-vcs dep --- integrations/langfuse/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/langfuse/pyproject.toml b/integrations/langfuse/pyproject.toml index b9b5068ff..beff61c35 100644 --- a/integrations/langfuse/pyproject.toml +++ b/integrations/langfuse/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["hatchling"] +requires = ["hatchling", "hatch-vcs"] build-backend = "hatchling.build" [project] From 8640cdf6c3fecdc46330c4fe73bdb95da6283a3d Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Tue, 30 Apr 2024 10:49:00 +0200 Subject: [PATCH 15/28] Use OPENAI_API_KEY secret --- .github/workflows/langfuse.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/langfuse.yml b/.github/workflows/langfuse.yml index 9d77ec2f9..381b2ed77 100644 --- a/.github/workflows/langfuse.yml +++ b/.github/workflows/langfuse.yml @@ -23,6 +23,7 @@ env: FORCE_COLOR: "1" LANGFUSE_SECRET_KEY: ${{ secrets.LANGFUSE_SECRET_KEY }} LANGFUSE_PUBLIC_KEY: ${{ secrets.LANGFUSE_PUBLIC_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} jobs: run: From 53e4a51dd465c67b5c68f297858aa06beaa72217 Mon Sep 17 00:00:00 2001 From: Daria Fokina Date: Tue, 30 Apr 2024 13:15:27 +0200 Subject: [PATCH 16/28] update docstrings --- .../components/connectors/langfuse/langfuse_connector.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py b/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py index 89e512ab8..86da4a680 100644 --- a/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py +++ b/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py @@ -10,7 +10,7 @@ class LangfuseConnector: LangfuseConnector connects Haystack LLM framework with Langfuse in order to enable the tracing of operations and data flow within various components of a pipeline. - Simply add this component to your pipeline, DO NOT connect it to any other component. The LangfuseConnector will + Simply add this component to your pipeline, but *do not* connect it to any other component. The LangfuseConnector will automatically trace the operations and data flow within the pipeline. Here is an example of how to use it: @@ -57,7 +57,7 @@ def __init__(self, name: str, public: bool = False): Langfuse dashboard. :param public: Whether the tracing data should be public or private. If set to `True`, the tracing data will be publicly accessible to anyone with the tracing URL. If set to `False`, the tracing data will be private and - only accessible to the Langfuse account owner. Default is `False`. + only accessible to the Langfuse account owner. The default is `False`. """ self.name = name self.tracer = LangfuseTracer(tracer=Langfuse(), name=name, public=public) From 946a91abc3482496ac35df121eb212e8c7ab8cf6 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Tue, 30 Apr 2024 14:11:08 +0200 Subject: [PATCH 17/28] Update integrations/langfuse/README.md Co-authored-by: Daria Fokina --- integrations/langfuse/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/langfuse/README.md b/integrations/langfuse/README.md index 18cfe6d60..e8b6ce0f7 100644 --- a/integrations/langfuse/README.md +++ b/integrations/langfuse/README.md @@ -15,7 +15,7 @@ langfuse-haystack integrates tracing capabilities into [Haystack](https://github ## Installation -To install langfuse-haystack, simply run the following command: +To install langfuse-haystack, run the following command: ```sh pip install langfuse-haystack From 1f19f9a4a2e4372d5d8b45a1e3cd536a7bfb6b1a Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Tue, 30 Apr 2024 14:11:25 +0200 Subject: [PATCH 18/28] Update integrations/langfuse/README.md Co-authored-by: Daria Fokina --- integrations/langfuse/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/langfuse/README.md b/integrations/langfuse/README.md index e8b6ce0f7..2700a79c8 100644 --- a/integrations/langfuse/README.md +++ b/integrations/langfuse/README.md @@ -23,7 +23,7 @@ pip install langfuse-haystack ## Usage -To enable tracing in your Haystack pipeline, you need to add the `LangfuseComponent` to your pipeline. Here's an example: +To enable tracing in your Haystack pipeline, add the `LangfuseComponent` to your pipeline. Here's an example: ```python import os From 825d39cd00079052727ccd0347fdc95d84167299 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Tue, 30 Apr 2024 14:11:37 +0200 Subject: [PATCH 19/28] Update integrations/langfuse/README.md Co-authored-by: Daria Fokina --- integrations/langfuse/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/langfuse/README.md b/integrations/langfuse/README.md index 2700a79c8..0d11de56a 100644 --- a/integrations/langfuse/README.md +++ b/integrations/langfuse/README.md @@ -63,7 +63,7 @@ In this example, we add the `LangfuseComponent` to the pipeline with the name "t ## Trace Visualization -Langfuse provides a user-friendly interface to visualize and analyze the traces generated by your Haystack pipeline. Simply login into your Langfuse account and navigate to the trace URL to view the trace details. +Langfuse provides a user-friendly interface to visualize and analyze the traces generated by your Haystack pipeline. Login into your Langfuse account and navigate to the trace URL to view the trace details. ## Contributing From 02288e456564808895b711dee03748bda2c1b5f7 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Tue, 30 Apr 2024 14:11:51 +0200 Subject: [PATCH 20/28] Update integrations/langfuse/README.md Co-authored-by: Daria Fokina --- integrations/langfuse/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/langfuse/README.md b/integrations/langfuse/README.md index 0d11de56a..87f997d41 100644 --- a/integrations/langfuse/README.md +++ b/integrations/langfuse/README.md @@ -67,7 +67,7 @@ Langfuse provides a user-friendly interface to visualize and analyze the traces ## Contributing -`hatch` is the best way to interact with this project, to install it: +`hatch` is the best way to interact with this project. To install it, run: ```sh pip install hatch ``` From 18be5fc29753bf5b89f3590382251c2aa592cef2 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Tue, 30 Apr 2024 14:12:06 +0200 Subject: [PATCH 21/28] Update integrations/langfuse/README.md Co-authored-by: Daria Fokina --- integrations/langfuse/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/langfuse/README.md b/integrations/langfuse/README.md index 87f997d41..de05574d7 100644 --- a/integrations/langfuse/README.md +++ b/integrations/langfuse/README.md @@ -77,7 +77,7 @@ With `hatch` installed, to run all the tests: hatch run test ``` -To run the linters `ruff` and `mypy`: +Run the linters `ruff` and `mypy`: ``` hatch run lint:all ``` From 08827666148571143dc0d92bbb3ab1686cdd2e15 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Tue, 30 Apr 2024 14:12:16 +0200 Subject: [PATCH 22/28] Update integrations/langfuse/README.md Co-authored-by: Daria Fokina --- integrations/langfuse/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/langfuse/README.md b/integrations/langfuse/README.md index de05574d7..e2286dce0 100644 --- a/integrations/langfuse/README.md +++ b/integrations/langfuse/README.md @@ -72,7 +72,7 @@ Langfuse provides a user-friendly interface to visualize and analyze the traces pip install hatch ``` -With `hatch` installed, to run all the tests: +With `hatch` installed, run all the tests: ``` hatch run test ``` From e7730f7f9172b279f5ca143ad019a652a80336a3 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Tue, 30 Apr 2024 14:15:25 +0200 Subject: [PATCH 23/28] lint fixups --- .../components/connectors/langfuse/langfuse_connector.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py b/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py index 86da4a680..7e9ddfd27 100644 --- a/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py +++ b/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py @@ -10,8 +10,8 @@ class LangfuseConnector: LangfuseConnector connects Haystack LLM framework with Langfuse in order to enable the tracing of operations and data flow within various components of a pipeline. - Simply add this component to your pipeline, but *do not* connect it to any other component. The LangfuseConnector will - automatically trace the operations and data flow within the pipeline. + Simply add this component to your pipeline, but *do not* connect it to any other component. The LangfuseConnector + will automatically trace the operations and data flow within the pipeline. Here is an example of how to use it: From 6171b159225b69d2fb28c1cb90fb73b426934144 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Thu, 2 May 2024 09:44:49 +0200 Subject: [PATCH 24/28] Improve test, previous version always returned 200 --- integrations/langfuse/tests/test_tracing.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/integrations/langfuse/tests/test_tracing.py b/integrations/langfuse/tests/test_tracing.py index 3d61d2cce..4fc1cd9ce 100644 --- a/integrations/langfuse/tests/test_tracing.py +++ b/integrations/langfuse/tests/test_tracing.py @@ -41,12 +41,17 @@ def test_tracing_integration(): ) assert "Berlin" in response["llm"]["replies"][0].content assert response["tracer"]["trace_url"] + url = "https://cloud.langfuse.com/api/public/traces/" trace_url = response["tracer"]["trace_url"] + parsed_url = urlparse(trace_url) + # trace id is the last part of the path (after the last '/') + uuid = os.path.basename(parsed_url.path) try: - # should be able to access the trace data because we set LangfuseConnector to public=True - response = requests.get(trace_url) - assert ( - response.status_code == 200 - ), f"Failed to retrieve tracing data from {trace_url} got: {response.status_code}" + # GET request with Basic Authentication on the Langfuse API + response = requests.get( + url + uuid, auth=HTTPBasicAuth(os.environ.get("LANGFUSE_PUBLIC_KEY"), os.environ.get("LANGFUSE_SECRET_KEY")) + ) + + assert response.status_code == 200, f"Failed to retrieve data from Langfuse API: {response.status_code}" except requests.exceptions.RequestException as e: - assert False, f"Failed to retrieve tracing data from Langfuse: {e}" + assert False, f"Failed to retrieve data from Langfuse API: {e}" From df87c4f0bd4703327eea223e87d65fc7ee5cce68 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Thu, 2 May 2024 14:12:23 +0200 Subject: [PATCH 25/28] Update integrations/langfuse/README.md Co-authored-by: Daria Fokina --- integrations/langfuse/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/langfuse/README.md b/integrations/langfuse/README.md index e2286dce0..8061cc162 100644 --- a/integrations/langfuse/README.md +++ b/integrations/langfuse/README.md @@ -23,7 +23,7 @@ pip install langfuse-haystack ## Usage -To enable tracing in your Haystack pipeline, add the `LangfuseComponent` to your pipeline. Here's an example: +To enable tracing in your Haystack pipeline, add the `LangfuseConnector` to your pipeline. Here's an example: ```python import os From bb7c6f699bfd132351d101c94563cea08749bf61 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Thu, 2 May 2024 14:12:29 +0200 Subject: [PATCH 26/28] Update integrations/langfuse/README.md Co-authored-by: Daria Fokina --- integrations/langfuse/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/langfuse/README.md b/integrations/langfuse/README.md index 8061cc162..9864a2368 100644 --- a/integrations/langfuse/README.md +++ b/integrations/langfuse/README.md @@ -59,7 +59,7 @@ if __name__ == "__main__": print(response["tracer"]["trace_url"]) ``` -In this example, we add the `LangfuseComponent` to the pipeline with the name "tracer". Each run of the pipeline produces one trace viewable on the Langfuse website with a specific URL. The trace captures the entire execution context, including the prompts, completions, and metadata. +In this example, we add the `LangfuseConnector` to the pipeline with the name "tracer". Each run of the pipeline produces one trace viewable on the Langfuse website with a specific URL. The trace captures the entire execution context, including the prompts, completions, and metadata. ## Trace Visualization From 33628ed2bf8b5137e9ba6ae036759383f97b36cc Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Thu, 2 May 2024 17:36:35 +0200 Subject: [PATCH 27/28] Add details about Langfuse keys --- integrations/langfuse/README.md | 6 +++++- .../components/connectors/langfuse/langfuse_connector.py | 7 +++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/integrations/langfuse/README.md b/integrations/langfuse/README.md index 9864a2368..901ac122b 100644 --- a/integrations/langfuse/README.md +++ b/integrations/langfuse/README.md @@ -23,7 +23,11 @@ pip install langfuse-haystack ## Usage -To enable tracing in your Haystack pipeline, add the `LangfuseConnector` to your pipeline. Here's an example: +To enable tracing in your Haystack pipeline, add the `LangfuseConnector` to your pipeline. +You also need to set the `LANGFUSE_SECRET_KEY` and `LANGFUSE_PUBLIC_KEY` environment variables in order to connect to Langfuse account. +You can get these keys by signing up for an account on the Langfuse website. + +Here's an example: ```python import os diff --git a/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py b/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py index 7e9ddfd27..d26b21ef2 100644 --- a/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py +++ b/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py @@ -13,6 +13,13 @@ class LangfuseConnector: Simply add this component to your pipeline, but *do not* connect it to any other component. The LangfuseConnector will automatically trace the operations and data flow within the pipeline. + Note that you need to set the `LANGFUSE_SECRET_KEY` and `LANGFUSE_PUBLIC_KEY` environment variables in order + to use this component. The `LANGFUSE_SECRET_KEY` and `LANGFUSE_PUBLIC_KEY` are the secret and public keys provided by + Langfuse. You can get these keys by signing up for an account on the Langfuse website. + + In addition, you need to set the `HAYSTACK_CONTENT_TRACING_ENABLED` environment variable to `true` in order to enable + Haystack tracing in your pipeline. + Here is an example of how to use it: ```python From a30572890af0310d3ba8d588bf4faea1edc1916e Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Thu, 2 May 2024 17:38:27 +0200 Subject: [PATCH 28/28] Pylint --- .../components/connectors/langfuse/langfuse_connector.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py b/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py index d26b21ef2..bb2d22954 100644 --- a/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py +++ b/integrations/langfuse/src/haystack_integrations/components/connectors/langfuse/langfuse_connector.py @@ -14,11 +14,11 @@ class LangfuseConnector: will automatically trace the operations and data flow within the pipeline. Note that you need to set the `LANGFUSE_SECRET_KEY` and `LANGFUSE_PUBLIC_KEY` environment variables in order - to use this component. The `LANGFUSE_SECRET_KEY` and `LANGFUSE_PUBLIC_KEY` are the secret and public keys provided by - Langfuse. You can get these keys by signing up for an account on the Langfuse website. + to use this component. The `LANGFUSE_SECRET_KEY` and `LANGFUSE_PUBLIC_KEY` are the secret and public keys provided + by Langfuse. You can get these keys by signing up for an account on the Langfuse website. - In addition, you need to set the `HAYSTACK_CONTENT_TRACING_ENABLED` environment variable to `true` in order to enable - Haystack tracing in your pipeline. + In addition, you need to set the `HAYSTACK_CONTENT_TRACING_ENABLED` environment variable to `true` in order to + enable Haystack tracing in your pipeline. Here is an example of how to use it: