Skip to content

Commit

Permalink
Merge branch 'main' into openapi
Browse files Browse the repository at this point in the history
  • Loading branch information
vblagoje authored Jun 24, 2024
2 parents 7eb140f + f4c29d8 commit d714a6f
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 37 deletions.
22 changes: 12 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,23 +26,25 @@ $ pip install -U haystack-experimental

## Experiments lifecycle

Any experimental feature will be removed from `haystack-experimental` after a period of 3 months. After this time,
the experiment will be either:
- Merged into Haystack core and published in the next minor release
- Released as a Core Integration,
Each experimental feature has a default lifespan of 3 months starting from the date of the first non-pre-release build
that includes it. Once it reaches the end of its lifespan, the experiment will be either:
- Merged into Haystack core and published in the next minor release, or
- Released as a Core Integration, or
- Dropped.

## Experiments catalog

The latest version of the package contains the following experiments:

| Name | Type | Experiment end date |
|--------------------------|-------------------------| ------------------- |
| [`EvaluationHarness`][1] | Evaluation orchestrator | August 2024 |
| [`OpenAPITool`][2] | OpenAPITool component | August 2024 |
| Name | Type | Expected experiment end date |
| ------------------------ | ----------------------- | ------------------- |
| [`EvaluationHarness`][1] | Evaluation orchestrator | September 2024 |
| [`OpenAIFunctionCaller`][2] | Function Calling Component | September 2024 |
| [`OpenAPITool`][3] | OpenAPITool component | September 2024 |

[1]: https://github.com/deepset-ai/haystack-experimental/tree/main/haystack_experimental/evaluation/harness
[2]: https://github.com/deepset-ai/haystack-experimental/tree/main/haystack_experimental/components/tools/openapi
[2]: https://github.com/deepset-ai/haystack-experimental/tree/main/haystack_experimental/components/tools/openai
[3]: https://github.com/deepset-ai/haystack-experimental/tree/main/haystack_experimental/components/tools/openapi

## Usage

Expand Down Expand Up @@ -114,4 +116,4 @@ class Pipeline(HaystackPipeline):

## Contributing

Direct contributions to `haystack-experimental` are not expected, but Haystack maintainers might ask contributors to move pull requests that target the [core repository](https://github.com/deepset-ai/haystack) to this repository.
Direct contributions to `haystack-experimental` are not expected, but Haystack maintainers might ask contributors to move pull requests that target the [core repository](https://github.com/deepset-ai/haystack) to this repository.
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ def default_rag_evaluation_pipeline(
RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY: partial(
SASEvaluator, model="sentence-transformers/all-MiniLM-L6-v2"
),
RAGEvaluationMetric.ANSWER_FAITHFULNESS: FaithfulnessEvaluator,
RAGEvaluationMetric.CONTEXT_RELEVANCE: ContextRelevanceEvaluator,
RAGEvaluationMetric.ANSWER_FAITHFULNESS: partial(FaithfulnessEvaluator, raise_on_failure=False),
RAGEvaluationMetric.CONTEXT_RELEVANCE: partial(ContextRelevanceEvaluator, raise_on_failure=False),
}

for metric in metrics:
Expand Down
36 changes: 22 additions & 14 deletions haystack_experimental/evaluation/harness/rag/harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,23 +131,31 @@ def run( # noqa: D102
pipeline_outputs["second"],
)

result_inputs = {
"questions": inputs.queries,
"contexts": [
[doc.content for doc in docs]
for docs in self._lookup_component_output(
RAGExpectedComponent.DOCUMENT_RETRIEVER,
rag_outputs,
"retrieved_documents",
)
],
"responses": self._lookup_component_output(
RAGExpectedComponent.RESPONSE_GENERATOR, rag_outputs, "replies"
),
}
if inputs.ground_truth_answers is not None:
result_inputs["ground_truth_answers"] = inputs.ground_truth_answers
if inputs.ground_truth_documents is not None:
result_inputs["ground_truth_documents"] = [
[doc.content for doc in docs] for docs in inputs.ground_truth_documents
]

assert run_name is not None
run_results = EvaluationRunResult(
run_name,
inputs={
"questions": inputs.queries,
"contexts": [
[doc.content for doc in docs]
for docs in self._lookup_component_output(
RAGExpectedComponent.DOCUMENT_RETRIEVER,
rag_outputs,
"retrieved_documents",
)
],
"responses": self._lookup_component_output(
RAGExpectedComponent.RESPONSE_GENERATOR, rag_outputs, "replies"
),
},
inputs=result_inputs,
results=eval_outputs,
)

Expand Down
5 changes: 0 additions & 5 deletions haystack_experimental/version.py

This file was deleted.

10 changes: 4 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[build-system]
requires = ["hatchling>=1.8.0"]
requires = ["hatchling>=1.8.0", "hatch-vcs"]
build-backend = "hatchling.build"

[project]
Expand Down Expand Up @@ -74,7 +74,8 @@ sync = "./.github/utils/pydoc-markdown.sh"
delete-outdated = "python ./.github/utils/delete_outdated_docs.py {args}"

[tool.hatch.version]
path = "haystack_experimental/version.py"
source = "vcs"
tag-pattern = 'v(?P<version>.*)'

[tool.hatch.metadata]
allow-direct-references = true
Expand All @@ -91,10 +92,7 @@ quiet-level = 3
skip = "test/nodes/*,test/others/*,test/samples/*,e2e/*"

[tool.pylint]
ignore-paths = [
"haystack_experimental/__init__.py",
"haystack_experimental/version.py",
]
ignore-paths = ["haystack_experimental/__init__.py"]

[tool.pylint.'MESSAGES CONTROL']
max-line-length = 120
Expand Down
56 changes: 56 additions & 0 deletions test/evaluation/harness/rag/test_harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,62 @@ def test_run_model_based_metrics(self, monkeypatch):

assert output.inputs == inputs
assert output.results.run_name == "test_run"
assert output.results.inputs == {
"questions": ["What is the capital of France?"] * 6,
"contexts": [
["France"],
[
"9th century",
"10th century",
"9th",
],
[
"classical",
"rock music",
"dubstep",
],
[
"11th",
"the 11th",
"11th century",
],
[
"Denmark",
"Norway",
"Iceland",
],
[
"10th century",
"the first half of the 10th century",
"10th",
"10th",
],
],
"responses": [
"placeholder",
"placeholder",
"placeholder",
"placeholder",
"placeholder",
"placeholder",
],
"ground_truth_documents": [
["France"],
["9th century", "9th"],
["classical music", "classical"],
["11th century", "the 11th"],
["Denmark, Iceland and Norway"],
["10th century", "10th"],
],
"ground_truth_answers": [
"Paris is the capital of France.",
"9th century",
"classical music",
"11th century",
"Denmark, Iceland and Norway",
"10th century",
],
}
assert output.results.results == {
"metric_answer_faithfulness": MockModelBasedEvaluator.default_output(
RAGEvaluationMetric.ANSWER_FAITHFULNESS
Expand Down

0 comments on commit d714a6f

Please sign in to comment.