From 84013849276f25e3397489915d11a5d8cceb3b09 Mon Sep 17 00:00:00 2001
From: Madeesh Kannan <shadeMe@users.noreply.github.com>
Date: Thu, 4 Jul 2024 16:02:38 +0200
Subject: [PATCH] feat/refactor: Allow pipelines without generators to be used
 with the RAG eval harness (#31)

---
 .../evaluation/harness/rag/__init__.py        |   3 +-
 .../evaluation/harness/rag/harness.py         | 237 ++++++++++++------
 .../evaluation/harness/rag/parameters.py      |  10 +-
 test/evaluation/harness/rag/test_harness.py   |  65 ++++-
 4 files changed, 217 insertions(+), 98 deletions(-)

diff --git a/haystack_experimental/evaluation/harness/rag/__init__.py b/haystack_experimental/evaluation/harness/rag/__init__.py
index cc714697..fb7009f7 100644
--- a/haystack_experimental/evaluation/harness/rag/__init__.py
+++ b/haystack_experimental/evaluation/harness/rag/__init__.py
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-from .harness import RAGEvaluationHarness
+from .harness import DefaultRAGArchitecture, RAGEvaluationHarness
 from .parameters import (
     RAGEvaluationInput,
     RAGEvaluationMetric,
@@ -13,6 +13,7 @@
 )
 
 _all_ = [
+    "DefaultRAGArchitecture",
     "RAGEvaluationHarness",
     "RAGExpectedComponent",
     "RAGExpectedComponentMetadata",
diff --git a/haystack_experimental/evaluation/harness/rag/harness.py b/haystack_experimental/evaluation/harness/rag/harness.py
index b66d9aa1..f76c48c3 100644
--- a/haystack_experimental/evaluation/harness/rag/harness.py
+++ b/haystack_experimental/evaluation/harness/rag/harness.py
@@ -3,7 +3,8 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from copy import deepcopy
-from typing import Any, Dict, List, Optional, Set
+from enum import Enum
+from typing import Any, Dict, List, Optional, Set, Union
 
 from haystack import Pipeline
 from haystack.evaluation.eval_run_result import EvaluationRunResult
@@ -25,6 +26,83 @@
 )
 
 
+class DefaultRAGArchitecture(Enum):
+    """
+    Represents default RAG pipeline architectures that can be used with the evaluation harness.
+    """
+
+    #: A RAG pipeline with:
+    #: - A query embedder component named 'query_embedder' with a 'text' input.
+    #: - A document retriever component named 'retriever' with a 'documents' output.
+    EMBEDDING_RETRIEVAL = "embedding_retrieval"
+
+    #: A RAG pipeline with:
+    #: - A document retriever component named 'retriever' with a 'query' input and a 'documents' output.
+    KEYWORD_RETRIEVAL = "keyword_retrieval"
+
+    #: A RAG pipeline with:
+    #: - A query embedder component named 'query_embedder' with a 'text' input.
+    #: - A document retriever component named 'retriever' with a 'documents' output.
+    #: - A response generator component named 'generator' with a 'replies' output.
+    GENERATION_WITH_EMBEDDING_RETRIEVAL = "generation_with_embedding_retrieval"
+
+    #: A RAG pipeline with:
+    #: - A document retriever component named 'retriever' with a 'query' input and a 'documents' output.
+    #: - A response generator component named 'generator' with a 'replies' output.
+    GENERATION_WITH_KEYWORD_RETRIEVAL = "generation_with_keyword_retrieval"
+
+    @property
+    def expected_components(
+        self,
+    ) -> Dict[RAGExpectedComponent, RAGExpectedComponentMetadata]:
+        """
+        Returns the expected components for the architecture.
+
+        :returns:
+            The expected components.
+        """
+        if self in (
+            DefaultRAGArchitecture.EMBEDDING_RETRIEVAL,
+            DefaultRAGArchitecture.GENERATION_WITH_EMBEDDING_RETRIEVAL,
+        ):
+            expected = {
+                RAGExpectedComponent.QUERY_PROCESSOR: RAGExpectedComponentMetadata(
+                    name="query_embedder", input_mapping={"query": "text"}
+                ),
+                RAGExpectedComponent.DOCUMENT_RETRIEVER: RAGExpectedComponentMetadata(
+                    name="retriever",
+                    output_mapping={"retrieved_documents": "documents"},
+                ),
+            }
+        elif self in (
+            DefaultRAGArchitecture.KEYWORD_RETRIEVAL,
+            DefaultRAGArchitecture.GENERATION_WITH_KEYWORD_RETRIEVAL,
+        ):
+            expected = {
+                RAGExpectedComponent.QUERY_PROCESSOR: RAGExpectedComponentMetadata(
+                    name="retriever", input_mapping={"query": "query"}
+                ),
+                RAGExpectedComponent.DOCUMENT_RETRIEVER: RAGExpectedComponentMetadata(
+                    name="retriever",
+                    output_mapping={"retrieved_documents": "documents"},
+                ),
+            }
+        else:
+            raise NotImplementedError(f"Unexpected default RAG architecture: {self}")
+
+        if self in (
+            DefaultRAGArchitecture.GENERATION_WITH_EMBEDDING_RETRIEVAL,
+            DefaultRAGArchitecture.GENERATION_WITH_KEYWORD_RETRIEVAL,
+        ):
+            expected[RAGExpectedComponent.RESPONSE_GENERATOR] = (
+                RAGExpectedComponentMetadata(
+                    name="generator", output_mapping={"replies": "replies"}
+                )
+            )
+
+        return expected
+
+
 class RAGEvaluationHarness(
     EvaluationHarness[RAGEvaluationInput, RAGEvaluationOverrides, RAGEvaluationOutput]
 ):
@@ -35,7 +113,10 @@ class RAGEvaluationHarness(
     def __init__(
         self,
         rag_pipeline: Pipeline,
-        rag_components: Dict[RAGExpectedComponent, RAGExpectedComponentMetadata],
+        rag_components: Union[
+            DefaultRAGArchitecture,
+            Dict[RAGExpectedComponent, RAGExpectedComponentMetadata],
+        ],
         metrics: Set[RAGEvaluationMetric],
     ):
         """
@@ -44,76 +125,23 @@ def __init__(
         :param rag_pipeline:
             The RAG pipeline to evaluate.
         :param rag_components:
-            A mapping of expected components to their metadata.
+            Either a default RAG architecture or a mapping
+            of expected components to their metadata.
         :param metrics:
             The metrics to use during evaluation.
         """
         super().__init__()
 
-        self._validate_rag_components(rag_pipeline, rag_components)
+        if isinstance(rag_components, DefaultRAGArchitecture):
+            rag_components = rag_components.expected_components
+
+        self._validate_rag_components(rag_pipeline, rag_components, metrics)
 
         self.rag_pipeline = rag_pipeline
-        self.rag_components = rag_components
-        self.metrics = metrics
+        self.rag_components = deepcopy(rag_components)
+        self.metrics = deepcopy(metrics)
         self.evaluation_pipeline = default_rag_evaluation_pipeline(metrics)
 
-    @classmethod
-    def default_with_embedding_retriever(
-        cls, rag_pipeline: Pipeline, metrics: Set[RAGEvaluationMetric]
-    ) -> "RAGEvaluationHarness":
-        """
-        Create a default evaluation harness for evaluating RAG pipelines with a query embedder.
-
-        :param rag_pipeline:
-            The RAG pipeline to evaluate. The following assumptions are made:
-            - The query embedder component is named 'query_embedder' and has a 'text' input.
-            - The document retriever component is named 'retriever' and has a 'documents' output.
-            - The response generator component is named 'generator' and has a 'replies' output.
-        :param metrics:
-            The metrics to use during evaluation.
-        """
-        rag_components = {
-            RAGExpectedComponent.QUERY_PROCESSOR: RAGExpectedComponentMetadata(
-                name="query_embedder", input_mapping={"query": "text"}
-            ),
-            RAGExpectedComponent.DOCUMENT_RETRIEVER: RAGExpectedComponentMetadata(
-                name="retriever", output_mapping={"retrieved_documents": "documents"}
-            ),
-            RAGExpectedComponent.RESPONSE_GENERATOR: RAGExpectedComponentMetadata(
-                name="generator", output_mapping={"replies": "replies"}
-            ),
-        }
-
-        return cls(rag_pipeline, rag_components, deepcopy(metrics))
-
-    @classmethod
-    def default_with_keyword_retriever(
-        cls, rag_pipeline: Pipeline, metrics: Set[RAGEvaluationMetric]
-    ) -> "RAGEvaluationHarness":
-        """
-        Create a default evaluation harness for evaluating RAG pipelines with a keyword retriever.
-
-        :param rag_pipeline:
-            The RAG pipeline to evaluate. The following assumptions are made:
-            - The document retriever component is named 'retriever' and has a 'query' input and a 'documents' output.
-            - The response generator component is named 'generator' and has a 'replies' output.
-        :param metrics:
-            The metrics to use during evaluation.
-        """
-        rag_components = {
-            RAGExpectedComponent.QUERY_PROCESSOR: RAGExpectedComponentMetadata(
-                name="retriever", input_mapping={"query": "query"}
-            ),
-            RAGExpectedComponent.DOCUMENT_RETRIEVER: RAGExpectedComponentMetadata(
-                name="retriever", output_mapping={"retrieved_documents": "documents"}
-            ),
-            RAGExpectedComponent.RESPONSE_GENERATOR: RAGExpectedComponentMetadata(
-                name="generator", output_mapping={"replies": "replies"}
-            ),
-        }
-
-        return cls(rag_pipeline, rag_components, deepcopy(metrics))
-
     def run(  # noqa: D102
         self,
         inputs: RAGEvaluationInput,
@@ -141,10 +169,12 @@ def run(  # noqa: D102
                     "retrieved_documents",
                 )
             ],
-            "responses": self._lookup_component_output(
-                RAGExpectedComponent.RESPONSE_GENERATOR, rag_outputs, "replies"
-            ),
         }
+        if RAGExpectedComponent.RESPONSE_GENERATOR in self.rag_components:
+            result_inputs["responses"] = self._lookup_component_output(
+                RAGExpectedComponent.RESPONSE_GENERATOR, rag_outputs, "replies"
+            )
+
         if inputs.ground_truth_answers is not None:
             result_inputs["ground_truth_answers"] = inputs.ground_truth_answers
         if inputs.ground_truth_documents is not None:
@@ -199,6 +229,14 @@ def _generate_eval_run_pipelines(
         rag_pipeline = self._override_pipeline(self.rag_pipeline, rag_overrides)
         eval_pipeline = self._override_pipeline(self.evaluation_pipeline, eval_overrides)  # type: ignore
 
+        included_first_outputs = {
+            self.rag_components[RAGExpectedComponent.DOCUMENT_RETRIEVER].name
+        }
+        if RAGExpectedComponent.RESPONSE_GENERATOR in self.rag_components:
+            included_first_outputs.add(
+                self.rag_components[RAGExpectedComponent.RESPONSE_GENERATOR].name
+            )
+
         return PipelinePair(
             first=rag_pipeline,
             second=eval_pipeline,
@@ -206,10 +244,7 @@ def _generate_eval_run_pipelines(
             map_first_outputs=lambda x: self._aggregate_rag_outputs(  # pylint: disable=unnecessary-lambda
                 x
             ),
-            included_first_outputs={
-                self.rag_components[RAGExpectedComponent.DOCUMENT_RETRIEVER].name,
-                self.rag_components[RAGExpectedComponent.RESPONSE_GENERATOR].name,
-            },
+            included_first_outputs=included_first_outputs,
         )
 
     def _aggregate_rag_outputs(
@@ -217,16 +252,17 @@ def _aggregate_rag_outputs(
     ) -> Dict[str, Dict[str, Any]]:
         aggregate = aggregate_batched_pipeline_outputs(outputs)
 
-        # We only care about the first response from the generator.
-        generator_name = self.rag_components[
-            RAGExpectedComponent.RESPONSE_GENERATOR
-        ].name
-        replies_output_name = self.rag_components[
-            RAGExpectedComponent.RESPONSE_GENERATOR
-        ].output_mapping["replies"]
-        aggregate[generator_name][replies_output_name] = [
-            r[0] for r in aggregate[generator_name][replies_output_name]
-        ]
+        if RAGExpectedComponent.RESPONSE_GENERATOR in self.rag_components:
+            # We only care about the first response from the generator.
+            generator_name = self.rag_components[
+                RAGExpectedComponent.RESPONSE_GENERATOR
+            ].name
+            replies_output_name = self.rag_components[
+                RAGExpectedComponent.RESPONSE_GENERATOR
+            ].output_mapping["replies"]
+            aggregate[generator_name][replies_output_name] = [
+                r[0] for r in aggregate[generator_name][replies_output_name]
+            ]
 
         return aggregate
 
@@ -383,11 +419,46 @@ def _prepare_eval_pipeline_additional_inputs(
     def _validate_rag_components(
         pipeline: Pipeline,
         components: Dict[RAGExpectedComponent, RAGExpectedComponentMetadata],
+        metrics: Set[RAGEvaluationMetric],
     ):
-        for e in RAGExpectedComponent:
-            if e not in components:
+        metric_specific_required_components = {
+            RAGEvaluationMetric.DOCUMENT_MAP: [
+                RAGExpectedComponent.QUERY_PROCESSOR,
+                RAGExpectedComponent.DOCUMENT_RETRIEVER,
+            ],
+            RAGEvaluationMetric.DOCUMENT_MRR: [
+                RAGExpectedComponent.QUERY_PROCESSOR,
+                RAGExpectedComponent.DOCUMENT_RETRIEVER,
+            ],
+            RAGEvaluationMetric.DOCUMENT_RECALL_SINGLE_HIT: [
+                RAGExpectedComponent.QUERY_PROCESSOR,
+                RAGExpectedComponent.DOCUMENT_RETRIEVER,
+            ],
+            RAGEvaluationMetric.DOCUMENT_RECALL_MULTI_HIT: [
+                RAGExpectedComponent.QUERY_PROCESSOR,
+                RAGExpectedComponent.DOCUMENT_RETRIEVER,
+            ],
+            RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY: [
+                RAGExpectedComponent.QUERY_PROCESSOR,
+                RAGExpectedComponent.RESPONSE_GENERATOR,
+            ],
+            RAGEvaluationMetric.FAITHFULNESS: [
+                RAGExpectedComponent.QUERY_PROCESSOR,
+                RAGExpectedComponent.DOCUMENT_RETRIEVER,
+                RAGExpectedComponent.RESPONSE_GENERATOR,
+            ],
+            RAGEvaluationMetric.CONTEXT_RELEVANCE: [
+                RAGExpectedComponent.QUERY_PROCESSOR,
+                RAGExpectedComponent.DOCUMENT_RETRIEVER,
+            ],
+        }
+
+        for m in metrics:
+            required_components = metric_specific_required_components[m]
+            if not all(c in components for c in required_components):
                 raise ValueError(
-                    f"RAG evaluation harness requires metadata for the '{e.value}' component."
+                    f"In order to use the metric '{m}', the RAG evaluation harness requires metadata "
+                    f"for the following components: {required_components}"
                 )
 
         pipeline_outputs = pipeline.outputs(
diff --git a/haystack_experimental/evaluation/harness/rag/parameters.py b/haystack_experimental/evaluation/harness/rag/parameters.py
index abae4251..638e4227 100644
--- a/haystack_experimental/evaluation/harness/rag/parameters.py
+++ b/haystack_experimental/evaluation/harness/rag/parameters.py
@@ -12,7 +12,7 @@
 
 class RAGExpectedComponent(Enum):
     """
-    Represents the basic components in a RAG pipeline that needs to be present for evaluation.
+    Represents the basic components in a RAG pipeline that are, by default, required to be present for evaluation.
 
     Each of these can be separate components in the pipeline or a single component that performs
     multiple tasks.
@@ -27,6 +27,7 @@ class RAGExpectedComponent(Enum):
     DOCUMENT_RETRIEVER = "document_retriever"
 
     #: The component in a RAG pipeline that generates responses based on the query and the retrieved documents.
+    #: Can be optional if the harness is only evaluating retrieval.
     #: Expected outputs: `replies` - Name of out containing the LLM responses. Only the first response is used.
     RESPONSE_GENERATOR = "response_generator"
 
@@ -57,24 +58,31 @@ class RAGEvaluationMetric(Enum):
     """
 
     #: Document Mean Average Precision.
+    #: Required RAG components: Query Processor, Document Retriever.
     DOCUMENT_MAP = "metric_doc_map"
 
     #: Document Mean Reciprocal Rank.
+    #: Required RAG components: Query Processor, Document Retriever.
     DOCUMENT_MRR = "metric_doc_mrr"
 
     #: Document Recall with a single hit.
+    #: Required RAG components: Query Processor, Document Retriever.
     DOCUMENT_RECALL_SINGLE_HIT = "metric_doc_recall_single"
 
     #: Document Recall with multiple hits.
+    #: Required RAG components: Query Processor, Document Retriever.
     DOCUMENT_RECALL_MULTI_HIT = "metric_doc_recall_multi"
 
     #: Semantic Answer Similarity.
+    #: Required RAG components: Query Processor, Response Generator.
     SEMANTIC_ANSWER_SIMILARITY = "metric_sas"
 
     #: Faithfulness.
+    #: Required RAG components: Query Processor, Document Retriever, Response Generator.
     FAITHFULNESS = "metric_faithfulness"
 
     #: Context Relevance.
+    #: Required RAG components: Query Processor, Document Retriever.
     CONTEXT_RELEVANCE = "metric_context_relevance"
 
 
diff --git a/test/evaluation/harness/rag/test_harness.py b/test/evaluation/harness/rag/test_harness.py
index 84a78741..65e528c2 100644
--- a/test/evaluation/harness/rag/test_harness.py
+++ b/test/evaluation/harness/rag/test_harness.py
@@ -5,6 +5,7 @@
 import pytest
 
 from haystack_experimental.evaluation.harness.rag import (
+    DefaultRAGArchitecture,
     RAGEvaluationHarness,
     RAGExpectedComponent,
     RAGExpectedComponentMetadata,
@@ -377,12 +378,27 @@ def test_init_invalid_missing_outputs(self, rag_pipeline):
     def test_init_defaults(
         self, rag_pipeline_with_query_embedder, rag_pipeline_with_keyword_retriever
     ):
-        _ = RAGEvaluationHarness.default_with_embedding_retriever(
-            rag_pipeline_with_query_embedder, metrics={RAGEvaluationMetric.DOCUMENT_MAP}
+        _ = RAGEvaluationHarness(
+            rag_pipeline_with_query_embedder,
+            DefaultRAGArchitecture.GENERATION_WITH_EMBEDDING_RETRIEVAL,
+            metrics={RAGEvaluationMetric.DOCUMENT_MAP},
         )
 
-        _ = RAGEvaluationHarness.default_with_keyword_retriever(
+        _ = RAGEvaluationHarness(
             rag_pipeline_with_keyword_retriever,
+            DefaultRAGArchitecture.GENERATION_WITH_KEYWORD_RETRIEVAL,
+            metrics={RAGEvaluationMetric.DOCUMENT_MAP},
+        )
+
+        _ = RAGEvaluationHarness(
+            rag_pipeline_with_query_embedder,
+            DefaultRAGArchitecture.EMBEDDING_RETRIEVAL,
+            metrics={RAGEvaluationMetric.DOCUMENT_MAP},
+        )
+
+        _ = RAGEvaluationHarness(
+            rag_pipeline_with_keyword_retriever,
+            DefaultRAGArchitecture.KEYWORD_RETRIEVAL,
             metrics={RAGEvaluationMetric.DOCUMENT_MAP},
         )
 
@@ -393,10 +409,11 @@ def test_init_defaults_invalid_missing_inputs(
             ValueError,
             match="Required input 'text' not found in 'query_processor' component named 'query_embedder'",
         ):
-            _ = RAGEvaluationHarness.default_with_embedding_retriever(
+            _ = RAGEvaluationHarness(
                 build_rag_pipeline_with_query_embedder(
                     embedder_name="llm", generator_name="query_embedder"
                 ),
+                DefaultRAGArchitecture.GENERATION_WITH_EMBEDDING_RETRIEVAL,
                 metrics={RAGEvaluationMetric.DOCUMENT_MAP},
             )
 
@@ -404,10 +421,11 @@ def test_init_defaults_invalid_missing_inputs(
             ValueError,
             match="Required input 'query' not found in 'query_processor' component named 'retriever'",
         ):
-            _ = RAGEvaluationHarness.default_with_keyword_retriever(
+            _ = RAGEvaluationHarness(
                 build_rag_pipeline_with_keyword_retriever(
                     retriever_name="llm", generator_name="retriever"
                 ),
+                DefaultRAGArchitecture.GENERATION_WITH_KEYWORD_RETRIEVAL,
                 metrics={RAGEvaluationMetric.DOCUMENT_MAP},
             )
 
@@ -432,8 +450,9 @@ def test_init_defaults_invalid_missing_outputs(self):
             ValueError,
             match="Required output 'replies' not found in 'response_generator' component named 'generator'",
         ):
-            _ = RAGEvaluationHarness.default_with_embedding_retriever(
+            _ = RAGEvaluationHarness(
                 non_conformant_query_embedder_pipeline,
+                DefaultRAGArchitecture.GENERATION_WITH_EMBEDDING_RETRIEVAL,
                 metrics={RAGEvaluationMetric.DOCUMENT_MAP},
             )
 
@@ -441,20 +460,36 @@ def test_init_defaults_invalid_missing_outputs(self):
             ValueError,
             match="Required output 'documents' not found in 'document_retriever' component named 'retriever'",
         ):
-            _ = RAGEvaluationHarness.default_with_keyword_retriever(
+            _ = RAGEvaluationHarness(
                 non_conformant_keyword_retriever_pipeline,
+                DefaultRAGArchitecture.GENERATION_WITH_KEYWORD_RETRIEVAL,
                 metrics={RAGEvaluationMetric.DOCUMENT_MAP},
             )
 
+    def test_init_invalid_component_for_metric(self, rag_pipeline_with_query_embedder):
+        with pytest.raises(
+            ValueError,
+            match="In order to use the metric .* RAG evaluation harness requires metadata",
+        ):
+            _ = RAGEvaluationHarness(
+                rag_pipeline_with_query_embedder,
+                DefaultRAGArchitecture.EMBEDDING_RETRIEVAL,
+                metrics={
+                    RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY,
+                },
+            )
+
     def test_run_invalid_ground_truths(self, rag_pipeline_with_query_embedder):
-        harness_map = RAGEvaluationHarness.default_with_embedding_retriever(
+        harness_map = RAGEvaluationHarness(
             rag_pipeline_with_query_embedder,
+            DefaultRAGArchitecture.GENERATION_WITH_EMBEDDING_RETRIEVAL,
             metrics={
                 RAGEvaluationMetric.DOCUMENT_MAP,
             },
         )
-        harness_sas = RAGEvaluationHarness.default_with_embedding_retriever(
+        harness_sas = RAGEvaluationHarness(
             rag_pipeline_with_query_embedder,
+            DefaultRAGArchitecture.GENERATION_WITH_EMBEDDING_RETRIEVAL,
             metrics={
                 RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY,
             },
@@ -502,8 +537,9 @@ def test_run_invalid_additional_input(
         self,
         rag_pipeline_with_query_embedder,
     ):
-        harness = RAGEvaluationHarness.default_with_embedding_retriever(
+        harness = RAGEvaluationHarness(
             rag_pipeline_with_query_embedder,
+            DefaultRAGArchitecture.GENERATION_WITH_EMBEDDING_RETRIEVAL,
             metrics={
                 RAGEvaluationMetric.DOCUMENT_MAP,
             },
@@ -527,8 +563,9 @@ def test_run_invalid_override(
         self,
         rag_pipeline_with_query_embedder,
     ):
-        harness = RAGEvaluationHarness.default_with_embedding_retriever(
+        harness = RAGEvaluationHarness(
             rag_pipeline_with_query_embedder,
+            DefaultRAGArchitecture.GENERATION_WITH_EMBEDDING_RETRIEVAL,
             metrics={
                 RAGEvaluationMetric.DOCUMENT_MAP,
             },
@@ -574,12 +611,13 @@ def test_run_statistical_metrics(self):
             RAGEvaluationMetric.DOCUMENT_RECALL_SINGLE_HIT,
             RAGEvaluationMetric.DOCUMENT_RECALL_MULTI_HIT,
         }
-        harness = RAGEvaluationHarness.default_with_keyword_retriever(
+        harness = RAGEvaluationHarness(
             build_rag_pipeline_with_keyword_retriever(
                 retriever_component=MockKeywordRetriever(),
                 generator_component=MockGenerator(arg=0),
                 generator_name="generator",
             ),
+            DefaultRAGArchitecture.KEYWORD_RETRIEVAL,
             metrics=metrics,
         )
 
@@ -630,12 +668,13 @@ def test_run_model_based_metrics(self, monkeypatch):
             RAGEvaluationMetric.CONTEXT_RELEVANCE,
             RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY,
         }
-        harness = RAGEvaluationHarness.default_with_keyword_retriever(
+        harness = RAGEvaluationHarness(
             build_rag_pipeline_with_keyword_retriever(
                 retriever_component=MockKeywordRetriever(),
                 generator_component=MockGenerator(arg=0),
                 generator_name="generator",
             ),
+            DefaultRAGArchitecture.GENERATION_WITH_KEYWORD_RETRIEVAL,
             metrics=metrics,
         )