diff --git a/haystack_experimental/evaluation/harness/rag/evaluation_pipeline.py b/haystack_experimental/evaluation/harness/rag/evaluation_pipeline.py index 08c295c8..581a3150 100644 --- a/haystack_experimental/evaluation/harness/rag/evaluation_pipeline.py +++ b/haystack_experimental/evaluation/harness/rag/evaluation_pipeline.py @@ -44,8 +44,8 @@ def default_rag_evaluation_pipeline( RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY: partial( SASEvaluator, model="sentence-transformers/all-MiniLM-L6-v2" ), - RAGEvaluationMetric.ANSWER_FAITHFULNESS: FaithfulnessEvaluator, - RAGEvaluationMetric.CONTEXT_RELEVANCE: ContextRelevanceEvaluator, + RAGEvaluationMetric.ANSWER_FAITHFULNESS: partial(FaithfulnessEvaluator, raise_on_failure=False), + RAGEvaluationMetric.CONTEXT_RELEVANCE: partial(ContextRelevanceEvaluator, raise_on_failure=False), } for metric in metrics: