Skip to content

Commit

Permalink
feat: Add ContextRelevanceEvaluator to RAG eval harness
Browse files Browse the repository at this point in the history
  • Loading branch information
shadeMe committed Jun 18, 2024
1 parent 3193cbd commit d4553a6
Show file tree
Hide file tree
Showing 4 changed files with 482 additions and 302 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from haystack import Pipeline
from haystack.components.evaluators import (
ContextRelevanceEvaluator,
DocumentMAPEvaluator,
DocumentMRREvaluator,
DocumentRecallEvaluator,
Expand Down Expand Up @@ -34,12 +35,17 @@ def default_rag_evaluation_pipeline(
metric_ctors = {
RAGEvaluationMetric.DOCUMENT_MAP: DocumentMAPEvaluator,
RAGEvaluationMetric.DOCUMENT_MRR: DocumentMRREvaluator,
RAGEvaluationMetric.DOCUMENT_RECALL_SINGLE_HIT: partial(DocumentRecallEvaluator, mode=RecallMode.SINGLE_HIT),
RAGEvaluationMetric.DOCUMENT_RECALL_MULTI_HIT: partial(DocumentRecallEvaluator, mode=RecallMode.MULTI_HIT),
RAGEvaluationMetric.DOCUMENT_RECALL_SINGLE_HIT: partial(
DocumentRecallEvaluator, mode=RecallMode.SINGLE_HIT
),
RAGEvaluationMetric.DOCUMENT_RECALL_MULTI_HIT: partial(
DocumentRecallEvaluator, mode=RecallMode.MULTI_HIT
),
RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY: partial(
SASEvaluator, model="sentence-transformers/all-MiniLM-L6-v2"
),
RAGEvaluationMetric.ANSWER_FAITHFULNESS: FaithfulnessEvaluator,
RAGEvaluationMetric.CONTEXT_RELEVANCE: ContextRelevanceEvaluator,
}

for metric in metrics:
Expand Down
13 changes: 11 additions & 2 deletions haystack_experimental/evaluation/harness/rag/harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,12 @@ def _map_rag_eval_pipeline_io(self) -> Dict[str, List[str]]:
"replies",
),
},
RAGEvaluationMetric.CONTEXT_RELEVANCE: {
"contexts": (
RAGExpectedComponent.DOCUMENT_RETRIEVER,
"retrieved_documents",
),
},
}

outputs_to_inputs: Dict[str, List[str]] = {}
Expand Down Expand Up @@ -344,6 +350,11 @@ def _prepare_eval_pipeline_additional_inputs(
eval_inputs[metric.value] = {
"ground_truth_documents": inputs.ground_truth_documents
}
elif metric in (
RAGEvaluationMetric.ANSWER_FAITHFULNESS,
RAGEvaluationMetric.CONTEXT_RELEVANCE,
):
eval_inputs[metric.value] = {"questions": inputs.queries}
elif metric == RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY:
if inputs.ground_truth_answers is None:
raise ValueError(
Expand All @@ -357,8 +368,6 @@ def _prepare_eval_pipeline_additional_inputs(
eval_inputs[metric.value] = {
"ground_truth_answers": inputs.ground_truth_answers
}
elif metric == RAGEvaluationMetric.ANSWER_FAITHFULNESS:
eval_inputs[metric.value] = {"questions": inputs.queries}

return eval_inputs

Expand Down
3 changes: 3 additions & 0 deletions haystack_experimental/evaluation/harness/rag/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ class RAGEvaluationMetric(Enum):
#: Answer Faithfulness.
ANSWER_FAITHFULNESS = "metric_answer_faithfulness"

#: Context Relevance.
CONTEXT_RELEVANCE = "metric_context_relevance"


@dataclass(frozen=True)
class RAGEvaluationInput:
Expand Down
Loading

0 comments on commit d4553a6

Please sign in to comment.