Skip to content

Commit

Permalink
chore: Rename ExtractiveReader's input from document to documents
Browse files Browse the repository at this point in the history
… to match its type List[Document] (#6164)

* rename input param, add doc string, add example

* reno
  • Loading branch information
julian-risch authored Oct 24, 2023
1 parent 1f4ed3c commit fe3bc15
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 3 deletions.
21 changes: 18 additions & 3 deletions haystack/preview/components/readers/extractive.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@ class ExtractiveReader:
"""
A component for performing extractive QA.
Every possible answer span is assigned a confidence score independent of other answer spans. This fixes a common issue of other implementations which make comparisons across documents harder by normalising each document's answers independently.
Example usage:
```python
p = Pipeline()
p.add_component(instance=InMemoryBM25Retriever(document_store=InMemoryDocumentStore()), name="retriever")
p.add_component(instance=ExtractiveReader(), name="reader")
p.connect("retriever", "reader")
question = "Who lives in Berlin?"
p.run({"retriever": {"query": question}, "reader": {"query": question}})
```
"""

def __init__(
Expand Down Expand Up @@ -256,7 +266,7 @@ def _nest_answers(
def run(
self,
query: str,
document: List[Document],
documents: List[Document],
top_k: Optional[int] = None,
confidence_threshold: Optional[float] = None,
max_seq_length: Optional[int] = None,
Expand All @@ -267,9 +277,14 @@ def run(
):
"""
Performs extractive QA on the given documents using the given query.
:param query: Query string.
:param documents: List of Documents to search for an answer to the query.
:param top_k: The maximum number of answers to return.
:return: List of ExtractedAnswers sorted by (desc.) answer score.
"""
queries = [query] # Temporary solution until we have decided what batching should look like in v2
documents = [document]
nested_documents = [documents]
if self.model is None:
raise ComponentError("The component was not warmed up. Run 'warm_up()' before calling 'run()'.")

Expand All @@ -281,7 +296,7 @@ def run(
answers_per_seq = answers_per_seq or self.answers_per_seq or top_k or 20
no_answer = no_answer if no_answer is not None else self.no_answer

flattened_queries, flattened_documents, query_ids = self._flatten_documents(queries, documents)
flattened_queries, flattened_documents, query_ids = self._flatten_documents(queries, nested_documents)
input_ids, attention_mask, sequence_ids, encodings, query_ids, document_ids = self._preprocess(
flattened_queries, flattened_documents, max_seq_length, query_ids, stride
)
Expand Down
4 changes: 4 additions & 0 deletions releasenotes/notes/rename-reader-input-af739955bf4f71b5.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
preview:
- |
Renamed ExtractiveReader's input from `document` to `documents` to match its type List[Document].

0 comments on commit fe3bc15

Please sign in to comment.