Skip to content

Commit

Permalink
Kotahi DocMaps API: Match reviews case-insensitive and break on first…
Browse files Browse the repository at this point in the history
… hyphen break line within public reviews (#249)

* Kotahi DocMaps API: Match reviews case-insensitive

* Ignore text after first hyphen break line when extracting public reviews
  • Loading branch information
de-code authored Nov 8, 2024
1 parent c0f8ffc commit 72e9915
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 2 deletions.
4 changes: 2 additions & 2 deletions data_hub_api/kotahi_docmaps/v1/codecs/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def extract_elife_assessments_from_email(email_body: Optional[str]) -> Optional[

def extract_elife_public_reviews_from_email(email_body: Optional[str]) -> Optional[str]:
if email_body:
pattern = r'(?s)([pP]ublic [rR]eviews?:?\s*\n.*)-{10,}'
pattern = r'(?s)([pP]ublic [rR]eviews?:?\s*\n.*?)-{10,}'
match = re.search(pattern, email_body)
if match:
extracted_text = match.group(1).strip()
Expand All @@ -53,7 +53,7 @@ def extract_elife_public_reviews_from_email(email_body: Optional[str]) -> Option
def extract_public_review_parts(public_reviews: Optional[str]):
if public_reviews:
pattern = r'(?=Reviewer #\d+ \(Public Review\):?)'
parts = re.split(pattern, public_reviews)
parts = re.split(pattern, public_reviews, flags=re.IGNORECASE)
parts = [part.strip() for part in parts if part.strip()]
if len(parts) > 1:
return parts[1:]
Expand Down
11 changes: 11 additions & 0 deletions tests/unit_tests/kotahi_docmaps/v1/codecs/evaluation_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@ def test_should_extract_elife_public_reviews_from_email(self):
).strip()
assert result == PUBLIC_REVIEWS_1.strip()

def test_should_ignore_text_after_first_hyphen_break(self):
result = extract_elife_public_reviews_from_email(
EMAIL_BODY_WITH_ELIFE_ASSESSMENT_AND_PUBLIC_REVIEWS_1
+ '\n----------\nmore text'
).strip()
assert result == PUBLIC_REVIEWS_1.strip()

def test_should_return_none_if_there_is_no_public_reviews_available(self):
assert not extract_elife_public_reviews_from_email(EMAIL_BODY_1)

Expand All @@ -83,6 +90,10 @@ def test_should_extract_all_public_reviews_individually(self):
assert result[1] == REVIEW_2.strip()
assert result[2] == REVIEW_3.strip()

def test_should_match_case_insensitive(self):
result = extract_public_review_parts(PUBLIC_REVIEWS_1.lower())
assert result[0] == REVIEW_1.strip().lower()

def test_should_return_none_when_there_is_no_public_reviews(self):
assert not extract_public_review_parts(None)

Expand Down

0 comments on commit 72e9915

Please sign in to comment.