Skip to content

Commit

Permalink
Merge pull request #572 from NASA-IMPACT/566-write-the-rule-interpret…
Browse files Browse the repository at this point in the history
…er-utils-file

Update title pattern by applying rule interpretation
  • Loading branch information
code-geek authored Jan 31, 2024
2 parents a5d29d1 + 0b863f5 commit 099fc39
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 1 deletion.
12 changes: 11 additions & 1 deletion sde_collections/models/pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from django.db import models

from ..pattern_interpreter import interpret_title_pattern
from .collection_choice_fields import DocumentTypes


Expand Down Expand Up @@ -149,7 +150,16 @@ class TitlePattern(BaseMatchPattern):

def apply(self) -> None:
matched_urls = self.matched_urls()
matched_urls.update(generated_title=self.title_pattern)

# since this is not running in celery, this is a bit slow
for url, scraped_title in matched_urls.values_list("url", "scraped_title"):
generated_title = interpret_title_pattern(
url, scraped_title, self.title_pattern
)
matched_urls.filter(url=url, scraped_title=scraped_title).update(
generated_title=generated_title
)

candidate_url_ids = list(matched_urls.values_list("id", flat=True))
self.candidate_urls.through.objects.bulk_create(
objs=[
Expand Down
8 changes: 8 additions & 0 deletions sde_collections/pattern_interpreter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
def interpret_title_pattern(url, scraped_title, title_pattern):
"""Interpret a title pattern."""
# If "{title}" is in the title_pattern, replace it with scraped_title
if "{title}" in title_pattern:
return title_pattern.replace("{title}", scraped_title)
# If "{title}" is not in the title_pattern, return title_pattern as is
else:
return title_pattern

0 comments on commit 099fc39

Please sign in to comment.