Skip to content

Commit

Permalink
Add a prototype of Sample::developmental_stage backfill script
Browse files Browse the repository at this point in the history
  • Loading branch information
arkid15r committed Dec 11, 2023
1 parent 07d3759 commit d499901
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 3.2.18 on 2023-12-08 00:45

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("data_refinery_common", "0074_sample_developmental_stage"),
]

operations = [
migrations.AddField(
model_name="sample",
name="last_refreshed",
field=models.DateTimeField(auto_now=True, null=True),
),
]
4 changes: 4 additions & 0 deletions common/data_refinery_common/models/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ def __str__(self):
created_at = models.DateTimeField(editable=False, default=timezone.now)
last_modified = models.DateTimeField(default=timezone.now)

# Auxiliary field for tracking latest metadata update time.
# Originally added to support Sample::developmental_stage values backfilling.
last_refreshed = models.DateTimeField(auto_now=True, null=True)

def save(self, *args, **kwargs):
"""On save, update timestamps"""
current_time = timezone.now()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import time

from django.core.management.base import BaseCommand

from data_refinery_common.logging import get_and_configure_logger
from data_refinery_common.models import Sample
from data_refinery_foreman.surveyor.sra import SraSurveyor

logger = get_and_configure_logger(__name__)


class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
"--limit",
default=1000,
type=int,
help="Number of samples to refresh",
)
parser.add_argument(
"--source",
choices=("SRA",),
required=True,
type=str,
help="Source name (ARRAY_EXPRESS, GEO, SRA)",
)

def handle(self, *args, **options):
for sample in Sample.objects.filter(
developmental_stage__isnull=True,
last_refreshed__isnull=True,
source_database=options["source"],
).order_by("id")[: options["limit"]]:
logger.info(f"Refreshing metadata for a sample {sample.accession_code}")
try:
_, sample_metadata = SraSurveyor.gather_all_metadata(sample.accession_code)
SraSurveyor._apply_harmonized_metadata_to_sample(sample_metadata)
except Exception as e:
logger.exception(e)
finally:
sample.save()

time.sleep(1)

0 comments on commit d499901

Please sign in to comment.