Skip to content

Commit

Permalink
Add search endpoints and indexing.
Browse files Browse the repository at this point in the history
  • Loading branch information
mcantelon committed Dec 2, 2015
1 parent 97f7192 commit f0f9fca
Show file tree
Hide file tree
Showing 7 changed files with 349 additions and 6 deletions.
5 changes: 5 additions & 0 deletions storage_service/locations/api/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# stdlib, alphabetical
import json
import logging
from multiprocessing import Process
import os
import shutil
import urllib
Expand Down Expand Up @@ -475,6 +476,10 @@ def obj_create(self, bundle, **kwargs):
if bundle.obj.package_type in (Package.AIP, Package.AIC, Package.DIP) and bundle.obj.current_location.purpose in (Location.AIP_STORAGE, Location.DIP_STORAGE):
# Store AIP/AIC
bundle.obj.store_aip(origin_location, origin_path)

# Asynchronously index AIP files
p = Process(target=bundle.obj.index_file_data_from_aip_mets)
p.start()
elif bundle.obj.package_type in (Package.TRANSFER,) and bundle.obj.current_location.purpose in (Location.BACKLOG,):
# Move transfer to backlog
bundle.obj.backlog_transfer(origin_location, origin_path)
Expand Down
3 changes: 3 additions & 0 deletions storage_service/locations/api/search/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Common
# May have multiple models, so import * and use __all__ in file.
from router import *
159 changes: 159 additions & 0 deletions storage_service/locations/api/search/router.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import django_filters
from rest_framework import routers, serializers, viewsets, filters
from rest_framework.decorators import list_route
from rest_framework.response import Response

from django.db.models import Sum

from locations import models


class CaseInsensitiveBooleanFilter(django_filters.Filter):
"""
This allows users to query booleans without having to use "True" and "False"
"""
def filter(self, qs, value):
if value is not None:
lc_value = value.lower()
if lc_value == "true":
value = True
elif lc_value == "false":
value = False
return qs.filter(**{self.name: value})
return qs


class PipelineField(serializers.RelatedField):
"""
Used to show UUID of related pipelines
"""
def to_representation(self, value):
return value.uuid


class LocationSerializer(serializers.HyperlinkedModelSerializer):
"""
Serialize Location model data
"""
space = serializers.ReadOnlyField(source='space.uuid')
pipelines = PipelineField(many=True, read_only=True, source='pipeline')

class Meta:
model = models.Location
fields = ('uuid', 'space', 'pipelines', 'purpose', 'quota', 'used', 'enabled')


class LocationFilter(django_filters.FilterSet):
"""
Filter for searching Location data
"""
uuid = django_filters.CharFilter(name='uuid')
space = django_filters.CharFilter(name='space')
purpose = django_filters.CharFilter(name='purpose')
enabled = CaseInsensitiveBooleanFilter(name='enabled')

class Meta:
model = models.Location
fields = ['uuid', 'space', 'purpose', 'enabled']


class LocationViewSet(viewsets.ReadOnlyModelViewSet):
"""
Search API view for Location model data
"""
queryset = models.Location.objects.all()
serializer_class = LocationSerializer
filter_backends = (filters.DjangoFilterBackend,)
filter_class = LocationFilter


class PackageSerializer(serializers.HyperlinkedModelSerializer):
"""
Serialize Package model data
"""
origin_pipeline = serializers.ReadOnlyField(source='origin_pipeline.uuid')
current_location = serializers.ReadOnlyField(source='current_location.uuid')
pointer_file_location = serializers.ReadOnlyField(source='pointer_file_location.uuid')

class Meta:
model = models.Package
fields = ('uuid', 'current_path', 'size', 'origin_pipeline', 'current_location', 'package_type', 'status', 'pointer_file_location', 'pointer_file_path')


class PackageFilter(django_filters.FilterSet):
"""
Filter for searching Package data
"""
min_size = django_filters.NumberFilter(name='size', lookup_type='gte')
max_size = django_filters.NumberFilter(name='size', lookup_type='lte')
pipeline = django_filters.CharFilter(name='origin_pipeline')
location = django_filters.CharFilter(name='current_location')
package_type = django_filters.CharFilter(name='package_type')

class Meta:
model = models.Package
fields = ['uuid', 'min_size', 'max_size', 'pipeline', 'location', 'package_type', 'status', 'pointer_file_location']


class PackageViewSet(viewsets.ReadOnlyModelViewSet):
"""
Search API view for Package model data
"""
queryset = models.Package.objects.all()
serializer_class = PackageSerializer
filter_backends = (filters.DjangoFilterBackend,)
filter_class = PackageFilter


class FileSerializer(serializers.HyperlinkedModelSerializer):
"""
Serialize File model data
"""
pipeline = serializers.ReadOnlyField(source='origin.uuid')

class Meta:
model = models.File
fields = ('uuid', 'name', 'file_type', 'size', 'format_name', 'pronom_id', 'pipeline', 'source_package', 'normalized', 'validated', 'ingestion_time')


class FileFilter(django_filters.FilterSet):
"""
Filter for searching File data
"""
min_size = django_filters.NumberFilter(name='size', lookup_type='gte')
max_size = django_filters.NumberFilter(name='size', lookup_type='lte')
pipeline = django_filters.CharFilter(name='origin')
package = django_filters.CharFilter(name='source_package')
name = django_filters.CharFilter(name='name', lookup_type='icontains')
normalized = CaseInsensitiveBooleanFilter(name='normalized')
ingestion_time = django_filters.DateFilter(name='ingestion_time', lookup_type='contains')

class Meta:
model = models.File
fields = ['uuid', 'name', 'file_type', 'min_size', 'max_size', 'format_name', 'pronom_id', 'pipeline', 'source_package', 'normalized', 'validated', 'ingestion_time']


class FileViewSet(viewsets.ReadOnlyModelViewSet):
"""
Search API view for File model data
Custom endpoint "stats" provides total size of files searched for
"""
queryset = models.File.objects.all()
serializer_class = FileSerializer
filter_backends = (filters.DjangoFilterBackend,)
filter_class = FileFilter

@list_route(methods=['get'])
def stats(self, request):
filtered = FileFilter(request.GET, queryset=self.get_queryset())
count = filtered.qs.count()
summary = filtered.qs.aggregate(Sum('size'))
return Response({'count': count, 'total_size': summary['size__sum']})


# Route location, package, and file search API requests
router = routers.DefaultRouter()
router.register(r'location', LocationViewSet)
router.register(r'package', PackageViewSet)
router.register(r'file', FileViewSet)
9 changes: 8 additions & 1 deletion storage_service/locations/api/urls.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from django.conf.urls import include, url
from rest_framework import routers, serializers, viewsets, filters, generics
from tastypie.api import Api
from locations.api import v1, v2

from locations import models
from locations.api import v1, v2
from locations.api.search import router
from locations.api.sword import views


v1_api = Api(api_name='v1')
v1_api.register(v1.SpaceResource())
v1_api.register(v1.LocationResource())
Expand All @@ -16,9 +20,12 @@
v2_api.register(v2.PackageResource())
v2_api.register(v2.PipelineResource())


urlpatterns = [
url(r'', include(v1_api.urls)),
url(r'v1/sword/$', views.service_document, name='sword_service_document'),
url(r'', include(v2_api.urls)),
url(r'v2/sword/$', views.service_document, name='sword_service_document'),
url(r'v1/search/', include(router.urls)),
url(r'v2/search/', include(router.urls))
]
56 changes: 56 additions & 0 deletions storage_service/locations/migrations/0005_search_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

from django.db import models, migrations


class Migration(migrations.Migration):

dependencies = [
('locations', '0004_v0_7'),
]

operations = [
migrations.AddField(
model_name='file',
name='file_type',
field=models.CharField(max_length=8, null=True, choices=[(b'AIP', b'AIP'), (b'transfer', b'Transfer')]),
preserve_default=True,
),
migrations.AddField(
model_name='file',
name='format_name',
field=models.TextField(max_length=128, blank=True),
preserve_default=True,
),
migrations.AddField(
model_name='file',
name='ingestion_time',
field=models.DateTimeField(null=True),
preserve_default=True,
),
migrations.AddField(
model_name='file',
name='normalized',
field=models.NullBooleanField(default=None, help_text=b'Whether or not file has been normalized'),
preserve_default=True,
),
migrations.AddField(
model_name='file',
name='pronom_id',
field=models.TextField(max_length=128, blank=True),
preserve_default=True,
),
migrations.AddField(
model_name='file',
name='size',
field=models.IntegerField(default=0, help_text=b'Size in bytes of the file'),
preserve_default=True,
),
migrations.AddField(
model_name='file',
name='validated',
field=models.NullBooleanField(default=None, help_text=b'Whether or not file has been validated'),
preserve_default=True,
),
]
18 changes: 18 additions & 0 deletions storage_service/locations/models/event.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,16 +132,34 @@ class File(models.Model):
help_text="Unique identifier")
package = models.ForeignKey('Package', null=True)
name = models.TextField(max_length=1000)
ingestion_time = models.DateTimeField(null=True)

AIP = "AIP"
TRANSFER = "transfer"
FILE_TYPE_CHOICES = (
(AIP, 'AIP'),
(TRANSFER, 'Transfer')
)
file_type = models.CharField(max_length=8, choices=FILE_TYPE_CHOICES, null=True)

source_id = models.TextField(max_length=128)
source_package = models.TextField(blank=True,
help_text="Unique identifier of originating unit")
size = models.IntegerField(default=0, help_text='Size in bytes of the file')
format_name = models.TextField(blank=True, max_length=128)
pronom_id = models.TextField(blank=True, max_length=128)
# Sized to fit sha512
checksum = models.TextField(max_length=128)
stored = models.BooleanField(default=False)
accessionid = models.TextField(blank=True,
help_text="Accession ID of originating transfer")
origin = UUIDField(editable=False, unique=False, version=4, blank=True,
help_text="Unique identifier of originating Archivematica dashboard")
normalized = models.BooleanField(default=None,
help_text="Whether or not file has been normalized")
validated = models.BooleanField(default=None,
help_text="Whether or not file has been validated")



class Meta:
Expand Down
Loading

0 comments on commit f0f9fca

Please sign in to comment.