Skip to content
This repository has been archived by the owner on Sep 3, 2024. It is now read-only.

Commit

Permalink
Merge pull request #51 from dialect-map/custom-paper-metadata-route
Browse files Browse the repository at this point in the history
Use custom paper metadata route
  • Loading branch information
Sinclert authored May 11, 2022
2 parents 416506e + 4984a82 commit d0db650
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 45 deletions.
2 changes: 1 addition & 1 deletion reqs/requirements-prod.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ feedparser==6.0.8

# Private packages
git+ssh://[email protected]/dialect-map/[email protected]#egg=dialect-map-io[gcp]
git+ssh://[email protected]/dialect-map/dialect-map-schemas.git@v0.1.5#egg=dialect-map-schemas
git+ssh://[email protected]/dialect-map/dialect-map-schemas.git@v0.2.0#egg=dialect-map-schemas
5 changes: 0 additions & 5 deletions src/job/mapping/__init__.py

This file was deleted.

10 changes: 0 additions & 10 deletions src/job/mapping/record_routes.py

This file was deleted.

26 changes: 18 additions & 8 deletions src/job/models/arxiv.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from dialect_map_schemas import CategoryMembershipSchema
from dialect_map_schemas import PaperSchema
from dialect_map_schemas import PaperAuthorSchema
from dialect_map_schemas import PaperMetadataSchema


@dataclass
Expand Down Expand Up @@ -74,8 +75,19 @@ class ArxivMetadata:
paper_updated_at: datetime

@property
def paper_record(self) -> dict:
"""Adapts the ArXiv metadata object into a Paper record"""
def paper_metadata(self) -> dict:
"""Adapts the ArXiv metadata object into a PaperMetadata record"""

schema = PaperMetadataSchema()

return {
schema.paper.name: self._build_paper_record(),
schema.authors.name: self._build_author_records(),
schema.memberships.name: self._build_membership_records(),
}

def _build_paper_record(self) -> dict:
"""Builds an ArXiv paper dictionary out of the ArXiv metadata object"""

schema = PaperSchema()

Expand All @@ -90,9 +102,8 @@ def paper_record(self) -> dict:
schema.updated_at.name: self.paper_updated_at.isoformat(),
}

@property
def author_records(self) -> Iterable[dict]:
"""Adapts the ArXiv metadata object into a list of PaperAuthor records"""
def _build_author_records(self) -> Iterable[dict]:
"""Builds an ArXiv author dictionary out of the ArXiv metadata object"""

schema = PaperAuthorSchema()

Expand All @@ -104,9 +115,8 @@ def author_records(self) -> Iterable[dict]:
schema.created_at.name: self.paper_created_at.isoformat(),
}

@property
def memberships_records(self) -> Iterable[dict]:
"""Adapts the ArXiv metadata object into a list of CategoryMembership records"""
def _build_membership_records(self) -> Iterable[dict]:
"""Builds an ArXiv category membership dictionary out of the ArXiv metadata object"""

schema = CategoryMembershipSchema()

Expand Down
4 changes: 2 additions & 2 deletions src/job/output/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def create_record(self, record_route: APIRoute, record_data: dict) -> None:
:param record_route: data record route
"""

record_schema = record_route.model_schema()
record_schema = record_route.schema()
record_data = record_schema.load(record_data)

self._create(
Expand All @@ -69,7 +69,7 @@ def archive_record(self, record_route: APIRoute, record_data: dict) -> None:
:param record_route: data record route
"""

record_schema = record_route.model_schema()
record_schema = record_route.schema()
schema_id_field = record_schema.schema_id

self._archive(
Expand Down
24 changes: 5 additions & 19 deletions src/routines.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,11 @@
from typing import List

from dialect_map_io.data_output import TextFileWriter
from dialect_map_schemas.routes import DM_PAPER_METADATA_ROUTE

from job.files import FileSystemIterator
from job.input import BaseMetadataSource
from job.input import PDFCorpusSource
from job.mapping import CATEGORY_MEMBER_ROUTE
from job.mapping import PAPER_AUTHOR_ROUTE
from job.mapping import PAPER_ROUTE
from job.models import ArxivMetadata
from job.output import DialectMapOperator
from job.output import LocalFileOperator
Expand Down Expand Up @@ -81,21 +79,6 @@ def __init__(self, file_iter: FileSystemIterator, api_ctl: DialectMapOperator):
self.api_controller = api_ctl
self.sources = [] # type: ignore

def _dispatch_record(self, record: ArxivMetadata) -> None:
"""
Dispatch metadata record to the destination API
:param record: metadata record to dispatch
"""

# The paper record must be inserted first
self.api_controller.create_record(PAPER_ROUTE, record.paper_record)

for membership in record.memberships_records:
self.api_controller.create_record(CATEGORY_MEMBER_ROUTE, membership)

for author in record.author_records:
self.api_controller.create_record(PAPER_AUTHOR_ROUTE, author)

def _get_metadata_records(self, paper_id: str) -> List[ArxivMetadata]:
"""
Gets the metadata records from the sources given an ArXiv paper ID
Expand Down Expand Up @@ -135,4 +118,7 @@ def run(self, *args) -> None:
continue

for record in records:
self._dispatch_record(record)
self.api_controller.create_record(
DM_PAPER_METADATA_ROUTE,
record.paper_metadata,
)

0 comments on commit d0db650

Please sign in to comment.