From f14c166bdd93d3f8420a52b6214249979b1f1025 Mon Sep 17 00:00:00 2001 From: Anton Suharev Date: Thu, 28 Mar 2024 01:59:18 -0700 Subject: [PATCH] fix: refactor CodeMetaMetadata and DataCiteMetadata to utilize CommonMetadata --- django/library/models.py | 474 +++++++++++++++++++++++---------------- 1 file changed, 276 insertions(+), 198 deletions(-) diff --git a/django/library/models.py b/django/library/models.py index d1d0c056b..111553862 100644 --- a/django/library/models.py +++ b/django/library/models.py @@ -1510,13 +1510,18 @@ def bagit_info(self): } @cached_property - def codemeta(self): - """Returns a CodeMeta object that can be dumped to json""" - return CodeMeta.build(self) + def common_metadata(self): + """Returns a CommonMetadata object used to build specific metadata objects: for example CodeMeta or DataCite""" + return CommonMetadata(self) + + @cached_property + def codemeta_metadata(self): + """Returns a CodeMetaMetadata object that can be dumped to json""" + return CodeMetaMetadata.build(self.common_metadata) @cached_property def datacite_metadata(self): - return DataCiteMetadata.build(self) + return DataCiteMetadata.build(self.common_metadata) @property def is_draft(self): @@ -1558,7 +1563,7 @@ def get_status_color(self): @property def codemeta_json(self): - return self.codemeta.to_json() + return self.codemeta_metadata.to_json() def create_or_update_codemeta(self, force=True): return self.get_fs_api().create_or_update_codemeta(force=force) @@ -2165,9 +2170,11 @@ def send_candidate_reviewer_email(self, resend=False): cc=[settings.REVIEW_EDITOR_EMAIL], ) self.review.log( - action=PeerReviewEvent.INVITATION_SENT - if resend - else PeerReviewEvent.INVITATION_RESENT, + action=( + PeerReviewEvent.INVITATION_SENT + if resend + else PeerReviewEvent.INVITATION_RESENT + ), author=self.editor, message=f"{self.editor} sent an invitation to candidate reviewer {self.candidate_reviewer}", ) @@ -2351,14 +2358,16 @@ def __str__(self): return f"[peer review] {invitation.candidate_reviewer} submitted? {self.reviewer_submitted}, recommendation: {self.get_recommendation_display()}" -class CodeMeta: +class CommonMetadata: DATE_PUBLISHED_FORMAT = "%Y-%m-%d" + COMSES_ORGANIZATION = { "@id": "https://ror.org/015bsfc29", "@type": "Organization", "name": "CoMSES Net", "url": "https://www.comses.net", } + INITIAL_DATA = { "@context": "http://schema.org", "@type": "SoftwareSourceCode", @@ -2373,104 +2382,232 @@ class CodeMeta: "provider": COMSES_ORGANIZATION, } - def __init__(self, metadata: dict): - if not metadata: - raise ValueError( - "Initialize with a base dictionary with codemeta terms mapped to JSON-serializable values" + def __init__(self, release: CodebaseRelease): + codebase = release.codebase + + self.codebase_release = release + + self.initial_data = self.INITIAL_DATA + self.name = codebase.title + self.abstract = codebase.summary + self.description = codebase.description.raw + self.version = release.version_number + self.targetProduct = self.convert_target_product() + self.programmingLanguage = self.convert_programming_languages() + self.author = self.convert_authors() + self.contributors = self.convert_contributors() + self.identifier = release.permanent_url + self.dateCreated = release.date_created.isoformat() + self.dateModified = release.last_modified.isoformat() + self.keywords = self.convert_keywords() + self.runtimePlatform = self.convert_platforms() + self.url = release.permanent_url + + self.citation = [] + if codebase.references_text: + self.citation.append( + {"@type": "CreativeWork", "text": codebase.references_text} + ) + if codebase.replication_text: + self.citation.append( + {"@type": "CreativeWork", "text": codebase.replication_text} + ) + if codebase.associated_publication_text: + self.citation.append( + {"@type": "CreativeWork", "text": codebase.associated_publication_text} ) - self.metadata = metadata - @classmethod - def convert_target_product(cls, codebase_release: CodebaseRelease): + if release.live: + self.datePublished = release.last_published_on.strftime( + self.DATE_PUBLISHED_FORMAT + ) + self.copyrightYear = release.last_published_on.year + + if release.first_published_at: + self.first_published_at = release.first_published_at + + if release.license: + self.license = release.license + + if codebase.repository_url: + self.codeRepository = codebase.repository_url + + if release.release_notes: + self.releaseNotes = release.release_notes.raw + + # codemeta @id + self.id = release.permanent_url + + def convert_target_product(self): target_product = { "@type": "SoftwareApplication", - "name": codebase_release.title, - "operatingSystem": codebase_release.os, + "name": self.codebase_release.title, + "operatingSystem": self.codebase_release.os, "applicationCategory": "Computational Model", } - if codebase_release.live: + if self.codebase_release.live: target_product.update( - downloadUrl=f"{settings.BASE_URL}{codebase_release.get_download_url()}", - releaseNotes=codebase_release.release_notes.raw, - softwareVersion=codebase_release.version_number, - identifier=codebase_release.permanent_url, - sameAs=codebase_release.comses_permanent_url, + downloadUrl=f"{settings.BASE_URL}{self.codebase_release.get_download_url()}", + releaseNotes=self.codebase_release.release_notes.raw, + softwareVersion=self.codebase_release.version_number, + identifier=self.codebase_release.permanent_url, + sameAs=self.codebase_release.comses_permanent_url, ) - image_urls = codebase_release.codebase.get_image_urls() + image_urls = self.codebase_release.codebase.get_image_urls() if image_urls: target_product.update(screenshot=f"{settings.BASE_URL}{image_urls[0]}") return target_product - @classmethod - def convert_keywords(cls, codebase_release: CodebaseRelease): - return [tag.name for tag in codebase_release.codebase.tags.all()] + def convert_keywords(self): + return [tag.name for tag in self.codebase_release.codebase.tags.all()] - @classmethod - def convert_platforms(cls, codebase_release: CodebaseRelease): - return [tag.name for tag in codebase_release.platform_tags.all()] + def convert_platforms(self): + return [tag.name for tag in self.codebase_release.platform_tags.all()] - @classmethod - def convert_authors(cls, codebase_release: CodebaseRelease): + def convert_authors(self): return [ author.contributor.to_codemeta() - for author in ReleaseContributor.objects.authors(codebase_release) + for author in ReleaseContributor.objects.authors(self.codebase_release) ] - @classmethod - def convert_programming_languages(cls, codebase_release: CodebaseRelease): + def convert_contributors(self): + nonauthor_contributors = ReleaseContributor.objects.nonauthors( + self.codebase_release + ) + + contributors = [ + { + "contributorName": self.codebase_release.codebase.repository_url, + "contributorType": "hostingInstitution", + } + ] + + if nonauthor_contributors is not None: + for contributor in nonauthor_contributors: + already_has_other_role = False + for role in contributor.roles: + if role == "copyrightHolder": + contributors.append( + { + "contributorName:": contributor.contributor.name, + "contributorType:": "RightsHolder", + } + ) + elif role == "editor": + contributors.append( + { + "contributorName:": contributor.contributor.name, + "contributorType:": "Editor", + } + ) + elif role == "funder": + contributors.append( + { + "contributorName:": contributor.contributor.name, + "contributorType:": "Sponsor", + } + ) + elif role == "pointOfContact": + contributors.append( + { + "contributorName:": contributor.contributor.name, + "contributorType:": "ContactPerson", + } + ) + elif role == "resourceProvider": + contributors.append( + { + "contributorName:": contributor.contributor.name, + "contributorType:": "Distributor", + } + ) + # collaborator, contributor, maintainer, publisher and any other new roles will be mapped to other role + elif already_has_other_role == False: + contributors.append( + { + "contributorName:": contributor.contributor.name, + "contributorType:": "Other", + } + ) + already_has_other_role = True + return contributors + + def convert_programming_languages(self): return [ {"@type": "ComputerLanguage", "name": pl.name} - for pl in codebase_release.programming_languages.all() + for pl in self.codebase_release.programming_languages.all() ] - @classmethod - def build(cls, release: CodebaseRelease): - metadata = cls.INITIAL_DATA.copy() - codebase = release.codebase - metadata.update( - name=codebase.title, - abstract=codebase.summary, - description=codebase.description.raw, - version=release.version_number, - targetProduct=cls.convert_target_product(release), - programmingLanguage=cls.convert_programming_languages(release), - author=cls.convert_authors(release), - identifier=release.permanent_url, - dateCreated=release.date_created.isoformat(), - dateModified=release.last_modified.isoformat(), - keywords=cls.convert_keywords(release), - runtimePlatform=cls.convert_platforms(release), - url=release.permanent_url, - citation=[], - ) - if release.live: - metadata.update( - datePublished=release.last_published_on.strftime( - cls.DATE_PUBLISHED_FORMAT - ) - ) - metadata.update(copyrightYear=release.last_published_on.year) - if release.license: - metadata.update(license=release.license.url) - if codebase.repository_url: - metadata.update(codeRepository=codebase.repository_url) - cls.add_citation_text(metadata, codebase.references_text) - cls.add_citation_text(metadata, codebase.replication_text) - cls.add_citation_text(metadata, codebase.associated_publication_text) + def to_creative_work(cls, text): + return - if release.release_notes: - metadata.update(releaseNotes=release.release_notes.raw) - metadata["@id"] = release.permanent_url - return CodeMeta(metadata) + # def to_json(self): + # """Returns a JSON string of this codemeta data""" + # # FIXME: should ideally validate metadata as well + # return json.dumps(self.metadata) - @classmethod - def add_citation_text(cls, metadata, text): - if text: - metadata.get("citation").append(cls.to_creative_work(text)) + # def to_dict(self): + # return self.metadata.copy() + + +class CodeMetaMetadata: + COMSES_ORGANIZATION = { + "@id": "https://ror.org/015bsfc29", + "@type": "Organization", + "name": "CoMSES Net", + "url": "https://www.comses.net", + } + + def __init__(self, metadata: dict): + if not metadata: + raise ValueError( + "Initialize with a base dictionary with codemeta terms mapped to JSON-serializable values" + ) + self.metadata = metadata @classmethod - def to_creative_work(cls, text): - return {"@type": "CreativeWork", "text": text} + def build(cls, common_metadata: CommonMetadata): + + metadata = {} + + metadata.update( + **common_metadata.initial_data, + name=common_metadata.name, + abstract=common_metadata.abstract, + description=common_metadata.description, + version=common_metadata.version, + targetProduct=common_metadata.targetProduct, + programmingLanguage=common_metadata.programmingLanguage, + author=common_metadata.author, + identifier=common_metadata.identifier, + dateCreated=common_metadata.dateCreated, + dateModified=common_metadata.dateModified, + keywords=common_metadata.keywords, + runtimePlatform=common_metadata.runtimePlatform, + url=common_metadata.url, + citation=common_metadata.citation, + ) + + datePublished = getattr(common_metadata, "datePublished", None) + if datePublished: + metadata.update(datePublished=datePublished) + metadata.update(copyrightYear=common_metadata.copyrightYear) + + license = getattr(common_metadata, "license", None) + if license: + metadata.update(license=license.url) + + codeRepository = getattr(common_metadata, "codeRepository", None) + if codeRepository: + metadata.update(codeRepository=codeRepository) + + releaseNotes = getattr(common_metadata, "releaseNotes", None) + if releaseNotes: + metadata.update(releaseNotes=releaseNotes) + + metadata["@id"] = common_metadata.id + + return CodeMetaMetadata(metadata) def to_json(self): """Returns a JSON string of this codemeta data""" @@ -2490,7 +2627,7 @@ def __init__(self, metadata: dict): self.metadata = metadata @classmethod - def build(cls, release: CodebaseRelease): + def build(cls, common_metadata: CommonMetadata): """ Build the DataCite schema 4.3 data in JSON format. See documentation @ https://schema.datacite.org/meta/kernel-4.3/doc/DataCite-MetadataKernel_v4.3.pdf @@ -2506,36 +2643,54 @@ def build(cls, release: CodebaseRelease): some fields DataCite do not want or have. """ metadata = {} - codemeta = release.codemeta.metadata + + publisher_name = common_metadata.initial_data["publisher"]["name"] + publisher_url = common_metadata.initial_data["publisher"]["url"] + metadata.update( - creators=cls.convert_authors(codemeta), + creators=cls.convert_authors(common_metadata), descriptions=[ - {"description": codemeta["description"], "descriptionType": "Abstract"} + { + "description": common_metadata.description, + "descriptionType": "Abstract", + } ], identifiers=[ - {"identifier": codemeta["identifier"], "identifierType": "DOI"} + {"identifier": common_metadata.identifier, "identifierType": "DOI"} ], - publicationYear=cls.convert_publication_year(codemeta, release), - publisher=codemeta["publisher"]["name"] - + " " - + codemeta["publisher"]["url"], + publicationYear=cls.convert_publication_year(common_metadata), + publisher=publisher_name + " " + publisher_url, types={"resourceType": "Model", "resourceTypeGeneral": "Software"}, - titles=[{"title": codemeta["name"]}], - version=codemeta["version"], + titles=[{"title": common_metadata.name}], + version=common_metadata.version, ) - if "codeRepository" in codemeta: + codeRepository = getattr(common_metadata, "codeRepository", None) + if codeRepository: + metadata.update(contributors=common_metadata.contributors) + + if common_metadata.keywords: + metadata.update(subjects=common_metadata.keywords) + + if common_metadata.releaseNotes: + metadata["descriptions"].append( + { + "description": common_metadata.releaseNotes, + "descriptionType": "TechnicalInfo", + } + ) + + if common_metadata.license: + license = common_metadata.license metadata.update( - contributors=cls.convert_contributors( - codemeta["codeRepository"], release - ) + rightsList=[ + { + "rights": license.name, + "rightsIdentifier": license.name, + "rightsURI": license.url, + } + ] ) - if "keywords" in codemeta: - metadata.update(subjects=cls.convert_keywords(codemeta)) - if "releaseNotes" in codemeta: - metadata["descriptions"].append(cls.convert_release_notes(codemeta)) - if release.license: - metadata.update(rightsList=cls.convert_license(release)) """ Now we will handle the nested parent/child DOI part. @@ -2560,9 +2715,9 @@ def build(cls, release: CodebaseRelease): return DataCiteMetadata(metadata) @classmethod - def convert_authors(cls, codemeta): + def convert_authors(cls, common_metadata: CommonMetadata): creators = [] - for author in codemeta["author"]: + for author in common_metadata.author: # print ("author: " + str(author)) author_type = "Organizational" if author["@type"] == "Person": @@ -2579,103 +2734,26 @@ def convert_authors(cls, codemeta): return creators @classmethod - def convert_contributors(cls, code_repo, codebase_release): - """ - For DataCite purpose, contributors do not include authors. - Our contributor categories are: collaborator, contributor, copyrightHolder, - editor, funder, maintainer, pointOfContact, publisher, resourceProvider - For DataCite's contributor categories, see - https://schema.datacite.org/meta/kernel-4.3/doc/DataCite-MetadataKernel_v4.3.pdf page 13) - """ - contributors = [ - { - "contributorName": code_repo, - "contributorType": "hostingInstitution", - } - ] - nonauthors = ReleaseContributor.objects.nonauthors(codebase_release) - for contributor in nonauthors: - already_has_other_role = False - for role in contributor.roles: - if role == "copyrightHolder": - contributors.append( - { - "contributorName:": contributor.contributor.name, - "contributorType:": "RightsHolder", - } - ) - elif role == "editor": - contributors.append( - { - "contributorName:": contributor.contributor.name, - "contributorType:": "Editor", - } - ) - elif role == "funder": - contributors.append( - { - "contributorName:": contributor.contributor.name, - "contributorType:": "Sponsor", - } - ) - elif role == "pointOfContact": - contributors.append( - { - "contributorName:": contributor.contributor.name, - "contributorType:": "ContactPerson", - } - ) - elif role == "resourceProvider": - contributors.append( - { - "contributorName:": contributor.contributor.name, - "contributorType:": "Distributor", - } - ) - # collaborator, contributor, maintainer, publisher and any other new roles will be mapped to other role - elif already_has_other_role == False: - contributors.append( - { - "contributorName:": contributor.contributor.name, - "contributorType:": "Other", - } - ) - already_has_other_role = True - return contributors + def convert_publication_year(cls, common_metadata): + copyrightYear = getattr(common_metadata, "copyrightYear", None) + if copyrightYear: + return copyrightYear - @classmethod - def convert_keywords(cls, codemeta): - subjects = [] - for keyword in codemeta["keywords"]: - subjects.append(keyword) - return subjects - - @classmethod - def convert_license(cls, release): - # the license in codemeta combines the name & url so instead will use the - # release license object (should be the same) - license = release.license - return [ - { - "rights": license.name, - "rightsIdentifier": license.name, - "rightsURI": license.url, - } - ] + first_published_at = getattr(common_metadata, "first_published_at", None) + if first_published_at: + return ( + first_published_at.year + if getattr(common_metadata, "first_published_at") + else None + ) - @classmethod - def convert_publication_year(cls, codemeta, codebase_release): - if "copyrightYear" in codemeta: - return codemeta["copyrightYear"] - elif codebase_release.first_published_at: - return codebase_release.first_published_at.year + def to_json(self): + """Returns a JSON string of this codemeta data""" + # FIXME: should ideally validate metadata as well + return json.dumps(self.metadata) - @classmethod - def convert_release_notes(cls, codemeta): - return { - "description": codemeta["releaseNotes"], - "descriptionType": "TechnicalInfo", - } + def to_dict(self): + return self.metadata.copy() @register_snippet