From eb4b90ad6454e480b912552c82f68101aae5071d Mon Sep 17 00:00:00 2001 From: Tom Morrell Date: Fri, 19 Apr 2024 15:08:00 -0700 Subject: [PATCH] Black formatting --- caltechdata_api/cli.py | 28 +++++++++++++++++----------- caltechdata_api/md_to_json.py | 27 +++++++++++++++++---------- process_tomograms.py | 6 ++++++ setup.py | 2 +- 4 files changed, 41 insertions(+), 22 deletions(-) diff --git a/caltechdata_api/cli.py b/caltechdata_api/cli.py index 57d3911..c2df4e9 100644 --- a/caltechdata_api/cli.py +++ b/caltechdata_api/cli.py @@ -360,7 +360,9 @@ def create_record(): existing_data, token, production=False, publish=False ) rec_id = response - print(f'You can view and publish this record at https://data.caltechlibrary.dev/uploads/{rec_id}') + print( + f"You can view and publish this record at https://data.caltechlibrary.dev/uploads/{rec_id}" + ) break else: print("Going back to the main menu.") @@ -421,7 +423,9 @@ def create_record(): metadata, token, production=False, publish=False ) rec_id = response - print(f'You can view and publish this record at https://data.caltechlibrary.dev/uploads/{rec_id}') + print( + f"You can view and publish this record at https://data.caltechlibrary.dev/uploads/{rec_id}" + ) with open(response + ".json", "w") as file: json.dump(metadata, file, indent=2) break @@ -434,15 +438,15 @@ def create_record(): def edit_record(): record_id = input("Enter the CaltechDATA record ID: ") token = get_or_set_token() - file_name = download_file_by_id(record_id,token) + file_name = download_file_by_id(record_id, token) if file_name: try: # Read the edited metadata file with open(file_name, "r") as file: metadata = json.load(file) response = caltechdata_edit( - record_id, metadata, token, production=False, publish=False - ) + record_id, metadata, token, production=False, publish=False + ) if response: print("Metadata edited successfully.") else: @@ -469,12 +473,14 @@ def edit_record(): publish=False, ) rec_id = response - print(f'You can view and publish this record at https://data.caltechlibrary.dev/uploads/{rec_id}') + print( + f"You can view and publish this record at https://data.caltechlibrary.dev/uploads/{rec_id}" + ) -def download_file_by_id(record_id,token=None): +def download_file_by_id(record_id, token=None): url = f"https://data.caltechlibrary.dev/api/records/{record_id}" - + headers = { "accept": "application/vnd.datacite.datacite+json", } @@ -483,12 +489,12 @@ def download_file_by_id(record_id,token=None): headers["Authorization"] = "Bearer %s" % token try: - response = requests.get(url,headers=headers) + response = requests.get(url, headers=headers) if response.status_code != 200: # Might have a draft response = requests.get( - url + "/draft", - headers=headers, + url + "/draft", + headers=headers, ) if response.status_code != 200: raise Exception(f"Record {record_id} does not exist, cannot edit") diff --git a/caltechdata_api/md_to_json.py b/caltechdata_api/md_to_json.py index 4b601b4..e9fe1f5 100644 --- a/caltechdata_api/md_to_json.py +++ b/caltechdata_api/md_to_json.py @@ -16,13 +16,19 @@ def camel_case(s): def expand_special_keys(key, value): """Expand special keys into their structured format (affiliation, nameIdentifiers).""" if key == "affiliation": - if 'ror.org' not in value: - raise ValueError('Affiliation Identifier is not a ROR') - ror = value.split('ror.org/')[1].split(']')[0] - response = requests.get(f'https://api.ror.org/organizations/{ror}').json() - return [{"affiliationIdentifier": ror, "affiliationIdentifierScheme": "ROR","name":response['name']}] + if "ror.org" not in value: + raise ValueError("Affiliation Identifier is not a ROR") + ror = value.split("ror.org/")[1].split("]")[0] + response = requests.get(f"https://api.ror.org/organizations/{ror}").json() + return [ + { + "affiliationIdentifier": ror, + "affiliationIdentifierScheme": "ROR", + "name": response["name"], + } + ] elif key == "nameIdentifiers": - orcid = value.split('orcid.org/')[1].split(']')[0] + orcid = value.split("orcid.org/")[1].split("]")[0] return [ { "nameIdentifier": orcid, @@ -44,10 +50,10 @@ def parse_readme_to_json(readme_path): current_object = {} title_line = lines.pop(0) - if title_line.startswith('#') == False: + if title_line.startswith("#") == False: raise ValueError('README.md needs to start with "# Title"') else: - json_data['titles'] = [{'title':title_line.replace("# ","")}] + json_data["titles"] = [{"title": title_line.replace("# ", "")}] section_pattern = re.compile(r"^##\s+(.*)$") key_value_pattern = re.compile(r"^-\s+(.*?):\s+(.*)$") @@ -61,7 +67,7 @@ def parse_readme_to_json(readme_path): elif len(current_object) == 1: key, value = next(iter(current_object.items())) if key in ["language", "publicationYear", "publisher", "version"]: - json_data[current_section]=value + json_data[current_section] = value else: json_data[current_section].append(current_object) else: @@ -120,7 +126,8 @@ def parse_readme_to_json(readme_path): return json_data -if __name__ == '__main__': + +if __name__ == "__main__": readme_path = "exampleREADME.md" try: json_data = parse_readme_to_json(readme_path) diff --git a/process_tomograms.py b/process_tomograms.py index 870c66e..5f9c947 100644 --- a/process_tomograms.py +++ b/process_tomograms.py @@ -11,6 +11,7 @@ # Set OpenAI API key from environment variable openai.api_key = os.getenv("OPENAI_API_KEY") + # Function to parse collaborators using OpenAI API def parse_collaborators(collaborator_string): # Using OpenAI API to extract names and contributions @@ -55,6 +56,7 @@ def parse_collaborators(collaborator_string): formatted.append(new) return formatted + # Function to create a description based on the metadata def create_detailed_description(information, annotation): keywords = [] @@ -142,6 +144,7 @@ def create_detailed_description(information, annotation): ) return description, keywords + # Function for processing files and extracting information def process_files(files, embargoed): formats = [] @@ -206,6 +209,7 @@ def process_files(files, embargoed): default_preview, ) + # List of funding resources funding = [ {"funderName": "NIH"}, @@ -228,6 +232,7 @@ def process_files(files, embargoed): }, ] + # Function for processing a single tomogram record def process_record(source, edit=None): # Extract information from the record @@ -414,6 +419,7 @@ def process_record(source, edit=None): except FileNotFoundError: print("Not deleting remaned files") + # Read record IDs from a file with open("tomogram_ids.json", "r") as infile: record_ids = json.load(infile) diff --git a/setup.py b/setup.py index a08b5ff..acf2955 100755 --- a/setup.py +++ b/setup.py @@ -67,7 +67,7 @@ def package_files(package, directory): "pyyaml", "s3fs", "configparser", - "awscli" + "awscli", ] # What packages are optional?