Skip to content

Commit

Permalink
Black formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
tmorrell committed Apr 19, 2024
1 parent 4fdaa54 commit eb4b90a
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 22 deletions.
28 changes: 17 additions & 11 deletions caltechdata_api/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,9 @@ def create_record():
existing_data, token, production=False, publish=False
)
rec_id = response
print(f'You can view and publish this record at https://data.caltechlibrary.dev/uploads/{rec_id}')
print(
f"You can view and publish this record at https://data.caltechlibrary.dev/uploads/{rec_id}"
)
break
else:
print("Going back to the main menu.")
Expand Down Expand Up @@ -421,7 +423,9 @@ def create_record():
metadata, token, production=False, publish=False
)
rec_id = response
print(f'You can view and publish this record at https://data.caltechlibrary.dev/uploads/{rec_id}')
print(
f"You can view and publish this record at https://data.caltechlibrary.dev/uploads/{rec_id}"
)
with open(response + ".json", "w") as file:
json.dump(metadata, file, indent=2)
break
Expand All @@ -434,15 +438,15 @@ def create_record():
def edit_record():
record_id = input("Enter the CaltechDATA record ID: ")
token = get_or_set_token()
file_name = download_file_by_id(record_id,token)
file_name = download_file_by_id(record_id, token)
if file_name:
try:
# Read the edited metadata file
with open(file_name, "r") as file:
metadata = json.load(file)
response = caltechdata_edit(
record_id, metadata, token, production=False, publish=False
)
record_id, metadata, token, production=False, publish=False
)
if response:
print("Metadata edited successfully.")
else:
Expand All @@ -469,12 +473,14 @@ def edit_record():
publish=False,
)
rec_id = response
print(f'You can view and publish this record at https://data.caltechlibrary.dev/uploads/{rec_id}')
print(
f"You can view and publish this record at https://data.caltechlibrary.dev/uploads/{rec_id}"
)


def download_file_by_id(record_id,token=None):
def download_file_by_id(record_id, token=None):
url = f"https://data.caltechlibrary.dev/api/records/{record_id}"

headers = {
"accept": "application/vnd.datacite.datacite+json",
}
Expand All @@ -483,12 +489,12 @@ def download_file_by_id(record_id,token=None):
headers["Authorization"] = "Bearer %s" % token

try:
response = requests.get(url,headers=headers)
response = requests.get(url, headers=headers)
if response.status_code != 200:
# Might have a draft
response = requests.get(
url + "/draft",
headers=headers,
url + "/draft",
headers=headers,
)
if response.status_code != 200:
raise Exception(f"Record {record_id} does not exist, cannot edit")
Expand Down
27 changes: 17 additions & 10 deletions caltechdata_api/md_to_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,19 @@ def camel_case(s):
def expand_special_keys(key, value):
"""Expand special keys into their structured format (affiliation, nameIdentifiers)."""
if key == "affiliation":
if 'ror.org' not in value:
raise ValueError('Affiliation Identifier is not a ROR')
ror = value.split('ror.org/')[1].split(']')[0]
response = requests.get(f'https://api.ror.org/organizations/{ror}').json()
return [{"affiliationIdentifier": ror, "affiliationIdentifierScheme": "ROR","name":response['name']}]
if "ror.org" not in value:
raise ValueError("Affiliation Identifier is not a ROR")
ror = value.split("ror.org/")[1].split("]")[0]
response = requests.get(f"https://api.ror.org/organizations/{ror}").json()
return [
{
"affiliationIdentifier": ror,
"affiliationIdentifierScheme": "ROR",
"name": response["name"],
}
]
elif key == "nameIdentifiers":
orcid = value.split('orcid.org/')[1].split(']')[0]
orcid = value.split("orcid.org/")[1].split("]")[0]
return [
{
"nameIdentifier": orcid,
Expand All @@ -44,10 +50,10 @@ def parse_readme_to_json(readme_path):
current_object = {}

title_line = lines.pop(0)
if title_line.startswith('#') == False:
if title_line.startswith("#") == False:
raise ValueError('README.md needs to start with "# Title"')
else:
json_data['titles'] = [{'title':title_line.replace("# ","")}]
json_data["titles"] = [{"title": title_line.replace("# ", "")}]

section_pattern = re.compile(r"^##\s+(.*)$")
key_value_pattern = re.compile(r"^-\s+(.*?):\s+(.*)$")
Expand All @@ -61,7 +67,7 @@ def parse_readme_to_json(readme_path):
elif len(current_object) == 1:
key, value = next(iter(current_object.items()))
if key in ["language", "publicationYear", "publisher", "version"]:
json_data[current_section]=value
json_data[current_section] = value
else:
json_data[current_section].append(current_object)
else:
Expand Down Expand Up @@ -120,7 +126,8 @@ def parse_readme_to_json(readme_path):

return json_data

if __name__ == '__main__':

if __name__ == "__main__":
readme_path = "exampleREADME.md"
try:
json_data = parse_readme_to_json(readme_path)
Expand Down
6 changes: 6 additions & 0 deletions process_tomograms.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# Set OpenAI API key from environment variable
openai.api_key = os.getenv("OPENAI_API_KEY")


# Function to parse collaborators using OpenAI API
def parse_collaborators(collaborator_string):
# Using OpenAI API to extract names and contributions
Expand Down Expand Up @@ -55,6 +56,7 @@ def parse_collaborators(collaborator_string):
formatted.append(new)
return formatted


# Function to create a description based on the metadata
def create_detailed_description(information, annotation):
keywords = []
Expand Down Expand Up @@ -142,6 +144,7 @@ def create_detailed_description(information, annotation):
)
return description, keywords


# Function for processing files and extracting information
def process_files(files, embargoed):
formats = []
Expand Down Expand Up @@ -206,6 +209,7 @@ def process_files(files, embargoed):
default_preview,
)


# List of funding resources
funding = [
{"funderName": "NIH"},
Expand All @@ -228,6 +232,7 @@ def process_files(files, embargoed):
},
]


# Function for processing a single tomogram record
def process_record(source, edit=None):
# Extract information from the record
Expand Down Expand Up @@ -414,6 +419,7 @@ def process_record(source, edit=None):
except FileNotFoundError:
print("Not deleting remaned files")


# Read record IDs from a file
with open("tomogram_ids.json", "r") as infile:
record_ids = json.load(infile)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def package_files(package, directory):
"pyyaml",
"s3fs",
"configparser",
"awscli"
"awscli",
]

# What packages are optional?
Expand Down

0 comments on commit eb4b90a

Please sign in to comment.