Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1191 migrate license #1193

Merged
merged 2 commits into from
Aug 30, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 91 additions & 19 deletions scripts/migration/migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,19 +130,21 @@ def get_clowder_v1_dataset_collections(headers, user_v1, dataset_id):
user_collections = response.json()
for collection in user_collections:
collection_id = collection["id"]
if collection['name'] == 'subchild':
if collection["name"] == "subchild":
print("HERE")
collection_dataset_endpoint = (
f"{CLOWDER_V1}/api/collections/{collection_id}/datasets"
)
try:
dataset_response = requests.get(collection_dataset_endpoint, headers=headers)
dataset_response = requests.get(
collection_dataset_endpoint, headers=headers
)
datasets = dataset_response.json()
for ds in datasets:
if ds["id"] == dataset_id:
matching_collections.append(collection)
except Exception as e:
print('Exception', e)
print("Exception", e)
return matching_collections


Expand Down Expand Up @@ -271,10 +273,74 @@ def create_admin_user():
return generate_user_api_key(admin_user, admin_user["password"])


def add_dataset_license(v1_license, headers):
"""Create appropriate license (standard/custom) based on v1 license details"""
license_id = "CC-BY"
# standard licenses
if v1_license["license_type"] == "license2":
if (
not v1_license["ccAllowCommercial"]
and not v1_license["ccAllowDerivative"]
and not v1_license["ccRequireShareAlike"]
):
license_id = "CC BY-NC-ND"
elif (
v1_license["ccAllowCommercial"]
and not v1_license["ccAllowDerivative"]
and not v1_license["ccRequireShareAlike"]
):
license_id = "CC BY-ND"
elif (
not v1_license["ccAllowCommercial"]
and v1_license["ccAllowDerivative"]
and not v1_license["ccRequireShareAlike"]
):
license_id = "CC BY-NC"
elif (
not v1_license["ccAllowCommercial"]
and v1_license["ccAllowDerivative"]
and v1_license["ccRequireShareAlike"]
):
license_id = "CC BY-NC-SA"
elif (
v1_license["ccAllowCommercial"]
and v1_license["ccAllowDerivative"]
and v1_license["ccRequireShareAlike"]
):
license_id = "CC BY-SA"
elif (
v1_license["ccAllowCommercial"]
and v1_license["ccAllowDerivative"]
and not v1_license["ccRequireShareAlike"]
):
license_id = "CC BY"
elif v1_license["license_type"] == "license3":
license_id = "CCO Public Domain Dedication"
else:
# custom license
license_body = {
"name": v1_license["license_text"],
"url": v1_license["license_url"],
"holders": v1_license["holders"],
}
if license_body["url"] == "":
license_body["url"] = "https://dbpedia.org/page/All_rights_reserved"
license_v2_endpoint = f"{CLOWDER_V2}/api/v2/licenses?"
response = requests.post(
license_v2_endpoint, headers=headers, json=license_body
)
print(response.json())
license_id = response.json()["id"]
return license_id


def create_v2_dataset(dataset, headers):
"""Create a dataset in Clowder v2."""
# TODO: GET correct license
dataset_in_v2_endpoint = f"{CLOWDER_V2}/api/v2/datasets?license_id=CC BY"
print(f"Creating dataset license in Clowder v2.")
v2_license_id = add_dataset_license(dataset["license"], headers)

dataset_in_v2_endpoint = f"{CLOWDER_V2}/api/v2/datasets?license_id={v2_license_id}"
dataset_example = {
"name": dataset["name"],
"description": dataset["description"],
Expand Down Expand Up @@ -540,6 +606,7 @@ def register_migration_extractor():
f"Failed to register migration extractor in Clowder v2. Status code: {response.status_code}"
)


def add_children(collection_hierarchy_json, remaining_collections):
new_json = []
new_remaining_collections = []
Expand Down Expand Up @@ -584,22 +651,26 @@ def build_collection_hierarchy(collection_id, headers):
remaining_collections.append(col)

while len(remaining_collections) > 0:
children, remaining_collections = add_children(
children, remaining_collections
)
children, remaining_collections = add_children(children, remaining_collections)
print("Now we are done")
return children


def build_collection_metadata_for_v1_dataset(dataset_id, user_v1, headers):
dataset_collections = get_clowder_v1_dataset_collections(headers=headers, user_v1=user_v1, dataset_id=dataset_id)
dataset_collections = get_clowder_v1_dataset_collections(
headers=headers, user_v1=user_v1, dataset_id=dataset_id
)

collection_data = []
for collection in dataset_collections:
collection_children = build_collection_hierarchy(collection_id=collection['id'], headers=headers)
collection_children = build_collection_hierarchy(
collection_id=collection["id"], headers=headers
)
for child in collection_children:
collection_data.append(child)
return collection_data


def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP):
"""Process user resources from Clowder v1 to Clowder v2."""
user_v1_datasets = get_clowder_v1_user_datasets(user_id=user_v1["id"])
Expand Down Expand Up @@ -639,22 +710,24 @@ def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP):
add_file_metadata(file, file_v2_id, clowder_headers_v1, user_headers_v2)
# posting the collection hierarchy as metadata
# TODO need to actually post this
collection_metadata_dict = build_collection_metadata_for_v1_dataset(dataset_id=dataset['id'],user_v1=user_v1, headers=clowder_headers_v1)
collection_metadata_dict = build_collection_metadata_for_v1_dataset(
dataset_id=dataset["id"], user_v1=user_v1, headers=clowder_headers_v1
)
migration_extractor_collection_metadata = {
"listener" : {
"name": "migration",
"version": "1",
"description": "Migration of metadata from Clowder v1 to Clowder v2",
"listener": {
"name": "migration",
"version": "1",
"description": "Migration of metadata from Clowder v1 to Clowder v2",
},
"context_url": "https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld",
"content": collection_metadata_dict,
"contents": collection_metadata_dict,
}
v2_metadata_endpoint = (
f"{CLOWDER_V2}/api/v2/datasets/{dataset_v2_id}/metadata"
)
v2_metadata_endpoint = f"{CLOWDER_V2}/api/v2/datasets/{dataset_v2_id}/metadata"
response = requests.post(
v2_metadata_endpoint, json=migration_extractor_collection_metadata, headers=clowder_headers_v2
v2_metadata_endpoint,
json=migration_extractor_collection_metadata,
headers=clowder_headers_v2,
)
if response.status_code == 200:
print("Successfully added collection info as metadata in v2.")
Expand All @@ -663,7 +736,6 @@ def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP):
f"Failed to add collection info as metadata in Clowder v2. Status code: {response.status_code}"
)


return [USER_MAP, DATASET_MAP]


Expand Down
Loading