clowder-framework · longshuicy · Aug 30, 2024 · Aug 30, 2024 · Aug 30, 2024
diff --git a/scripts/migration/migrate.py b/scripts/migration/migrate.py
@@ -130,19 +130,21 @@ def get_clowder_v1_dataset_collections(headers, user_v1, dataset_id):
     user_collections = response.json()
     for collection in user_collections:
         collection_id = collection["id"]
-        if collection['name'] == 'subchild':
+        if collection["name"] == "subchild":
             print("HERE")
         collection_dataset_endpoint = (
             f"{CLOWDER_V1}/api/collections/{collection_id}/datasets"
         )
         try:
-            dataset_response = requests.get(collection_dataset_endpoint, headers=headers)
+            dataset_response = requests.get(
+                collection_dataset_endpoint, headers=headers
+            )
             datasets = dataset_response.json()
             for ds in datasets:
                 if ds["id"] == dataset_id:
                     matching_collections.append(collection)
         except Exception as e:
-            print('Exception', e)
+            print("Exception", e)
     return matching_collections
 
 
@@ -271,10 +273,74 @@ def create_admin_user():
     return generate_user_api_key(admin_user, admin_user["password"])
 
 
+def add_dataset_license(v1_license, headers):
+    """Create appropriate license (standard/custom) based on v1 license details"""
+    license_id = "CC-BY"
+    # standard licenses
+    if v1_license["license_type"] == "license2":
+        if (
+            not v1_license["ccAllowCommercial"]
+            and not v1_license["ccAllowDerivative"]
+            and not v1_license["ccRequireShareAlike"]
+        ):
+            license_id = "CC BY-NC-ND"
+        elif (
+            v1_license["ccAllowCommercial"]
+            and not v1_license["ccAllowDerivative"]
+            and not v1_license["ccRequireShareAlike"]
+        ):
+            license_id = "CC BY-ND"
+        elif (
+            not v1_license["ccAllowCommercial"]
+            and v1_license["ccAllowDerivative"]
+            and not v1_license["ccRequireShareAlike"]
+        ):
+            license_id = "CC BY-NC"
+        elif (
+            not v1_license["ccAllowCommercial"]
+            and v1_license["ccAllowDerivative"]
+            and v1_license["ccRequireShareAlike"]
+        ):
+            license_id = "CC BY-NC-SA"
+        elif (
+            v1_license["ccAllowCommercial"]
+            and v1_license["ccAllowDerivative"]
+            and v1_license["ccRequireShareAlike"]
+        ):
+            license_id = "CC BY-SA"
+        elif (
+            v1_license["ccAllowCommercial"]
+            and v1_license["ccAllowDerivative"]
+            and not v1_license["ccRequireShareAlike"]
+        ):
+            license_id = "CC BY"
+    elif v1_license["license_type"] == "license3":
+        license_id = "CCO Public Domain Dedication"
+    else:
+        # custom license
+        license_body = {
+            "name": v1_license["license_text"],
+            "url": v1_license["license_url"],
+            "holders": v1_license["holders"],
+        }
+        if license_body["url"] == "":
+            license_body["url"] = "https://dbpedia.org/page/All_rights_reserved"
+        license_v2_endpoint = f"{CLOWDER_V2}/api/v2/licenses?"
+        response = requests.post(
+            license_v2_endpoint, headers=headers, json=license_body
+        )
+        print(response.json())
+        license_id = response.json()["id"]
+    return license_id
+
+
 def create_v2_dataset(dataset, headers):
     """Create a dataset in Clowder v2."""
     # TODO: GET correct license
-    dataset_in_v2_endpoint = f"{CLOWDER_V2}/api/v2/datasets?license_id=CC BY"
+    print(f"Creating dataset license in Clowder v2.")
+    v2_license_id = add_dataset_license(dataset["license"], headers)
+
+    dataset_in_v2_endpoint = f"{CLOWDER_V2}/api/v2/datasets?license_id={v2_license_id}"
     dataset_example = {
         "name": dataset["name"],
         "description": dataset["description"],
@@ -540,6 +606,7 @@ def register_migration_extractor():
             f"Failed to register migration extractor in Clowder v2. Status code: {response.status_code}"
         )
 
+
 def add_children(collection_hierarchy_json, remaining_collections):
     new_json = []
     new_remaining_collections = []
@@ -584,22 +651,26 @@ def build_collection_hierarchy(collection_id, headers):
             remaining_collections.append(col)
 
     while len(remaining_collections) > 0:
-        children, remaining_collections = add_children(
-            children, remaining_collections
-        )
+        children, remaining_collections = add_children(children, remaining_collections)
     print("Now we are done")
     return children
 
+
 def build_collection_metadata_for_v1_dataset(dataset_id, user_v1, headers):
-    dataset_collections = get_clowder_v1_dataset_collections(headers=headers, user_v1=user_v1, dataset_id=dataset_id)
+    dataset_collections = get_clowder_v1_dataset_collections(
+        headers=headers, user_v1=user_v1, dataset_id=dataset_id
+    )
 
     collection_data = []
     for collection in dataset_collections:
-        collection_children = build_collection_hierarchy(collection_id=collection['id'], headers=headers)
+        collection_children = build_collection_hierarchy(
+            collection_id=collection["id"], headers=headers
+        )
         for child in collection_children:
             collection_data.append(child)
     return collection_data
 
+
 def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP):
     """Process user resources from Clowder v1 to Clowder v2."""
     user_v1_datasets = get_clowder_v1_user_datasets(user_id=user_v1["id"])
@@ -639,22 +710,24 @@ def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP):
                 add_file_metadata(file, file_v2_id, clowder_headers_v1, user_headers_v2)
         # posting the collection hierarchy as metadata
         # TODO need to actually post this
-        collection_metadata_dict = build_collection_metadata_for_v1_dataset(dataset_id=dataset['id'],user_v1=user_v1, headers=clowder_headers_v1)
+        collection_metadata_dict = build_collection_metadata_for_v1_dataset(
+            dataset_id=dataset["id"], user_v1=user_v1, headers=clowder_headers_v1
+        )
         migration_extractor_collection_metadata = {
-            "listener" : {
-            "name": "migration",
-            "version": "1",
-            "description": "Migration of metadata from Clowder v1 to Clowder v2",
+            "listener": {
+                "name": "migration",
+                "version": "1",
+                "description": "Migration of metadata from Clowder v1 to Clowder v2",
             },
             "context_url": "https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld",
             "content": collection_metadata_dict,
             "contents": collection_metadata_dict,
         }
-        v2_metadata_endpoint = (
-            f"{CLOWDER_V2}/api/v2/datasets/{dataset_v2_id}/metadata"
-        )
+        v2_metadata_endpoint = f"{CLOWDER_V2}/api/v2/datasets/{dataset_v2_id}/metadata"
         response = requests.post(
-            v2_metadata_endpoint, json=migration_extractor_collection_metadata, headers=clowder_headers_v2
+            v2_metadata_endpoint,
+            json=migration_extractor_collection_metadata,
+            headers=clowder_headers_v2,
         )
         if response.status_code == 200:
             print("Successfully added collection info as metadata in v2.")
@@ -663,7 +736,6 @@ def process_user_and_resources(user_v1, USER_MAP, DATASET_MAP):
                 f"Failed to add collection info as metadata in Clowder v2. Status code: {response.status_code}"
             )
 
-
     return [USER_MAP, DATASET_MAP]