Use the ID instead of persistentID for Dataverse downloads (#355)

Not every Dataverse server implements the persistentIDs for files but they always have IDs. So we should use that instead to get the download URL. Co-authored-by: Leonardo Uieda <[email protected]>
fatiando · Feb 19, 2024 · c256699 · c256699
1 parent ec7f3ee
commit c256699
Showing 1 changed file with 15 additions and 11 deletions.
diff --git a/pooch/downloaders.py b/pooch/downloaders.py
@@ -1124,18 +1124,22 @@ def download_url(self, file_name):
             The HTTP URL that can be used to download the file.
         """
         parsed = parse_url(self.archive_url)
-
-        # Iterate over the given files until we find one of the requested name
-        for filedata in self.api_response.json()["data"]["latestVersion"]["files"]:
-            if file_name == filedata["dataFile"]["filename"]:
-                return (
-                    f"{parsed['protocol']}://{parsed['netloc']}/api/access/datafile/"
-                    f":persistentId?persistentId={filedata['dataFile']['persistentId']}"
-                )
-
-        raise ValueError(
-            f"File '{file_name}' not found in data archive {self.archive_url} (doi:{self.doi})."
+        response = self.api_response.json()
+        files = {
+            file["dataFile"]["filename"]: file["dataFile"]
+            for file in response["data"]["latestVersion"]["files"]
+        }
+        if file_name not in files:
+            raise ValueError(
+                f"File '{file_name}' not found in data archive "
+                f"{self.archive_url} (doi:{self.doi})."
+            )
+        # Generate download_url using the file id
+        download_url = (
+            f"{parsed['protocol']}://{parsed['netloc']}/api/access/datafile/"
+            f"{files[file_name]['id']}"
         )
+        return download_url
 
     def populate_registry(self, pooch):
         """