Skip to content

Commit

Permalink
Use the ID instead of persistentID for Dataverse downloads (#355)
Browse files Browse the repository at this point in the history
Not every Dataverse server implements the persistentIDs for files but they 
always have IDs. So we should use that instead to get the download URL.

Co-authored-by: Leonardo Uieda <[email protected]>
  • Loading branch information
santisoler and leouieda authored Feb 19, 2024
1 parent ec7f3ee commit c256699
Showing 1 changed file with 15 additions and 11 deletions.
26 changes: 15 additions & 11 deletions pooch/downloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -1124,18 +1124,22 @@ def download_url(self, file_name):
The HTTP URL that can be used to download the file.
"""
parsed = parse_url(self.archive_url)

# Iterate over the given files until we find one of the requested name
for filedata in self.api_response.json()["data"]["latestVersion"]["files"]:
if file_name == filedata["dataFile"]["filename"]:
return (
f"{parsed['protocol']}://{parsed['netloc']}/api/access/datafile/"
f":persistentId?persistentId={filedata['dataFile']['persistentId']}"
)

raise ValueError(
f"File '{file_name}' not found in data archive {self.archive_url} (doi:{self.doi})."
response = self.api_response.json()
files = {
file["dataFile"]["filename"]: file["dataFile"]
for file in response["data"]["latestVersion"]["files"]
}
if file_name not in files:
raise ValueError(
f"File '{file_name}' not found in data archive "
f"{self.archive_url} (doi:{self.doi})."
)
# Generate download_url using the file id
download_url = (
f"{parsed['protocol']}://{parsed['netloc']}/api/access/datafile/"
f"{files[file_name]['id']}"
)
return download_url

def populate_registry(self, pooch):
"""
Expand Down

0 comments on commit c256699

Please sign in to comment.