Skip to content

Commit

Permalink
Merge pull request #209 from pysat/enh/103_104_zip
Browse files Browse the repository at this point in the history
ENH: unzip support for download
  • Loading branch information
jklenzing authored Oct 31, 2023
2 parents 8945d9d + 756b952 commit 939d26d
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 8 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
All notable changes to this project will be documented in this file.
This project adheres to [Semantic Versioning](https://semver.org/).

## [0.X.X] - 2023-XX-XX
## [0.0.6] - 2023-XX-XX
* New Instruments
* MAVEN mag
* MAVEN SEP
* MAVEN in situ key parameters
* REACH Dosimeter
* New Features
* Allow files to be unzipped after download
* Bug Fixes
* Fix general clean routine to skip transformation matrices
* New window needs to be integer for calculate_imf_steadiness
Expand Down
70 changes: 63 additions & 7 deletions pysatNASA/instruments/methods/cdaweb.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
import os
import pandas as pds
import requests
import tempfile
from time import sleep
import xarray as xr
import zipfile

from bs4 import BeautifulSoup
from cdasws import CdasWs
Expand Down Expand Up @@ -471,7 +473,6 @@ def load_xarray(fnames, tag='', inst_id='',
return data, meta


# TODO(#103): Include support to unzip / untar files after download.
def download(date_array, data_path, tag='', inst_id='', supported_tags=None,
remote_url='https://cdaweb.gsfc.nasa.gov'):
"""Download NASA CDAWeb data.
Expand Down Expand Up @@ -514,6 +515,7 @@ def download(date_array, data_path, tag='', inst_id='', supported_tags=None,
"""

# Get information about remote data product location
inst_dict = try_inst_dict(inst_id, tag, supported_tags)

# Naming scheme for files on the CDAWeb server
Expand All @@ -526,6 +528,13 @@ def download(date_array, data_path, tag='', inst_id='', supported_tags=None,
start=date_array[0],
stop=date_array[-1])

# Create temproary directory if files need to be unzipped.
if 'zip_method' in inst_dict.keys():
zip_method = inst_dict['zip_method']
temp_dir = tempfile.TemporaryDirectory()
else:
zip_method = None

# Download only requested files that exist remotely
for date, fname in remote_files.items():
# Format files for specific dates and download location
Expand All @@ -548,18 +557,19 @@ def download(date_array, data_path, tag='', inst_id='', supported_tags=None,
formatted_remote_dir.strip('/'),
fname))

saved_local_fname = os.path.join(data_path, fname)

# Perform download
logger.info(' '.join(('Attempting to download file for',
date.strftime('%d %B %Y'))))
try:
with requests.get(remote_path) as req:
if req.status_code != 404:
with open(saved_local_fname, 'wb') as open_f:
open_f.write(req.content)
logger.info('Successfully downloaded {:}.'.format(
saved_local_fname))
if zip_method:
get_file(req.content, data_path, fname,
temp_path=temp_dir.name, zip_method=zip_method)
else:
get_file(req.content, data_path, fname)
logger.info(''.join(('Successfully downloaded ',
fname, '.')))
else:
logger.info(' '.join(('File not available for',
date.strftime('%d %B %Y'))))
Expand All @@ -568,6 +578,52 @@ def download(date_array, data_path, tag='', inst_id='', supported_tags=None,
date.strftime('%d %B %Y'))))
# Pause to avoid excessive pings to server
sleep(0.2)

if zip_method:
# Cleanup temporary directory
temp_dir.cleanup()

return


def get_file(remote_file, data_path, fname, temp_path=None, zip_method=None):
"""Retrieve a file, unzipping if necessary.
Parameters
----------
remote_file : file content
File content retireved via requests.
data_path : str
Path to pysat archival directory.
fname : str
Name of file on the remote server.
temp_path : str
Path to temporary directory. (Default=None)
zip_method : str
The method used to zip the file. Supports 'zip' and None.
If None, downloads files directly. (default=None)
"""

if zip_method:
# Use a temporary location.
dl_fname = os.path.join(temp_path, fname)
else:
# Use the pysat data directory.
dl_fname = os.path.join(data_path, fname)

# Download the file to desired destination.
with open(dl_fname, 'wb') as open_f:
open_f.write(remote_file)

# Unzip and move the files from the temporary directory.
if zip_method == 'zip':
with zipfile.ZipFile(dl_fname, 'r') as open_zip:
open_zip.extractall(data_path)

elif zip_method is not None:
logger.warning('{:} is not a recognized zip method'.format(zip_method))

return


Expand Down
23 changes: 23 additions & 0 deletions pysatNASA/tests/test_methods_cdaweb.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Unit tests for the cdaweb instrument methods."""

import datetime as dt
import logging
import pandas as pds
import requests
import tempfile

import pytest

Expand Down Expand Up @@ -50,6 +52,27 @@ def test_load_with_empty_file_list(self):
assert meta is None
return

def test_bad_zip_warning_get_files(self, caplog):
"""Test that warning is raised for unsupported zip method."""

# Specifiy small file to get
url = 'https://cdaweb.gsfc.nasa.gov/pub/000_readme.txt'
req = requests.get(url)

# Download to temporary location
temp_dir = tempfile.TemporaryDirectory()

with caplog.at_level(logging.WARNING, logger='pysat'):
cdw.get_file(req.content, '.', 'test.txt', temp_path=temp_dir.name,
zip_method='badzip')
captured = caplog.text

# Check for appropriate warning
warn_msg = "not a recognized zip method"
assert warn_msg in captured

return

@pytest.mark.parametrize("bad_key,bad_val,err_msg",
[("tag", "badval", "inst_id / tag combo unknown."),
("inst_id", "badval",
Expand Down

0 comments on commit 939d26d

Please sign in to comment.