diff --git a/CHANGELOG.md b/CHANGELOG.md index afdb541d..f4a311cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,12 +2,14 @@ All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/). -## [0.X.X] - 2023-XX-XX +## [0.0.6] - 2023-XX-XX * New Instruments * MAVEN mag * MAVEN SEP * MAVEN in situ key parameters * REACH Dosimeter +* New Features + * Allow files to be unzipped after download * Bug Fixes * Fix general clean routine to skip transformation matrices * New window needs to be integer for calculate_imf_steadiness diff --git a/pysatNASA/instruments/methods/cdaweb.py b/pysatNASA/instruments/methods/cdaweb.py index 3e3340fb..37db9ce0 100644 --- a/pysatNASA/instruments/methods/cdaweb.py +++ b/pysatNASA/instruments/methods/cdaweb.py @@ -12,8 +12,10 @@ import os import pandas as pds import requests +import tempfile from time import sleep import xarray as xr +import zipfile from bs4 import BeautifulSoup from cdasws import CdasWs @@ -471,7 +473,6 @@ def load_xarray(fnames, tag='', inst_id='', return data, meta -# TODO(#103): Include support to unzip / untar files after download. def download(date_array, data_path, tag='', inst_id='', supported_tags=None, remote_url='https://cdaweb.gsfc.nasa.gov'): """Download NASA CDAWeb data. @@ -514,6 +515,7 @@ def download(date_array, data_path, tag='', inst_id='', supported_tags=None, """ + # Get information about remote data product location inst_dict = try_inst_dict(inst_id, tag, supported_tags) # Naming scheme for files on the CDAWeb server @@ -526,6 +528,13 @@ def download(date_array, data_path, tag='', inst_id='', supported_tags=None, start=date_array[0], stop=date_array[-1]) + # Create temproary directory if files need to be unzipped. + if 'zip_method' in inst_dict.keys(): + zip_method = inst_dict['zip_method'] + temp_dir = tempfile.TemporaryDirectory() + else: + zip_method = None + # Download only requested files that exist remotely for date, fname in remote_files.items(): # Format files for specific dates and download location @@ -548,18 +557,19 @@ def download(date_array, data_path, tag='', inst_id='', supported_tags=None, formatted_remote_dir.strip('/'), fname)) - saved_local_fname = os.path.join(data_path, fname) - # Perform download logger.info(' '.join(('Attempting to download file for', date.strftime('%d %B %Y')))) try: with requests.get(remote_path) as req: if req.status_code != 404: - with open(saved_local_fname, 'wb') as open_f: - open_f.write(req.content) - logger.info('Successfully downloaded {:}.'.format( - saved_local_fname)) + if zip_method: + get_file(req.content, data_path, fname, + temp_path=temp_dir.name, zip_method=zip_method) + else: + get_file(req.content, data_path, fname) + logger.info(''.join(('Successfully downloaded ', + fname, '.'))) else: logger.info(' '.join(('File not available for', date.strftime('%d %B %Y')))) @@ -568,6 +578,52 @@ def download(date_array, data_path, tag='', inst_id='', supported_tags=None, date.strftime('%d %B %Y')))) # Pause to avoid excessive pings to server sleep(0.2) + + if zip_method: + # Cleanup temporary directory + temp_dir.cleanup() + + return + + +def get_file(remote_file, data_path, fname, temp_path=None, zip_method=None): + """Retrieve a file, unzipping if necessary. + + Parameters + ---------- + remote_file : file content + File content retireved via requests. + data_path : str + Path to pysat archival directory. + fname : str + Name of file on the remote server. + temp_path : str + Path to temporary directory. (Default=None) + zip_method : str + The method used to zip the file. Supports 'zip' and None. + If None, downloads files directly. (default=None) + + """ + + if zip_method: + # Use a temporary location. + dl_fname = os.path.join(temp_path, fname) + else: + # Use the pysat data directory. + dl_fname = os.path.join(data_path, fname) + + # Download the file to desired destination. + with open(dl_fname, 'wb') as open_f: + open_f.write(remote_file) + + # Unzip and move the files from the temporary directory. + if zip_method == 'zip': + with zipfile.ZipFile(dl_fname, 'r') as open_zip: + open_zip.extractall(data_path) + + elif zip_method is not None: + logger.warning('{:} is not a recognized zip method'.format(zip_method)) + return diff --git a/pysatNASA/tests/test_methods_cdaweb.py b/pysatNASA/tests/test_methods_cdaweb.py index dd7870e6..c42e0e00 100644 --- a/pysatNASA/tests/test_methods_cdaweb.py +++ b/pysatNASA/tests/test_methods_cdaweb.py @@ -1,8 +1,10 @@ """Unit tests for the cdaweb instrument methods.""" import datetime as dt +import logging import pandas as pds import requests +import tempfile import pytest @@ -50,6 +52,27 @@ def test_load_with_empty_file_list(self): assert meta is None return + def test_bad_zip_warning_get_files(self, caplog): + """Test that warning is raised for unsupported zip method.""" + + # Specifiy small file to get + url = 'https://cdaweb.gsfc.nasa.gov/pub/000_readme.txt' + req = requests.get(url) + + # Download to temporary location + temp_dir = tempfile.TemporaryDirectory() + + with caplog.at_level(logging.WARNING, logger='pysat'): + cdw.get_file(req.content, '.', 'test.txt', temp_path=temp_dir.name, + zip_method='badzip') + captured = caplog.text + + # Check for appropriate warning + warn_msg = "not a recognized zip method" + assert warn_msg in captured + + return + @pytest.mark.parametrize("bad_key,bad_val,err_msg", [("tag", "badval", "inst_id / tag combo unknown."), ("inst_id", "badval",