diff --git a/README.md b/README.md index 0a02163..6bdb99e 100644 --- a/README.md +++ b/README.md @@ -24,8 +24,15 @@ pre-commit install For making changes, see the [guidance on development](https://github.com/alan-turing-institute/python-project-template?tab=readme-ov-file#setting-up-a-new-project) from the template that generated this project. ## Usage + +Example: + +```bash +cloudcasting download "2020-06-01 00:00" "2020-06-30 23:55" "path/to/my/dir/data.zarr" +``` + ```bash - cloudcasting download --help +> cloudcasting download --help Usage: cloudcasting download [OPTIONS] START_DATE END_DATE OUTPUT_DIRECTORY @@ -57,37 +64,38 @@ For making changes, see the [guidance on development](https://github.com/alan-tu │ [default: None] │ │ [required] │ ╰────────────────────────────────────────────────────────────────────────────────╯ -╭─ Options ──────────────────────────────────────────────────────────────────────╮ -│ --data-inner-steps INTEGER Data will be sliced │ -│ into │ -│ data_inner_steps*5mi… │ -│ chunks │ -│ [default: 3] │ -│ --get-hrv --no-get-hrv Whether to download │ -│ HRV data │ -│ [default: no-get-hrv] │ -│ --override-date-boun… --no-override-date… Whether to override │ -│ date range limits │ -│ [default: │ -│ no-override-date-bou… │ -│ --lon-min FLOAT Minimum longitude │ -│ [default: -16] │ -│ --lon-max FLOAT Maximum longitude │ -│ [default: 10] │ -│ --lat-min FLOAT Minimum latitude │ -│ [default: 45] │ -│ --lat-max FLOAT Maximum latitude │ -│ [default: 70] │ -│ --help Show this message and │ -│ exit. │ -╰────────────────────────────────────────────────────────────────────────────────╯ - -``` +╭─ Options ─────────────────────────────────────────────────────────────────────╮ +│ --data-inner-steps INTEGER Data will be sliced │ +│ into │ +│ data_inner_steps*5mi… │ +│ chunks │ +│ [default: 3] │ +│ --get-hrv --no-get-hrv Whether to download │ +│ HRV data │ +│ [default: no-get-hrv] │ +│ --override-date-bou… --no-override-date… Whether to override │ +│ date range limits │ +│ [default: │ +│ no-override-date-bou… │ +│ --lon-min FLOAT Minimum longitude │ +│ [default: -16] │ +│ --lon-max FLOAT Maximum longitude │ +│ [default: 10] │ +│ --lat-min FLOAT Minimum latitude │ +│ [default: 45] │ +│ --lat-max FLOAT Maximum latitude │ +│ [default: 70] │ +│ --valid-set --no-valid-set Whether to filter │ +│ data from 2022 to │ +│ download the │ +│ validation set (every │ +│ 2 weeks). │ +│ [default: │ +│ no-valid-set] │ +│ --help Show this message and │ +│ exit. │ +╰───────────────────────────────────────────────────────────────────────────────╯ -Example: - -```bash -cloudcasting download "2020-06-01 00:00" "2020-06-30 23:55" "path/to/my/dir/data.zarr" ``` diff --git a/src/cloudcasting/download.py b/src/cloudcasting/download.py index d7e5f3c..47eb132 100644 --- a/src/cloudcasting/download.py +++ b/src/cloudcasting/download.py @@ -84,6 +84,12 @@ def download_satellite_data( lon_max: Annotated[float, typer.Option(help="Maximum longitude")] = 10, lat_min: Annotated[float, typer.Option(help="Minimum latitude")] = 45, lat_max: Annotated[float, typer.Option(help="Maximum latitude")] = 70, + valid_set: Annotated[ + bool, + typer.Option( + help="Whether to filter data from 2022 to download the validation set (every 2 weeks)." + ), + ] = False, ) -> None: """ Download a selection of the available EUMETSAT data. @@ -154,6 +160,21 @@ def download_satellite_data( # in `data_inner_steps` (which should be slighly more robust to missing values). ds = ds.sel(time=np.mod(ds.time.dt.minute, data_inner_steps * 5) == 0) + if year == 2022: + set_str = "Validation" if valid_set else "Training" + week_str = "3" if valid_set else "1" + logger.info("Data in 2022 will be downloaded every 2 weeks due to train/valid split.") + logger.info("%s set selected: starting week will be %s", set_str, week_str) + # integer division by 14 will tell us the week we're on. + # checking the mod wrt 2 will let us select ever 2 weeks (weeks are 1-indexed). + # valid set is defined as from week 3-4, 7-8 etc. (where the mod is != 2). + mask = ( + np.mod(ds.time.dt.day // 14, 2) != 0 + if valid_set + else np.mod(ds.time.dt.day // 14, 2) == 0 + ) + ds = ds.sel(time=mask) + # Convert lon-lat bounds to geostationary-coords (x_min, x_max), (y_min, y_max) = lon_lat_to_geostationary_area_coords( [lon_min, lon_max],