From 52ddd82ced0b98c639d602ae3c1af78badebd27e Mon Sep 17 00:00:00 2001 From: Alex Merose Date: Sun, 25 Feb 2024 20:15:49 +0700 Subject: [PATCH] Debugged demo join; now focused on tuning performance. --- demo/mara_join.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/demo/mara_join.py b/demo/mara_join.py index 39214c8..b71838e 100644 --- a/demo/mara_join.py +++ b/demo/mara_join.py @@ -7,28 +7,34 @@ mv_df = dd.read_csv( 'https://raw.githubusercontent.com/wildlife-dynamics/ecoscope/master/tests/' - 'sample_data/vector/movbank_data.csv' + 'sample_data/vector/movbank_data.csv', ) -mv_df['timestamp'] = dd.to_datetime(mv_df['timestamp'], utc=True) +mv_df['timestamp'] = dd.to_datetime(mv_df['timestamp']) +mv_df.set_index('timestamp', drop=False, sort=True) mv_df['geometry'] = gdd.points_from_xy( - mv_df['location-long'], mv_df['location-lat'] + mv_df, 'location-long', 'location-lat', crs=4326 +) +timerange = slice( + mv_df.timestamp.min().compute(), + mv_df.timestamp.max().compute(), ) -# What is the CRS? mv_gdf = gdd.from_dask_dataframe(mv_df, 'geometry') +# For MARA, we'd replace this with an Xee call. era5_ds = xr.open_zarr( 'gs://gcp-public-data-arco-era5/ar/' '1959-2022-full_37-1h-0p25deg-chunk-1.zarr-v2', - chunks={'time': 240, 'level': 1} + chunks={'time': 48, 'level': 1} ) era5_wind_ds = era5_ds[['u_component_of_wind', 'v_component_of_wind']].sel( - time=slice(mv_gdf['timestamp'].min(), mv_gdf['timestamp'].max()) + time=timerange, + level=1000, # surface level only. ) era5_wind_df = qr.to_dd(era5_wind_ds) # What is the CRS? era5_wind_df['geometry'] = gdd.points_from_xy( - era5_wind_df['longitude'], era5_wind_df['latitude'], era5_wind_df['level'] + era5_wind_df, 'longitude', 'latitude', ) era5_wind_gdf = gdd.from_dask_dataframe(era5_wind_df, 'geometry')