Skip to content

Commit

Permalink
Debugged demo join; now focused on tuning performance.
Browse files Browse the repository at this point in the history
  • Loading branch information
alxmrs committed Feb 26, 2024
1 parent 15d931a commit 52ddd82
Showing 1 changed file with 13 additions and 7 deletions.
20 changes: 13 additions & 7 deletions demo/mara_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,34 @@

mv_df = dd.read_csv(
'https://raw.githubusercontent.com/wildlife-dynamics/ecoscope/master/tests/'
'sample_data/vector/movbank_data.csv'
'sample_data/vector/movbank_data.csv',
)

mv_df['timestamp'] = dd.to_datetime(mv_df['timestamp'], utc=True)
mv_df['timestamp'] = dd.to_datetime(mv_df['timestamp'])
mv_df.set_index('timestamp', drop=False, sort=True)
mv_df['geometry'] = gdd.points_from_xy(
mv_df['location-long'], mv_df['location-lat']
mv_df, 'location-long', 'location-lat', crs=4326
)
timerange = slice(
mv_df.timestamp.min().compute(),
mv_df.timestamp.max().compute(),
)
# What is the CRS?
mv_gdf = gdd.from_dask_dataframe(mv_df, 'geometry')

# For MARA, we'd replace this with an Xee call.
era5_ds = xr.open_zarr(
'gs://gcp-public-data-arco-era5/ar/'
'1959-2022-full_37-1h-0p25deg-chunk-1.zarr-v2',
chunks={'time': 240, 'level': 1}
chunks={'time': 48, 'level': 1}
)
era5_wind_ds = era5_ds[['u_component_of_wind', 'v_component_of_wind']].sel(
time=slice(mv_gdf['timestamp'].min(), mv_gdf['timestamp'].max())
time=timerange,
level=1000, # surface level only.
)
era5_wind_df = qr.to_dd(era5_wind_ds)
# What is the CRS?
era5_wind_df['geometry'] = gdd.points_from_xy(
era5_wind_df['longitude'], era5_wind_df['latitude'], era5_wind_df['level']
era5_wind_df, 'longitude', 'latitude',
)
era5_wind_gdf = gdd.from_dask_dataframe(era5_wind_df, 'geometry')

Expand Down

0 comments on commit 52ddd82

Please sign in to comment.