From aa5f554c1ec88b9151b7f2bdc7562176b8805ac2 Mon Sep 17 00:00:00 2001 From: Kalle Westerling Date: Fri, 3 Nov 2023 16:04:22 +0000 Subject: [PATCH 1/4] Docstrings --- deepsensor/active_learning/acquisition_fns.py | 10 +- deepsensor/data/loader.py | 128 +++++++++++++----- deepsensor/data/task.py | 48 ++++--- deepsensor/plot.py | 64 ++++++--- 4 files changed, 173 insertions(+), 77 deletions(-) diff --git a/deepsensor/active_learning/acquisition_fns.py b/deepsensor/active_learning/acquisition_fns.py index fb70ad86..c62271a3 100644 --- a/deepsensor/active_learning/acquisition_fns.py +++ b/deepsensor/active_learning/acquisition_fns.py @@ -31,7 +31,7 @@ def __init__( context_set_idx (int): Index of context set to add new observations to when computing the acquisition function. - target_set_idx (int): + target_set_idx (int): Index of target set to compute acquisition function for. """ self.model = model @@ -299,7 +299,9 @@ def __call__(self, task: Task): if isinstance(pred, list): pred = pred[self.target_set_idx] true = task["Y_t"][self.target_set_idx] - return -np.mean(norm.logpdf(true, loc=pred, scale=self.model.stddev(task))) + return -np.mean( + norm.logpdf(true, loc=pred, scale=self.model.stddev(task)) + ) class OracleJointNLL(AcquisitionFunctionOracle): @@ -480,6 +482,8 @@ def __call__(self, task: Task, X_s: np.ndarray, **kwargs) -> np.ndarray: # Compute the expected improvement Z = (mean - best_target_value) / stddev - ei = stddev * (mean - best_target_value) * norm.cdf(Z) + stddev * norm.pdf(Z) + ei = stddev * (mean - best_target_value) * norm.cdf( + Z + ) + stddev * norm.pdf(Z) return ei diff --git a/deepsensor/data/loader.py b/deepsensor/data/loader.py index 1dec965d..54adc9df 100644 --- a/deepsensor/data/loader.py +++ b/deepsensor/data/loader.py @@ -25,7 +25,7 @@ class TaskLoader: - If all data passed as paths, load the data and overwrite the paths with the loaded data - Either all data is passed as paths, or all data is passed as loaded data (else ``ValueError``) - If all data passed as paths, the TaskLoader can be saved with the ``save`` method - (using config) + (using config) Args: task_loader_ID: @@ -97,7 +97,9 @@ def __init__( str, List[Union[xr.DataArray, xr.Dataset, pd.DataFrame, str]], ] = None, - aux_at_contexts: Optional[Tuple[int, Union[xr.DataArray, xr.Dataset]]] = None, + aux_at_contexts: Optional[ + Tuple[int, Union[xr.DataArray, xr.Dataset]] + ] = None, aux_at_targets: Optional[ Union[ xr.DataArray, @@ -130,7 +132,9 @@ def __init__( self.target_delta_t = self.config["target_delta_t"] self.time_freq = self.config["time_freq"] self.xarray_interp_method = self.config["xarray_interp_method"] - self.discrete_xarray_sampling = self.config["discrete_xarray_sampling"] + self.discrete_xarray_sampling = self.config[ + "discrete_xarray_sampling" + ] self.dtype = self.config["dtype"] else: self.context = context @@ -283,7 +287,9 @@ def _load_pandas_or_xarray(path): def _load_data(data): if isinstance(data, (tuple, list)): - data = tuple([_load_pandas_or_xarray(data_i) for data_i in data]) + data = tuple( + [_load_pandas_or_xarray(data_i) for data_i in data] + ) else: data = _load_pandas_or_xarray(data) return data @@ -344,7 +350,9 @@ def cast_to_dtype(var): # Note: Numeric pandas indexes are always cast to float64, so we have to cast # x1/x2 coord dtypes during task sampling else: - raise ValueError(f"Unknown type {type(var)} for context set {var}") + raise ValueError( + f"Unknown type {type(var)} for context set {var}" + ) return var if var is None: @@ -409,22 +417,28 @@ def count_data_dims_of_tuple_of_sets(datasets): elif isinstance(var, xr.DataArray): dim = 1 # Single data variable elif isinstance(var, pd.DataFrame): - dim = len(var.columns) # Assumes all columns are data variables + dim = len( + var.columns + ) # Assumes all columns are data variables elif isinstance(var, pd.Series): dim = 1 # Single data variable else: - raise ValueError(f"Unknown type {type(var)} for context set {var}") + raise ValueError( + f"Unknown type {type(var)} for context set {var}" + ) dims.append(dim) return dims context_dims = count_data_dims_of_tuple_of_sets(self.context) target_dims = count_data_dims_of_tuple_of_sets(self.target) if self.aux_at_contexts is not None: - context_dims += count_data_dims_of_tuple_of_sets(self.aux_at_contexts) + context_dims += count_data_dims_of_tuple_of_sets( + self.aux_at_contexts + ) if self.aux_at_targets is not None: - aux_at_target_dims = count_data_dims_of_tuple_of_sets(self.aux_at_targets)[ - 0 - ] + aux_at_target_dims = count_data_dims_of_tuple_of_sets( + self.aux_at_targets + )[0] else: aux_at_target_dims = 0 @@ -455,13 +469,17 @@ def infer_var_IDs_of_tuple_of_sets(datasets, delta_ts=None): if isinstance(var, xr.DataArray): var_ID = (var.name,) # Single data variable elif isinstance(var, xr.Dataset): - var_ID = tuple(var.data_vars.keys()) # Multiple data variables + var_ID = tuple( + var.data_vars.keys() + ) # Multiple data variables elif isinstance(var, pd.DataFrame): var_ID = tuple(var.columns) elif isinstance(var, pd.Series): var_ID = (var.name,) else: - raise ValueError(f"Unknown type {type(var)} for context set {var}") + raise ValueError( + f"Unknown type {type(var)} for context set {var}" + ) if delta_ts is not None: # Add delta_t to the variable ID @@ -485,15 +503,17 @@ def infer_var_IDs_of_tuple_of_sets(datasets, delta_ts=None): ) if self.aux_at_contexts is not None: - context_var_IDs += infer_var_IDs_of_tuple_of_sets(self.aux_at_contexts) + context_var_IDs += infer_var_IDs_of_tuple_of_sets( + self.aux_at_contexts + ) context_var_IDs_and_delta_t += infer_var_IDs_of_tuple_of_sets( self.aux_at_contexts, [0] ) if self.aux_at_targets is not None: - aux_at_target_var_IDs = infer_var_IDs_of_tuple_of_sets(self.aux_at_targets)[ - 0 - ] + aux_at_target_var_IDs = infer_var_IDs_of_tuple_of_sets( + self.aux_at_targets + )[0] else: aux_at_target_var_IDs = None @@ -505,7 +525,9 @@ def infer_var_IDs_of_tuple_of_sets(datasets, delta_ts=None): aux_at_target_var_IDs, ) - def _check_links(self, links: Union[Tuple[int, int], List[Tuple[int, int]]]): + def _check_links( + self, links: Union[Tuple[int, int], List[Tuple[int, int]]] + ): """ Check that the context-target links are valid. @@ -531,7 +553,9 @@ def _check_links(self, links: Union[Tuple[int, int], List[Tuple[int, int]]]): assert isinstance( links, list ), f"Links must be a list of length-2 tuples, but got {type(links)}" - assert len(links) > 0, "If links is not None, it must be a non-empty list" + assert ( + len(links) > 0 + ), "If links is not None, it must be a non-empty list" assert all( isinstance(link, tuple) for link in links ), f"Links must be a list of tuples, but got {[type(link) for link in links]}" @@ -629,9 +653,15 @@ def sample_da( dim = da.shape[0] if da.ndim == 3 else 1 Y_c = np.zeros((dim, 0), dtype=self.dtype) return X_c, Y_c - x1 = rng.uniform(da.coords["x1"].min(), da.coords["x1"].max(), N) - x2 = rng.uniform(da.coords["x2"].min(), da.coords["x2"].max(), N) - Y_c = da.sel(x1=xr.DataArray(x1), x2=xr.DataArray(x2), method="nearest") + x1 = rng.uniform( + da.coords["x1"].min(), da.coords["x1"].max(), N + ) + x2 = rng.uniform( + da.coords["x2"].min(), da.coords["x2"].max(), N + ) + Y_c = da.sel( + x1=xr.DataArray(x1), x2=xr.DataArray(x2), method="nearest" + ) Y_c = np.array(Y_c, dtype=self.dtype) X_c = np.array([x1, x2], dtype=self.dtype) @@ -709,7 +739,11 @@ def sample_df( N = sampling_strat rng = np.random.default_rng(seed) idx = rng.choice(df.index, N) - X_c = df.loc[idx].reset_index()[["x1", "x2"]].values.T.astype(self.dtype) + X_c = ( + df.loc[idx] + .reset_index()[["x1", "x2"]] + .values.T.astype(self.dtype) + ) Y_c = df.loc[idx].values.T elif isinstance(sampling_strat, str) and sampling_strat in [ "all", @@ -939,7 +973,9 @@ def check_sampling_strat(sampling_strat, set): ) for strat in sampling_strat: - if not isinstance(strat, (str, int, np.integer, float, np.ndarray)): + if not isinstance( + strat, (str, int, np.integer, float, np.ndarray) + ): raise InvalidSamplingStrategyError( f"Unknown sampling strategy {strat} of type {type(strat)}" ) @@ -994,7 +1030,9 @@ def sample_variable(var, sampling_strat, seed): elif isinstance(var, (pd.DataFrame, pd.Series)): X, Y = self.sample_df(var, sampling_strat, seed) else: - raise ValueError(f"Unknown type {type(var)} for context set " f"{var}") + raise ValueError( + f"Unknown type {type(var)} for context set " f"{var}" + ) return X, Y # Check that the sampling strategies are valid @@ -1002,7 +1040,9 @@ def sample_variable(var, sampling_strat, seed): target_sampling = check_sampling_strat(target_sampling, self.target) # Check `split_frac if split_frac < 0 or split_frac > 1: - raise ValueError(f"split_frac must be between 0 and 1, got {split_frac}") + raise ValueError( + f"split_frac must be between 0 and 1, got {split_frac}" + ) if self.links is None: b1 = any( [ @@ -1095,8 +1135,12 @@ def sample_variable(var, sampling_strat, seed): # Perform the split sampling strategy for linked context and target sets at this point # while we have the full context and target data in scope - context_split_idxs = np.where(np.array(context_sampling) == "split")[0] - target_split_idxs = np.where(np.array(target_sampling) == "split")[0] + context_split_idxs = np.where( + np.array(context_sampling) == "split" + )[0] + target_split_idxs = np.where(np.array(target_sampling) == "split")[ + 0 + ] assert len(context_split_idxs) == len(target_split_idxs), ( f"Number of context sets with 'split' sampling strategy " f"({len(context_split_idxs)}) must match number of target sets " @@ -1149,8 +1193,12 @@ def sample_variable(var, sampling_strat, seed): # Perform the gapfill sampling strategy for linked context and target sets at this point # while we have the full context and target data in scope - context_gapfill_idxs = np.where(np.array(context_sampling) == "gapfill")[0] - target_gapfill_idxs = np.where(np.array(target_sampling) == "gapfill")[0] + context_gapfill_idxs = np.where( + np.array(context_sampling) == "gapfill" + )[0] + target_gapfill_idxs = np.where( + np.array(target_sampling) == "gapfill" + )[0] assert len(context_gapfill_idxs) == len(target_gapfill_idxs), ( f"Number of context sets with 'gapfill' sampling strategy " f"({len(context_gapfill_idxs)}) must match number of target sets " @@ -1180,9 +1228,13 @@ def sample_variable(var, sampling_strat, seed): # Keep trying until we get a target set with at least one target point keep_searching = True while keep_searching: - added_mask_date = rng.choice(self.context[context_idx].time) + added_mask_date = rng.choice( + self.context[context_idx].time + ) added_mask = ( - self.context[context_idx].sel(time=added_mask_date).isnull() + self.context[context_idx] + .sel(time=added_mask_date) + .isnull() ) curr_mask = context_var.isnull() @@ -1193,7 +1245,9 @@ def sample_variable(var, sampling_strat, seed): # when we could just slice the target values here target_mask = added_mask & ~curr_mask if isinstance(target_var, xr.Dataset): - keep_searching = np.all(target_mask.to_array().data == False) + keep_searching = np.all( + target_mask.to_array().data == False + ) else: keep_searching = np.all(target_mask.data == False) if keep_searching: @@ -1224,7 +1278,9 @@ def sample_variable(var, sampling_strat, seed): if self.aux_at_contexts is not None: # Add auxiliary variable sampled at context set as a new context variable - X_c_offgrid = [X_c for X_c in task["X_c"] if not isinstance(X_c, tuple)] + X_c_offgrid = [ + X_c for X_c in task["X_c"] if not isinstance(X_c, tuple) + ] if len(X_c_offgrid) == 0: # No offgrid context sets X_c_offrid_all = np.empty((2, 0), dtype=self.dtype) @@ -1269,7 +1325,9 @@ def __call__(self, date, *args, **kwargs): Task object or list of task objects for each date containing the context and target data. """ - if isinstance(date, (list, tuple, pd.core.indexes.datetimes.DatetimeIndex)): + if isinstance( + date, (list, tuple, pd.core.indexes.datetimes.DatetimeIndex) + ): return [self.task_generation(d, *args, **kwargs) for d in date] else: return self.task_generation(date, *args, **kwargs) diff --git a/deepsensor/data/task.py b/deepsensor/data/task.py index d858902c..2b4392eb 100644 --- a/deepsensor/data/task.py +++ b/deepsensor/data/task.py @@ -54,8 +54,7 @@ def summarise_repr(cls, k, v) -> str: Value of the task dictionary. Returns: - str: - String representation of the task. + str: String representation of the task. """ if v is None: return "None" @@ -275,7 +274,9 @@ def mask_nans_numpy(self): missing values are. """ if "batch_dim" not in self["ops"]: - raise ValueError("Must call `add_batch_dim` before `mask_nans_numpy`") + raise ValueError( + "Must call `add_batch_dim` before `mask_nans_numpy`" + ) def f(arr): if isinstance(arr, deepsensor.backend.nps.Masked): @@ -304,9 +305,13 @@ def mask_nans_nps(self): ... """ if "batch_dim" not in self["ops"]: - raise ValueError("Must call `add_batch_dim` before `mask_nans_nps`") + raise ValueError( + "Must call `add_batch_dim` before `mask_nans_nps`" + ) if "numpy_mask" not in self["ops"]: - raise ValueError("Must call `mask_nans_numpy` before `mask_nans_nps`") + raise ValueError( + "Must call `mask_nans_numpy` before `mask_nans_nps`" + ) def f(arr): if isinstance(arr, np.ma.MaskedArray): @@ -357,14 +362,10 @@ def append_obs_to_task( option plus ability to remove observations. Args: - task (:class:`deepsensor.data.task.Task`:): - The task to modify. - X_new (array-like): - New observation coordinates. - Y_new (array-like): - New observation values. - context_set_idx (int): - Index of the context set to append to. + task (:class:`deepsensor.data.task.Task`:): The task to modify. + X_new (array-like): New observation coordinates. + Y_new (array-like): New observation values. + context_set_idx (int): Index of the context set to append to. Returns: :class:`deepsensor.data.task.Task`: @@ -401,7 +402,9 @@ def append_obs_to_task( return task_with_new -def flatten_X(X: Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]) -> np.ndarray: +def flatten_X( + X: Union[np.ndarray, Tuple[np.ndarray, np.ndarray]] +) -> np.ndarray: """ Convert tuple of gridded coords to (2, N) array if necessary. @@ -419,7 +422,9 @@ def flatten_X(X: Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]) -> np.ndarray return X -def flatten_Y(Y: Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]) -> np.ndarray: +def flatten_Y( + Y: Union[np.ndarray, Tuple[np.ndarray, np.ndarray]] +) -> np.ndarray: """ Convert gridded data of shape (N_dim, N_x1, N_x2) to (N_dim, N_x1 * N_x2) array if necessary. @@ -459,15 +464,14 @@ def concat_tasks(tasks: List[Task], multiple: int = 1) -> Task: the computational graph in graph mode. Returns: - :class:`~.data.task.Task` - Task containing multiple batches. + :class:`~.data.task.Task`: Task containing multiple batches. Raises: - ValueError + ValueError: If the tasks have different numbers of target sets. - ValueError + ValueError: If the tasks have different numbers of targets. - ValueError + ValueError: If the tasks have different types of target sets (gridded/ non-gridded). """ @@ -546,7 +550,9 @@ def concat_tasks(tasks: List[Task], multiple: int = 1) -> Task: ) else: # Target set is off-the-grid with tensor for `X_t` - merged_task["X_t"][i] = B.concat(*[t["X_t"][i] for t in tasks], axis=0) + merged_task["X_t"][i] = B.concat( + *[t["X_t"][i] for t in tasks], axis=0 + ) merged_task["Y_t"][i] = B.concat(*[t["Y_t"][i] for t in tasks], axis=0) merged_task["time"] = [t["time"] for t in tasks] diff --git a/deepsensor/plot.py b/deepsensor/plot.py index 4541e698..4e15cbde 100644 --- a/deepsensor/plot.py +++ b/deepsensor/plot.py @@ -99,7 +99,9 @@ def task( with plt.rc_context(params): fig, axes = plt.subplots( - nrows=nrows, ncols=ncols, figsize=(ncols * figsize, nrows * figsize) + nrows=nrows, + ncols=ncols, + figsize=(ncols * figsize, nrows * figsize), ) if nrows == 1: axes = axes[np.newaxis] @@ -119,7 +121,9 @@ def task( if isinstance(X, tuple): X = flatten_X(X) Y = Y.reshape(Y.shape[0], -1) - axes[i, j].scatter(X[1, :], X[0, :], c=Y[i], s=markersize, marker=".") + axes[i, j].scatter( + X[1, :], X[0, :], c=Y[i], s=markersize, marker="." + ) if equal_aspect: # Don't warp aspect ratio axes[i, j].set_aspect("equal") @@ -216,7 +220,8 @@ def context_encoding( context_set_idxs = np.array(range(len(task_loader.context_dims))) context_var_ID_set_sizes = [ - ndim + 1 for ndim in np.array(task_loader.context_dims)[context_set_idxs] + ndim + 1 + for ndim in np.array(task_loader.context_dims)[context_set_idxs] ] # Add density channel to each set size max_context_set_size = max(context_var_ID_set_sizes) ncols = max_context_set_size @@ -238,7 +243,9 @@ def context_encoding( else: var_IDs = task_loader.context_var_IDs[ctx_i] - ncols_row_i = task_loader.context_dims[ctx_i] + 1 # Add density channel + ncols_row_i = ( + task_loader.context_dims[ctx_i] + 1 + ) # Add density channel for col_i in range(ncols_row_i): ax = axes[row_i, col_i] # Need `origin="lower"` because encoding has `x1` increasing from top to bottom, @@ -303,13 +310,14 @@ def offgrid_context( colors: Optional[str] = None, **scatter_kwargs, ) -> None: - """Plot the off-grid context points on ``axes``. + """ + Plot the off-grid context points on ``axes``. Uses a provided :class:`~.data.processor.DataProcessor` to unnormalise the context coordinates if provided. Args: - axes (:class:`numpy:numpy.ndarray` | List[:class:`matplotlib:matplotlib.axes.Axes`] | Tuple[:class:`matplotlib:matplotlib.axes.Axes`]: + axes (:class:`numpy:numpy.ndarray` | List[:class:`matplotlib:matplotlib.axes.Axes`] | Tuple[:class:`matplotlib:matplotlib.axes.Axes`]): Axes to plot on. task (:class:`~.data.task.Task`): Task containing the context set to plot. @@ -334,7 +342,7 @@ def offgrid_context( Additional keyword arguments to pass to the scatter plot. Returns: - None. + None """ if markers is None: # all matplotlib markers @@ -379,7 +387,9 @@ def offgrid_context( elif plot_target and set_i >= len(task["X_c"]): label += f"Target set {set_i - len(task['X_c'])} " if task_loader is not None: - label += f"({task_loader.target_var_IDs[set_i - len(task['X_c'])]})" + label += ( + f"({task_loader.target_var_IDs[set_i - len(task['X_c'])]})" + ) for ax in axes: ax.scatter( @@ -472,7 +482,8 @@ def offgrid_context_observations( for x_c, y_c in zip(X_c.T, Y_c): if extent is not None: if not ( - extent[0] <= x_c[0] <= extent[1] and extent[2] <= x_c[1] <= extent[3] + extent[0] <= x_c[0] <= extent[1] + and extent[2] <= x_c[1] <= extent[3] ): continue for ax in axes: @@ -646,7 +657,9 @@ def unet_forward(unet, x): for layer_i, feature_map in enumerate(feature_maps): n_features = feature_map.shape[1] n_features_to_plot = min(n_features_per_layer, n_features) - feature_idxs = rng.choice(n_features, n_features_to_plot, replace=False) + feature_idxs = rng.choice( + n_features, n_features_to_plot, replace=False + ) fig, axes = plt.subplots( nrows=1, @@ -712,8 +725,12 @@ def placements( :class:`matplotlib:matplotlib.figure.Figure` A figure containing the placement plots. """ - fig, ax = plt.subplots(subplot_kw={"projection": crs}, figsize=(figsize, figsize)) - ax.scatter(*X_new_df.values.T[::-1], c="r", linewidths=0.5, **scatter_kwargs) + fig, ax = plt.subplots( + subplot_kw={"projection": crs}, figsize=(figsize, figsize) + ) + ax.scatter( + *X_new_df.values.T[::-1], c="r", linewidths=0.5, **scatter_kwargs + ) offgrid_context(ax, task, data_processor, linewidths=0.5, **scatter_kwargs) ax.coastlines() @@ -779,7 +796,9 @@ def acquisition_fn( """ # Remove spatial dims using data_processor.raw_spatial_coords_names plot_dims = [col_dim, *data_processor.raw_spatial_coord_names] - non_plot_dims = [dim for dim in acquisition_fn_ds.dims if dim not in plot_dims] + non_plot_dims = [ + dim for dim in acquisition_fn_ds.dims if dim not in plot_dims + ] valid_avg_dims = ["time", "sample"] for dim in non_plot_dims: if dim not in valid_avg_dims: @@ -791,7 +810,8 @@ def acquisition_fn( if len(non_plot_dims) > 0: # Average over non-plot dims print( - "Averaging acquisition function over dims for plotting: " f"{non_plot_dims}" + "Averaging acquisition function over dims for plotting: " + f"{non_plot_dims}" ) acquisition_fn_ds = acquisition_fn_ds.mean(dim=non_plot_dims) @@ -834,7 +854,9 @@ def acquisition_fn( if add_colorbar and final_axis: im = ax.get_children()[0] label = acquisition_fn_ds.name - cax = plt.axes([0.93, 0.035, 0.02, 0.91]) # add a small custom axis + cax = plt.axes( + [0.93, 0.035, 0.02, 0.91] + ) # add a small custom axis cbar = plt.colorbar( im, cax=cax, label=label ) # specify axis for colorbar to occupy with cax @@ -925,7 +947,9 @@ def prediction( var_ID: [param for param in pred[var_ID]] for var_ID in pred } else: - prediction_parameters = {var_ID: prediction_parameters for var_ID in pred} + prediction_parameters = { + var_ID: prediction_parameters for var_ID in pred + } n_vars = len(pred.target_var_IDs) n_params = max(len(params) for params in prediction_parameters.values()) @@ -1002,7 +1026,9 @@ def prediction( hue = ( pred[var_ID] .reset_index()[[x1_name, x2_name]] - .apply(lambda row: f"({row[x1_name]}, {row[x2_name]})", axis=1) + .apply( + lambda row: f"({row[x1_name]}, {row[x2_name]})", axis=1 + ) ) hue.name = f"{x1_name}, {x2_name}" @@ -1018,7 +1044,9 @@ def prediction( # rotate date times ax.set_xticklabels( - ax.get_xticklabels(), rotation=45, horizontalalignment="right" + ax.get_xticklabels(), + rotation=45, + horizontalalignment="right", ) ax.set_title(f"{var_ID} {param}") From 5cbbea11abc542c509389737d7bd194166ad1896 Mon Sep 17 00:00:00 2001 From: Kalle Westerling Date: Fri, 3 Nov 2023 16:04:29 +0000 Subject: [PATCH 2/4] Markdown --- docs/research_ideas.md | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/docs/research_ideas.md b/docs/research_ideas.md index d462d3cd..456f7690 100644 --- a/docs/research_ideas.md +++ b/docs/research_ideas.md @@ -10,7 +10,8 @@ resources in the [](resources.md) page before starting. Why not [join our Slack channel](https://docs.google.com/forms/d/e/1FAIpQLScsI8EiXDdSfn1huMp1vj5JAxi9NIeYLljbEUlMceZvwVpugw/viewform) and start a conversation around these ideas or your own? :-) -### Transfer learning from regions of dense observations to regions of sparse observations +## Transfer learning from regions of dense observations to regions of sparse observations + Since the `ConvNP` is a data-hungry model, it does not perform well if only trained on a small number of observations, which presents a challenge for modelling variables that are poorly observed. @@ -19,20 +20,23 @@ Can we train a model on a region of dense observations and then transfer the mod of sparse observations? Does the performance improve? -### Sensor placement for forecasting +## Sensor placement for forecasting + Previous active learning research with ConvNPs has only considered sensor placement for interpolation. Do the sensor placements change when the model is trained for forecasting? See, e.g., Section 4.2.1 of [Environmental sensor placement with convolutional Gaussian neural processes](https://doi.org/10.1017/eds.2023.22). -### U-Net architectural changes +## U-Net architectural changes + The `ConvNP` currently uses a vanilla U-Net architecture. Do any architectural changes improve performance, such as batch normalisation or dropout? This would require digging into the [`neuralprocesses.construct_convgnp` method](https://github.com/wesselb/neuralprocesses/blob/f20572ba480c1279ad5fb66dbb89cbc73a0171c7/neuralprocesses/architectures/convgnp.py#L97) and replacing the U-Net module with a custom one. -### Extension to continuous time observations +## Extension to continuous time observations + The `ConvNP` currently assumes that the observations are on a regular time grid. How can we extend this to continuous time observations, where the observations are not necessarily on a regular time grid? @@ -40,7 +44,8 @@ Can we do this without a major rework of the code and model? For example, can we pass a 'time of observation' auxiliary input to the model? What are the limitations of this approach? -### Training with ablations for interpretability +## Training with ablations for interpretability + Since the `ConvNP` operates on sets of observations, it is possible to ablate observations and see how the model's predictions change. Thus, the `ConvNP` admits unique interpretability opportunities. @@ -52,7 +57,8 @@ For example, when generating `Task`s with a `TaskLoader`, randomly set some of t Then, at test time, ablate context sets and measure the change in the model's predictions or performance. -### Monte Carlo sensor placement using AR sampling +## Monte Carlo sensor placement using AR sampling + The `GreedyAlgorithm` for sensor placement currently uses the model's mean prediction to infill missing observations at query sites. However, one could also draw multiple [AR samples](user-guide/prediction.ipynb) From 685d75c008a3660eea46bdd66717cc02eaeff8b6 Mon Sep 17 00:00:00 2001 From: Kalle Westerling Date: Fri, 3 Nov 2023 16:07:46 +0000 Subject: [PATCH 3/4] Black --- deepsensor/active_learning/acquisition_fns.py | 8 +- deepsensor/data/loader.py | 126 +++++------------- deepsensor/data/task.py | 24 +--- deepsensor/plot.py | 49 ++----- 4 files changed, 55 insertions(+), 152 deletions(-) diff --git a/deepsensor/active_learning/acquisition_fns.py b/deepsensor/active_learning/acquisition_fns.py index c62271a3..daf99361 100644 --- a/deepsensor/active_learning/acquisition_fns.py +++ b/deepsensor/active_learning/acquisition_fns.py @@ -299,9 +299,7 @@ def __call__(self, task: Task): if isinstance(pred, list): pred = pred[self.target_set_idx] true = task["Y_t"][self.target_set_idx] - return -np.mean( - norm.logpdf(true, loc=pred, scale=self.model.stddev(task)) - ) + return -np.mean(norm.logpdf(true, loc=pred, scale=self.model.stddev(task))) class OracleJointNLL(AcquisitionFunctionOracle): @@ -482,8 +480,6 @@ def __call__(self, task: Task, X_s: np.ndarray, **kwargs) -> np.ndarray: # Compute the expected improvement Z = (mean - best_target_value) / stddev - ei = stddev * (mean - best_target_value) * norm.cdf( - Z - ) + stddev * norm.pdf(Z) + ei = stddev * (mean - best_target_value) * norm.cdf(Z) + stddev * norm.pdf(Z) return ei diff --git a/deepsensor/data/loader.py b/deepsensor/data/loader.py index 54adc9df..60df4cb8 100644 --- a/deepsensor/data/loader.py +++ b/deepsensor/data/loader.py @@ -97,9 +97,7 @@ def __init__( str, List[Union[xr.DataArray, xr.Dataset, pd.DataFrame, str]], ] = None, - aux_at_contexts: Optional[ - Tuple[int, Union[xr.DataArray, xr.Dataset]] - ] = None, + aux_at_contexts: Optional[Tuple[int, Union[xr.DataArray, xr.Dataset]]] = None, aux_at_targets: Optional[ Union[ xr.DataArray, @@ -132,9 +130,7 @@ def __init__( self.target_delta_t = self.config["target_delta_t"] self.time_freq = self.config["time_freq"] self.xarray_interp_method = self.config["xarray_interp_method"] - self.discrete_xarray_sampling = self.config[ - "discrete_xarray_sampling" - ] + self.discrete_xarray_sampling = self.config["discrete_xarray_sampling"] self.dtype = self.config["dtype"] else: self.context = context @@ -287,9 +283,7 @@ def _load_pandas_or_xarray(path): def _load_data(data): if isinstance(data, (tuple, list)): - data = tuple( - [_load_pandas_or_xarray(data_i) for data_i in data] - ) + data = tuple([_load_pandas_or_xarray(data_i) for data_i in data]) else: data = _load_pandas_or_xarray(data) return data @@ -350,9 +344,7 @@ def cast_to_dtype(var): # Note: Numeric pandas indexes are always cast to float64, so we have to cast # x1/x2 coord dtypes during task sampling else: - raise ValueError( - f"Unknown type {type(var)} for context set {var}" - ) + raise ValueError(f"Unknown type {type(var)} for context set {var}") return var if var is None: @@ -417,28 +409,22 @@ def count_data_dims_of_tuple_of_sets(datasets): elif isinstance(var, xr.DataArray): dim = 1 # Single data variable elif isinstance(var, pd.DataFrame): - dim = len( - var.columns - ) # Assumes all columns are data variables + dim = len(var.columns) # Assumes all columns are data variables elif isinstance(var, pd.Series): dim = 1 # Single data variable else: - raise ValueError( - f"Unknown type {type(var)} for context set {var}" - ) + raise ValueError(f"Unknown type {type(var)} for context set {var}") dims.append(dim) return dims context_dims = count_data_dims_of_tuple_of_sets(self.context) target_dims = count_data_dims_of_tuple_of_sets(self.target) if self.aux_at_contexts is not None: - context_dims += count_data_dims_of_tuple_of_sets( - self.aux_at_contexts - ) + context_dims += count_data_dims_of_tuple_of_sets(self.aux_at_contexts) if self.aux_at_targets is not None: - aux_at_target_dims = count_data_dims_of_tuple_of_sets( - self.aux_at_targets - )[0] + aux_at_target_dims = count_data_dims_of_tuple_of_sets(self.aux_at_targets)[ + 0 + ] else: aux_at_target_dims = 0 @@ -469,17 +455,13 @@ def infer_var_IDs_of_tuple_of_sets(datasets, delta_ts=None): if isinstance(var, xr.DataArray): var_ID = (var.name,) # Single data variable elif isinstance(var, xr.Dataset): - var_ID = tuple( - var.data_vars.keys() - ) # Multiple data variables + var_ID = tuple(var.data_vars.keys()) # Multiple data variables elif isinstance(var, pd.DataFrame): var_ID = tuple(var.columns) elif isinstance(var, pd.Series): var_ID = (var.name,) else: - raise ValueError( - f"Unknown type {type(var)} for context set {var}" - ) + raise ValueError(f"Unknown type {type(var)} for context set {var}") if delta_ts is not None: # Add delta_t to the variable ID @@ -503,17 +485,15 @@ def infer_var_IDs_of_tuple_of_sets(datasets, delta_ts=None): ) if self.aux_at_contexts is not None: - context_var_IDs += infer_var_IDs_of_tuple_of_sets( - self.aux_at_contexts - ) + context_var_IDs += infer_var_IDs_of_tuple_of_sets(self.aux_at_contexts) context_var_IDs_and_delta_t += infer_var_IDs_of_tuple_of_sets( self.aux_at_contexts, [0] ) if self.aux_at_targets is not None: - aux_at_target_var_IDs = infer_var_IDs_of_tuple_of_sets( - self.aux_at_targets - )[0] + aux_at_target_var_IDs = infer_var_IDs_of_tuple_of_sets(self.aux_at_targets)[ + 0 + ] else: aux_at_target_var_IDs = None @@ -525,9 +505,7 @@ def infer_var_IDs_of_tuple_of_sets(datasets, delta_ts=None): aux_at_target_var_IDs, ) - def _check_links( - self, links: Union[Tuple[int, int], List[Tuple[int, int]]] - ): + def _check_links(self, links: Union[Tuple[int, int], List[Tuple[int, int]]]): """ Check that the context-target links are valid. @@ -553,9 +531,7 @@ def _check_links( assert isinstance( links, list ), f"Links must be a list of length-2 tuples, but got {type(links)}" - assert ( - len(links) > 0 - ), "If links is not None, it must be a non-empty list" + assert len(links) > 0, "If links is not None, it must be a non-empty list" assert all( isinstance(link, tuple) for link in links ), f"Links must be a list of tuples, but got {[type(link) for link in links]}" @@ -653,15 +629,9 @@ def sample_da( dim = da.shape[0] if da.ndim == 3 else 1 Y_c = np.zeros((dim, 0), dtype=self.dtype) return X_c, Y_c - x1 = rng.uniform( - da.coords["x1"].min(), da.coords["x1"].max(), N - ) - x2 = rng.uniform( - da.coords["x2"].min(), da.coords["x2"].max(), N - ) - Y_c = da.sel( - x1=xr.DataArray(x1), x2=xr.DataArray(x2), method="nearest" - ) + x1 = rng.uniform(da.coords["x1"].min(), da.coords["x1"].max(), N) + x2 = rng.uniform(da.coords["x2"].min(), da.coords["x2"].max(), N) + Y_c = da.sel(x1=xr.DataArray(x1), x2=xr.DataArray(x2), method="nearest") Y_c = np.array(Y_c, dtype=self.dtype) X_c = np.array([x1, x2], dtype=self.dtype) @@ -739,11 +709,7 @@ def sample_df( N = sampling_strat rng = np.random.default_rng(seed) idx = rng.choice(df.index, N) - X_c = ( - df.loc[idx] - .reset_index()[["x1", "x2"]] - .values.T.astype(self.dtype) - ) + X_c = df.loc[idx].reset_index()[["x1", "x2"]].values.T.astype(self.dtype) Y_c = df.loc[idx].values.T elif isinstance(sampling_strat, str) and sampling_strat in [ "all", @@ -973,9 +939,7 @@ def check_sampling_strat(sampling_strat, set): ) for strat in sampling_strat: - if not isinstance( - strat, (str, int, np.integer, float, np.ndarray) - ): + if not isinstance(strat, (str, int, np.integer, float, np.ndarray)): raise InvalidSamplingStrategyError( f"Unknown sampling strategy {strat} of type {type(strat)}" ) @@ -1030,9 +994,7 @@ def sample_variable(var, sampling_strat, seed): elif isinstance(var, (pd.DataFrame, pd.Series)): X, Y = self.sample_df(var, sampling_strat, seed) else: - raise ValueError( - f"Unknown type {type(var)} for context set " f"{var}" - ) + raise ValueError(f"Unknown type {type(var)} for context set " f"{var}") return X, Y # Check that the sampling strategies are valid @@ -1040,9 +1002,7 @@ def sample_variable(var, sampling_strat, seed): target_sampling = check_sampling_strat(target_sampling, self.target) # Check `split_frac if split_frac < 0 or split_frac > 1: - raise ValueError( - f"split_frac must be between 0 and 1, got {split_frac}" - ) + raise ValueError(f"split_frac must be between 0 and 1, got {split_frac}") if self.links is None: b1 = any( [ @@ -1135,12 +1095,8 @@ def sample_variable(var, sampling_strat, seed): # Perform the split sampling strategy for linked context and target sets at this point # while we have the full context and target data in scope - context_split_idxs = np.where( - np.array(context_sampling) == "split" - )[0] - target_split_idxs = np.where(np.array(target_sampling) == "split")[ - 0 - ] + context_split_idxs = np.where(np.array(context_sampling) == "split")[0] + target_split_idxs = np.where(np.array(target_sampling) == "split")[0] assert len(context_split_idxs) == len(target_split_idxs), ( f"Number of context sets with 'split' sampling strategy " f"({len(context_split_idxs)}) must match number of target sets " @@ -1193,12 +1149,8 @@ def sample_variable(var, sampling_strat, seed): # Perform the gapfill sampling strategy for linked context and target sets at this point # while we have the full context and target data in scope - context_gapfill_idxs = np.where( - np.array(context_sampling) == "gapfill" - )[0] - target_gapfill_idxs = np.where( - np.array(target_sampling) == "gapfill" - )[0] + context_gapfill_idxs = np.where(np.array(context_sampling) == "gapfill")[0] + target_gapfill_idxs = np.where(np.array(target_sampling) == "gapfill")[0] assert len(context_gapfill_idxs) == len(target_gapfill_idxs), ( f"Number of context sets with 'gapfill' sampling strategy " f"({len(context_gapfill_idxs)}) must match number of target sets " @@ -1228,13 +1180,9 @@ def sample_variable(var, sampling_strat, seed): # Keep trying until we get a target set with at least one target point keep_searching = True while keep_searching: - added_mask_date = rng.choice( - self.context[context_idx].time - ) + added_mask_date = rng.choice(self.context[context_idx].time) added_mask = ( - self.context[context_idx] - .sel(time=added_mask_date) - .isnull() + self.context[context_idx].sel(time=added_mask_date).isnull() ) curr_mask = context_var.isnull() @@ -1245,9 +1193,7 @@ def sample_variable(var, sampling_strat, seed): # when we could just slice the target values here target_mask = added_mask & ~curr_mask if isinstance(target_var, xr.Dataset): - keep_searching = np.all( - target_mask.to_array().data == False - ) + keep_searching = np.all(target_mask.to_array().data == False) else: keep_searching = np.all(target_mask.data == False) if keep_searching: @@ -1278,9 +1224,7 @@ def sample_variable(var, sampling_strat, seed): if self.aux_at_contexts is not None: # Add auxiliary variable sampled at context set as a new context variable - X_c_offgrid = [ - X_c for X_c in task["X_c"] if not isinstance(X_c, tuple) - ] + X_c_offgrid = [X_c for X_c in task["X_c"] if not isinstance(X_c, tuple)] if len(X_c_offgrid) == 0: # No offgrid context sets X_c_offrid_all = np.empty((2, 0), dtype=self.dtype) @@ -1325,9 +1269,7 @@ def __call__(self, date, *args, **kwargs): Task object or list of task objects for each date containing the context and target data. """ - if isinstance( - date, (list, tuple, pd.core.indexes.datetimes.DatetimeIndex) - ): + if isinstance(date, (list, tuple, pd.core.indexes.datetimes.DatetimeIndex)): return [self.task_generation(d, *args, **kwargs) for d in date] else: return self.task_generation(date, *args, **kwargs) diff --git a/deepsensor/data/task.py b/deepsensor/data/task.py index 2b4392eb..748aca4b 100644 --- a/deepsensor/data/task.py +++ b/deepsensor/data/task.py @@ -274,9 +274,7 @@ def mask_nans_numpy(self): missing values are. """ if "batch_dim" not in self["ops"]: - raise ValueError( - "Must call `add_batch_dim` before `mask_nans_numpy`" - ) + raise ValueError("Must call `add_batch_dim` before `mask_nans_numpy`") def f(arr): if isinstance(arr, deepsensor.backend.nps.Masked): @@ -305,13 +303,9 @@ def mask_nans_nps(self): ... """ if "batch_dim" not in self["ops"]: - raise ValueError( - "Must call `add_batch_dim` before `mask_nans_nps`" - ) + raise ValueError("Must call `add_batch_dim` before `mask_nans_nps`") if "numpy_mask" not in self["ops"]: - raise ValueError( - "Must call `mask_nans_numpy` before `mask_nans_nps`" - ) + raise ValueError("Must call `mask_nans_numpy` before `mask_nans_nps`") def f(arr): if isinstance(arr, np.ma.MaskedArray): @@ -402,9 +396,7 @@ def append_obs_to_task( return task_with_new -def flatten_X( - X: Union[np.ndarray, Tuple[np.ndarray, np.ndarray]] -) -> np.ndarray: +def flatten_X(X: Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]) -> np.ndarray: """ Convert tuple of gridded coords to (2, N) array if necessary. @@ -422,9 +414,7 @@ def flatten_X( return X -def flatten_Y( - Y: Union[np.ndarray, Tuple[np.ndarray, np.ndarray]] -) -> np.ndarray: +def flatten_Y(Y: Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]) -> np.ndarray: """ Convert gridded data of shape (N_dim, N_x1, N_x2) to (N_dim, N_x1 * N_x2) array if necessary. @@ -550,9 +540,7 @@ def concat_tasks(tasks: List[Task], multiple: int = 1) -> Task: ) else: # Target set is off-the-grid with tensor for `X_t` - merged_task["X_t"][i] = B.concat( - *[t["X_t"][i] for t in tasks], axis=0 - ) + merged_task["X_t"][i] = B.concat(*[t["X_t"][i] for t in tasks], axis=0) merged_task["Y_t"][i] = B.concat(*[t["Y_t"][i] for t in tasks], axis=0) merged_task["time"] = [t["time"] for t in tasks] diff --git a/deepsensor/plot.py b/deepsensor/plot.py index 4e15cbde..e541b89b 100644 --- a/deepsensor/plot.py +++ b/deepsensor/plot.py @@ -121,9 +121,7 @@ def task( if isinstance(X, tuple): X = flatten_X(X) Y = Y.reshape(Y.shape[0], -1) - axes[i, j].scatter( - X[1, :], X[0, :], c=Y[i], s=markersize, marker="." - ) + axes[i, j].scatter(X[1, :], X[0, :], c=Y[i], s=markersize, marker=".") if equal_aspect: # Don't warp aspect ratio axes[i, j].set_aspect("equal") @@ -220,8 +218,7 @@ def context_encoding( context_set_idxs = np.array(range(len(task_loader.context_dims))) context_var_ID_set_sizes = [ - ndim + 1 - for ndim in np.array(task_loader.context_dims)[context_set_idxs] + ndim + 1 for ndim in np.array(task_loader.context_dims)[context_set_idxs] ] # Add density channel to each set size max_context_set_size = max(context_var_ID_set_sizes) ncols = max_context_set_size @@ -243,9 +240,7 @@ def context_encoding( else: var_IDs = task_loader.context_var_IDs[ctx_i] - ncols_row_i = ( - task_loader.context_dims[ctx_i] + 1 - ) # Add density channel + ncols_row_i = task_loader.context_dims[ctx_i] + 1 # Add density channel for col_i in range(ncols_row_i): ax = axes[row_i, col_i] # Need `origin="lower"` because encoding has `x1` increasing from top to bottom, @@ -387,9 +382,7 @@ def offgrid_context( elif plot_target and set_i >= len(task["X_c"]): label += f"Target set {set_i - len(task['X_c'])} " if task_loader is not None: - label += ( - f"({task_loader.target_var_IDs[set_i - len(task['X_c'])]})" - ) + label += f"({task_loader.target_var_IDs[set_i - len(task['X_c'])]})" for ax in axes: ax.scatter( @@ -482,8 +475,7 @@ def offgrid_context_observations( for x_c, y_c in zip(X_c.T, Y_c): if extent is not None: if not ( - extent[0] <= x_c[0] <= extent[1] - and extent[2] <= x_c[1] <= extent[3] + extent[0] <= x_c[0] <= extent[1] and extent[2] <= x_c[1] <= extent[3] ): continue for ax in axes: @@ -657,9 +649,7 @@ def unet_forward(unet, x): for layer_i, feature_map in enumerate(feature_maps): n_features = feature_map.shape[1] n_features_to_plot = min(n_features_per_layer, n_features) - feature_idxs = rng.choice( - n_features, n_features_to_plot, replace=False - ) + feature_idxs = rng.choice(n_features, n_features_to_plot, replace=False) fig, axes = plt.subplots( nrows=1, @@ -725,12 +715,8 @@ def placements( :class:`matplotlib:matplotlib.figure.Figure` A figure containing the placement plots. """ - fig, ax = plt.subplots( - subplot_kw={"projection": crs}, figsize=(figsize, figsize) - ) - ax.scatter( - *X_new_df.values.T[::-1], c="r", linewidths=0.5, **scatter_kwargs - ) + fig, ax = plt.subplots(subplot_kw={"projection": crs}, figsize=(figsize, figsize)) + ax.scatter(*X_new_df.values.T[::-1], c="r", linewidths=0.5, **scatter_kwargs) offgrid_context(ax, task, data_processor, linewidths=0.5, **scatter_kwargs) ax.coastlines() @@ -796,9 +782,7 @@ def acquisition_fn( """ # Remove spatial dims using data_processor.raw_spatial_coords_names plot_dims = [col_dim, *data_processor.raw_spatial_coord_names] - non_plot_dims = [ - dim for dim in acquisition_fn_ds.dims if dim not in plot_dims - ] + non_plot_dims = [dim for dim in acquisition_fn_ds.dims if dim not in plot_dims] valid_avg_dims = ["time", "sample"] for dim in non_plot_dims: if dim not in valid_avg_dims: @@ -810,8 +794,7 @@ def acquisition_fn( if len(non_plot_dims) > 0: # Average over non-plot dims print( - "Averaging acquisition function over dims for plotting: " - f"{non_plot_dims}" + "Averaging acquisition function over dims for plotting: " f"{non_plot_dims}" ) acquisition_fn_ds = acquisition_fn_ds.mean(dim=non_plot_dims) @@ -854,9 +837,7 @@ def acquisition_fn( if add_colorbar and final_axis: im = ax.get_children()[0] label = acquisition_fn_ds.name - cax = plt.axes( - [0.93, 0.035, 0.02, 0.91] - ) # add a small custom axis + cax = plt.axes([0.93, 0.035, 0.02, 0.91]) # add a small custom axis cbar = plt.colorbar( im, cax=cax, label=label ) # specify axis for colorbar to occupy with cax @@ -947,9 +928,7 @@ def prediction( var_ID: [param for param in pred[var_ID]] for var_ID in pred } else: - prediction_parameters = { - var_ID: prediction_parameters for var_ID in pred - } + prediction_parameters = {var_ID: prediction_parameters for var_ID in pred} n_vars = len(pred.target_var_IDs) n_params = max(len(params) for params in prediction_parameters.values()) @@ -1026,9 +1005,7 @@ def prediction( hue = ( pred[var_ID] .reset_index()[[x1_name, x2_name]] - .apply( - lambda row: f"({row[x1_name]}, {row[x2_name]})", axis=1 - ) + .apply(lambda row: f"({row[x1_name]}, {row[x2_name]})", axis=1) ) hue.name = f"{x1_name}, {x2_name}" From bead6379252ffb51ecd68187c9d99d19305f4eb2 Mon Sep 17 00:00:00 2001 From: Kalle Westerling Date: Tue, 7 May 2024 17:41:31 +0100 Subject: [PATCH 4/4] Changing from Google form sign-up to new E&S MS forms --- CONTRIBUTING.md | 5 ++--- README.md | 3 +-- docs/community/faq.md | 3 ++- docs/community/index.md | 3 +-- docs/index.md | 3 +-- docs/research_ideas.md | 3 +-- 6 files changed, 8 insertions(+), 12 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3c279172..8cc489b4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -33,9 +33,8 @@ Slack workspace, where conversation about when to hold the community calls in th place. **Slack Workspace**: Join -our [DeepSensor Slack channel](https://docs.google.com/forms/d/e/1FAIpQLScsI8EiXDdSfn1huMp1vj5JAxi9NIeYLljbEUlMceZvwVpugw/viewform) for -discussions, queries, and community interactions. Send us an email at kwesterling@turing.ac.uk to -request an invite. +our DeepSensor Slack channel for +discussions, queries, and community interactions. In order to join, [sign up for the Turing Environment & Sustainability stakeholder community](https://forms.office.com/pages/responsepage.aspx?id=p_SVQ1XklU-Knx-672OE-ZmEJNLHTHVFkqQ97AaCfn9UMTZKT1IwTVhJRE82UjUzMVE2MThSOU5RMC4u). The form includes a question on signing up for the Slack team, where you can find DeepSensor's channel. **Email**: If you prefer a more formal communication method or have specific concerns, please reach us at tomandersson3@gmail.com. diff --git a/README.md b/README.md index effac0fc..a77a35f7 100644 --- a/README.md +++ b/README.md @@ -29,8 +29,7 @@ This allows DeepSensor users to focus on the science and rapidly iterate on idea DeepSensor is an experimental package, and we welcome [contributions from the community](https://github.com/alan-turing-institute/deepsensor/blob/main/CONTRIBUTING.md). -We have an active Slack channel for code and research discussions; you can request to -join [via this Google Form](https://docs.google.com/forms/d/e/1FAIpQLScsI8EiXDdSfn1huMp1vj5JAxi9NIeYLljbEUlMceZvwVpugw/viewform). +We have an active Slack channel for code and research discussions; you can join by [signing up for the Turing Environment & Sustainability stakeholder community](https://forms.office.com/pages/responsepage.aspx?id=p_SVQ1XklU-Knx-672OE-ZmEJNLHTHVFkqQ97AaCfn9UMTZKT1IwTVhJRE82UjUzMVE2MThSOU5RMC4u). The form includes a question on signing up for the Slack team, where you can find DeepSensor's channel. ![DeepSensor example application figures](https://raw.githubusercontent.com/alan-turing-institute/deepsensor/main/figs/deepsensor_application_examples.png) diff --git a/docs/community/faq.md b/docs/community/faq.md index 53f62fa3..7396b20b 100644 --- a/docs/community/faq.md +++ b/docs/community/faq.md @@ -46,7 +46,8 @@ Check our [](./contributing.md) guide. **Q: How can I get in touch with other contributors or maintainers?** **Answer:** -[Request to join our Slack channel](https://docs.google.com/forms/d/e/1FAIpQLScsI8EiXDdSfn1huMp1vj5JAxi9NIeYLljbEUlMceZvwVpugw/viewform) to stay in touch with other contributors and maintainers. +Request to join our Slack channel to stay in touch with other contributors and maintainers. You can join by [signing up for the Turing Environment & Sustainability stakeholder community](https://forms.office.com/pages/responsepage.aspx?id=p_SVQ1XklU-Knx-672OE-ZmEJNLHTHVFkqQ97AaCfn9UMTZKT1IwTVhJRE82UjUzMVE2MThSOU5RMC4u). The form includes a question on signing up for the Slack team, where you can find DeepSensor's channel. + We also have a regular community Zoom call (join the Slack channel or get in touch to find out more). --- diff --git a/docs/community/index.md b/docs/community/index.md index 1146ab5f..dad2568e 100644 --- a/docs/community/index.md +++ b/docs/community/index.md @@ -2,7 +2,6 @@ The DeepSensor community is a group of users and contributors who are interested in the development of DeepSensor. The community is open to anyone who is interested in DeepSensor. The community is a place to ask questions, discuss ideas, and share your work. -If you are interested in joining the community, please request to join our Slack channel via [this Google Form](https://docs.google.com/forms/d/e/1FAIpQLScsI8EiXDdSfn1huMp1vj5JAxi9NIeYLljbEUlMceZvwVpugw/viewform). -Once your request has been accepted, you will have access to the Slack channel [here](https://ai4environment.slack.com/archives/C05NQ76L87R). +If you are interested in joining the community, please request to join our Slack channel. You can join by [signing up for the Turing Environment & Sustainability stakeholder community](https://forms.office.com/pages/responsepage.aspx?id=p_SVQ1XklU-Knx-672OE-ZmEJNLHTHVFkqQ97AaCfn9UMTZKT1IwTVhJRE82UjUzMVE2MThSOU5RMC4u). The form includes a question on signing up for the Slack team, where you can find DeepSensor's channel. We welcome contributions from the community. If you are interested in contributing to DeepSensor, please read the [Contributing Guide](./contributing.md). diff --git a/docs/index.md b/docs/index.md index a5954725..d92b41a5 100644 --- a/docs/index.md +++ b/docs/index.md @@ -7,12 +7,11 @@ neural processes. **Useful links**: [Code repository](https://github.com/alan-turing-institute/deepsensor) | [Issues](https://github.com/alan-turing-institute/deepsensor/issues) | -[Slack join request form](https://docs.google.com/forms/d/e/1FAIpQLScsI8EiXDdSfn1huMp1vj5JAxi9NIeYLljbEUlMceZvwVpugw/viewform) | +[Slack join request form](https://forms.office.com/pages/responsepage.aspx?id=p_SVQ1XklU-Knx-672OE-ZmEJNLHTHVFkqQ97AaCfn9UMTZKT1IwTVhJRE82UjUzMVE2MThSOU5RMC4u) | [Slack channel](https://ai4environment.slack.com/archives/C05NQ76L87R) | [DeepSensor Gallery](https://github.com/tom-andersson/deepsensor_gallery) - ::::{grid} 1 1 2 2 :gutter: 2 diff --git a/docs/research_ideas.md b/docs/research_ideas.md index 456f7690..7e8e711d 100644 --- a/docs/research_ideas.md +++ b/docs/research_ideas.md @@ -7,8 +7,7 @@ Below are a non-exhaustive selection of research ideas that you could explore. It would be helpful to ensure you are familiar with the literature and resources in the [](resources.md) page before starting. -Why not [join our Slack channel](https://docs.google.com/forms/d/e/1FAIpQLScsI8EiXDdSfn1huMp1vj5JAxi9NIeYLljbEUlMceZvwVpugw/viewform) -and start a conversation around these ideas or your own? :-) +Why not join our Slack channel and start a conversation around these ideas or your own? :-) You can join by [signing up for the Turing Environment & Sustainability stakeholder community](https://forms.office.com/pages/responsepage.aspx?id=p_SVQ1XklU-Knx-672OE-ZmEJNLHTHVFkqQ97AaCfn9UMTZKT1IwTVhJRE82UjUzMVE2MThSOU5RMC4u). The form includes a question on signing up for the Slack team, where you can find DeepSensor's channel. ## Transfer learning from regions of dense observations to regions of sparse observations