Skip to content

Commit

Permalink
Merge branch 'main' into make-google-docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
kallewesterling authored Oct 16, 2023
2 parents 7390847 + 1c26719 commit 7b36be9
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 65 deletions.
79 changes: 36 additions & 43 deletions deepsensor/active_learning/acquisition_fns.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,25 @@ class AcquisitionFunction:
Parent class for acquisition functions.
"""

def __init__(self, model: ProbabilisticModel):
def __init__(
self,
model: ProbabilisticModel,
context_set_idx: int = 0,
target_set_idx: int = 0,
):
"""
Args:
model (:class:`~.model.model.ProbabilisticModel`):
[Description of the model parameter.]
context_set_idx (int):
Index of context set to add new observations to when computing
the acquisition function.
target_set_idx (int):
Index of target set to compute acquisition function for.
"""
self.model = model
self.context_set_idx = context_set_idx
self.target_set_idx = target_set_idx
self.min_or_max = -1

def __call__(self, task: Task) -> np.ndarray:
Expand Down Expand Up @@ -94,21 +103,19 @@ def __init__(self, model: ProbabilisticModel):
super().__init__(model)
self.min_or_max = "min"

def __call__(self, task: Task, target_set_idx: int = 0):
def __call__(self, task: Task):
"""
...
Args:
task (:class:`~.data.task.Task`):
[Description of the task parameter.]
target_set_idx (int, optional):
[Description of the target_set_idx parameter.], by default 0
Returns:
[Type of the return value]:
[Description of the return value.]
"""
return np.mean(self.model.stddev(task)[target_set_idx])
return np.mean(self.model.stddev(task)[self.target_set_idx])


class MeanVariance(AcquisitionFunction):
Expand All @@ -125,21 +132,19 @@ def __init__(self, model: ProbabilisticModel):
super().__init__(model)
self.min_or_max = "min"

def __call__(self, task: Task, target_set_idx: int = 0):
def __call__(self, task: Task):
"""
...
Args:
task (:class:`~.data.task.Task`):
[Description of the task parameter.]
target_set_idx (int, optional):
[Description of the target_set_idx parameter.], default is 0
Returns:
[Type of the return value]:
[Description of the return value.]
"""
return np.mean(self.model.variance(task)[target_set_idx])
return np.mean(self.model.variance(task)[self.target_set_idx])


class pNormStddev(AcquisitionFunction):
Expand All @@ -157,22 +162,20 @@ def __init__(self, *args, p: int = 1, **kwargs):
self.p = p
self.min_or_max = "min"

def __call__(self, task: Task, target_set_idx: int = 0):
def __call__(self, task: Task):
"""
...
Args:
task (:class:`~.data.task.Task`):
[Description of the task parameter.]
target_set_idx (int, optional):
[Description of the target_set_idx parameter.], defaults to 0
Returns:
[Type of the return value]:
[Description of the return value.]
"""
return np.linalg.norm(
self.model.stddev(task)[target_set_idx].ravel(), ord=self.p
self.model.stddev(task)[self.target_set_idx].ravel(), ord=self.p
)


Expand Down Expand Up @@ -262,7 +265,9 @@ def __call__(self, task: Task):
[Description of the return value.]
"""
pred = self.model.mean(task)
true = task["Y_t"]
if isinstance(pred, list):
pred = pred[self.target_set_idx]
true = task["Y_t"][self.target_set_idx]
return np.mean(np.abs(pred - true))


Expand Down Expand Up @@ -293,7 +298,9 @@ def __call__(self, task: Task):
[Description of the return value.]
"""
pred = self.model.mean(task)
true = task["Y_t"]
if isinstance(pred, list):
pred = pred[self.target_set_idx]
true = task["Y_t"][self.target_set_idx]
return np.sqrt(np.mean((pred - true) ** 2))


Expand Down Expand Up @@ -324,10 +331,10 @@ def __call__(self, task: Task):
[Description of the return value.]
"""
pred = self.model.mean(task)
true = task["Y_t"]
return -np.mean(
norm.logpdf(true, loc=pred, scale=self.model.stddev(task))
)
if isinstance(pred, list):
pred = pred[self.target_set_idx]
true = task["Y_t"][self.target_set_idx]
return -np.mean(norm.logpdf(true, loc=pred, scale=self.model.stddev(task)))


class OracleJointNLL(AcquisitionFunctionOracle):
Expand Down Expand Up @@ -393,15 +400,7 @@ def __call__(self, task: Task, X_s: np.ndarray):
class ContextDist(AcquisitionFunctionParallel):
"""Distance to closest context point."""

def __init__(self, context_set_idx: int = 0):
"""
...
Args:
context_set_idx (int, optional):
[Description of the context_set_idx parameter.], defaults to 0
"""
self.context_set_idx = context_set_idx
def __init__(self):
self.min_or_max = "max"

def __call__(self, task: Task, X_s: np.ndarray):
Expand Down Expand Up @@ -451,7 +450,7 @@ def __init__(self, model: ProbabilisticModel):
super().__init__(model)
self.min_or_max = "max"

def __call__(self, task: Task, X_s: np.ndarray, target_set_idx: int = 0):
def __call__(self, task: Task, X_s: np.ndarray):
"""
...
Expand All @@ -460,8 +459,6 @@ def __call__(self, task: Task, X_s: np.ndarray, target_set_idx: int = 0):
[Description of the task parameter.]
X_s (:class:`numpy:numpy.ndarray`):
[Description of the X_s parameter.]
target_set_idx (int, optional):
[Description of the target_set_idx parameter.], defaults to 0
Returns:
[Type of the return value]:
Expand All @@ -471,7 +468,7 @@ def __call__(self, task: Task, X_s: np.ndarray, target_set_idx: int = 0):
task = copy.deepcopy(task)
task["X_t"] = X_s

return self.model.stddev(task)[target_set_idx]
return self.model.stddev(task)[self.target_set_idx]


class ExpectedImprovement(AcquisitionFunctionParallel):
Expand All @@ -484,30 +481,26 @@ class ExpectedImprovement(AcquisitionFunctionParallel):
for maximisation.
"""

def __init__(self, model: ProbabilisticModel, context_set_idx: int = 0):
def __init__(self, model: ProbabilisticModel):
"""
Args:
model (:class:`~.model.model.ProbabilisticModel`):
[Description of the model parameter.]
context_set_idx (int):
Index of context set to add new observations to when computing the
acquisition function.
"""
super().__init__(model)
self.context_set_idx = context_set_idx
self.min_or_max = "max"

def __call__(
self, task: Task, X_s: np.ndarray, target_set_idx: int = 0
self,
task: Task,
X_s: np.ndarray,
) -> np.ndarray:
"""
Args:
task (:class:`~.data.task.Task`):
Task object containing context and target sets.
X_s (:class:`numpy:numpy.ndarray`):
Search points. Shape (2, N_search).
target_set_idx (int):
Index of target set to compute acquisition function for.
Returns:
:class:`numpy:numpy.ndarray`:
Expand All @@ -518,12 +511,12 @@ def __call__(
task["X_t"] = X_s

# Compute the predictive mean and variance of the target set
mean = self.model.mean(task)[target_set_idx]
mean = self.model.mean(task)[self.target_set_idx]

if task["Y_c"][self.context_set_idx].size == 0:
# No previous context points, so heuristically use the predictive mean as the
# acquisition function. This will at least select the most positive predicted mean.
return self.model.mean(task)[target_set_idx]
return self.model.mean(task)[self.target_set_idx]
else:
# Determine the best target value seen so far
best_target_value = task["Y_c"][self.context_set_idx].max()
Expand Down
61 changes: 39 additions & 22 deletions deepsensor/data/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import json

import warnings
import xarray as xr
import pandas as pd

Expand Down Expand Up @@ -66,20 +67,23 @@ def __init__(
self.config["coords"]["x2"]["map"]
)

self.x1_none = self.config["coords"]["x1"]["map"] is None
self.x2_none = self.config["coords"]["x2"]["map"] is None
self.x1_name = self.config["coords"]["x1"]["name"]
self.x2_name = self.config["coords"]["x2"]["name"]
self.x1_map = self.config["coords"]["x1"]["map"]
self.x2_map = self.config["coords"]["x2"]["map"]
else:
self.config = {}

self.x1_none = x1_map is None
self.x2_none = x2_map is None
if (self.x1_none and not self.x2_none) or (
not self.x1_none and self.x2_none
):
raise ValueError(
"Must provide both x1_map and x2_map, or neither."
)
elif not self.x1_none and not self.x2_none:
self.x1_name = x1_name
self.x2_name = x2_name
self.x1_map = x1_map
self.x2_map = x2_map

# rewrite below more concisely
if self.x1_map is None and not self.x2_map is None:
raise ValueError("Must provide both x1_map and x2_map, or neither.")
elif not self.x1_map is None and self.x2_map is None:
raise ValueError("Must provide both x1_map and x2_map, or neither.")
elif not self.x1_map is None and not self.x2_map is None:
x1_map, x2_map = self._validate_coord_mappings(x1_map, x2_map)

if "coords" not in self.config:
Expand Down Expand Up @@ -124,6 +128,14 @@ def _validate_coord_mappings(self, x1_map, x2_map):
raise ValueError(
f"x2_map must be a 2-tuple of different numbers, not {x2_map}"
)
if np.diff(x1_map) != np.diff(x2_map):
warnings.warn(
f"x1_map={x1_map} and x2_map={x2_map} have different ranges ({float(np.diff(x1_map))} "
f"and {float(np.diff(x2_map))}, respectively). "
"This can lead to stretching/squashing of data, which may "
"impact model performance.",
UserWarning,
)

return x1_map, x2_map

Expand Down Expand Up @@ -388,20 +400,25 @@ def map_coords(
)

# Infer x1 and x2 mappings from min/max of data coords if not provided by user
if self.x1_none and self.x2_none:
x1_map = (x1.min(), x1.max())
x2_map = (x2.min(), x2.max())
x1_map, x2_map = self._validate_coord_mappings(x1_map, x2_map)
self.config["coords"]["x1"]["map"] = x1_map
self.config["coords"]["x2"]["map"] = x2_map
if self.x1_map is None and self.x2_map is None:
# Ensure scalings are the same for x1 and x2
x1_range = x1.max() - x1.min()
x2_range = x2.max() - x2.min()
range = np.max([x1_range, x2_range])
self.x1_map = (x1.min(), x1.min() + range)
self.x2_map = (x2.min(), x2.min() + range)

self.x1_map, self.x2_map = self._validate_coord_mappings(
self.x1_map, self.x2_map
)
self.config["coords"]["x1"]["map"] = self.x1_map
self.config["coords"]["x2"]["map"] = self.x2_map

if self.verbose:
print(
f"Inferring x1_map={x1_map} and x2_map={x2_map} from data min/max"
f"Inferring x1_map={self.x1_map} and x2_map={self.x2_map} from data min/max"
)

self.x2_none = False
self.x1_none = False

new_x1, new_x2 = self.map_x1_and_x2(x1, x2, unnorm=unnorm)

if isinstance(data, (pd.DataFrame, pd.Series)):
Expand Down

0 comments on commit 7b36be9

Please sign in to comment.