Skip to content

Commit

Permalink
Merge pull request #604 from aai-institute/fix/603-numpy-float
Browse files Browse the repository at this point in the history
Replace np.float_ with np.float64
  • Loading branch information
schroedk authored Jun 19, 2024
2 parents debb822 + 0d53fd6 commit d13af35
Show file tree
Hide file tree
Showing 19 changed files with 106 additions and 103 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
- Extend `NystroemSketchInfluence` with block-diagonal and Gauss-Newton
approximation
[PR #596](https://github.com/aai-institute/pyDVL/pull/596)

## Fixed
- Replace `np.float_` with `np.float64` and `np.alltrue` with `np.all`,
as the old aliases are removed in NumPy 2.0
[PR #604](https://github.com/aai-institute/pyDVL/pull/604)

## Changed

Expand Down
24 changes: 12 additions & 12 deletions notebooks/support/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@


def plot_gaussian_blobs(
train_ds: Tuple[NDArray[np.float_], NDArray[np.int_]],
test_ds: Tuple[NDArray[np.float_], NDArray[np.int_]],
x_min: Optional[NDArray[np.float_]] = None,
x_max: Optional[NDArray[np.float_]] = None,
train_ds: Tuple[NDArray[np.float64], NDArray[np.int_]],
test_ds: Tuple[NDArray[np.float64], NDArray[np.int_]],
x_min: Optional[NDArray[np.float64]] = None,
x_max: Optional[NDArray[np.float64]] = None,
*,
xlabel: Optional[str] = None,
ylabel: Optional[str] = None,
legend_title: Optional[str] = None,
vline: Optional[float] = None,
line: Optional[NDArray[np.float_]] = None,
line: Optional[NDArray[np.float64]] = None,
suptitle: Optional[str] = None,
s: Optional[float] = None,
figsize: Tuple[int, int] = (20, 10),
Expand Down Expand Up @@ -104,15 +104,15 @@ def plot_gaussian_blobs(


def plot_influences(
x: NDArray[np.float_],
influences: NDArray[np.float_],
x: NDArray[np.float64],
influences: NDArray[np.float64],
corrupted_indices: Optional[List[int]] = None,
*,
ax: Optional[plt.Axes] = None,
xlabel: Optional[str] = None,
ylabel: Optional[str] = None,
legend_title: Optional[str] = None,
line: Optional[NDArray[np.float_]] = None,
line: Optional[NDArray[np.float64]] = None,
suptitle: Optional[str] = None,
colorbar_limits: Optional[Tuple] = None,
) -> plt.Axes:
Expand Down Expand Up @@ -403,7 +403,7 @@ def plot_sample_images(dataset: pd.DataFrame, n_images_per_class: int = 3):


def plot_lowest_highest_influence_images(
subset_influences: NDArray[np.float_],
subset_influences: NDArray[np.float64],
subset_images: List[JpegImageFile],
num_to_plot: int,
):
Expand Down Expand Up @@ -454,7 +454,7 @@ def plot_losses(losses: Losses):
def corrupt_imagenet(
dataset: pd.DataFrame,
fraction_to_corrupt: float,
avg_influences: NDArray[np.float_],
avg_influences: NDArray[np.float64],
) -> Tuple[pd.DataFrame, Dict[Any, List[int]]]:
"""Given the preprocessed tiny imagenet dataset (or a subset of it),
it takes a fraction of the images with the highest influence and (randomly)
Expand Down Expand Up @@ -494,7 +494,7 @@ def corrupt_imagenet(
def compute_mean_corrupted_influences(
corrupted_dataset: pd.DataFrame,
corrupted_indices: Dict[Any, List[int]],
avg_corrupted_influences: NDArray[np.float_],
avg_corrupted_influences: NDArray[np.float64],
) -> pd.DataFrame:
"""Given a corrupted dataset, it returns a dataframe with average influence for each class,
separating corrupted and original points.
Expand Down Expand Up @@ -534,7 +534,7 @@ def compute_mean_corrupted_influences(
def plot_corrupted_influences_distribution(
corrupted_dataset: pd.DataFrame,
corrupted_indices: Dict[Any, List[int]],
avg_corrupted_influences: NDArray[np.float_],
avg_corrupted_influences: NDArray[np.float64],
figsize: Tuple[int, int] = (16, 8),
):
"""Given a corrupted dataset, plots the histogram with the distribution of
Expand Down
4 changes: 2 additions & 2 deletions notebooks/support/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@


class Losses(NamedTuple):
training: NDArray[np.float_]
validation: NDArray[np.float_]
training: NDArray[np.float64]
validation: NDArray[np.float64]
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
pyDeprecate>=0.3.2
numpy>=1.20
numpy>=1.20,<2
pandas>=1.3
scikit-learn
scipy>=1.7.0
Expand Down
4 changes: 2 additions & 2 deletions src/pydvl/reporting/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def plot_shapley(


def plot_influence_distribution(
influences: NDArray[np.float_], index: int, title_extra: str = ""
influences: NDArray[np.float64], index: int, title_extra: str = ""
) -> plt.Axes:
"""Plots the histogram of the influence that all samples in the training set
have over a single sample index.
Expand All @@ -292,7 +292,7 @@ def plot_influence_distribution(


def plot_influence_distribution_by_label(
influences: NDArray[np.float_], labels: NDArray[np.float_], title_extra: str = ""
influences: NDArray[np.float64], labels: NDArray[np.float64], title_extra: str = ""
):
"""Plots the histogram of the influence that all samples in the training set
have over a single sample index, separated by labels.
Expand Down
2 changes: 1 addition & 1 deletion src/pydvl/reporting/scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
def compute_removal_score(
u: Utility,
values: ValuationResult,
percentages: Union[NDArray[np.float_], Iterable[float]],
percentages: Union[NDArray[np.float64], Iterable[float]],
*,
remove_best: bool = False,
progress: bool = False,
Expand Down
18 changes: 9 additions & 9 deletions src/pydvl/utils/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,20 +279,20 @@ def running_moments(

@overload
def running_moments(
previous_avg: NDArray[np.float_],
previous_variance: NDArray[np.float_],
previous_avg: NDArray[np.float64],
previous_variance: NDArray[np.float64],
count: int,
new_value: NDArray[np.float_],
) -> Tuple[NDArray[np.float_], NDArray[np.float_]]:
new_value: NDArray[np.float64],
) -> Tuple[NDArray[np.float64], NDArray[np.float64]]:
...


def running_moments(
previous_avg: float | NDArray[np.float_],
previous_variance: float | NDArray[np.float_],
previous_avg: float | NDArray[np.float64],
previous_variance: float | NDArray[np.float64],
count: int,
new_value: float | NDArray[np.float_],
) -> Tuple[float | NDArray[np.float_], float | NDArray[np.float_]]:
new_value: float | NDArray[np.float64],
) -> Tuple[float | NDArray[np.float64], float | NDArray[np.float64]]:
"""Uses Welford's algorithm to calculate the running average and variance of
a set of numbers.
Expand Down Expand Up @@ -323,7 +323,7 @@ def running_moments(


def top_k_value_accuracy(
y_true: NDArray[np.float_], y_pred: NDArray[np.float_], k: int = 3
y_true: NDArray[np.float64], y_pred: NDArray[np.float64], k: int = 3
) -> float:
"""Computes the top-k accuracy for the estimated values by comparing indices
of the highest k values.
Expand Down
2 changes: 1 addition & 1 deletion src/pydvl/utils/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class Scorer:
"""

_name: str
range: NDArray[np.float_]
range: NDArray[np.float64]

def __init__(
self,
Expand Down
26 changes: 13 additions & 13 deletions src/pydvl/value/least_core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@


class LeastCoreProblem(NamedTuple):
utility_values: NDArray[np.float_]
A_lb: NDArray[np.float_]
utility_values: NDArray[np.float64]
A_lb: NDArray[np.float64]


def lc_solve_problem(
Expand Down Expand Up @@ -113,7 +113,7 @@ def lc_solve_problem(
solver_options=solver_options,
)

values: Optional[NDArray[np.float_]]
values: Optional[NDArray[np.float64]]

if subsidy is None:
logger.debug("No values were found")
Expand Down Expand Up @@ -219,13 +219,13 @@ def _map_func(


def _solve_least_core_linear_program(
A_eq: NDArray[np.float_],
b_eq: NDArray[np.float_],
A_lb: NDArray[np.float_],
b_lb: NDArray[np.float_],
A_eq: NDArray[np.float64],
b_eq: NDArray[np.float64],
A_lb: NDArray[np.float64],
b_lb: NDArray[np.float64],
solver_options: dict,
non_negative_subsidy: bool = False,
) -> Tuple[Optional[NDArray[np.float_]], Optional[float]]:
) -> Tuple[Optional[NDArray[np.float64]], Optional[float]]:
r"""Solves the Least Core's linear program using cvxopt.
$$
Expand Down Expand Up @@ -297,12 +297,12 @@ def _solve_least_core_linear_program(

def _solve_egalitarian_least_core_quadratic_program(
subsidy: float,
A_eq: NDArray[np.float_],
b_eq: NDArray[np.float_],
A_lb: NDArray[np.float_],
b_lb: NDArray[np.float_],
A_eq: NDArray[np.float64],
b_eq: NDArray[np.float64],
A_lb: NDArray[np.float64],
b_lb: NDArray[np.float64],
solver_options: dict,
) -> Optional[NDArray[np.float_]]:
) -> Optional[NDArray[np.float64]]:
r"""Solves the egalitarian Least Core's quadratic program using cvxopt.
$$
Expand Down
16 changes: 8 additions & 8 deletions src/pydvl/value/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,9 @@ class ValuationResult(
"""

_indices: NDArray[IndexT]
_values: NDArray[np.float_]
_values: NDArray[np.float64]
_counts: NDArray[np.int_]
_variances: NDArray[np.float_]
_variances: NDArray[np.float64]
_data: Dataset
_names: NDArray[NameT]
_algorithm: str
Expand All @@ -216,8 +216,8 @@ class ValuationResult(
def __init__(
self,
*,
values: NDArray[np.float_],
variances: Optional[NDArray[np.float_]] = None,
values: NDArray[np.float64],
variances: Optional[NDArray[np.float64]] = None,
counts: Optional[NDArray[np.int_]] = None,
indices: Optional[NDArray[IndexT]] = None,
data_names: Optional[Sequence[NameT] | NDArray[NameT]] = None,
Expand Down Expand Up @@ -299,20 +299,20 @@ def sort(
self._sort_order = reverse

@property
def values(self) -> NDArray[np.float_]:
def values(self) -> NDArray[np.float64]:
"""The values, possibly sorted."""
return self._values[self._sort_positions]

@property
def variances(self) -> NDArray[np.float_]:
def variances(self) -> NDArray[np.float64]:
"""The variances, possibly sorted."""
return self._variances[self._sort_positions]

@property
def stderr(self) -> NDArray[np.float_]:
def stderr(self) -> NDArray[np.float64]:
"""The raw standard errors, possibly sorted."""
return cast(
NDArray[np.float_], np.sqrt(self.variances / np.maximum(1, self.counts))
NDArray[np.float64], np.sqrt(self.variances / np.maximum(1, self.counts))
)

@property
Expand Down
4 changes: 2 additions & 2 deletions src/pydvl/value/shapley/classwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def __str__(self):
def __call__(
self: "ClasswiseScorer",
model: SupervisedModel,
x_test: NDArray[np.float_],
x_test: NDArray[np.float64],
y_test: NDArray[np.int_],
) -> float:
(
Expand All @@ -180,7 +180,7 @@ def __call__(
def estimate_in_class_and_out_of_class_score(
self,
model: SupervisedModel,
x_test: NDArray[np.float_],
x_test: NDArray[np.float64],
y_test: NDArray[np.int_],
rescale_scores: bool = True,
) -> Tuple[float, float]:
Expand Down
4 changes: 2 additions & 2 deletions src/pydvl/value/shapley/gt.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@

log = logging.getLogger(__name__)

T = TypeVar("T", NDArray[np.float_], float)
T = TypeVar("T", NDArray[np.float64], float)
GTConstants = namedtuple("GTConstants", ["kk", "Z", "q", "q_tot", "T"])


Expand Down Expand Up @@ -266,7 +266,7 @@ def reducer(
results_it: Iterable[Tuple[NDArray, NDArray]]
) -> Tuple[NDArray, NDArray]:
return np.concatenate(list(x[0] for x in results_it)).astype(
np.float_
np.float64
), np.concatenate(list(x[1] for x in results_it)).astype(np.int_)

seed_sequence = ensure_seed_sequence(seed)
Expand Down
2 changes: 1 addition & 1 deletion src/pydvl/value/shapley/knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def knn_shapley(u: Utility, *, progress: bool = True) -> ValuationResult:
# closest to farthest
_, indices = nns.kneighbors(u.data.x_test)

values: NDArray[np.float_] = np.zeros_like(u.data.indices, dtype=np.float_)
values: NDArray[np.float64] = np.zeros_like(u.data.indices, dtype=np.float64)
n = len(u.data)
yt = u.data.y_train
iterator = enumerate(zip(u.data.y_test, indices), start=1)
Expand Down
4 changes: 2 additions & 2 deletions src/pydvl/value/stopping.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,7 @@ class HistoryDeviation(StoppingCriterion):
pin_converged: If `True`, once an index has converged, it is pinned
"""

_memory: NDArray[np.float_]
_memory: NDArray[np.float64]

def __init__(
self,
Expand Down Expand Up @@ -666,7 +666,7 @@ def __init__(
raise ValueError("rtol must be in (0, 1)")
self.rtol = rtol
self.burn_in = burn_in
self._memory: NDArray[np.float_] | None = None
self._memory: NDArray[np.float64] | None = None
self._corr = 0.0
self._completion = 0.0
self._iterations = 0
Expand Down
Loading

0 comments on commit d13af35

Please sign in to comment.