Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace np.float_ with np.float64 #604

Merged
merged 5 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
- Extend `NystroemSketchInfluence` with block-diagonal and Gauss-Newton
approximation
[PR #596](https://github.com/aai-institute/pyDVL/pull/596)

## Fixed
- Replace `np.float_` with `np.float64` and `np.alltrue` with `np.all`,
as the old aliases are removed in NumPy 2.0
[PR #604](https://github.com/aai-institute/pyDVL/pull/604)

## Changed

Expand Down
24 changes: 12 additions & 12 deletions notebooks/support/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@


def plot_gaussian_blobs(
train_ds: Tuple[NDArray[np.float_], NDArray[np.int_]],
test_ds: Tuple[NDArray[np.float_], NDArray[np.int_]],
x_min: Optional[NDArray[np.float_]] = None,
x_max: Optional[NDArray[np.float_]] = None,
train_ds: Tuple[NDArray[np.float64], NDArray[np.int_]],
test_ds: Tuple[NDArray[np.float64], NDArray[np.int_]],
x_min: Optional[NDArray[np.float64]] = None,
x_max: Optional[NDArray[np.float64]] = None,
*,
xlabel: Optional[str] = None,
ylabel: Optional[str] = None,
legend_title: Optional[str] = None,
vline: Optional[float] = None,
line: Optional[NDArray[np.float_]] = None,
line: Optional[NDArray[np.float64]] = None,
suptitle: Optional[str] = None,
s: Optional[float] = None,
figsize: Tuple[int, int] = (20, 10),
Expand Down Expand Up @@ -104,15 +104,15 @@ def plot_gaussian_blobs(


def plot_influences(
x: NDArray[np.float_],
influences: NDArray[np.float_],
x: NDArray[np.float64],
influences: NDArray[np.float64],
corrupted_indices: Optional[List[int]] = None,
*,
ax: Optional[plt.Axes] = None,
xlabel: Optional[str] = None,
ylabel: Optional[str] = None,
legend_title: Optional[str] = None,
line: Optional[NDArray[np.float_]] = None,
line: Optional[NDArray[np.float64]] = None,
suptitle: Optional[str] = None,
colorbar_limits: Optional[Tuple] = None,
) -> plt.Axes:
Expand Down Expand Up @@ -403,7 +403,7 @@ def plot_sample_images(dataset: pd.DataFrame, n_images_per_class: int = 3):


def plot_lowest_highest_influence_images(
subset_influences: NDArray[np.float_],
subset_influences: NDArray[np.float64],
subset_images: List[JpegImageFile],
num_to_plot: int,
):
Expand Down Expand Up @@ -454,7 +454,7 @@ def plot_losses(losses: Losses):
def corrupt_imagenet(
dataset: pd.DataFrame,
fraction_to_corrupt: float,
avg_influences: NDArray[np.float_],
avg_influences: NDArray[np.float64],
) -> Tuple[pd.DataFrame, Dict[Any, List[int]]]:
"""Given the preprocessed tiny imagenet dataset (or a subset of it),
it takes a fraction of the images with the highest influence and (randomly)
Expand Down Expand Up @@ -494,7 +494,7 @@ def corrupt_imagenet(
def compute_mean_corrupted_influences(
corrupted_dataset: pd.DataFrame,
corrupted_indices: Dict[Any, List[int]],
avg_corrupted_influences: NDArray[np.float_],
avg_corrupted_influences: NDArray[np.float64],
) -> pd.DataFrame:
"""Given a corrupted dataset, it returns a dataframe with average influence for each class,
separating corrupted and original points.
Expand Down Expand Up @@ -534,7 +534,7 @@ def compute_mean_corrupted_influences(
def plot_corrupted_influences_distribution(
corrupted_dataset: pd.DataFrame,
corrupted_indices: Dict[Any, List[int]],
avg_corrupted_influences: NDArray[np.float_],
avg_corrupted_influences: NDArray[np.float64],
figsize: Tuple[int, int] = (16, 8),
):
"""Given a corrupted dataset, plots the histogram with the distribution of
Expand Down
4 changes: 2 additions & 2 deletions notebooks/support/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@


class Losses(NamedTuple):
training: NDArray[np.float_]
validation: NDArray[np.float_]
training: NDArray[np.float64]
validation: NDArray[np.float64]
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
pyDeprecate>=0.3.2
numpy>=1.20
numpy>=1.20,<2
pandas>=1.3
scikit-learn
scipy>=1.7.0
Expand Down
4 changes: 2 additions & 2 deletions src/pydvl/reporting/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def plot_shapley(


def plot_influence_distribution(
influences: NDArray[np.float_], index: int, title_extra: str = ""
influences: NDArray[np.float64], index: int, title_extra: str = ""
) -> plt.Axes:
"""Plots the histogram of the influence that all samples in the training set
have over a single sample index.
Expand All @@ -292,7 +292,7 @@ def plot_influence_distribution(


def plot_influence_distribution_by_label(
influences: NDArray[np.float_], labels: NDArray[np.float_], title_extra: str = ""
influences: NDArray[np.float64], labels: NDArray[np.float64], title_extra: str = ""
):
"""Plots the histogram of the influence that all samples in the training set
have over a single sample index, separated by labels.
Expand Down
2 changes: 1 addition & 1 deletion src/pydvl/reporting/scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
def compute_removal_score(
u: Utility,
values: ValuationResult,
percentages: Union[NDArray[np.float_], Iterable[float]],
percentages: Union[NDArray[np.float64], Iterable[float]],
*,
remove_best: bool = False,
progress: bool = False,
Expand Down
18 changes: 9 additions & 9 deletions src/pydvl/utils/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,20 +279,20 @@ def running_moments(

@overload
def running_moments(
previous_avg: NDArray[np.float_],
previous_variance: NDArray[np.float_],
previous_avg: NDArray[np.float64],
previous_variance: NDArray[np.float64],
count: int,
new_value: NDArray[np.float_],
) -> Tuple[NDArray[np.float_], NDArray[np.float_]]:
new_value: NDArray[np.float64],
) -> Tuple[NDArray[np.float64], NDArray[np.float64]]:
...


def running_moments(
previous_avg: float | NDArray[np.float_],
previous_variance: float | NDArray[np.float_],
previous_avg: float | NDArray[np.float64],
previous_variance: float | NDArray[np.float64],
count: int,
new_value: float | NDArray[np.float_],
) -> Tuple[float | NDArray[np.float_], float | NDArray[np.float_]]:
new_value: float | NDArray[np.float64],
) -> Tuple[float | NDArray[np.float64], float | NDArray[np.float64]]:
"""Uses Welford's algorithm to calculate the running average and variance of
a set of numbers.

Expand Down Expand Up @@ -323,7 +323,7 @@ def running_moments(


def top_k_value_accuracy(
y_true: NDArray[np.float_], y_pred: NDArray[np.float_], k: int = 3
y_true: NDArray[np.float64], y_pred: NDArray[np.float64], k: int = 3
) -> float:
"""Computes the top-k accuracy for the estimated values by comparing indices
of the highest k values.
Expand Down
2 changes: 1 addition & 1 deletion src/pydvl/utils/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class Scorer:
"""

_name: str
range: NDArray[np.float_]
range: NDArray[np.float64]

def __init__(
self,
Expand Down
26 changes: 13 additions & 13 deletions src/pydvl/value/least_core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@


class LeastCoreProblem(NamedTuple):
utility_values: NDArray[np.float_]
A_lb: NDArray[np.float_]
utility_values: NDArray[np.float64]
A_lb: NDArray[np.float64]


def lc_solve_problem(
Expand Down Expand Up @@ -113,7 +113,7 @@ def lc_solve_problem(
solver_options=solver_options,
)

values: Optional[NDArray[np.float_]]
values: Optional[NDArray[np.float64]]

if subsidy is None:
logger.debug("No values were found")
Expand Down Expand Up @@ -219,13 +219,13 @@ def _map_func(


def _solve_least_core_linear_program(
A_eq: NDArray[np.float_],
b_eq: NDArray[np.float_],
A_lb: NDArray[np.float_],
b_lb: NDArray[np.float_],
A_eq: NDArray[np.float64],
b_eq: NDArray[np.float64],
A_lb: NDArray[np.float64],
b_lb: NDArray[np.float64],
solver_options: dict,
non_negative_subsidy: bool = False,
) -> Tuple[Optional[NDArray[np.float_]], Optional[float]]:
) -> Tuple[Optional[NDArray[np.float64]], Optional[float]]:
r"""Solves the Least Core's linear program using cvxopt.

$$
Expand Down Expand Up @@ -297,12 +297,12 @@ def _solve_least_core_linear_program(

def _solve_egalitarian_least_core_quadratic_program(
subsidy: float,
A_eq: NDArray[np.float_],
b_eq: NDArray[np.float_],
A_lb: NDArray[np.float_],
b_lb: NDArray[np.float_],
A_eq: NDArray[np.float64],
b_eq: NDArray[np.float64],
A_lb: NDArray[np.float64],
b_lb: NDArray[np.float64],
solver_options: dict,
) -> Optional[NDArray[np.float_]]:
) -> Optional[NDArray[np.float64]]:
r"""Solves the egalitarian Least Core's quadratic program using cvxopt.

$$
Expand Down
16 changes: 8 additions & 8 deletions src/pydvl/value/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,9 @@ class ValuationResult(
"""

_indices: NDArray[IndexT]
_values: NDArray[np.float_]
_values: NDArray[np.float64]
_counts: NDArray[np.int_]
_variances: NDArray[np.float_]
_variances: NDArray[np.float64]
_data: Dataset
_names: NDArray[NameT]
_algorithm: str
Expand All @@ -216,8 +216,8 @@ class ValuationResult(
def __init__(
self,
*,
values: NDArray[np.float_],
variances: Optional[NDArray[np.float_]] = None,
values: NDArray[np.float64],
variances: Optional[NDArray[np.float64]] = None,
counts: Optional[NDArray[np.int_]] = None,
indices: Optional[NDArray[IndexT]] = None,
data_names: Optional[Sequence[NameT] | NDArray[NameT]] = None,
Expand Down Expand Up @@ -299,20 +299,20 @@ def sort(
self._sort_order = reverse

@property
def values(self) -> NDArray[np.float_]:
def values(self) -> NDArray[np.float64]:
"""The values, possibly sorted."""
return self._values[self._sort_positions]

@property
def variances(self) -> NDArray[np.float_]:
def variances(self) -> NDArray[np.float64]:
"""The variances, possibly sorted."""
return self._variances[self._sort_positions]

@property
def stderr(self) -> NDArray[np.float_]:
def stderr(self) -> NDArray[np.float64]:
"""The raw standard errors, possibly sorted."""
return cast(
NDArray[np.float_], np.sqrt(self.variances / np.maximum(1, self.counts))
NDArray[np.float64], np.sqrt(self.variances / np.maximum(1, self.counts))
)

@property
Expand Down
4 changes: 2 additions & 2 deletions src/pydvl/value/shapley/classwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def __str__(self):
def __call__(
self: "ClasswiseScorer",
model: SupervisedModel,
x_test: NDArray[np.float_],
x_test: NDArray[np.float64],
y_test: NDArray[np.int_],
) -> float:
(
Expand All @@ -180,7 +180,7 @@ def __call__(
def estimate_in_class_and_out_of_class_score(
self,
model: SupervisedModel,
x_test: NDArray[np.float_],
x_test: NDArray[np.float64],
y_test: NDArray[np.int_],
rescale_scores: bool = True,
) -> Tuple[float, float]:
Expand Down
4 changes: 2 additions & 2 deletions src/pydvl/value/shapley/gt.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@

log = logging.getLogger(__name__)

T = TypeVar("T", NDArray[np.float_], float)
T = TypeVar("T", NDArray[np.float64], float)
GTConstants = namedtuple("GTConstants", ["kk", "Z", "q", "q_tot", "T"])


Expand Down Expand Up @@ -266,7 +266,7 @@ def reducer(
results_it: Iterable[Tuple[NDArray, NDArray]]
) -> Tuple[NDArray, NDArray]:
return np.concatenate(list(x[0] for x in results_it)).astype(
np.float_
np.float64
), np.concatenate(list(x[1] for x in results_it)).astype(np.int_)

seed_sequence = ensure_seed_sequence(seed)
Expand Down
2 changes: 1 addition & 1 deletion src/pydvl/value/shapley/knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def knn_shapley(u: Utility, *, progress: bool = True) -> ValuationResult:
# closest to farthest
_, indices = nns.kneighbors(u.data.x_test)

values: NDArray[np.float_] = np.zeros_like(u.data.indices, dtype=np.float_)
values: NDArray[np.float64] = np.zeros_like(u.data.indices, dtype=np.float64)
n = len(u.data)
yt = u.data.y_train
iterator = enumerate(zip(u.data.y_test, indices), start=1)
Expand Down
4 changes: 2 additions & 2 deletions src/pydvl/value/stopping.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,7 @@ class HistoryDeviation(StoppingCriterion):
pin_converged: If `True`, once an index has converged, it is pinned
"""

_memory: NDArray[np.float_]
_memory: NDArray[np.float64]

def __init__(
self,
Expand Down Expand Up @@ -666,7 +666,7 @@ def __init__(
raise ValueError("rtol must be in (0, 1)")
self.rtol = rtol
self.burn_in = burn_in
self._memory: NDArray[np.float_] | None = None
self._memory: NDArray[np.float64] | None = None
self._corr = 0.0
self._completion = 0.0
self._iterations = 0
Expand Down
Loading