Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP ENH add censored quadratic df #250

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion skglm/datafits/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from .base import BaseDatafit, BaseMultitaskDatafit
from .single_task import Quadratic, QuadraticSVC, Logistic, Huber, Poisson, Gamma, Cox
from .single_task import (
Quadratic, QuadraticSVC, CensoredQuadratic, Logistic, Huber, Poisson, Gamma, Cox,
)
from .multi_task import QuadraticMultiTask
from .group import QuadraticGroup, LogisticGroup

Expand Down
98 changes: 97 additions & 1 deletion skglm/datafits/single_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ def initialize_sparse(self, X_data, X_indptr, X_indices, y):
self.Xty[j] = xty

def get_global_lipschitz(self, X, y):
return norm(X, ord=2) ** 2 / len(y)
n_samples = X.shape[0]
return norm(X, ord=2) ** 2 / n_samples

def get_global_lipschitz_sparse(self, X_data, X_indptr, X_indices, y):
return spectral_norm(X_data, X_indptr, X_indices, len(y)) ** 2 / len(y)
Expand Down Expand Up @@ -107,6 +108,101 @@ def full_grad_sparse(
def intercept_update_step(self, y, Xw):
return np.mean(Xw - y)

class CensoredQuadratic(Quadratic):
"""Quadratic datafit with access to Xty but not to y.

The datafit reads:

.. math:: 1 / (2 xx n_"samples") ||Xw||_2 ^ 2 - 1 / (n_"samples") w^\top (X^\top y)

Attributes
----------
Xty : array, shape (n_features,)
Pre-computed quantity used during the gradient evaluation.
Equal to ``X.T @ y``.

Note
----
The class is jit compiled at fit time using Numba compiler.
This allows for faster computations.
"""

def __init__(self, Xty, y_mean):
self.Xty = Xty
self.y_mean = y_mean
Comment on lines +130 to +132
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We better leave the constructor to define the Datafit hyper-parameters.
We can move Xty and y_mean to the initialize method

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this would break the existing code: all solvers call datafit.initialize(X, y) internally

Copy link
Collaborator Author

@mathurinm mathurinm May 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe a better API would be to instantiate all datafits with X, y and whatever they need (each has its own API)
then call datafit.initialize() that would use all stored attributes

eg

datafit = Quadratic(X, y)
penalty = L1(alpha=0.1)
solver = AndersonCD()
solver.solve(datafit, penalty)

It would give more freedom to each datafit, to require various quantities.

To me this makes more sense because we have datafits like Cox depneding on more than just X and y.

Wdyt @BadrMOUFAD ?

Edit: this may break GeneralizedEstimator, it would need X and y to be instantiated, not at fit time


def get_spec(self):
spec = (
('Xty', float64[:]),
('y_mean', float64),
)
return spec

def params_to_dict(self):
return dict(Xty=self.Xty, y_mean=self.y_mean)

# def get_lipschitz(self, X, y):


# lipschitz = np.zeros(n_features, dtype=X.dtype)
# for j in range(n_features):
# lipschitz[j] = (X[:, j] ** 2).sum() / len(y)

# return lipschitz

# XXX TODO check without y? or pass y = np.zeros(n_samples)
# def get_lipschitz_sparse(self, X_data, X_indptr, X_indices, y):
# n_features = len(X_indptr) - 1
# lipschitz = np.zeros(n_features, dtype=X_data.dtype)

# for j in range(n_features):
# nrm2 = 0.
# for idx in range(X_indptr[j], X_indptr[j + 1]):
# nrm2 += X_data[idx] ** 2

# lipschitz[j] = nrm2 / len(y)

# return lipschitz

def initialize(self, X, y):
pass

def initialize_sparse(self, X_data, X_indptr, X_indices, y):
pass

# def get_global_lipschitz(self, X, y):
# return norm(X, ord=2) ** 2 / len(y)

# def get_global_lipschitz_sparse(self, X_data, X_indptr, X_indices, y):
# return spectral_norm(X_data, X_indptr, X_indices, len(y)) ** 2 / len(y)

def value(self, y, w, Xw):
return np.sum((Xw) ** 2) / (2 * len(Xw)) - w @ self.Xty / len(Xw)

# def gradient_scalar(self, X, y, w, Xw, j):
# return (X[:, j] @ Xw - self.Xty[j]) / len(Xw)

# def gradient_scalar_sparse(self, X_data, X_indptr, X_indices, y, Xw, j):
# XjTXw = 0.
# for i in range(X_indptr[j], X_indptr[j+1]):
# XjTXw += X_data[i] * Xw[X_indices[i]]
# return (XjTXw - self.Xty[j]) / len(Xw)

# def full_grad_sparse(
# self, X_data, X_indptr, X_indices, y, Xw):
# n_features = X_indptr.shape[0] - 1
# n_samples = y.shape[0]
# grad = np.zeros(n_features, dtype=Xw.dtype)
# for j in range(n_features):
# XjTXw = 0.
# for i in range(X_indptr[j], X_indptr[j + 1]):
# XjTXw += X_data[i] * Xw[X_indices[i]]
# grad[j] = (XjTXw - self.Xty[j]) / n_samples
# return grad

def intercept_update_step(self, y, Xw):
return np.mean(Xw) - self.y_mean # MM TODO check


@njit
def sigmoid(x):
Expand Down
24 changes: 24 additions & 0 deletions skglm/demo_censored.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import numpy as np


from skglm.datafits import Quadratic, CensoredQuadratic
from skglm.penalties import L1
from skglm.solvers import AndersonCD
from skglm.utils.jit_compilation import compiled_clone
from skglm.utils.data import make_correlated_data

X, y, _ = make_correlated_data(100, 150)

pen = compiled_clone(L1(alpha=0))

solver = AndersonCD(verbose=3, fit_intercept=True)
df = Quadratic()
df = compiled_clone(df)

w = solver.solve(X, y, df, pen)[0]

df2 = CensoredQuadratic(X.T @ y, y.mean())
df2 = compiled_clone(df2)

w2 = solver.solve(X, np.zeros(X.shape[0]), df2, pen)[0]
np.testing.assert_allclose(w2, w)
Loading