Skip to content

Commit

Permalink
Merge pull request #116 from X-DataInitiative/#115-add-score-metrics-…
Browse files Browse the repository at this point in the history
…to-hawkes

#115 add score metrics to hawkes
  • Loading branch information
Mbompr authored Oct 23, 2017
2 parents 32b4f60 + f2fae8a commit e4998c6
Show file tree
Hide file tree
Showing 12 changed files with 706 additions and 84 deletions.
43 changes: 43 additions & 0 deletions tick/inference/base/learner_hawkes_param.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,3 +278,46 @@ def get_kernel_norms(self):
corresponding_simu = self._corresponding_simu()
get_norm = np.vectorize(lambda kernel: kernel.get_norm())
return get_norm(corresponding_simu.kernels)

def score(self, events=None, end_times=None, coeffs=None):
"""Compute score metric
Score metric is log likelihood (the higher the better)
Parameters
----------
events : `list` of `list` of `np.ndarray`, default = None
List of Hawkes processes realizations used to measure score.
Each realization of the Hawkes process is a list of n_node for
each component of the Hawkes. Namely `events[i][j]` contains a
one-dimensional `numpy.array` of the events' timestamps of
component j of realization i.
If only one realization is given, it will be wrapped into a list
If None, events given while fitting model will be used
end_times : `np.ndarray` or `float`, default = None
List of end time of all hawkes processes used to measure score.
If None, it will be set to each realization's latest time.
If only one realization is provided, then a float can be given.
coeffs : `np.ndarray`
Coefficients at which the score is measured
Returns
-------
likelihood : `double`
Computed log likelihood value
"""
if events is None and not self._fitted:
raise ValueError('You must either call `fit` before `score` or '
'provide events')

if coeffs is None:
coeffs = self.coeffs

if events is None and end_times is None:
model = self._model_obj
else:
model = self._construct_model_obj()
model.fit(events, end_times)

return - model.loss(coeffs)
55 changes: 55 additions & 0 deletions tick/inference/hawkes_adm4.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,3 +455,58 @@ def get_kernel_norms(self):
corresponding_simu = self._corresponding_simu()
get_norm = np.vectorize(lambda kernel: kernel.get_norm())
return get_norm(corresponding_simu.kernels)

def score(self, events=None, end_times=None, baseline=None, adjacency=None):
"""Compute score metric
Score metric is log likelihood (the higher the better)
Parameters
----------
events : `list` of `list` of `np.ndarray`, default = None
List of Hawkes processes realizations used to measure score.
Each realization of the Hawkes process is a list of n_node for
each component of the Hawkes. Namely `events[i][j]` contains a
one-dimensional `numpy.array` of the events' timestamps of
component j of realization i.
If only one realization is given, it will be wrapped into a list
If None, events given while fitting model will be used
end_times : `np.ndarray` or `float`, default = None
List of end time of all hawkes processes used to measure score.
If None, it will be set to each realization's latest time.
If only one realization is provided, then a float can be given.
baseline : `np.ndarray`, shape=(n_nodes, ), default = None
Baseline vector for which the score is measured
If `None` baseline obtained during fitting is used
adjacency : `np.ndarray`, shape=(n_nodes, n_nodes), default = None
Adjacency matrix for which the score is measured
If `None` adjacency obtained during fitting is used
Returns
-------
likelihood : `double`
Computed log likelihood value
"""
if events is None and not self._fitted:
raise ValueError('You must either call `fit` before `score` or '
'provide events')

if baseline is not None or adjacency is not None:
if baseline is None:
baseline = self.baseline
if adjacency is None:
adjacency = self.adjacency
coeffs = np.hstack((baseline, adjacency.ravel()))
else:
coeffs = self.coeffs

if events is None and end_times is None:
model = self._model
else:
model = ModelHawkesFixedExpKernLogLik(self.decay,
n_threads=self.n_threads)
model.fit(events, end_times)

return - model.loss(coeffs)
84 changes: 77 additions & 7 deletions tick/inference/hawkes_em.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,14 +176,11 @@ def _solve(self, baseline_start=None, kernel_start=None):
else:
self.baseline = baseline_start.copy()

_kernel_uvm_2d = self.kernel.reshape((self.n_nodes,
self.n_nodes * self.kernel_size))

for i in range(self.max_iter + 1):
prev_baseline = self.baseline.copy()
prev_kernel = self.kernel.copy()

self._learner.solve(self.baseline, _kernel_uvm_2d)
self._learner.solve(self.baseline, self._flat_kernels)

rel_baseline = relative_distance(self.baseline, prev_baseline)
rel_kernel = relative_distance(self.kernel, prev_kernel)
Expand Down Expand Up @@ -250,13 +247,86 @@ def get_kernel_norms(self):
2d array in which each entry i, j corresponds to the norm of
kernel i, j
"""
kernel_intervals = self.kernel_discretization[1:] - \
self.kernel_discretization[:-1]
return self.kernel.dot(kernel_intervals)
return self._learner.get_kernel_norms(self._flat_kernels)

def objective(self, coeffs, loss: float = None):
raise NotImplementedError()

def score(self, events=None, end_times=None, baseline=None, kernel=None):
"""Compute score metric
Score metric is log likelihood (the higher the better)
Parameters
----------
events : `list` of `list` of `np.ndarray`, default = None
List of Hawkes processes realizations used to measure score.
Each realization of the Hawkes process is a list of n_node for
each component of the Hawkes. Namely `events[i][j]` contains a
one-dimensional `numpy.array` of the events' timestamps of
component j of realization i.
If only one realization is given, it will be wrapped into a list
If None, events given while fitting model will be used
end_times : `np.ndarray` or `float`, default = None
List of end time of all hawkes processes used to measure score.
If None, it will be set to each realization's latest time.
If only one realization is provided, then a float can be given.
baseline : `np.ndarray`, shape=(n_nodes, ), default = None
Baseline vector for which the score is measured
If `None` baseline obtained during fitting is used
kernel : `None` or `np.ndarray', shape=(n_nodes, n_nodes, kernel_size), default=None
Used to force start values for kernel parameter
If `None` kernel obtained during fitting is used
Returns
-------
likelihood : `double`
Computed log likelihood value
"""
if events is None and not self._fitted:
raise ValueError('You must either call `fit` before `score` or '
'provide events')

if events is None and end_times is None:
learner = self
else:
learner = HawkesEM(**self.get_params())
learner._set('_end_times', end_times)
learner._set_data(events)

n_nodes = learner.n_nodes
kernel_size = learner.kernel_size

if baseline is None:
baseline = self.baseline

if kernel is None:
kernel = self.kernel

flat_kernels = kernel.reshape((n_nodes, n_nodes * kernel_size))

return learner._learner.loglikelihood(baseline, flat_kernels)

def get_params(self):
return {
'kernel_support': self.kernel_support,
'kernel_size': self.kernel_size,
'kernel_discretization': self.kernel_discretization,
'tol': self.tol,
'max_iter': self.max_iter,
'print_every': self.print_every,
'record_every': self.record_every,
'verbose': self.verbose,
'n_threads': self.n_threads
}

@property
def _flat_kernels(self):
return self.kernel.reshape((self.n_nodes,
self.n_nodes * self.kernel_size))

@property
def kernel_support(self):
return self._learner.get_kernel_support()
Expand Down
46 changes: 46 additions & 0 deletions tick/inference/hawkes_expkern_fixeddecay.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,3 +199,49 @@ def _corresponding_simu(self):
return SimuHawkesExpKernels(adjacency=self.adjacency,
decays=self.decays,
baseline=self.baseline)

def score(self, events=None, end_times=None, baseline=None, adjacency=None):
"""Compute score metric
Score metric is log likelihood (the higher the better)
Parameters
----------
events : `list` of `list` of `np.ndarray`, default = None
List of Hawkes processes realizations used to measure score.
Each realization of the Hawkes process is a list of n_node for
each component of the Hawkes. Namely `events[i][j]` contains a
one-dimensional `numpy.array` of the events' timestamps of
component j of realization i.
If only one realization is given, it will be wrapped into a list
If None, events given while fitting model will be used
end_times : `np.ndarray` or `float`, default = None
List of end time of all hawkes processes used to measure score.
If None, it will be set to each realization's latest time.
If only one realization is provided, then a float can be given.
baseline : `np.ndarray`, shape=(n_nodes, ), default = None
Baseline vector for which the score is measured
If `None` baseline obtained during fitting is used
adjacency : `np.ndarray`, shape=(n_nodes, n_nodes), default = None
Adjacency matrix for which the score is measured
If `None` adjacency obtained during fitting is used
Returns
-------
likelihood : `double`
Computed log likelihood value
"""
if baseline is not None or adjacency is not None:
if baseline is None:
baseline = self.baseline
if adjacency is None:
adjacency = self.adjacency
coeffs = np.hstack((baseline, adjacency.ravel()))
else:
coeffs = None

return LearnerHawkesParametric.score(
self, events=events, end_times=end_times,
coeffs=coeffs)
48 changes: 47 additions & 1 deletion tick/inference/hawkes_sumexpkern_fixeddecay.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def adjacency(self):
raise ValueError('You must fit data before getting estimated '
'adjacency')
else:
return self.coeffs[self.n_nodes * self._model_obj.n_baselines:]\
return self.coeffs[self.n_nodes * self._model_obj.n_baselines:] \
.reshape((self.n_nodes, self.n_nodes, self.n_decays))

def _corresponding_simu(self):
Expand All @@ -198,3 +198,49 @@ def _corresponding_simu(self):

def get_baseline_values(self, i, abscissa_array):
return self._corresponding_simu().get_baseline_values(i, abscissa_array)

def score(self, events=None, end_times=None, baseline=None, adjacency=None):
"""Compute score metric
Score metric is log likelihood (the higher the better)
Parameters
----------
events : `list` of `list` of `np.ndarray`, default = None
List of Hawkes processes realizations used to measure score.
Each realization of the Hawkes process is a list of n_node for
each component of the Hawkes. Namely `events[i][j]` contains a
one-dimensional `numpy.array` of the events' timestamps of
component j of realization i.
If only one realization is given, it will be wrapped into a list
If None, events given while fitting model will be used
end_times : `np.ndarray` or `float`, default = None
List of end time of all hawkes processes used to measure score.
If None, it will be set to each realization's latest time.
If only one realization is provided, then a float can be given.
baseline : `np.ndarray`, shape=(n_nodes, ), default = None
Baseline vector for which the score is measured
If `None` baseline obtained during fitting is used
adjacency : `np.ndarray`, shape=(n_nodes, n_nodes, n_decays), default = None
Adjacency matrix for which the score is measured
If `None` adjacency obtained during fitting is used
Returns
-------
likelihood : `double`
Computed log likelihood value
"""
if baseline is not None or adjacency is not None:
if baseline is None:
baseline = self.baseline
if adjacency is None:
adjacency = self.adjacency
coeffs = np.hstack((baseline, adjacency.ravel()))
else:
coeffs = None

return LearnerHawkesParametric.score(
self, events=events, end_times=end_times,
coeffs=coeffs)
Loading

0 comments on commit e4998c6

Please sign in to comment.