Merge pull request #116 from X-DataInitiative/#115-add-score-metrics-…

…to-hawkes #115 add score metrics to hawkes
X-DataInitiative · Oct 23, 2017 · e4998c6 · e4998c6
2 parents 32b4f60 + f2fae8a
commit e4998c6
Show file tree

Hide file tree

Showing 12 changed files with 706 additions and 84 deletions.
diff --git a/tick/inference/base/learner_hawkes_param.py b/tick/inference/base/learner_hawkes_param.py
@@ -278,3 +278,46 @@ def get_kernel_norms(self):
         corresponding_simu = self._corresponding_simu()
         get_norm = np.vectorize(lambda kernel: kernel.get_norm())
         return get_norm(corresponding_simu.kernels)
+
+    def score(self, events=None, end_times=None, coeffs=None):
+        """Compute score metric
+        Score metric is log likelihood (the higher the better)
+
+        Parameters
+        ----------
+        events : `list` of `list` of `np.ndarray`, default = None
+            List of Hawkes processes realizations used to measure score.
+            Each realization of the Hawkes process is a list of n_node for
+            each component of the Hawkes. Namely `events[i][j]` contains a
+            one-dimensional `numpy.array` of the events' timestamps of
+            component j of realization i.
+            If only one realization is given, it will be wrapped into a list
+            If None, events given while fitting model will be used
+
+        end_times : `np.ndarray` or `float`, default = None
+            List of end time of all hawkes processes used to measure score.
+            If None, it will be set to each realization's latest time.
+            If only one realization is provided, then a float can be given.
+
+        coeffs : `np.ndarray`
+            Coefficients at which the score is measured
+
+        Returns
+        -------
+        likelihood : `double`
+            Computed log likelihood value
+        """
+        if events is None and not self._fitted:
+            raise ValueError('You must either call `fit` before `score` or '
+                             'provide events')
+
+        if coeffs is None:
+            coeffs = self.coeffs
+
+        if events is None and end_times is None:
+            model = self._model_obj
+        else:
+            model = self._construct_model_obj()
+            model.fit(events, end_times)
+
+        return - model.loss(coeffs)
diff --git a/tick/inference/hawkes_adm4.py b/tick/inference/hawkes_adm4.py
@@ -455,3 +455,58 @@ def get_kernel_norms(self):
         corresponding_simu = self._corresponding_simu()
         get_norm = np.vectorize(lambda kernel: kernel.get_norm())
         return get_norm(corresponding_simu.kernels)
+
+    def score(self, events=None, end_times=None, baseline=None, adjacency=None):
+        """Compute score metric
+        Score metric is log likelihood (the higher the better)
+
+        Parameters
+        ----------
+        events : `list` of `list` of `np.ndarray`, default = None
+            List of Hawkes processes realizations used to measure score.
+            Each realization of the Hawkes process is a list of n_node for
+            each component of the Hawkes. Namely `events[i][j]` contains a
+            one-dimensional `numpy.array` of the events' timestamps of
+            component j of realization i.
+            If only one realization is given, it will be wrapped into a list
+            If None, events given while fitting model will be used
+
+        end_times : `np.ndarray` or `float`, default = None
+            List of end time of all hawkes processes used to measure score.
+            If None, it will be set to each realization's latest time.
+            If only one realization is provided, then a float can be given.
+
+        baseline : `np.ndarray`, shape=(n_nodes, ), default = None
+            Baseline vector for which the score is measured
+            If `None` baseline obtained during fitting is used
+
+        adjacency : `np.ndarray`, shape=(n_nodes, n_nodes), default = None
+            Adjacency matrix for which the score is measured
+            If `None` adjacency obtained during fitting is used
+
+        Returns
+        -------
+        likelihood : `double`
+            Computed log likelihood value
+        """
+        if events is None and not self._fitted:
+            raise ValueError('You must either call `fit` before `score` or '
+                             'provide events')
+
+        if baseline is not None or adjacency is not None:
+            if baseline is None:
+                baseline = self.baseline
+            if adjacency is None:
+                adjacency = self.adjacency
+            coeffs = np.hstack((baseline, adjacency.ravel()))
+        else:
+            coeffs = self.coeffs
+
+        if events is None and end_times is None:
+            model = self._model
+        else:
+            model = ModelHawkesFixedExpKernLogLik(self.decay,
+                                                  n_threads=self.n_threads)
+            model.fit(events, end_times)
+
+        return - model.loss(coeffs)
diff --git a/tick/inference/hawkes_em.py b/tick/inference/hawkes_em.py
@@ -176,14 +176,11 @@ def _solve(self, baseline_start=None, kernel_start=None):
         else:
             self.baseline = baseline_start.copy()
 
-        _kernel_uvm_2d = self.kernel.reshape((self.n_nodes,
-                                              self.n_nodes * self.kernel_size))
-
         for i in range(self.max_iter + 1):
             prev_baseline = self.baseline.copy()
             prev_kernel = self.kernel.copy()
 
-            self._learner.solve(self.baseline, _kernel_uvm_2d)
+            self._learner.solve(self.baseline, self._flat_kernels)
 
             rel_baseline = relative_distance(self.baseline, prev_baseline)
             rel_kernel = relative_distance(self.kernel, prev_kernel)
@@ -250,13 +247,86 @@ def get_kernel_norms(self):
             2d array in which each entry i, j corresponds to the norm of
             kernel i, j
         """
-        kernel_intervals = self.kernel_discretization[1:] - \
-                           self.kernel_discretization[:-1]
-        return self.kernel.dot(kernel_intervals)
+        return self._learner.get_kernel_norms(self._flat_kernels)
 
     def objective(self, coeffs, loss: float = None):
         raise NotImplementedError()
 
+    def score(self, events=None, end_times=None, baseline=None, kernel=None):
+        """Compute score metric
+        Score metric is log likelihood (the higher the better)
+
+        Parameters
+        ----------
+        events : `list` of `list` of `np.ndarray`, default = None
+            List of Hawkes processes realizations used to measure score.
+            Each realization of the Hawkes process is a list of n_node for
+            each component of the Hawkes. Namely `events[i][j]` contains a
+            one-dimensional `numpy.array` of the events' timestamps of
+            component j of realization i.
+            If only one realization is given, it will be wrapped into a list
+            If None, events given while fitting model will be used
+
+        end_times : `np.ndarray` or `float`, default = None
+            List of end time of all hawkes processes used to measure score.
+            If None, it will be set to each realization's latest time.
+            If only one realization is provided, then a float can be given.
+
+        baseline : `np.ndarray`, shape=(n_nodes, ), default = None
+            Baseline vector for which the score is measured
+            If `None` baseline obtained during fitting is used
+
+        kernel : `None` or `np.ndarray', shape=(n_nodes, n_nodes, kernel_size), default=None
+            Used to force start values for kernel parameter
+            If `None` kernel obtained during fitting is used
+
+        Returns
+        -------
+        likelihood : `double`
+            Computed log likelihood value
+        """
+        if events is None and not self._fitted:
+            raise ValueError('You must either call `fit` before `score` or '
+                             'provide events')
+
+        if events is None and end_times is None:
+            learner = self
+        else:
+            learner = HawkesEM(**self.get_params())
+            learner._set('_end_times', end_times)
+            learner._set_data(events)
+
+        n_nodes = learner.n_nodes
+        kernel_size = learner.kernel_size
+
+        if baseline is None:
+            baseline = self.baseline
+
+        if kernel is None:
+            kernel = self.kernel
+
+        flat_kernels = kernel.reshape((n_nodes, n_nodes * kernel_size))
+
+        return learner._learner.loglikelihood(baseline, flat_kernels)
+
+    def get_params(self):
+        return {
+            'kernel_support': self.kernel_support,
+            'kernel_size': self.kernel_size,
+            'kernel_discretization': self.kernel_discretization,
+            'tol': self.tol,
+            'max_iter': self.max_iter,
+            'print_every': self.print_every,
+            'record_every': self.record_every,
+            'verbose': self.verbose,
+            'n_threads': self.n_threads
+        }
+
+    @property
+    def _flat_kernels(self):
+        return self.kernel.reshape((self.n_nodes,
+                                    self.n_nodes * self.kernel_size))
+
     @property
     def kernel_support(self):
         return self._learner.get_kernel_support()

diff --git a/tick/inference/hawkes_expkern_fixeddecay.py b/tick/inference/hawkes_expkern_fixeddecay.py
@@ -199,3 +199,49 @@ def _corresponding_simu(self):
         return SimuHawkesExpKernels(adjacency=self.adjacency,
                                     decays=self.decays,
                                     baseline=self.baseline)
+
+    def score(self, events=None, end_times=None, baseline=None, adjacency=None):
+        """Compute score metric
+        Score metric is log likelihood (the higher the better)
+
+        Parameters
+        ----------
+        events : `list` of `list` of `np.ndarray`, default = None
+            List of Hawkes processes realizations used to measure score.
+            Each realization of the Hawkes process is a list of n_node for
+            each component of the Hawkes. Namely `events[i][j]` contains a
+            one-dimensional `numpy.array` of the events' timestamps of
+            component j of realization i.
+            If only one realization is given, it will be wrapped into a list
+            If None, events given while fitting model will be used
+
+        end_times : `np.ndarray` or `float`, default = None
+            List of end time of all hawkes processes used to measure score.
+            If None, it will be set to each realization's latest time.
+            If only one realization is provided, then a float can be given.
+
+        baseline : `np.ndarray`, shape=(n_nodes, ), default = None
+            Baseline vector for which the score is measured
+            If `None` baseline obtained during fitting is used
+
+        adjacency : `np.ndarray`, shape=(n_nodes, n_nodes), default = None
+            Adjacency matrix for which the score is measured
+            If `None` adjacency obtained during fitting is used
+
+        Returns
+        -------
+        likelihood : `double`
+            Computed log likelihood value
+        """
+        if baseline is not None or adjacency is not None:
+            if baseline is None:
+                baseline = self.baseline
+            if adjacency is None:
+                adjacency = self.adjacency
+            coeffs = np.hstack((baseline, adjacency.ravel()))
+        else:
+            coeffs = None
+
+        return LearnerHawkesParametric.score(
+            self, events=events, end_times=end_times,
+            coeffs=coeffs)
diff --git a/tick/inference/hawkes_sumexpkern_fixeddecay.py b/tick/inference/hawkes_sumexpkern_fixeddecay.py
@@ -188,7 +188,7 @@ def adjacency(self):
             raise ValueError('You must fit data before getting estimated '
                              'adjacency')
         else:
-            return self.coeffs[self.n_nodes * self._model_obj.n_baselines:]\
+            return self.coeffs[self.n_nodes * self._model_obj.n_baselines:] \
                 .reshape((self.n_nodes, self.n_nodes, self.n_decays))
 
     def _corresponding_simu(self):
@@ -198,3 +198,49 @@ def _corresponding_simu(self):
 
     def get_baseline_values(self, i, abscissa_array):
         return self._corresponding_simu().get_baseline_values(i, abscissa_array)
+
+    def score(self, events=None, end_times=None, baseline=None, adjacency=None):
+        """Compute score metric
+        Score metric is log likelihood (the higher the better)
+
+        Parameters
+        ----------
+        events : `list` of `list` of `np.ndarray`, default = None
+            List of Hawkes processes realizations used to measure score.
+            Each realization of the Hawkes process is a list of n_node for
+            each component of the Hawkes. Namely `events[i][j]` contains a
+            one-dimensional `numpy.array` of the events' timestamps of
+            component j of realization i.
+            If only one realization is given, it will be wrapped into a list
+            If None, events given while fitting model will be used
+
+        end_times : `np.ndarray` or `float`, default = None
+            List of end time of all hawkes processes used to measure score.
+            If None, it will be set to each realization's latest time.
+            If only one realization is provided, then a float can be given.
+
+        baseline : `np.ndarray`, shape=(n_nodes, ), default = None
+            Baseline vector for which the score is measured
+            If `None` baseline obtained during fitting is used
+
+        adjacency : `np.ndarray`, shape=(n_nodes, n_nodes, n_decays), default = None
+            Adjacency matrix for which the score is measured
+            If `None` adjacency obtained during fitting is used
+
+        Returns
+        -------
+        likelihood : `double`
+            Computed log likelihood value
+        """
+        if baseline is not None or adjacency is not None:
+            if baseline is None:
+                baseline = self.baseline
+            if adjacency is None:
+                adjacency = self.adjacency
+            coeffs = np.hstack((baseline, adjacency.ravel()))
+        else:
+            coeffs = None
+
+        return LearnerHawkesParametric.score(
+            self, events=events, end_times=end_times,
+            coeffs=coeffs)