From f2fae8a7207a6204c9225dc1615df3f3221ea09c Mon Sep 17 00:00:00 2001 From: Martin B Date: Thu, 12 Oct 2017 16:52:00 +0200 Subject: [PATCH] Add score metric to Hawkes EM, with exp kernels and ADM4 Solves #115 It was very easy except for HawkesEM which had no likelihood function. It was almost identical as solve method which has been modified to share code --- tick/inference/base/learner_hawkes_param.py | 43 +++ tick/inference/hawkes_adm4.py | 55 ++++ tick/inference/hawkes_em.py | 84 +++++- tick/inference/hawkes_expkern_fixeddecay.py | 46 ++++ .../inference/hawkes_sumexpkern_fixeddecay.py | 48 +++- tick/inference/src/hawkes_em.cpp | 252 ++++++++++++++---- tick/inference/src/hawkes_em.h | 62 +++-- tick/inference/swig/hawkes_em.i | 3 + tick/inference/tests/hawkes_adm4_test.py | 44 ++- tick/inference/tests/hawkes_em_test.py | 75 +++++- tick/inference/tests/hawkes_expkern_test.py | 39 +++ .../inference/tests/hawkes_sumexpkern_test.py | 39 +++ 12 files changed, 706 insertions(+), 84 deletions(-) diff --git a/tick/inference/base/learner_hawkes_param.py b/tick/inference/base/learner_hawkes_param.py index 4cc8f0bd7..ae23263ca 100644 --- a/tick/inference/base/learner_hawkes_param.py +++ b/tick/inference/base/learner_hawkes_param.py @@ -278,3 +278,46 @@ def get_kernel_norms(self): corresponding_simu = self._corresponding_simu() get_norm = np.vectorize(lambda kernel: kernel.get_norm()) return get_norm(corresponding_simu.kernels) + + def score(self, events=None, end_times=None, coeffs=None): + """Compute score metric + Score metric is log likelihood (the higher the better) + + Parameters + ---------- + events : `list` of `list` of `np.ndarray`, default = None + List of Hawkes processes realizations used to measure score. + Each realization of the Hawkes process is a list of n_node for + each component of the Hawkes. Namely `events[i][j]` contains a + one-dimensional `numpy.array` of the events' timestamps of + component j of realization i. + If only one realization is given, it will be wrapped into a list + If None, events given while fitting model will be used + + end_times : `np.ndarray` or `float`, default = None + List of end time of all hawkes processes used to measure score. + If None, it will be set to each realization's latest time. + If only one realization is provided, then a float can be given. + + coeffs : `np.ndarray` + Coefficients at which the score is measured + + Returns + ------- + likelihood : `double` + Computed log likelihood value + """ + if events is None and not self._fitted: + raise ValueError('You must either call `fit` before `score` or ' + 'provide events') + + if coeffs is None: + coeffs = self.coeffs + + if events is None and end_times is None: + model = self._model_obj + else: + model = self._construct_model_obj() + model.fit(events, end_times) + + return - model.loss(coeffs) diff --git a/tick/inference/hawkes_adm4.py b/tick/inference/hawkes_adm4.py index c9cabaabd..98573f673 100644 --- a/tick/inference/hawkes_adm4.py +++ b/tick/inference/hawkes_adm4.py @@ -455,3 +455,58 @@ def get_kernel_norms(self): corresponding_simu = self._corresponding_simu() get_norm = np.vectorize(lambda kernel: kernel.get_norm()) return get_norm(corresponding_simu.kernels) + + def score(self, events=None, end_times=None, baseline=None, adjacency=None): + """Compute score metric + Score metric is log likelihood (the higher the better) + + Parameters + ---------- + events : `list` of `list` of `np.ndarray`, default = None + List of Hawkes processes realizations used to measure score. + Each realization of the Hawkes process is a list of n_node for + each component of the Hawkes. Namely `events[i][j]` contains a + one-dimensional `numpy.array` of the events' timestamps of + component j of realization i. + If only one realization is given, it will be wrapped into a list + If None, events given while fitting model will be used + + end_times : `np.ndarray` or `float`, default = None + List of end time of all hawkes processes used to measure score. + If None, it will be set to each realization's latest time. + If only one realization is provided, then a float can be given. + + baseline : `np.ndarray`, shape=(n_nodes, ), default = None + Baseline vector for which the score is measured + If `None` baseline obtained during fitting is used + + adjacency : `np.ndarray`, shape=(n_nodes, n_nodes), default = None + Adjacency matrix for which the score is measured + If `None` adjacency obtained during fitting is used + + Returns + ------- + likelihood : `double` + Computed log likelihood value + """ + if events is None and not self._fitted: + raise ValueError('You must either call `fit` before `score` or ' + 'provide events') + + if baseline is not None or adjacency is not None: + if baseline is None: + baseline = self.baseline + if adjacency is None: + adjacency = self.adjacency + coeffs = np.hstack((baseline, adjacency.ravel())) + else: + coeffs = self.coeffs + + if events is None and end_times is None: + model = self._model + else: + model = ModelHawkesFixedExpKernLogLik(self.decay, + n_threads=self.n_threads) + model.fit(events, end_times) + + return - model.loss(coeffs) diff --git a/tick/inference/hawkes_em.py b/tick/inference/hawkes_em.py index 971a3fa31..1dedfafac 100644 --- a/tick/inference/hawkes_em.py +++ b/tick/inference/hawkes_em.py @@ -176,14 +176,11 @@ def _solve(self, baseline_start=None, kernel_start=None): else: self.baseline = baseline_start.copy() - _kernel_uvm_2d = self.kernel.reshape((self.n_nodes, - self.n_nodes * self.kernel_size)) - for i in range(self.max_iter + 1): prev_baseline = self.baseline.copy() prev_kernel = self.kernel.copy() - self._learner.solve(self.baseline, _kernel_uvm_2d) + self._learner.solve(self.baseline, self._flat_kernels) rel_baseline = relative_distance(self.baseline, prev_baseline) rel_kernel = relative_distance(self.kernel, prev_kernel) @@ -250,13 +247,86 @@ def get_kernel_norms(self): 2d array in which each entry i, j corresponds to the norm of kernel i, j """ - kernel_intervals = self.kernel_discretization[1:] - \ - self.kernel_discretization[:-1] - return self.kernel.dot(kernel_intervals) + return self._learner.get_kernel_norms(self._flat_kernels) def objective(self, coeffs, loss: float = None): raise NotImplementedError() + def score(self, events=None, end_times=None, baseline=None, kernel=None): + """Compute score metric + Score metric is log likelihood (the higher the better) + + Parameters + ---------- + events : `list` of `list` of `np.ndarray`, default = None + List of Hawkes processes realizations used to measure score. + Each realization of the Hawkes process is a list of n_node for + each component of the Hawkes. Namely `events[i][j]` contains a + one-dimensional `numpy.array` of the events' timestamps of + component j of realization i. + If only one realization is given, it will be wrapped into a list + If None, events given while fitting model will be used + + end_times : `np.ndarray` or `float`, default = None + List of end time of all hawkes processes used to measure score. + If None, it will be set to each realization's latest time. + If only one realization is provided, then a float can be given. + + baseline : `np.ndarray`, shape=(n_nodes, ), default = None + Baseline vector for which the score is measured + If `None` baseline obtained during fitting is used + + kernel : `None` or `np.ndarray', shape=(n_nodes, n_nodes, kernel_size), default=None + Used to force start values for kernel parameter + If `None` kernel obtained during fitting is used + + Returns + ------- + likelihood : `double` + Computed log likelihood value + """ + if events is None and not self._fitted: + raise ValueError('You must either call `fit` before `score` or ' + 'provide events') + + if events is None and end_times is None: + learner = self + else: + learner = HawkesEM(**self.get_params()) + learner._set('_end_times', end_times) + learner._set_data(events) + + n_nodes = learner.n_nodes + kernel_size = learner.kernel_size + + if baseline is None: + baseline = self.baseline + + if kernel is None: + kernel = self.kernel + + flat_kernels = kernel.reshape((n_nodes, n_nodes * kernel_size)) + + return learner._learner.loglikelihood(baseline, flat_kernels) + + def get_params(self): + return { + 'kernel_support': self.kernel_support, + 'kernel_size': self.kernel_size, + 'kernel_discretization': self.kernel_discretization, + 'tol': self.tol, + 'max_iter': self.max_iter, + 'print_every': self.print_every, + 'record_every': self.record_every, + 'verbose': self.verbose, + 'n_threads': self.n_threads + } + + @property + def _flat_kernels(self): + return self.kernel.reshape((self.n_nodes, + self.n_nodes * self.kernel_size)) + @property def kernel_support(self): return self._learner.get_kernel_support() diff --git a/tick/inference/hawkes_expkern_fixeddecay.py b/tick/inference/hawkes_expkern_fixeddecay.py index ad1dd66e9..d453b658c 100644 --- a/tick/inference/hawkes_expkern_fixeddecay.py +++ b/tick/inference/hawkes_expkern_fixeddecay.py @@ -199,3 +199,49 @@ def _corresponding_simu(self): return SimuHawkesExpKernels(adjacency=self.adjacency, decays=self.decays, baseline=self.baseline) + + def score(self, events=None, end_times=None, baseline=None, adjacency=None): + """Compute score metric + Score metric is log likelihood (the higher the better) + + Parameters + ---------- + events : `list` of `list` of `np.ndarray`, default = None + List of Hawkes processes realizations used to measure score. + Each realization of the Hawkes process is a list of n_node for + each component of the Hawkes. Namely `events[i][j]` contains a + one-dimensional `numpy.array` of the events' timestamps of + component j of realization i. + If only one realization is given, it will be wrapped into a list + If None, events given while fitting model will be used + + end_times : `np.ndarray` or `float`, default = None + List of end time of all hawkes processes used to measure score. + If None, it will be set to each realization's latest time. + If only one realization is provided, then a float can be given. + + baseline : `np.ndarray`, shape=(n_nodes, ), default = None + Baseline vector for which the score is measured + If `None` baseline obtained during fitting is used + + adjacency : `np.ndarray`, shape=(n_nodes, n_nodes), default = None + Adjacency matrix for which the score is measured + If `None` adjacency obtained during fitting is used + + Returns + ------- + likelihood : `double` + Computed log likelihood value + """ + if baseline is not None or adjacency is not None: + if baseline is None: + baseline = self.baseline + if adjacency is None: + adjacency = self.adjacency + coeffs = np.hstack((baseline, adjacency.ravel())) + else: + coeffs = None + + return LearnerHawkesParametric.score( + self, events=events, end_times=end_times, + coeffs=coeffs) diff --git a/tick/inference/hawkes_sumexpkern_fixeddecay.py b/tick/inference/hawkes_sumexpkern_fixeddecay.py index ad36a0eb3..be9c215fc 100644 --- a/tick/inference/hawkes_sumexpkern_fixeddecay.py +++ b/tick/inference/hawkes_sumexpkern_fixeddecay.py @@ -188,7 +188,7 @@ def adjacency(self): raise ValueError('You must fit data before getting estimated ' 'adjacency') else: - return self.coeffs[self.n_nodes * self._model_obj.n_baselines:]\ + return self.coeffs[self.n_nodes * self._model_obj.n_baselines:] \ .reshape((self.n_nodes, self.n_nodes, self.n_decays)) def _corresponding_simu(self): @@ -198,3 +198,49 @@ def _corresponding_simu(self): def get_baseline_values(self, i, abscissa_array): return self._corresponding_simu().get_baseline_values(i, abscissa_array) + + def score(self, events=None, end_times=None, baseline=None, adjacency=None): + """Compute score metric + Score metric is log likelihood (the higher the better) + + Parameters + ---------- + events : `list` of `list` of `np.ndarray`, default = None + List of Hawkes processes realizations used to measure score. + Each realization of the Hawkes process is a list of n_node for + each component of the Hawkes. Namely `events[i][j]` contains a + one-dimensional `numpy.array` of the events' timestamps of + component j of realization i. + If only one realization is given, it will be wrapped into a list + If None, events given while fitting model will be used + + end_times : `np.ndarray` or `float`, default = None + List of end time of all hawkes processes used to measure score. + If None, it will be set to each realization's latest time. + If only one realization is provided, then a float can be given. + + baseline : `np.ndarray`, shape=(n_nodes, ), default = None + Baseline vector for which the score is measured + If `None` baseline obtained during fitting is used + + adjacency : `np.ndarray`, shape=(n_nodes, n_nodes, n_decays), default = None + Adjacency matrix for which the score is measured + If `None` adjacency obtained during fitting is used + + Returns + ------- + likelihood : `double` + Computed log likelihood value + """ + if baseline is not None or adjacency is not None: + if baseline is None: + baseline = self.baseline + if adjacency is None: + adjacency = self.adjacency + coeffs = np.hstack((baseline, adjacency.ravel())) + else: + coeffs = None + + return LearnerHawkesParametric.score( + self, events=events, end_times=end_times, + coeffs=coeffs) diff --git a/tick/inference/src/hawkes_em.cpp b/tick/inference/src/hawkes_em.cpp index 8b3bc9e73..f1fb158dd 100644 --- a/tick/inference/src/hawkes_em.cpp +++ b/tick/inference/src/hawkes_em.cpp @@ -5,14 +5,14 @@ HawkesEM::HawkesEM(const double kernel_support, const ulong kernel_size, const int max_n_threads) - : ModelHawkesList(max_n_threads, 0), - kernel_discretization(nullptr) { + : ModelHawkesList(max_n_threads, 0), + kernel_discretization(nullptr) { set_kernel_support(kernel_support); set_kernel_size(kernel_size); } HawkesEM::HawkesEM(const SArrayDoublePtr kernel_discretization, const int max_n_threads) - : ModelHawkesList(max_n_threads, 0) { + : ModelHawkesList(max_n_threads, 0) { set_kernel_discretization(kernel_discretization); } @@ -23,23 +23,24 @@ void HawkesEM::allocate_weights() { weights_computed = true; } +double HawkesEM::loglikelihood(const ArrayDouble &mu, ArrayDouble2d &kernels) { + check_baseline_and_kernels(mu, kernels); + + double llh = parallel_map_additive_reduce(get_n_threads(), n_nodes * n_realizations, + &HawkesEM::loglikelihood_ur, this, mu, kernels); + return llh /= get_n_total_jumps(); +} + void HawkesEM::solve(ArrayDouble &mu, ArrayDouble2d &kernels) { if (!weights_computed) allocate_weights(); - - if (mu.size() != n_nodes) { - TICK_ERROR("baseline / mu argument must be an array of size " << n_nodes); - } - if (kernels.n_rows() != n_nodes || kernels.n_cols() != n_nodes * kernel_size) { - TICK_ERROR("kernels argument must be an array of shape (" - << n_nodes << ", " << n_nodes * kernel_size << ")"); - } + check_baseline_and_kernels(mu, kernels); // Map // Fill next_mu and next_kernels next_mu.init_to_zero(); next_kernels.init_to_zero(); parallel_run(get_n_threads(), n_nodes * n_realizations, - &HawkesEM::solve_u_r, this, mu, kernels); + &HawkesEM::solve_ur, this, mu, kernels); // Reduce // Fill mu and kernels with next_mu and next_kernels @@ -57,15 +58,30 @@ void HawkesEM::solve(ArrayDouble &mu, ArrayDouble2d &kernels) { } } -void HawkesEM::solve_u_r(const ulong r_u, const ArrayDouble &mu, - ArrayDouble2d &kernels) { +double HawkesEM::loglikelihood_ur(const ulong r_u, const ArrayDouble &mu, + ArrayDouble2d &kernels) { + const ulong r = static_cast(r_u / n_nodes); + double llh = (*end_times)[r]; + std::function add_to_llh = [&llh](double intensity_t_i) { + if (intensity_t_i <= 0) + llh = std::numeric_limits::infinity(); + else + llh += log(intensity_t_i); + }; + + compute_intensities_ur(r_u, mu, kernels, add_to_llh, false); + + llh -= compute_compensator_ur(r_u, mu, kernels); + return llh; +} + +void HawkesEM::solve_ur(const ulong r_u, const ArrayDouble &mu, + ArrayDouble2d &kernels) { // Obtain realization and node index from r_u const ulong r = static_cast(r_u / n_nodes); const ulong node_u = r_u % n_nodes; // Fetch corresponding data - SArrayDoublePtrList1D &realization = timestamps_list[r]; - ArrayDouble2d kernel_u(n_nodes, kernel_size, view_row(kernels, node_u).data()); const double mu_u = mu[node_u]; // initialize next data @@ -76,6 +92,73 @@ void HawkesEM::solve_u_r(const ulong r_u, const ArrayDouble &mu, r * n_nodes + node_u).data()); double &next_mu_ru = next_mu(r, node_u); + std::function add_to_next_kernel = + [this, &unnormalized_kernel_ru, &next_kernel_ru, &next_mu_ru, &mu_u](double intensity_t_i) { + // If norm is zero then nothing to do (no contribution) + if (intensity_t_i == 0) return; + + // Otherwise, we need to norm the kernel_temp's and the mu_temp + // and add their contributions to the estimation + next_mu_ru += mu_u / (intensity_t_i * this->end_times->sum()); + for (ulong node_v = 0; node_v < this->n_nodes; node_v++) { + ArrayDouble unnormalized_kernel_ruv = view_row(unnormalized_kernel_ru, node_v); + ArrayDouble next_kernel_ruv = view_row(next_kernel_ru, node_v); + for (ulong m = 0; m < this->kernel_size; m++) { + const double normalization_term = intensity_t_i * + (*this->n_jumps_per_node)[node_v] * get_kernel_dt(m); + next_kernel_ruv[m] += unnormalized_kernel_ruv[m] / normalization_term; + } + } + }; + compute_intensities_ur(r_u, mu, kernels, add_to_next_kernel, true); +} + +SArrayDouble2dPtr HawkesEM::get_kernel_norms(ArrayDouble2d &kernels) const { + check_baseline_and_kernels(ArrayDouble(n_nodes), kernels); + + ArrayDouble discretization_intervals(kernel_size); + for (ulong m = 0; m < kernel_size; ++m) { + discretization_intervals[m] = get_kernel_dt(m); + } + + ArrayDouble2d kernel_norms(n_nodes, n_nodes); + for (ulong node_u = 0; node_u < n_nodes; ++node_u) { + ArrayDouble2d kernel_u(n_nodes, kernel_size, view_row(kernels, node_u).data()); + for (ulong node_v = 0; node_v < n_nodes; ++node_v) { + ArrayDouble kernel_uv = view_row(kernel_u, node_v); + kernel_norms(node_u, node_v) = kernel_uv.dot(discretization_intervals); + } + } + + return kernel_norms.as_sarray2d_ptr(); +} + +void HawkesEM::compute_intensities_ur(const ulong r_u, + const ArrayDouble &mu, ArrayDouble2d &kernels, + std::function intensity_func, + bool store_unnormalized_kernel) { + // Obtain realization and node index from r_u + const ulong r = static_cast(r_u / n_nodes); + const ulong node_u = r_u % n_nodes; + + ArrayDouble2d kernel_norms = *get_kernel_norms(kernels); + double marginal_int_intensity = 0; + for (ulong node_v = 0; node_v < n_nodes; ++node_v) { + marginal_int_intensity += kernel_norms(node_v, node_u); + } + + // Fetch corresponding data + SArrayDoublePtrList1D &realization = timestamps_list[r]; + ArrayDouble2d kernel_u(n_nodes, kernel_size, view_row(kernels, node_u).data()); + const double mu_u = mu[node_u]; + + ArrayDouble2d unnormalized_kernel_ru; + if (store_unnormalized_kernel) { + unnormalized_kernel_ru = + ArrayDouble2d(n_nodes, kernel_size, + view_row(unnormalized_kernels, r * n_nodes + node_u).data()); + } + ArrayDouble timestamps_u = view(*realization[node_u]); // This array will allow us to find quicker the events in each component that @@ -85,24 +168,26 @@ void HawkesEM::solve_u_r(const ulong r_u, const ArrayDouble &mu, last_indices[v] = realization[v]->size(); } - // We loop in reverse order to benefit from last_indices for (ulong i = timestamps_u.size() - 1; i != static_cast(-1); i--) { - // this array will store temporary values + const double t_i = timestamps_u[i]; unnormalized_kernel_ru.init_to_zero(); - // norm will be equal to mu_u + \sum_v \sum_(t_j < t_i) g_uv(t_i - t_j) - double norm_u = 0; - - const double t_i = timestamps_u[i]; + // intensity_t_i will be equal to the intensity value of node i at time t_i ie. + // mu_u + \sum_v \sum_(t_j < t_i) g_uv(t_i - t_j) + double intensity_t_i = 0; for (ulong node_v = 0; node_v < n_nodes; node_v++) { ArrayDouble timestamps_v = view(*realization[node_v]); + ArrayDouble unnormalized_kernel_ruv; + if (store_unnormalized_kernel) + unnormalized_kernel_ruv = view_row(unnormalized_kernel_ru, node_v); + // Update the corresponding index such that it is the largest index which // satisfies v[index] <= t_i while (true) { if (last_indices[node_v] == 0) break; if (last_indices[node_v] < timestamps_v.size() && - t_i >= timestamps_v[last_indices[node_v]]) + t_i >= timestamps_v[last_indices[node_v]]) break; last_indices[node_v]--; } @@ -110,7 +195,6 @@ void HawkesEM::solve_u_r(const ulong r_u, const ArrayDouble &mu, // Get the corresponding kernels and their size ArrayDouble kernel_ruv = view_row(kernel_u, node_v); - ArrayDouble unnormalized_kernel_ruv = view_row(unnormalized_kernel_ru, node_v); ulong j0 = last_indices[node_v]; ulong last_m = 0; @@ -119,7 +203,7 @@ void HawkesEM::solve_u_r(const ulong r_u, const ArrayDouble &mu, for (ulong j = j0; j != static_cast(-1); j--) { // Case the two events are in fact the same one if (node_u == node_v && i == j) { - norm_u += mu_u; + intensity_t_i += mu_u; } else { // Case they are different const double t_j = timestamps_v[j]; @@ -140,11 +224,11 @@ void HawkesEM::solve_u_r(const ulong r_u, const ArrayDouble &mu, // Then we get the corresponding kernel value double unnormalized_p_uv_ij = kernel_ruv[m]; - // Update contribution to kernel of p_ij - unnormalized_kernel_ruv[m] += unnormalized_p_uv_ij; + if (store_unnormalized_kernel) + unnormalized_kernel_ruv[m] += unnormalized_p_uv_ij; // Update the norm - norm_u += unnormalized_p_uv_ij; + intensity_t_i += unnormalized_p_uv_ij; } else { // If the two events are too far away --> we are done with the second loop break; @@ -152,23 +236,87 @@ void HawkesEM::solve_u_r(const ulong r_u, const ArrayDouble &mu, } } } - // We are now ready to perform normalization ! + intensity_func(intensity_t_i); + } +} - // If norm is zero then nothing to do (no contribution) - if (norm_u == 0) continue; +double HawkesEM::compute_compensator_ur(const ulong r_u, const ArrayDouble &mu, + ArrayDouble2d &kernels) { + // Obtain realization and node index from r_u + const ulong r = static_cast(r_u / n_nodes); + const ulong node_u = r_u % n_nodes; - // Otherwise, we need to norm the kernel_temp's and the mu_temp - // and add their contributions to the estimation - next_mu_ru += mu_u / (norm_u * end_times->sum()); - for (ulong node_v = 0; node_v < n_nodes; node_v++) { - ArrayDouble unnormalized_kernel_ruv = view_row(unnormalized_kernel_ru, node_v); - ArrayDouble next_kernel_ruv = view_row(next_kernel_ru, node_v); - for (ulong m = 0; m < kernel_size; m++) { - const double normalization_term = norm_u * (*n_jumps_per_node)[node_v] * get_kernel_dt(m); - next_kernel_ruv[m] += unnormalized_kernel_ruv[m] / normalization_term; + double compensator = 0; + ArrayDouble2d kernel_norms = *get_kernel_norms(kernels); + + // Marginal value added to compensator by an event of node u + double marginal_compensator = 0; + for (ulong node_v = 0; node_v < n_nodes; ++node_v) { + marginal_compensator += kernel_norms(node_v, node_u); + } + + // Fetch corresponding data + SArrayDoublePtrList1D &realization = timestamps_list[r]; + const double mu_u = mu[node_u]; + + ArrayDouble timestamps_u = view(*realization[node_u]); + + // Marginal compensator of the kernel u for timestamps are close to end_time + double current_marginal_compensator = 0; + ulong last_m = 0; + ArrayDouble kernel_discretization = *get_kernel_discretization(); + for (ulong i = timestamps_u.size() - 1; i != static_cast(-1); i--) { + const double t_i = timestamps_u[i]; + double t_diff = (*end_times)[r] - t_i; + + // if t_i is too close from end_time its marginal value is less than marginal_compensator + if (t_diff < kernel_support) { + // We get the index in the kernel array + // last_m allows us to find m value quicker as m >= last_m + ulong m = last_m; + while (kernel_discretization[m + 1] < t_diff) { + double interval = kernel_discretization[m + 1] - kernel_discretization[m]; + // We add the compensator value added by bucket m + for (ulong node_v = 0; node_v < n_nodes; ++node_v) { + current_marginal_compensator += interval * kernels(node_v, node_u * kernel_size + m); + } + m++; } + compensator += current_marginal_compensator; + double interval_part = t_diff - kernel_discretization[m]; + // We add compensator value of part of the bucket + for (ulong node_v = 0; node_v < n_nodes; ++node_v) { + compensator += interval_part * kernels(node_v, node_u * kernel_size + m); + } + last_m = m; + } else { + // If t is far enough from end_time, its contribution to compensator is marginal_compensator + // just like all the (i + 1) events that will happen after + compensator += (i + 1) * marginal_compensator; + break; } } + + compensator += mu_u * (*end_times)[r]; + return compensator; +} + +double HawkesEM::get_kernel_dt(const ulong m) const { + if (kernel_discretization == nullptr) { + return kernel_support / kernel_size; + } else { + return (*kernel_discretization)[m + 1] - (*kernel_discretization)[m]; + } +} + +void HawkesEM::check_baseline_and_kernels(const ArrayDouble &mu, ArrayDouble2d &kernels) const { + if (mu.size() != n_nodes) { + TICK_ERROR("baseline / mu argument must be an array of size " << n_nodes); + } + if (kernels.n_rows() != n_nodes || kernels.n_cols() != n_nodes * kernel_size) { + TICK_ERROR("kernels argument must be an array of shape (" + << n_nodes << ", " << n_nodes * kernel_size << ")"); + } } double HawkesEM::get_kernel_fixed_dt() const { @@ -176,14 +324,14 @@ double HawkesEM::get_kernel_fixed_dt() const { return get_kernel_dt(); } else { TICK_ERROR("Cannot get discretization parameter if kernel discretization " - "is explicitly set") + "is explicitly set") } } void HawkesEM::set_kernel_support(const double kernel_support) { if (kernel_discretization != nullptr) { TICK_ERROR("kernel support cannot be set if kernel discretization " - "is explicitly set") + "is explicitly set") } if (kernel_support <= 0) { TICK_ERROR("Kernel support must be positive and you have provided " << kernel_support) @@ -195,7 +343,7 @@ void HawkesEM::set_kernel_support(const double kernel_support) { void HawkesEM::set_kernel_size(const ulong kernel_size) { if (kernel_discretization != nullptr) { TICK_ERROR("kernel size cannot be set if kernel discretization " - "is explicitly set") + "is explicitly set") } if (kernel_size <= 0) { TICK_ERROR("Kernel size must be positive and you have provided " << kernel_size) @@ -207,17 +355,17 @@ void HawkesEM::set_kernel_size(const ulong kernel_size) { void HawkesEM::set_kernel_dt(const double kernel_dt) { if (kernel_discretization != nullptr) { TICK_ERROR("kernel discretization parameter cannot be set if kernel discretization " - "is explicitly set") + "is explicitly set") } if (kernel_dt <= 0) { TICK_ERROR("Kernel discretization parameter must be positive and you have provided " - << kernel_dt) + << kernel_dt) } if (kernel_dt > kernel_support) { TICK_ERROR("Kernel discretization parameter must be smaller than kernel support." - << "You have provided " << kernel_dt - << " and kernel support is " << kernel_support) + << "You have provided " << kernel_dt + << " and kernel support is " << kernel_support) } set_kernel_size(static_cast(std::ceil(kernel_support / kernel_dt))); } @@ -238,3 +386,13 @@ void HawkesEM::set_kernel_discretization(const SArrayDoublePtr kernel_discretiza weights_computed = false; } +SArrayDoublePtr HawkesEM::get_kernel_discretization() const { + if (kernel_discretization == nullptr) { + ArrayDouble kernel_discretization_tmp = arange(0, kernel_size + 1); + kernel_discretization_tmp.mult_fill(kernel_discretization_tmp, get_kernel_fixed_dt()); + return kernel_discretization_tmp.as_sarray_ptr(); + } else { + return kernel_discretization; + } +} + diff --git a/tick/inference/src/hawkes_em.h b/tick/inference/src/hawkes_em.h index 51ebfb38f..9ff160728 100644 --- a/tick/inference/src/hawkes_em.h +++ b/tick/inference/src/hawkes_em.h @@ -45,39 +45,18 @@ class HawkesEM : public ModelHawkesList { //! @brief The main method to perform one iteration void solve(ArrayDouble &mu, ArrayDouble2d &kernels); - private: - //! @brief A method called in parallel by the method 'solve' - //! @param r_u : r * n_realizations + u, tells which realization and which node - void solve_u_r(const ulong r_u, const ArrayDouble &mu, ArrayDouble2d &kernel); + //! @brief Compute loglikelihood of a given kernel and baseline + double loglikelihood(const ArrayDouble &mu, ArrayDouble2d &kernels); + + SArrayDouble2dPtr get_kernel_norms(ArrayDouble2d &kernels) const; - //! @brief Discretization parameter of the kernel - //! If kernel_discretization is a nullptr then it is equal to kernel_support / kernel_size - //! otherwise it is equal to the difference of - //! kernel_discretization[m+1] - kernel_discretization[m] - inline double get_kernel_dt(const ulong m = 0) const { - if (kernel_discretization == nullptr) { - return kernel_support / kernel_size; - } else { - return (*kernel_discretization)[m + 1] - (*kernel_discretization)[m]; - } - } - - public: double get_kernel_support() const { return kernel_support; } ulong get_kernel_size() const { return kernel_size; } double get_kernel_fixed_dt() const; - SArrayDoublePtr get_kernel_discretization() const { - if (kernel_discretization == nullptr) { - ArrayDouble kernel_discretization_tmp = arange(0, kernel_size + 1); - kernel_discretization_tmp.mult_fill(kernel_discretization_tmp, get_kernel_fixed_dt()); - return kernel_discretization_tmp.as_sarray_ptr(); - } else { - return kernel_discretization; - } - } + SArrayDoublePtr get_kernel_discretization() const; //! @brief set kernel support void set_kernel_support(const double kernel_support); @@ -90,6 +69,37 @@ class HawkesEM : public ModelHawkesList { void set_kernel_dt(const double kernel_dt); void set_kernel_discretization(const SArrayDoublePtr kernel_discretization); + + private: + //! @brief A method called in parallel by the method 'solve' + //! @param r_u : r * n_realizations + u, tells which realization and which node + void solve_ur(const ulong r_u, const ArrayDouble &mu, ArrayDouble2d &kernel); + + //! @brief A method called in parallel by the method 'loglikelihood' + //! @param r_u : r * n_realizations + u, tells which realization and which node + double loglikelihood_ur(const ulong r_u, const ArrayDouble &mu, ArrayDouble2d &kernels); + + + //! @brief A method called by solve_ur and logliklihood_ur to compute all intensities at + //! all timestamps occuring in node u of realization r + //! @param r_u : r * n_realizations + u, tells which realization and which node + //! @param intensity_func : function that will be called for all timestamps with the intensity at + //! this timestamp as argument + //! @param store_unnormalized_kernel : solve_ur method needs to store an unnormalized version + //! of the kernels in the class variable unnormalized_kernels + void compute_intensities_ur(const ulong r_u, const ArrayDouble &mu, ArrayDouble2d &kernels, + std::function intensity_func, + bool store_unnormalized_kernel); + + double compute_compensator_ur(const ulong r_u, const ArrayDouble &mu, ArrayDouble2d &kernels); + + void check_baseline_and_kernels(const ArrayDouble &mu, ArrayDouble2d &kernels) const; + + //! @brief Discretization parameter of the kernel + //! If kernel_discretization is a nullptr then it is equal to kernel_support / kernel_size + //! otherwise it is equal to the difference of + //! kernel_discretization[m+1] - kernel_discretization[m] + double get_kernel_dt(const ulong m = 0) const; }; #endif // TICK_INFERENCE_SRC_HAWKES_EM_H_ diff --git a/tick/inference/swig/hawkes_em.i b/tick/inference/swig/hawkes_em.i index 50819c376..ded69526c 100644 --- a/tick/inference/swig/hawkes_em.i +++ b/tick/inference/swig/hawkes_em.i @@ -19,6 +19,9 @@ class HawkesEM : public ModelHawkesList { void solve(ArrayDouble &mu, ArrayDouble2d &kernels); + SArrayDouble2dPtr get_kernel_norms(ArrayDouble2d &kernels) const; + double loglikelihood(ArrayDouble &mu, ArrayDouble2d &kernels); + double get_kernel_support() const; ulong get_kernel_size() const; double get_kernel_fixed_dt() const; diff --git a/tick/inference/tests/hawkes_adm4_test.py b/tick/inference/tests/hawkes_adm4_test.py index 344a04770..e7cd0f100 100644 --- a/tick/inference/tests/hawkes_adm4_test.py +++ b/tick/inference/tests/hawkes_adm4_test.py @@ -7,6 +7,8 @@ class Test(unittest.TestCase): def setUp(self): + np.random.seed(329832) + self.decay = 0.7 self.float_1 = 5.23e-4 self.float_2 = 3.86e-2 @@ -21,7 +23,6 @@ def test_hawkes_adm4_solution(self): ] n_nodes = len(events[0]) - decay = 0.7 rho = 0.5 C = 10 lasso_nuclear_ratio = 0.7 @@ -29,7 +30,7 @@ def test_hawkes_adm4_solution(self): baseline_start = np.zeros(n_nodes) + .2 adjacency_start = np.zeros((n_nodes, n_nodes)) + .2 - learner = HawkesADM4(decay, rho=rho, C=C, + learner = HawkesADM4(self.decay, rho=rho, C=C, lasso_nuclear_ratio=lasso_nuclear_ratio, n_threads=3, max_iter=10, verbose=False, em_max_iter=3) @@ -46,6 +47,45 @@ def test_hawkes_adm4_solution(self): np.testing.assert_array_almost_equal(learner.adjacency, adjacency, decimal=6) + def test_hawkes_adm4_score(self): + """...Test HawkesADM4 score method + """ + n_nodes = 2 + n_realizations = 3 + + train_events = [[ + np.cumsum(np.random.rand(4 + i)) for i in range(n_nodes)] + for _ in range(n_realizations)] + + test_events = [[ + np.cumsum(np.random.rand(4 + i)) for i in range(n_nodes)] + for _ in range(n_realizations)] + + learner = HawkesADM4(self.decay) + + msg = '^You must either call `fit` before `score` or provide events$' + with self.assertRaisesRegex(ValueError, msg): + learner.score() + + given_baseline = np.random.rand(n_nodes) + given_adjacency = np.random.rand(n_nodes, n_nodes) + + learner.fit(train_events) + + train_score_current_coeffs = learner.score() + self.assertAlmostEqual(train_score_current_coeffs, 0.12029826) + + train_score_given_coeffs = learner.score( + baseline=given_baseline, adjacency=given_adjacency) + self.assertAlmostEqual(train_score_given_coeffs, -0.15247511) + + test_score_current_coeffs = learner.score(test_events) + self.assertAlmostEqual(test_score_current_coeffs, 0.17640007) + + test_score_given_coeffs = learner.score( + test_events, baseline=given_baseline, adjacency=given_adjacency) + self.assertAlmostEqual(test_score_given_coeffs, -0.07973875) + def test_hawkes_adm4_set_data(self): """...Test set_data method of Hawkes ADM4 """ diff --git a/tick/inference/tests/hawkes_em_test.py b/tick/inference/tests/hawkes_em_test.py index 402b9b2be..e79a4a331 100644 --- a/tick/inference/tests/hawkes_em_test.py +++ b/tick/inference/tests/hawkes_em_test.py @@ -3,6 +3,9 @@ import unittest import numpy as np from tick.inference import HawkesEM +from tick.optim.model.tests.hawkes_utils import hawkes_intensities, \ + hawkes_log_likelihood +from tick.plot import plot_hawkes_kernels class Test(unittest.TestCase): @@ -35,7 +38,9 @@ def test_hawkes_em_fit(self): n_threads=2, max_iter=10, verbose=False) em.fit(self.events, baseline_start=baseline, kernel_start=kernel) - np.set_printoptions(precision=4) + np.testing.assert_array_almost_equal( + em.baseline, [1.2264, 0.2164, 1.6782], decimal=4) + expected_kernel = [[[2.4569e-02, 2.5128e-06, 0.0000e+00], [1.8072e-02, 5.4332e-11, 0.0000e+00], [2.7286e-03, 4.0941e-08, 3.5705e-15]], @@ -71,6 +76,74 @@ def test_hawkes_em_fit(self): np.testing.assert_array_equal(em.get_kernel_supports(), np.ones((self.n_nodes, self.n_nodes)) * 3) + def test_hawkes_em_score(self): + """...Test score (ie. likelihood) function of Hawkes EM + """ + + def approximate_likelihood(em, events, end_times, precision=2): + n_total_jumps = sum(map(len, events)) + kernels_func = [[ + lambda t, i=i, j=j: em.get_kernel_values(i, j, np.array([t]))[0] + for j in range(n_nodes)] for i in range(n_nodes) + ] + intensities = hawkes_intensities(events, em.baseline, kernels_func) + return hawkes_log_likelihood(intensities, events, end_times, + precision=precision) / n_total_jumps + + # We use only 2 nodes otherwise integral approximation might be very + # slow + n_nodes = 2 + kernel_support = 1 + kernel_size = 3 + baseline = np.random.rand(n_nodes) + .2 + kernel = np.random.rand(n_nodes, n_nodes, kernel_size) + .4 + + train_events = \ + [np.cumsum(np.random.rand(2 + i)) for i in range(n_nodes)] + + test_events = \ + [2 + np.cumsum(np.random.rand(2 + i)) for i in range(n_nodes)] + + # Test for 2 kind of discretization + train_kwargs = [{'kernel_support': 1, 'kernel_size': 3}, + {'kernel_discretization': np.array([0., 1., 1.5, 3.])}] + + # Test with and without fitting + fits = [True, False] + + for kwargs, fit in zip(train_kwargs, fits): + em = HawkesEM(**kwargs) + end_times = max(map(max, train_events)) + 0.2 * kernel_support + + msg = '^You must either call `fit` before `score` or provide events' + with self.assertRaisesRegex(ValueError, msg): + em.score() + + if fit: + em.fit(train_events, end_times=end_times, + baseline_start=baseline, kernel_start=kernel) + else: + em.baseline = baseline + em.kernel = kernel + + # Score on em train data + if fit: + em_train_score = em.score() + else: + em_train_score = em.score(train_events, end_times=end_times) + self.assertAlmostEqual( + em_train_score, + approximate_likelihood(em, train_events, end_times, 2), + delta=1e-1, msg='Failed on train for {}'.format(kwargs)) + + # Score on test data + em_test_score = em.score(events=test_events) + test_end_times = max(map(max, test_events)) + self.assertAlmostEqual( + em_test_score, + approximate_likelihood(em, test_events, test_end_times, 4), + delta=1e-3, msg='Failed on test for {}'.format(kwargs)) + def test_hawkes_em_kernel_support(self): """...Test that Hawkes em kernel support parameter is correctly synchronized diff --git a/tick/inference/tests/hawkes_expkern_test.py b/tick/inference/tests/hawkes_expkern_test.py index a69a886f7..be0fc57b4 100644 --- a/tick/inference/tests/hawkes_expkern_test.py +++ b/tick/inference/tests/hawkes_expkern_test.py @@ -136,6 +136,45 @@ def test_HawkesExpKern_fit_start(self): learner.fit(self.events, start=random_coeffs) np.testing.assert_array_equal(learner.coeffs, random_coeffs) + def test_HawkesExpKern_score(self): + """...Test HawkesExpKern score method + """ + n_nodes = 2 + n_realizations = 3 + + train_events = [[ + np.cumsum(np.random.rand(4 + i)) for i in range(n_nodes)] + for _ in range(n_realizations)] + + test_events = [[ + np.cumsum(np.random.rand(4 + i)) for i in range(n_nodes)] + for _ in range(n_realizations)] + + learner = HawkesExpKern(self.decays) + + msg = '^You must either call `fit` before `score` or provide events$' + with self.assertRaisesRegex(ValueError, msg): + learner.score() + + given_baseline = np.random.rand(n_nodes) + given_adjacency = np.random.rand(n_nodes, n_nodes) + + learner.fit(train_events) + + train_score_current_coeffs = learner.score() + self.assertAlmostEqual(train_score_current_coeffs, 2.0855840) + + train_score_given_coeffs = learner.score( + baseline=given_baseline, adjacency=given_adjacency) + self.assertAlmostEqual(train_score_given_coeffs, 0.59502417) + + test_score_current_coeffs = learner.score(test_events) + self.assertAlmostEqual(test_score_current_coeffs, 1.6001762) + + test_score_given_coeffs = learner.score( + test_events, baseline=given_baseline, adjacency=given_adjacency) + self.assertAlmostEqual(test_score_given_coeffs, 0.89322199) + @staticmethod def specific_solver_kwargs(solver): """...A simple method to as systematically some kwargs to our tests diff --git a/tick/inference/tests/hawkes_sumexpkern_test.py b/tick/inference/tests/hawkes_sumexpkern_test.py index b254b934d..49101b910 100644 --- a/tick/inference/tests/hawkes_sumexpkern_test.py +++ b/tick/inference/tests/hawkes_sumexpkern_test.py @@ -103,6 +103,45 @@ def test_HawkesSumExpKern_fit(self): "reached too high baseline error" % (solver, penalty)) + def test_HawkesSumExpKern_score(self): + """...Test HawkesSumExpKern score method + """ + n_nodes = 2 + n_realizations = 3 + + train_events = [[ + np.cumsum(np.random.rand(4 + i)) for i in range(n_nodes)] + for _ in range(n_realizations)] + + test_events = [[ + np.cumsum(np.random.rand(4 + i)) for i in range(n_nodes)] + for _ in range(n_realizations)] + + learner = HawkesSumExpKern(self.decays) + + msg = '^You must either call `fit` before `score` or provide events$' + with self.assertRaisesRegex(ValueError, msg): + learner.score() + + given_baseline = np.random.rand(n_nodes) + given_adjacency = np.random.rand(n_nodes, n_nodes, self.n_decays) + + learner.fit(train_events) + + train_score_current_coeffs = learner.score() + self.assertAlmostEqual(train_score_current_coeffs, 1.684827141) + + train_score_given_coeffs = learner.score( + baseline=given_baseline, adjacency=given_adjacency) + self.assertAlmostEqual(train_score_given_coeffs, 1.16247892) + + test_score_current_coeffs = learner.score(test_events) + self.assertAlmostEqual(test_score_current_coeffs, 1.66494295) + + test_score_given_coeffs = learner.score( + test_events, baseline=given_baseline, adjacency=given_adjacency) + self.assertAlmostEqual(test_score_given_coeffs, 1.1081362) + def test_HawkesSumExpKern_fit_start(self): """...Test HawkesSumExpKern starting point of fit method """