Skip to content

Commit

Permalink
Goodness-of-fit diagnostics for Hawkes EM algorithm (#507)
Browse files Browse the repository at this point in the history
* Hawkes EM qq plots - first commit

* Hawkes EM qq plots - workings

* HawkesEM - Compute time integral of intensities

* HawkesEM - Time-changed interarrival

* debugging

* debugging

* debugging

* Update configure_env.sh and gtest.sh

* HawkesEM gtest - ok till implicit abscissa

* HawkesEM Gtest - pass till data with explicit abscissa

* HawkesEM Gtest - pass till values with explicit abscissa

* HawkesEM - Py unittest of fit method with simulated data

* HawkesEM - test with simulated data

* HawkesEM - basic unit tests for inter-arrival times

* try get more python working on windows

* Fix usage of threshold in _index_left and _t_left

* Hawkes EM qq plots - first commit

* Hawkes EM qq plots - workings

* HawkesEM - Compute time integral of intensities

* HawkesEM - Time-changed interarrival

* debugging

* debugging

* debugging

* Update configure_env.sh and gtest.sh

* HawkesEM gtest - ok till implicit abscissa

* HawkesEM Gtest - pass till data with explicit abscissa

* HawkesEM Gtest - pass till values with explicit abscissa

* HawkesEM - Py unittest of fit method with simulated data

* HawkesEM - test with simulated data

* HawkesEM - basic unit tests for inter-arrival times

* Fix usage of threshold in _index_left and _t_left

* Example - QQplot of simulated Hawkes process

* Examples - QQ plot of Hawkes EM algo

* TimeFunction - fix future max

* HawkesEM test - test 3 rtol

* HawkesKernelTiemFunc - get norm using time_func

* Clean comments in hawkes em test]

---------

Co-authored-by: dekken <[email protected]>
Co-authored-by: claudio <[email protected]>
  • Loading branch information
3 people authored Mar 5, 2023
1 parent 6be9d62 commit 04dbb37
Show file tree
Hide file tree
Showing 25 changed files with 1,633 additions and 255 deletions.
53 changes: 53 additions & 0 deletions examples/qq_plot_hawkes_em.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import numpy as np
import matplotlib.pyplot as plt

from tick.hawkes import (SimuHawkes, HawkesKernelTimeFunc, HawkesKernelExp,
HawkesEM)
from tick.base import TimeFunction
from tick.plot import qq_plots

run_time = 30000

t_values1 = np.array([0, 1, 1.5, 2., 3.5], dtype=float)
y_values1 = np.array([0, 0.2, 0, 0.1, 0.], dtype=float)
tf1 = TimeFunction([t_values1, y_values1],
inter_mode=TimeFunction.InterConstRight, dt=0.1)
kernel1 = HawkesKernelTimeFunc(tf1)

t_values2 = np.linspace(0, 4, 20)
y_values2 = np.maximum(0., np.sin(t_values2) / 4)
tf2 = TimeFunction([t_values2, y_values2])
kernel2 = HawkesKernelTimeFunc(tf2)

baseline = np.array([0.1, 0.3])

hawkes = SimuHawkes(baseline=baseline, end_time=run_time, verbose=False,
seed=2334)

hawkes.set_kernel(0, 0, kernel1)
hawkes.set_kernel(0, 1, HawkesKernelExp(.5, .7))
hawkes.set_kernel(1, 1, kernel2)

hawkes.simulate()

em = HawkesEM(4, kernel_size=16, n_threads=8, verbose=False, tol=1e-3)
em.fit(hawkes.timestamps)

hawkes.store_compensator_values()
residuals_list = em.time_changed_interarrival_times()

fig, axs = plt.subplots(2, 2)

_ = qq_plots(
point_process=hawkes,
ax=[axs[0, 0], axs[0, 1]],
node_names=['node 0 - simulation', 'node 1 - simulation'],
show=False
)
_ = qq_plots(
residuals=residuals_list[0],
ax=[axs[1, 0], axs[1, 1]],
node_names=['node 0 - estimation', 'node 1 - estimation'],
show=False
)
plt.show()
35 changes: 35 additions & 0 deletions examples/qq_plot_hawkes_nonparam.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import numpy as np
import matplotlib.pyplot as plt

from tick.hawkes import (SimuHawkes, HawkesKernelTimeFunc, HawkesKernelExp,
HawkesEM)
from tick.base import TimeFunction
from tick.plot import qq_plots

run_time = 30000

t_values1 = np.array([0, 1, 1.5, 2., 3.5], dtype=float)
y_values1 = np.array([0, 0.2, 0, 0.1, 0.], dtype=float)
tf1 = TimeFunction([t_values1, y_values1],
inter_mode=TimeFunction.InterConstRight, dt=0.1)
kernel1 = HawkesKernelTimeFunc(tf1)

t_values2 = np.linspace(0, 4, 20)
y_values2 = np.maximum(0., np.sin(t_values2) / 4)
tf2 = TimeFunction([t_values2, y_values2])
kernel2 = HawkesKernelTimeFunc(tf2)

baseline = np.array([0.1, 0.3])

hawkes = SimuHawkes(baseline=baseline, end_time=run_time, verbose=False,
seed=2334)

hawkes.set_kernel(0, 0, kernel1)
hawkes.set_kernel(0, 1, HawkesKernelExp(.5, .7))
hawkes.set_kernel(1, 1, kernel2)

hawkes.simulate()
hawkes.store_compensator_values()

fig = qq_plots(hawkes, show=False)
plt.show()
133 changes: 133 additions & 0 deletions lib/cpp-test/base/time_func_gtest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
// License: BSD 3 clause

#include <gtest/gtest.h>
#include "tick/base/time_func.h"
#include <iostream>
#include <cmath>

class TimeFunctionTest : public ::testing::Test {
protected:
ArrayDouble T;
ArrayDouble Y;
double dt = .25;
double time_horizon = 1.;
double border_value = .0;
ulong sample_size = 5;
void SetUp() override {
T = ArrayDouble{.0, .25, .5, .75, 1.};
Y = ArrayDouble{1., 2., 3., 4., 5.};
}
};

TEST_F(TimeFunctionTest, implicit_abscissa_data) {
TimeFunction tf(Y, TimeFunction::BorderType::Border0, TimeFunction::InterMode::InterConstRight,
dt, border_value);
EXPECT_DOUBLE_EQ(tf.get_t0(), T[0]);
SArrayDoublePtr sampled_y = tf.get_sampled_y();
EXPECT_EQ(tf.get_sample_size(), sample_size);
EXPECT_EQ(sampled_y->size(), Y.size());
for (ulong i = 0; i < Y.size(); i++) {
EXPECT_DOUBLE_EQ((*sampled_y)[i], Y[i]);
}
}
TEST_F(TimeFunctionTest, explicit_abscissa_data) {
TimeFunction tf(T, Y, TimeFunction::BorderType::Border0,
TimeFunction::InterMode::InterConstRight);
EXPECT_DOUBLE_EQ(tf.get_t0(), T[0]);
SArrayDoublePtr sampled_y = tf.get_sampled_y();
EXPECT_EQ(tf.get_sample_size(), sample_size);
EXPECT_EQ(sampled_y->size(), Y.size());
for (ulong i = 0; i < Y.size(); i++) {
EXPECT_DOUBLE_EQ((*sampled_y)[i], Y[i]);
}
}

TEST_F(TimeFunctionTest, border0_interconstright_implicit_node_values) {
TimeFunction tf(Y, TimeFunction::BorderType::Border0, TimeFunction::InterMode::InterConstRight,
dt, border_value);
double s = 0;
for (int k = 0; k < T.size(); k++) {
double t_k = T[k];
double y_k = Y[k];
EXPECT_DOUBLE_EQ(tf.value(t_k), y_k) << "error at k=" << k << ", t_k=" << t_k << "\n";
if (k > 0) s += Y[k - 1] * dt;
EXPECT_DOUBLE_EQ(tf.primitive(t_k), s) << "error at k=" << k << ", t_k=" << t_k << "\n";
}

EXPECT_DOUBLE_EQ(tf.get_norm(), tf.primitive(time_horizon));
}

TEST_F(TimeFunctionTest, border0_interconstright_explicit_node_values) {
TimeFunction tf(T, Y, TimeFunction::BorderType::Border0,
TimeFunction::InterMode::InterConstRight);
double s = 0;
for (int k = 0; k < T.size(); k++) {
double t_k = T[k];
double y_k = Y[k];
EXPECT_DOUBLE_EQ(tf.value(t_k), y_k) << "error at k=" << k << ", t_k=" << t_k << "\n";
if (k > 0) s += Y[k - 1] * dt;
EXPECT_DOUBLE_EQ(tf.primitive(t_k), s) << "error at k=" << k << ", t_k=" << t_k << "\n";
}

EXPECT_DOUBLE_EQ(tf.get_norm(), tf.primitive(time_horizon));
}

TEST_F(TimeFunctionTest, border0_interconstleft_implicit_node_values) {
TimeFunction tf(Y, TimeFunction::BorderType::Border0, TimeFunction::InterMode::InterConstLeft, dt,
border_value);
double s = 0;
for (int k = 0; k < T.size(); k++) {
double t_k = T[k];
double y_k = Y[k];
EXPECT_DOUBLE_EQ(tf.value(t_k), y_k) << "error at k=" << k << ", t_k=" << t_k << "\n";
if (k > 0) s += y_k * dt;
EXPECT_DOUBLE_EQ(tf.primitive(t_k), s) << "error at k=" << k << ", t_k=" << t_k << "\n";
}
EXPECT_DOUBLE_EQ(tf.get_norm(), tf.primitive(time_horizon));
}

TEST_F(TimeFunctionTest, border0_interconstleft_explicit_node_values) {
TimeFunction tf(T, Y, TimeFunction::BorderType::Border0, TimeFunction::InterMode::InterConstLeft);
double s = 0;
for (int k = 0; k < T.size(); k++) {
double t_k = T[k];
double y_k = Y[k];
EXPECT_DOUBLE_EQ(tf.value(t_k), y_k) << "error at k=" << k << ", t_k=" << t_k << "\n";
if (k > 0) s += y_k * dt;
EXPECT_DOUBLE_EQ(tf.primitive(t_k), s) << "error at k=" << k << ", t_k=" << t_k << "\n";
}
EXPECT_DOUBLE_EQ(tf.get_norm(), tf.primitive(time_horizon));
}

TEST_F(TimeFunctionTest, border0_interlinear_implicit_node_values) {
TimeFunction tf(Y, TimeFunction::BorderType::Border0, TimeFunction::InterMode::InterLinear, dt,
border_value);
double s = 0;
for (int k = 0; k < T.size(); k++) {
double t_k = T[k];
double y_k = Y[k];
EXPECT_DOUBLE_EQ(tf.value(t_k), y_k) << "error at k=" << k << ", t_k=" << t_k << "\n";
if (k > 0) s += .5 * (y_k + Y[k - 1]) * dt;
EXPECT_DOUBLE_EQ(tf.primitive(t_k), s) << "error at k=" << k << ", t_k=" << t_k << "\n";
}
EXPECT_DOUBLE_EQ(tf.get_norm(), tf.primitive(time_horizon));
}

TEST_F(TimeFunctionTest, border0_interlinear_explicit_node_values) {
TimeFunction tf(T, Y, TimeFunction::BorderType::Border0, TimeFunction::InterMode::InterLinear);
double s = 0;
for (int k = 0; k < T.size(); k++) {
double t_k = T[k];
double y_k = Y[k];
EXPECT_DOUBLE_EQ(tf.value(t_k), y_k) << "error at k=" << k << ", t_k=" << t_k << "\n";
if (k > 0) s += .5 * (y_k + Y[k - 1]) * dt;
EXPECT_DOUBLE_EQ(tf.primitive(t_k), s) << "error at k=" << k << ", t_k=" << t_k << "\n";
}
EXPECT_DOUBLE_EQ(tf.get_norm(), tf.primitive(time_horizon));
}
#ifdef ADD_MAIN
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
#endif // ADD_MAIN
16 changes: 16 additions & 0 deletions lib/cpp-test/hawkes/inference/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
add_executable(tick_test_hawkes_inference
hawkes_em_gtest.cpp
)

target_link_libraries(tick_test_hawkes_inference
${TICK_LIB_ARRAY}
${TICK_LIB_BASE}
${TICK_LIB_CRANDOM}
${TICK_LIB_BASE_MODEL}
${TICK_LIB_LINEAR_MODEL}
${TICK_LIB_HAWKES_INFERENCE}
${TICK_LIB_HAWKES_MODEL}
${TICK_TEST_LIBS}
)


Loading

0 comments on commit 04dbb37

Please sign in to comment.