diff --git a/.github/workflows/build_nix.yml b/.github/workflows/build_nix.yml index 0681f1b68..900ca1b46 100644 --- a/.github/workflows/build_nix.yml +++ b/.github/workflows/build_nix.yml @@ -21,7 +21,7 @@ jobs: fail-fast: false max-parallel: 4 matrix: - python-version: ['3.7', '3.9', '3.10'] # '3.8' has "'tp_print' is deprecated [-Werror,-Wdeprecated-declarations]" + python-version: ['3.7', '3.9', '3.10', '3.11'] # '3.8' has "'tp_print' is deprecated [-Werror,-Wdeprecated-declarations]" steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/build_win.yml b/.github/workflows/build_win.yml index b874d553e..02f9e873d 100644 --- a/.github/workflows/build_win.yml +++ b/.github/workflows/build_win.yml @@ -11,6 +11,7 @@ env: TICK_DEBUG: 0 TICK_WERROR: 0 MKN_CL_PREFERRED: 1 + MKN_COMPILE_THREADS: 2 jobs: build: @@ -21,10 +22,10 @@ jobs: fail-fast: false max-parallel: 4 matrix: - python-version: ['3.7'] # '3.8', '3.9', '3.10', have dll init issues + python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: submodules: true diff --git a/examples/qq_plot_hawkes_em.py b/examples/qq_plot_hawkes_em.py new file mode 100644 index 000000000..5e40e5044 --- /dev/null +++ b/examples/qq_plot_hawkes_em.py @@ -0,0 +1,53 @@ +import numpy as np +import matplotlib.pyplot as plt + +from tick.hawkes import (SimuHawkes, HawkesKernelTimeFunc, HawkesKernelExp, + HawkesEM) +from tick.base import TimeFunction +from tick.plot import qq_plots + +run_time = 30000 + +t_values1 = np.array([0, 1, 1.5, 2., 3.5], dtype=float) +y_values1 = np.array([0, 0.2, 0, 0.1, 0.], dtype=float) +tf1 = TimeFunction([t_values1, y_values1], + inter_mode=TimeFunction.InterConstRight, dt=0.1) +kernel1 = HawkesKernelTimeFunc(tf1) + +t_values2 = np.linspace(0, 4, 20) +y_values2 = np.maximum(0., np.sin(t_values2) / 4) +tf2 = TimeFunction([t_values2, y_values2]) +kernel2 = HawkesKernelTimeFunc(tf2) + +baseline = np.array([0.1, 0.3]) + +hawkes = SimuHawkes(baseline=baseline, end_time=run_time, verbose=False, + seed=2334) + +hawkes.set_kernel(0, 0, kernel1) +hawkes.set_kernel(0, 1, HawkesKernelExp(.5, .7)) +hawkes.set_kernel(1, 1, kernel2) + +hawkes.simulate() + +em = HawkesEM(4, kernel_size=16, n_threads=8, verbose=False, tol=1e-3) +em.fit(hawkes.timestamps) + +hawkes.store_compensator_values() +residuals_list = em.time_changed_interarrival_times() + +fig, axs = plt.subplots(2, 2) + +_ = qq_plots( + point_process=hawkes, + ax=[axs[0, 0], axs[0, 1]], + node_names=['node 0 - simulation', 'node 1 - simulation'], + show=False +) +_ = qq_plots( + residuals=residuals_list[0], + ax=[axs[1, 0], axs[1, 1]], + node_names=['node 0 - estimation', 'node 1 - estimation'], + show=False +) +plt.show() diff --git a/examples/qq_plot_hawkes_nonparam.py b/examples/qq_plot_hawkes_nonparam.py new file mode 100644 index 000000000..f2cc4246c --- /dev/null +++ b/examples/qq_plot_hawkes_nonparam.py @@ -0,0 +1,35 @@ +import numpy as np +import matplotlib.pyplot as plt + +from tick.hawkes import (SimuHawkes, HawkesKernelTimeFunc, HawkesKernelExp, + HawkesEM) +from tick.base import TimeFunction +from tick.plot import qq_plots + +run_time = 30000 + +t_values1 = np.array([0, 1, 1.5, 2., 3.5], dtype=float) +y_values1 = np.array([0, 0.2, 0, 0.1, 0.], dtype=float) +tf1 = TimeFunction([t_values1, y_values1], + inter_mode=TimeFunction.InterConstRight, dt=0.1) +kernel1 = HawkesKernelTimeFunc(tf1) + +t_values2 = np.linspace(0, 4, 20) +y_values2 = np.maximum(0., np.sin(t_values2) / 4) +tf2 = TimeFunction([t_values2, y_values2]) +kernel2 = HawkesKernelTimeFunc(tf2) + +baseline = np.array([0.1, 0.3]) + +hawkes = SimuHawkes(baseline=baseline, end_time=run_time, verbose=False, + seed=2334) + +hawkes.set_kernel(0, 0, kernel1) +hawkes.set_kernel(0, 1, HawkesKernelExp(.5, .7)) +hawkes.set_kernel(1, 1, kernel2) + +hawkes.simulate() +hawkes.store_compensator_values() + +fig = qq_plots(hawkes, show=False) +plt.show() diff --git a/lib/cpp-test/base/time_func_gtest.cpp b/lib/cpp-test/base/time_func_gtest.cpp new file mode 100644 index 000000000..549397236 --- /dev/null +++ b/lib/cpp-test/base/time_func_gtest.cpp @@ -0,0 +1,133 @@ +// License: BSD 3 clause + +#include +#include "tick/base/time_func.h" +#include +#include + +class TimeFunctionTest : public ::testing::Test { + protected: + ArrayDouble T; + ArrayDouble Y; + double dt = .25; + double time_horizon = 1.; + double border_value = .0; + ulong sample_size = 5; + void SetUp() override { + T = ArrayDouble{.0, .25, .5, .75, 1.}; + Y = ArrayDouble{1., 2., 3., 4., 5.}; + } +}; + +TEST_F(TimeFunctionTest, implicit_abscissa_data) { + TimeFunction tf(Y, TimeFunction::BorderType::Border0, TimeFunction::InterMode::InterConstRight, + dt, border_value); + EXPECT_DOUBLE_EQ(tf.get_t0(), T[0]); + SArrayDoublePtr sampled_y = tf.get_sampled_y(); + EXPECT_EQ(tf.get_sample_size(), sample_size); + EXPECT_EQ(sampled_y->size(), Y.size()); + for (ulong i = 0; i < Y.size(); i++) { + EXPECT_DOUBLE_EQ((*sampled_y)[i], Y[i]); + } +} +TEST_F(TimeFunctionTest, explicit_abscissa_data) { + TimeFunction tf(T, Y, TimeFunction::BorderType::Border0, + TimeFunction::InterMode::InterConstRight); + EXPECT_DOUBLE_EQ(tf.get_t0(), T[0]); + SArrayDoublePtr sampled_y = tf.get_sampled_y(); + EXPECT_EQ(tf.get_sample_size(), sample_size); + EXPECT_EQ(sampled_y->size(), Y.size()); + for (ulong i = 0; i < Y.size(); i++) { + EXPECT_DOUBLE_EQ((*sampled_y)[i], Y[i]); + } +} + +TEST_F(TimeFunctionTest, border0_interconstright_implicit_node_values) { + TimeFunction tf(Y, TimeFunction::BorderType::Border0, TimeFunction::InterMode::InterConstRight, + dt, border_value); + double s = 0; + for (int k = 0; k < T.size(); k++) { + double t_k = T[k]; + double y_k = Y[k]; + EXPECT_DOUBLE_EQ(tf.value(t_k), y_k) << "error at k=" << k << ", t_k=" << t_k << "\n"; + if (k > 0) s += Y[k - 1] * dt; + EXPECT_DOUBLE_EQ(tf.primitive(t_k), s) << "error at k=" << k << ", t_k=" << t_k << "\n"; + } + + EXPECT_DOUBLE_EQ(tf.get_norm(), tf.primitive(time_horizon)); +} + +TEST_F(TimeFunctionTest, border0_interconstright_explicit_node_values) { + TimeFunction tf(T, Y, TimeFunction::BorderType::Border0, + TimeFunction::InterMode::InterConstRight); + double s = 0; + for (int k = 0; k < T.size(); k++) { + double t_k = T[k]; + double y_k = Y[k]; + EXPECT_DOUBLE_EQ(tf.value(t_k), y_k) << "error at k=" << k << ", t_k=" << t_k << "\n"; + if (k > 0) s += Y[k - 1] * dt; + EXPECT_DOUBLE_EQ(tf.primitive(t_k), s) << "error at k=" << k << ", t_k=" << t_k << "\n"; + } + + EXPECT_DOUBLE_EQ(tf.get_norm(), tf.primitive(time_horizon)); +} + +TEST_F(TimeFunctionTest, border0_interconstleft_implicit_node_values) { + TimeFunction tf(Y, TimeFunction::BorderType::Border0, TimeFunction::InterMode::InterConstLeft, dt, + border_value); + double s = 0; + for (int k = 0; k < T.size(); k++) { + double t_k = T[k]; + double y_k = Y[k]; + EXPECT_DOUBLE_EQ(tf.value(t_k), y_k) << "error at k=" << k << ", t_k=" << t_k << "\n"; + if (k > 0) s += y_k * dt; + EXPECT_DOUBLE_EQ(tf.primitive(t_k), s) << "error at k=" << k << ", t_k=" << t_k << "\n"; + } + EXPECT_DOUBLE_EQ(tf.get_norm(), tf.primitive(time_horizon)); +} + +TEST_F(TimeFunctionTest, border0_interconstleft_explicit_node_values) { + TimeFunction tf(T, Y, TimeFunction::BorderType::Border0, TimeFunction::InterMode::InterConstLeft); + double s = 0; + for (int k = 0; k < T.size(); k++) { + double t_k = T[k]; + double y_k = Y[k]; + EXPECT_DOUBLE_EQ(tf.value(t_k), y_k) << "error at k=" << k << ", t_k=" << t_k << "\n"; + if (k > 0) s += y_k * dt; + EXPECT_DOUBLE_EQ(tf.primitive(t_k), s) << "error at k=" << k << ", t_k=" << t_k << "\n"; + } + EXPECT_DOUBLE_EQ(tf.get_norm(), tf.primitive(time_horizon)); +} + +TEST_F(TimeFunctionTest, border0_interlinear_implicit_node_values) { + TimeFunction tf(Y, TimeFunction::BorderType::Border0, TimeFunction::InterMode::InterLinear, dt, + border_value); + double s = 0; + for (int k = 0; k < T.size(); k++) { + double t_k = T[k]; + double y_k = Y[k]; + EXPECT_DOUBLE_EQ(tf.value(t_k), y_k) << "error at k=" << k << ", t_k=" << t_k << "\n"; + if (k > 0) s += .5 * (y_k + Y[k - 1]) * dt; + EXPECT_DOUBLE_EQ(tf.primitive(t_k), s) << "error at k=" << k << ", t_k=" << t_k << "\n"; + } + EXPECT_DOUBLE_EQ(tf.get_norm(), tf.primitive(time_horizon)); +} + +TEST_F(TimeFunctionTest, border0_interlinear_explicit_node_values) { + TimeFunction tf(T, Y, TimeFunction::BorderType::Border0, TimeFunction::InterMode::InterLinear); + double s = 0; + for (int k = 0; k < T.size(); k++) { + double t_k = T[k]; + double y_k = Y[k]; + EXPECT_DOUBLE_EQ(tf.value(t_k), y_k) << "error at k=" << k << ", t_k=" << t_k << "\n"; + if (k > 0) s += .5 * (y_k + Y[k - 1]) * dt; + EXPECT_DOUBLE_EQ(tf.primitive(t_k), s) << "error at k=" << k << ", t_k=" << t_k << "\n"; + } + EXPECT_DOUBLE_EQ(tf.get_norm(), tf.primitive(time_horizon)); +} +#ifdef ADD_MAIN +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} +#endif // ADD_MAIN diff --git a/lib/cpp-test/hawkes/inference/CMakeLists.txt b/lib/cpp-test/hawkes/inference/CMakeLists.txt new file mode 100644 index 000000000..c0acb43f3 --- /dev/null +++ b/lib/cpp-test/hawkes/inference/CMakeLists.txt @@ -0,0 +1,16 @@ +add_executable(tick_test_hawkes_inference + hawkes_em_gtest.cpp + ) + +target_link_libraries(tick_test_hawkes_inference + ${TICK_LIB_ARRAY} + ${TICK_LIB_BASE} + ${TICK_LIB_CRANDOM} + ${TICK_LIB_BASE_MODEL} + ${TICK_LIB_LINEAR_MODEL} + ${TICK_LIB_HAWKES_INFERENCE} + ${TICK_LIB_HAWKES_MODEL} + ${TICK_TEST_LIBS} + ) + + diff --git a/lib/cpp-test/hawkes/inference/hawkes_em_gtest.cpp b/lib/cpp-test/hawkes/inference/hawkes_em_gtest.cpp new file mode 100644 index 000000000..4ab71e2b9 --- /dev/null +++ b/lib/cpp-test/hawkes/inference/hawkes_em_gtest.cpp @@ -0,0 +1,250 @@ +// License: BSD 3 clause + +#include +#include "tick/hawkes/inference/hawkes_em.h" + +#define EPSILON 1e-64 + +class HawkesEMTest : public ::testing::Test { + protected: + ulong n_nodes = 2; + double kernel_support = 1.; + ulong kernel_size = 10; + double dt = kernel_support / kernel_size; + double t0 = .0; + ArrayDouble kernel_discretization; + ArrayDouble mu; + ArrayDouble2d kernels; + HawkesEM em{kernel_support, kernel_size, 1}; + void SetUp() override { + kernel_discretization = ArrayDouble{.0, .1, .2, .3, .4, .5, .6, .7, .8, .9, 1.}; + em.set_n_nodes(n_nodes); + mu = ArrayDouble{.05, .05}; + kernels = ArrayDouble2d(n_nodes, n_nodes * kernel_size); + kernels[0] = .0; + kernels[1] = .1; + kernels[2] = .2; + kernels[3] = .3; + kernels[4] = .4; + kernels[5] = .1; + kernels[6] = .1; + kernels[7] = .1; + kernels[8] = .1; + kernels[9] = .0; + kernels[10] = .5; + kernels[11] = .4; + kernels[12] = .3; + kernels[13] = .2; + kernels[14] = .1; + kernels[15] = .0; + kernels[16] = .1; + kernels[17] = .2; + kernels[18] = .3; + kernels[19] = .0; + kernels[20] = .5; + kernels[21] = .4; + kernels[22] = .0; + kernels[23] = .0; + kernels[24] = .0; + kernels[25] = .0; + kernels[26] = .0; + kernels[27] = .0; + kernels[28] = .0; + kernels[29] = .0; + kernels[30] = .2; + kernels[31] = .2; + kernels[32] = .4; + kernels[33] = .2; + kernels[34] = .1; + kernels[35] = .0; + kernels[36] = .0; + kernels[37] = .0; + kernels[38] = .0; + kernels[39] = .0; + } +}; + +TEST_F(HawkesEMTest, can_get_kernel_size) { EXPECT_EQ(em.get_kernel_size(), kernel_size); } + +TEST_F(HawkesEMTest, can_get_n_nodes) { EXPECT_EQ(em.get_n_nodes(), n_nodes); } + +TEST_F(HawkesEMTest, kernel_dt) { + EXPECT_DOUBLE_EQ(em.get_kernel_dt(), dt); + EXPECT_DOUBLE_EQ(em.get_kernel_fixed_dt(), dt); +} + +TEST_F(HawkesEMTest, kernel_discretization_test) { + ArrayDouble ks = *em.get_kernel_discretization(); + EXPECT_EQ(kernel_discretization.size(), ks.size()); + EXPECT_EQ(kernel_size + 1, ks.size()); + EXPECT_DOUBLE_EQ(kernel_discretization[0], em.get_kernel_t0()); + for (ulong t = 0; t < ks.size(); t++) { + EXPECT_DOUBLE_EQ(ks[t], kernel_discretization[t]); + } + em.set_kernel_discretization(kernel_discretization.as_sarray_ptr()); + EXPECT_EQ(em.get_kernel_size(), kernel_size); + EXPECT_NEAR(kernel_discretization[0], em.get_kernel_t0(), 1e-100); + EXPECT_DOUBLE_EQ(em.get_kernel_dt(), dt); +} + +TEST_F(HawkesEMTest, kernel_time_func_dt) { + em.init_kernel_time_func(kernels); + std::vector& timefunc = em.get_kernel_time_func(); + ASSERT_FALSE(timefunc.empty()); + for (ulong u = 0; u < n_nodes; ++u) { + for (ulong v = 0; v < n_nodes; ++v) { + ulong uv = u * n_nodes + v; + EXPECT_NEAR(timefunc[uv].get_t0(), kernel_discretization[0], 1e-100); + EXPECT_DOUBLE_EQ(timefunc[uv].get_t0(), em.get_kernel_t0()); + EXPECT_DOUBLE_EQ(timefunc[uv].get_dt(), dt); + } + } +} + +TEST_F(HawkesEMTest, kernel_time_func_data) { + em.init_kernel_time_func(kernels); + std::vector& timefunc = em.get_kernel_time_func(); + ASSERT_FALSE(timefunc.empty()); + for (ulong u = 0; u < n_nodes; ++u) { + for (ulong v = 0; v < n_nodes; ++v) { + ulong uv = u * n_nodes + v; + ArrayDouble data_uv = *(timefunc[uv].get_sampled_y()); + EXPECT_EQ(data_uv.size(), kernel_size); + for (ulong k = 0; k < kernel_size; ++k) { + ulong vk = v * kernel_size + k; + // Test data values + EXPECT_DOUBLE_EQ(data_uv[k], kernels(u, vk)) + << "Kernel[" << u << ", " << v << "]: " + << "Value of " << k << "-th sample data gives a mismatch."; + // Test abscissa + EXPECT_DOUBLE_EQ(timefunc[uv].get_t0() + k * timefunc[uv].get_dt(), + (*em.get_kernel_discretization())[k]) + << "Kernel[" << u << ", " << v << "]: " + << "Value of " << k << "-th abscissa point gives a mismatch."; + } + } + } +} + +TEST_F(HawkesEMTest, kernel_time_func_data_with_explicit_abscissa) { + // Rounding errors in the division T[10] - T[0] can result in + // the size of `sampled_y` being larger than the size of `Y` by 1 + em.set_kernel_discretization(kernel_discretization.as_sarray_ptr()); + em.init_kernel_time_func(kernels); + std::vector& timefunc = em.get_kernel_time_func(); + ASSERT_FALSE(timefunc.empty()); + for (ulong u = 0; u < n_nodes; ++u) { + for (ulong v = 0; v < n_nodes; ++v) { + ulong uv = u * n_nodes + v; + ArrayDouble data_uv = *(timefunc[uv].get_sampled_y()); + EXPECT_NEAR(data_uv.size(), kernel_size + 1, 1); + /* + std::cout << "Kernel[" << u << ", " << v << "]: " << std::endl; + for (ulong j = 0; j < data_uv.size(); j++) { + std::cout << "data_uv[" << j << "] = " << data_uv[j] << std::endl; + } + */ + for (ulong k = 0; k < kernel_size; ++k) { + ulong vk = v * kernel_size + k; + // Test data values + EXPECT_DOUBLE_EQ(data_uv[k], kernels(u, vk)) + << "Kernel[" << u << ", " << v << "]: " << std::endl + << "Value of " << k << "-th sample data gives a mismatch." << std::endl + << "Corresponding kernel discretization point : " + << (*em.get_kernel_discretization())[k] << std::endl; + } + } + } +} + +TEST_F(HawkesEMTest, kernel_time_func_values) { + em.init_kernel_time_func(kernels); + std::vector& timefunc = em.get_kernel_time_func(); + ASSERT_FALSE(timefunc.empty()); + for (ulong u = 0; u < n_nodes; ++u) { + for (ulong v = 0; v < n_nodes; ++v) { + ulong uv = u * n_nodes + v; + for (ulong k = 0; k < kernel_size; ++k) { + ulong vk = v * kernel_size + k; + double t = t0 + k * dt + .5 * dt; + EXPECT_DOUBLE_EQ(timefunc[uv].value(t), kernels(u, vk)) + << "Kernel[" << u << ", " << v << "]: " + << "Value at time t = " << t << " gives a mismatch.\n" + << "Abscissa position k=" << k << std::endl + << std::endl; + } + } + } +} + +TEST_F(HawkesEMTest, kernel_time_func_values_with_explicit_abscissa) { + em.set_kernel_discretization(kernel_discretization.as_sarray_ptr()); + em.init_kernel_time_func(kernels); + std::vector& timefunc = em.get_kernel_time_func(); + ASSERT_FALSE(timefunc.empty()); + for (ulong u = 0; u < n_nodes; ++u) { + for (ulong v = 0; v < n_nodes; ++v) { + ulong uv = u * n_nodes + v; + for (ulong k = 0; k < kernel_size; ++k) { + ulong vk = v * kernel_size + k; + double t = t0 + k * dt + .5 * dt; + EXPECT_DOUBLE_EQ(timefunc[uv].value(t), kernels(u, vk)) + << "Kernel[" << u << ", " << v << "]: " << std::endl + << "Value at time t = " << t << " gives a mismatch.\n" + << "Abscissa position k=" << k << std::endl + << "Abscissa[k] = " << t0 + k * dt << std::endl + << std::endl; + } + } + } +} + +TEST_F(HawkesEMTest, kernel_time_func_primitive) { + em.init_kernel_time_func(kernels); + std::vector& timefunc = em.get_kernel_time_func(); + ASSERT_FALSE(timefunc.empty()); + ArrayDouble kernel_discretization = *em.get_kernel_discretization(); + for (ulong u = 0; u < n_nodes; ++u) { + for (ulong v = 0; v < n_nodes; ++v) { + ulong uv = u * n_nodes + v; + double discrete_integral = .0; + for (ulong k = 0; k < kernel_size; ++k) { + ulong vk = v * kernel_size + k; + double t = t0 + (k + 1) * dt; + discrete_integral += kernels(u, vk) * dt; + EXPECT_DOUBLE_EQ(timefunc[uv].primitive(t), discrete_integral) + << "Kernel[" << u << ", " << v << "]: " + << "Primitive at time t=" << t << " (k = " << k << ") gives a mismatch."; + } + } + } +} + +TEST_F(HawkesEMTest, kernel_time_func_primitive_with_explicit_abscissa) { + em.set_kernel_discretization(kernel_discretization.as_sarray_ptr()); + em.init_kernel_time_func(kernels); + std::vector& timefunc = em.get_kernel_time_func(); + ASSERT_FALSE(timefunc.empty()); + ArrayDouble kernel_discretization = *em.get_kernel_discretization(); + for (ulong u = 0; u < n_nodes; ++u) { + for (ulong v = 0; v < n_nodes; ++v) { + ulong uv = u * n_nodes + v; + double discrete_integral = .0; + for (ulong k = 0; k < kernel_size; ++k) { + ulong vk = v * kernel_size + k; + double t = t0 + (k + 1) * dt; + discrete_integral += kernels(u, vk) * dt; + EXPECT_NEAR(timefunc[uv].primitive(t), discrete_integral, 1e-16) + << "Kernel[" << u << ", " << v << "]: " + << "Primitive at time t=" << t << " (k = " << k << ") gives a mismatch."; + } + } + } +} + +#ifdef ADD_MAIN +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} +#endif // ADD_MAIN diff --git a/lib/cpp/base/time_func.cpp b/lib/cpp/base/time_func.cpp index c960c7dd6..d04ef64ae 100644 --- a/lib/cpp/base/time_func.cpp +++ b/lib/cpp/base/time_func.cpp @@ -6,17 +6,12 @@ #include "tick/base/time_func.h" #include - -const double floor_threshold = 1e-10; - -double threshold_floor(const double x) { return std::floor((x) + floor_threshold); } - -const double timefunc_oversampling = 5.0; +#define FLOOR_THRESHOLD 1e-10 TimeFunction::TimeFunction(double y) { // Little trick so that it will predict y after 0 and 0 before - // 2* is needed to ensure that 0 > last_value_before_border + floor_threshold - last_value_before_border = -2 * floor_threshold; + // 2* is needed to ensure that 0 > last_value_before_border + FLOOR_THRESHOLD + last_value_before_border = -2 * FLOOR_THRESHOLD; border_value = y; border_type = DEFAULT_BORDER; } @@ -24,6 +19,7 @@ TimeFunction::TimeFunction(double y) { TimeFunction::TimeFunction(const ArrayDouble &Y, BorderType type, InterMode mode, double dt, double border_value) : inter_mode(mode), border_type(type), dt(dt), border_value(border_value) { + set_t0(0.); // Implicitly we are assuming that support starts at 0 ulong size = Y.size(); sampled_y = SArrayDouble::new_ptr(size); std::copy(Y.data(), Y.data() + size, sampled_y->data()); @@ -33,8 +29,24 @@ TimeFunction::TimeFunction(const ArrayDouble &Y, BorderType type, InterMode mode if (i == 0) { (*sampled_y_primitive)[i] = 0; } else { - (*sampled_y_primitive)[i] = - (*sampled_y_primitive)[i - 1] + ((*sampled_y)[i] + (*sampled_y)[i - 1]) * dt / 2.; + double y_i_1 = (*sampled_y)[i - 1]; + double y_i = (*sampled_y)[i]; + double y; + switch (inter_mode) { + case (InterMode::InterConstLeft): { + y = y_i; + break; + } + case (InterMode::InterConstRight): { + y = y_i_1; + break; + } + case (InterMode::InterLinear): { + y = (y_i + y_i_1) / 2.; + break; + } + } + (*sampled_y_primitive)[i] = (*sampled_y_primitive)[i - 1] + y * dt; } } @@ -60,22 +72,23 @@ TimeFunction::TimeFunction(const ArrayDouble &T, const ArrayDouble &Y, BorderTyp double min_step = DBL_MAX; for (ulong i = 1; i < size; ++i) { double step = T[i] - T[i - 1]; - if (step < -floor_threshold) TICK_ERROR("X-array must be sorted"); - if (step < floor_threshold) continue; + if (step < -FLOOR_THRESHOLD) TICK_ERROR("T-array must be sorted"); + if (step < FLOOR_THRESHOLD) continue; if (step < min_step) min_step = step; } // Specify dt if user has not specified it if (dt1 == 0) { - dt = min_step / timefunc_oversampling; + dt = min_step; } else { dt = dt1; } - if (dt < 10 * floor_threshold) + if (dt < 10 * FLOOR_THRESHOLD) TICK_ERROR("dt is too small, we currently cannot reach this precision"); - t0 = T[0]; + set_t0(T[0]); + last_value_before_border = T[size - 1]; switch (border_type) { case (BorderType::Border0): @@ -99,9 +112,7 @@ TimeFunction::TimeFunction(const ArrayDouble &T, const ArrayDouble &Y, BorderTyp if (length < dt) TICK_ERROR("``dt`` cannot be bigger than the time space you are considering"); - // We add 2 in order to be sure that we will have at least one value after the - // last given t This value will be used to interpolate until the last given t - ulong sample_size = (ulong)threshold_floor(length / dt) + 2; + ulong sample_size = 1 + std::ceil(length / dt); sampled_y = SArrayDouble::new_ptr(sample_size); sampled_y_primitive = SArrayDouble::new_ptr(sample_size); @@ -119,23 +130,54 @@ TimeFunction::TimeFunction(const ArrayDouble &T, const ArrayDouble &Y, BorderTyp double y_right = Y[index_left + 1]; for (ulong i = 0; i < sampled_y->size(); ++i) { - while (t > t_right + floor_threshold && index_left < size - 2) { - // Ensure we are not behind the last point. This might happen if dt does - // not divides length In this case we keep the last two points to - // interpolate + while (t > t_right && index_left < size - 2) { index_left += 1; t_left = T[index_left]; y_left = Y[index_left]; t_right = T[index_left + 1]; y_right = Y[index_left + 1]; } - double y_i = interpolation(t_left, y_left, t_right, y_right, t); + double y_i; + if (t_left + FLOOR_THRESHOLD < t) { + if (t < t_right - FLOOR_THRESHOLD) + y_i = interpolation(t_left, y_left, t_right, y_right, t); + else + y_i = y_right; + } else + y_i = y_left; + /* + std::cout << "\nTimeFunction::TimeFunction " << std::endl + << " i = " << i << ", " << std::endl + << " index_left = " << index_left << ", " << std::endl + << " t_left: " << t_left << std::endl + << " t: " << t << std::endl + << " t_right: " << t_right << std::endl + << " y_left: " << y_left << std::endl + << " y_i: " << y_i << std::endl + << " y_right: " << y_right << std::endl + << std::endl; + */ (*sampled_y)[i] = y_i; if (i == 0) (*sampled_y_primitive)[0] = 0; else { - (*sampled_y_primitive)[i] = - (*sampled_y_primitive)[i - 1] + (y_i + (*sampled_y)[i - 1]) * dt / 2.; + double y_i_1 = (*sampled_y)[i - 1]; + double y; + switch (inter_mode) { + case (InterMode::InterConstLeft): { + y = y_i; + break; + } + case (InterMode::InterConstRight): { + y = y_i_1; + break; + } + case (InterMode::InterLinear): { + y = (y_i + y_i_1) / 2.; + break; + } + } + (*sampled_y_primitive)[i] = (*sampled_y_primitive)[i - 1] + y * dt; } t += dt; } @@ -158,8 +200,7 @@ void TimeFunction::compute_future_max() { if (border_type != BorderType::Cyclic) { double previous_max = border_value; - // condition is : i + 1 > 0 as an ulong cannot be negative - for (ulong i = sample_size - 1; i + 1 > 0; --i) { + for (long i = sample_size - 1; i >= 0; --i) { (*future_max)[i] = std::max((*sampled_y)[i], previous_max); previous_max = (*future_max)[i]; } @@ -175,15 +216,20 @@ double TimeFunction::value(double t) { // this test must be the first one otherwise we might have problem with // constant TimeFunctions - if (t > last_value_before_border + floor_threshold) { + if (t > last_value_before_border + FLOOR_THRESHOLD) { if (border_type != BorderType::Cyclic) { return border_value; } else { // If border type is cyclic then we simply return the value it would have // in the first cycle - const double divider = last_value_before_border; - const int quotient = static_cast(threshold_floor(t / divider)); - return value(t - quotient * divider); + const double period = last_value_before_border; + const int k = static_cast(std::floor(t / period)); + double t_ = t - k * period; + if ((t_ > period) || (t_ < 0.)) { + throw std::runtime_error( + "TimeFunction - periodic function error: Could not fallback to base period"); + } + return value(t_); } } else if (t < t0) { // which behavior do we want if t < t0 ? @@ -192,24 +238,23 @@ double TimeFunction::value(double t) { return 0.0; } - const ulong i_left = get_index_(t); - - const double t_left = get_t_from_index_(i_left); + const ulong i_left = _idx_left(t); + const ulong i_right = _idx_right(t); + const double t_left = _t_left(t); + const double t_right = _t_right(t); const double y_left = (*sampled_y)[i_left]; - const double t_right = get_t_from_index_(i_left + 1); - const double y_right = (*sampled_y)[i_left + 1]; - + const double y_right = (*sampled_y)[i_right]; return interpolation(t_left, y_left, t_right, y_right, t); } double TimeFunction::primitive(double t) { - if (t > last_value_before_border + floor_threshold) { + if (t > last_value_before_border + FLOOR_THRESHOLD) { if (border_type != BorderType::Cyclic) { double delay = t - last_value_before_border; return primitive(last_value_before_border) + border_value * delay; } else { const double divider = last_value_before_border; - const int quotient = static_cast(threshold_floor(t / divider)); + const int quotient = static_cast(std::ceil(t / divider)); return quotient * primitive(last_value_before_border) + primitive(t - quotient * divider); } } else if (t < t0) { @@ -219,14 +264,34 @@ double TimeFunction::primitive(double t) { return 0.0; } - const ulong i_left = get_index_(t); - - const double t_left = get_t_from_index_(i_left); - const double y_left = (*sampled_y_primitive)[i_left]; - const double t_right = get_t_from_index_(i_left + 1); - const double y_right = (*sampled_y_primitive)[i_left + 1]; - - return interpolation(t_left, y_left, t_right, y_right, t); + const ulong i = _idx_left(t); + const ulong j = _idx_right(t); + const ulong s = (*sampled_y_primitive).size(); + const ulong index_left = (i < s) ? i : s - 1; + const ulong index_right = (j < s) ? j : s - 1; + const double t_left = _t_left(t); + const double t_right = _t_right(t); + // const double y_left = (*sampled_y)[index_left]; + // const double y_right = (*sampled_y)[index_right]; + const double y_primitive_left = (*sampled_y_primitive)[index_left]; + const double y_primitive_right = (*sampled_y_primitive)[index_right]; + const double slope = (y_primitive_right - y_primitive_left) / (t_right - t_left); + double res = y_primitive_left + (t - t_left) * slope; + /* + std::cout << "\nTimeFunction::primitive " << std::endl + << " index_left = " << index_left << ", " << std::endl + << " index_right = " << index_right << ", " << std::endl + << " t_left: " << t_left << std::endl + << " t: " << t << std::endl + << " t_right: " << t_right << std::endl + << " y_left: " << y_left << std::endl + << " y_right: " << y_right << std::endl + << " y_primitive_left: " << y_primitive_left << std::endl + << " y_primitive_right: " << y_primitive_right << std::endl + << " primitive(t) = " << res << std::endl + << std::endl; + */ + return res; } SArrayDoublePtr TimeFunction::value(ArrayDouble &array) { @@ -249,9 +314,14 @@ double TimeFunction::future_bound(double t) { } else { // If border type is cyclic then we simply return the value it would have // in the first cycle - const double divider = last_value_before_border; - const int quotient = static_cast(threshold_floor(t / divider)); - return future_bound(t - quotient * divider); + const double period = last_value_before_border; + const int k = static_cast(std::floor(t / period)); + double t_ = t - k * period; + if ((t_ > period) || (t_ < 0.)) { + throw std::runtime_error( + "TimeFunction - periodic function error: Could not fallback to base period"); + } + return future_bound(t_); } } else if (t < t0) { return (*future_max)[0]; @@ -259,12 +329,12 @@ double TimeFunction::future_bound(double t) { return (*future_max)[0]; } - const ulong i_left = get_index_(t); - - const double t_left = get_t_from_index_(i_left); + const ulong i_left = _idx_left(t); + const ulong i_right = _idx_right(t); + const double t_left = _t_left(t); + const double t_right = _t_right(t); const double y_left = (*future_max)[i_left]; - const double t_right = get_t_from_index_(i_left + 1); - const double y_right = (*future_max)[i_left + 1]; + const double y_right = (*future_max)[i_right]; return interpolation(t_left, y_left, t_right, y_right, t); } @@ -287,16 +357,16 @@ double TimeFunction::max_error(double t) { switch (inter_mode) { case (InterMode::InterLinear): - if (std::abs(t_left - t_right) < floor_threshold) { + if (std::abs(t_left - t_right) < FLOOR_THRESHOLD) { return std::abs(y_left - y_right); } else { const double slope = (y_right - y_left) / (t_right - t_left); return std::abs(slope) * dt; } case (InterMode::InterConstLeft): - return floor_threshold; + return FLOOR_THRESHOLD; case (InterMode::InterConstRight): - return floor_threshold; + return FLOOR_THRESHOLD; default: return 0; } @@ -312,38 +382,22 @@ double TimeFunction::get_norm() { } double norm = 0; - for (ulong i = 0; i < sampled_y->size() - 1; ++i) { - double y_left = (*sampled_y)[i]; - double y_right = (*sampled_y)[i + 1]; - - // if we cross the last value we must handle this case - double t_right = dt * (i + 1); - double span; - if (t_right < last_value_before_border + floor_threshold) { - span = dt; - } else { - span = std::max(0.0, last_value_before_border - dt * i); - y_right = value(last_value_before_border); - } + for (ulong i = 1; i < sampled_y->size(); ++i) { + double y_left = (*sampled_y)[i - 1]; + double y_right = (*sampled_y)[i]; switch (inter_mode) { - case (InterMode::InterLinear): { - // The norm can be decomposed between a rectangle and a triangle - const double rectangle_norm = y_left * span; - const double triangle_norm = (y_right - y_left) * span / 2; - norm += rectangle_norm + triangle_norm; + case (InterMode::InterLinear): + norm += .5 * (y_left + y_right) * dt; break; - } - case (InterMode::InterConstLeft): { + case (InterMode::InterConstLeft): // only right point matters - norm += y_right * span; + norm += y_right * dt; break; - } - case (InterMode::InterConstRight): { + case (InterMode::InterConstRight): // only left point matters - norm += y_left * span; + norm += y_left * dt; break; - } } } return norm; @@ -351,7 +405,7 @@ double TimeFunction::get_norm() { double TimeFunction::constant_left_interpolation(double t_left, double y_left, double t_right, double y_right, double t_value) { - if (std::abs(t_value - t_left) < floor_threshold) + if (std::abs(t_value - t_left) < FLOOR_THRESHOLD) return y_left; else return y_right; @@ -359,7 +413,7 @@ double TimeFunction::constant_left_interpolation(double t_left, double y_left, d double TimeFunction::constant_right_interpolation(double t_left, double y_left, double t_right, double y_right, double t_value) { - if (std::abs(t_value - t_right) < floor_threshold) + if (std::abs(t_value - t_right) < FLOOR_THRESHOLD) return y_right; else return y_left; @@ -367,7 +421,7 @@ double TimeFunction::constant_right_interpolation(double t_left, double y_left, double TimeFunction::linear_interpolation(double t_left, double y_left, double t_right, double y_right, double t_value) { - if (std::abs(t_left - t_right) < floor_threshold) { + if (std::abs(t_left - t_right) < FLOOR_THRESHOLD) { return (y_left + y_right) / 2.0; } else { double slope = (y_right - y_left) / (t_right - t_left); @@ -377,6 +431,16 @@ double TimeFunction::linear_interpolation(double t_left, double y_left, double t double TimeFunction::interpolation(double t_left, double y_left, double t_right, double y_right, double t) { + if ((t < t_left) || (t > t_right)) { + std::cout << "TimeFunction::interpolation Error: evaluation points error " << std::endl + << "t_left: " << t_left << std::endl + << "t: " << t << std::endl + << "t_right: " << t_right << std::endl; + } + if (t < t_left) + throw std::runtime_error("TimeFunction::interpolation error: t_left cannot be larger than t"); + if (t > t_right) + throw std::runtime_error("TimeFunction::interpolation error: t_right cannot be smaller than t"); // Second do the right interpolation switch (inter_mode) { case (InterMode::InterLinear): @@ -390,6 +454,30 @@ double TimeFunction::interpolation(double t_left, double y_left, double t_right, } } -ulong TimeFunction::get_index_(double t) { return (ulong)threshold_floor((t - t0) / dt); } +ulong TimeFunction::get_index_(double t) { return (ulong)std::ceil((t - t0) / dt); } double TimeFunction::get_t_from_index_(ulong i) { return t0 + dt * i; } + +ulong TimeFunction::_idx_left(double t) { + if (t < t0 + FLOOR_THRESHOLD) return 0; + ulong k_ = std::floor((t - t0) / dt); + ulong k = (k_ < 0) ? 0 : k_; + return k; +} + +ulong TimeFunction::_idx_right(double t) { + ulong k = std::ceil((t - t0 + FLOOR_THRESHOLD) / dt); + return k; +} + +double TimeFunction::_t_left(double t) { + if (t < t0 + FLOOR_THRESHOLD) return t0; + ulong k_ = std::floor((t - t0) / dt); + ulong k = (k_ < 0) ? 0 : k_; + return t0 + k * dt; +} + +double TimeFunction::_t_right(double t) { + ulong k = std::ceil((t - t0 + FLOOR_THRESHOLD) / dt); + return t0 + k * dt; +} diff --git a/lib/cpp/hawkes/inference/hawkes_em.cpp b/lib/cpp/hawkes/inference/hawkes_em.cpp index 8f12a6a0e..1ec8950f3 100644 --- a/lib/cpp/hawkes/inference/hawkes_em.cpp +++ b/lib/cpp/hawkes/inference/hawkes_em.cpp @@ -2,33 +2,29 @@ #include "tick/hawkes/inference/hawkes_em.h" -HawkesEM::HawkesEM(const double kernel_support, const ulong kernel_size, - const int max_n_threads) - : ModelHawkesList(max_n_threads, 0), kernel_discretization(nullptr) { +HawkesEM::HawkesEM(const double kernel_support, const ulong kernel_size, const int max_n_threads) + : ModelHawkesList(max_n_threads, 0), kernel_discretization(nullptr), kernel_time_func() { set_kernel_support(kernel_support); set_kernel_size(kernel_size); } -HawkesEM::HawkesEM(const SArrayDoublePtr kernel_discretization, - const int max_n_threads) - : ModelHawkesList(max_n_threads, 0) { +HawkesEM::HawkesEM(const SArrayDoublePtr kernel_discretization, const int max_n_threads) + : ModelHawkesList(max_n_threads, 0), kernel_time_func() { set_kernel_discretization(kernel_discretization); } void HawkesEM::allocate_weights() { next_mu = ArrayDouble2d(n_realizations, n_nodes); next_kernels = ArrayDouble2d(n_realizations * n_nodes, n_nodes * kernel_size); - unnormalized_kernels = - ArrayDouble2d(n_realizations * n_nodes, n_nodes * kernel_size); + unnormalized_kernels = ArrayDouble2d(n_realizations * n_nodes, n_nodes * kernel_size); weights_computed = true; } double HawkesEM::loglikelihood(const ArrayDouble &mu, ArrayDouble2d &kernels) { check_baseline_and_kernels(mu, kernels); - double llh = parallel_map_additive_reduce( - get_n_threads(), n_nodes * n_realizations, &HawkesEM::loglikelihood_ur, - this, mu, kernels); + double llh = parallel_map_additive_reduce(get_n_threads(), n_nodes * n_realizations, + &HawkesEM::loglikelihood_ur, this, mu, kernels); return llh /= get_n_total_jumps(); } @@ -40,8 +36,7 @@ void HawkesEM::solve(ArrayDouble &mu, ArrayDouble2d &kernels) { // Fill next_mu and next_kernels next_mu.init_to_zero(); next_kernels.init_to_zero(); - parallel_run(get_n_threads(), n_nodes * n_realizations, &HawkesEM::solve_ur, - this, mu, kernels); + parallel_run(get_n_threads(), n_nodes * n_realizations, &HawkesEM::solve_ur, this, mu, kernels); // Reduce // Fill mu and kernels with next_mu and next_kernels @@ -51,18 +46,15 @@ void HawkesEM::solve(ArrayDouble &mu, ArrayDouble2d &kernels) { for (ulong node_u = 0; node_u < n_nodes; ++node_u) { mu[node_u] += next_mu(r, node_u); - ArrayDouble2d next_kernel_u_r( - n_nodes, kernel_size, - view_row(next_kernels, r * n_nodes + node_u).data()); - ArrayDouble2d kernel_u(n_nodes, kernel_size, - view_row(kernels, node_u).data()); + ArrayDouble2d next_kernel_u_r(n_nodes, kernel_size, + view_row(next_kernels, r * n_nodes + node_u).data()); + ArrayDouble2d kernel_u(n_nodes, kernel_size, view_row(kernels, node_u).data()); kernel_u.mult_incr(next_kernel_u_r, 1.); } } } -double HawkesEM::loglikelihood_ur(const ulong r_u, const ArrayDouble &mu, - ArrayDouble2d &kernels) { +double HawkesEM::loglikelihood_ur(const ulong r_u, const ArrayDouble &mu, ArrayDouble2d &kernels) { const ulong r = static_cast(r_u / n_nodes); double llh = (*end_times)[r]; std::function add_to_llh = [&llh](double intensity_t_i) { @@ -78,8 +70,7 @@ double HawkesEM::loglikelihood_ur(const ulong r_u, const ArrayDouble &mu, return llh; } -void HawkesEM::solve_ur(const ulong r_u, const ArrayDouble &mu, - ArrayDouble2d &kernels) { +void HawkesEM::solve_ur(const ulong r_u, const ArrayDouble &mu, ArrayDouble2d &kernels) { // Obtain realization and node index from r_u const ulong r = static_cast(r_u / n_nodes); const ulong node_u = r_u % n_nodes; @@ -88,36 +79,30 @@ void HawkesEM::solve_ur(const ulong r_u, const ArrayDouble &mu, const double mu_u = mu[node_u]; // initialize next data - ArrayDouble2d next_kernel_ru( - n_nodes, kernel_size, - view_row(next_kernels, r * n_nodes + node_u).data()); - ArrayDouble2d unnormalized_kernel_ru( - n_nodes, kernel_size, - view_row(unnormalized_kernels, r * n_nodes + node_u).data()); + ArrayDouble2d next_kernel_ru(n_nodes, kernel_size, + view_row(next_kernels, r * n_nodes + node_u).data()); + ArrayDouble2d unnormalized_kernel_ru(n_nodes, kernel_size, + view_row(unnormalized_kernels, r * n_nodes + node_u).data()); double &next_mu_ru = next_mu(r, node_u); - std::function add_to_next_kernel = - [this, &unnormalized_kernel_ru, &next_kernel_ru, &next_mu_ru, - &mu_u](double intensity_t_i) { - // If norm is zero then nothing to do (no contribution) - if (intensity_t_i == 0) return; - - // Otherwise, we need to norm the kernel_temp's and the mu_temp - // and add their contributions to the estimation - next_mu_ru += mu_u / (intensity_t_i * this->end_times->sum()); - for (ulong node_v = 0; node_v < this->n_nodes; node_v++) { - ArrayDouble unnormalized_kernel_ruv = - view_row(unnormalized_kernel_ru, node_v); - ArrayDouble next_kernel_ruv = view_row(next_kernel_ru, node_v); - for (ulong m = 0; m < this->kernel_size; m++) { - const double normalization_term = - intensity_t_i * (*this->n_jumps_per_node)[node_v] * - get_kernel_dt(m); - next_kernel_ruv[m] += - unnormalized_kernel_ruv[m] / normalization_term; - } - } - }; + std::function add_to_next_kernel = [this, &unnormalized_kernel_ru, &next_kernel_ru, + &next_mu_ru, &mu_u](double intensity_t_i) { + // If norm is zero then nothing to do (no contribution) + if (intensity_t_i == 0) return; + + // Otherwise, we need to norm the kernel_temp's and the mu_temp + // and add their contributions to the estimation + next_mu_ru += mu_u / (intensity_t_i * this->end_times->sum()); + for (ulong node_v = 0; node_v < this->n_nodes; node_v++) { + ArrayDouble unnormalized_kernel_ruv = view_row(unnormalized_kernel_ru, node_v); + ArrayDouble next_kernel_ruv = view_row(next_kernel_ru, node_v); + for (ulong m = 0; m < this->kernel_size; m++) { + const double normalization_term = + intensity_t_i * (*this->n_jumps_per_node)[node_v] * get_kernel_dt(m); + next_kernel_ruv[m] += unnormalized_kernel_ruv[m] / normalization_term; + } + } + }; compute_intensities_ur(r_u, mu, kernels, add_to_next_kernel, true); } @@ -131,8 +116,7 @@ SArrayDouble2dPtr HawkesEM::get_kernel_norms(ArrayDouble2d &kernels) const { ArrayDouble2d kernel_norms(n_nodes, n_nodes); for (ulong node_u = 0; node_u < n_nodes; ++node_u) { - ArrayDouble2d kernel_u(n_nodes, kernel_size, - view_row(kernels, node_u).data()); + ArrayDouble2d kernel_u(n_nodes, kernel_size, view_row(kernels, node_u).data()); for (ulong node_v = 0; node_v < n_nodes; ++node_v) { ArrayDouble kernel_uv = view_row(kernel_u, node_v); kernel_norms(node_u, node_v) = kernel_uv.dot(discretization_intervals); @@ -142,10 +126,78 @@ SArrayDouble2dPtr HawkesEM::get_kernel_norms(ArrayDouble2d &kernels) const { return kernel_norms.as_sarray2d_ptr(); } -void HawkesEM::compute_intensities_ur( - const ulong r_u, const ArrayDouble &mu, ArrayDouble2d &kernels, - std::function intensity_func, - bool store_unnormalized_kernel) { +SArrayDouble2dPtr HawkesEM::get_kernel_primitives(ArrayDouble2d &kernels) const { + check_baseline_and_kernels(ArrayDouble(n_nodes), kernels); + + ArrayDouble discretization_intervals(kernel_size); + for (ulong m = 0; m < kernel_size; ++m) { + discretization_intervals[m] = get_kernel_dt(m); + } + + ArrayDouble2d vals(n_nodes, n_nodes * kernel_size); + for (ulong u = 0; u < n_nodes; ++u) { + ArrayDouble2d kernel_u(n_nodes, kernel_size, view_row(kernels, u).data()); + for (ulong v = 0; v < n_nodes; ++v) { + ArrayDouble kernel_uv = view_row(kernel_u, v); + double val = 0.; + for (ulong k = 0; k < kernel_size; ++k) { + ulong vk = v * kernel_size + k; + double increment = kernel_uv[k] * discretization_intervals[k]; + if (increment < 0) { + throw std::runtime_error( + "Error in computing primitive of the kernel: increment is negative!"); + } + val += increment; + vals(u, vk) = val; + } + } + } + + return vals.as_sarray2d_ptr(); +} + +void HawkesEM::init_kernel_time_func(ArrayDouble2d &kernels) { + // `kernels` is expected in the shape (n_nodes, n_nodes * kernel_size) + // Check that indeed this is the case! + check_baseline_and_kernels(ArrayDouble(n_nodes), kernels); + kernel_time_func.clear(); + kernel_time_func.reserve(n_nodes * n_nodes); + if (kernel_discretization == nullptr) { + std::cout << "Setting up kernel time functions with implicit abscissa" << std::endl; + } else { + std::cout << "Setting up kernel time functions with explicit abscissa" << std::endl; + } + for (ulong u = 0; u < n_nodes; ++u) { + ArrayDouble2d kernel_u(n_nodes, kernel_size, view_row(kernels, u).data()); + for (ulong v = 0; v < n_nodes; ++v) { + ArrayDouble kernel_uv = view_row(kernel_u, v); + // Use abscissa only if kernel_discretization is explictly set + if (kernel_discretization == nullptr) { + kernel_time_func.emplace_back(TimeFunction(kernel_uv, TimeFunction::BorderType::Border0, + TimeFunction::InterMode::InterConstRight, + get_kernel_fixed_dt(), .0)); + } else { + ArrayDouble kerdis = *get_kernel_discretization(); + ArrayDouble kernel_uv_(1 + kernel_size); + std::copy(kernel_uv.data(), kernel_uv.data() + kernel_size, kernel_uv_.data()); + kernel_uv_[kernel_size] = 0.; + kernel_time_func.emplace_back(TimeFunction(kerdis, kernel_uv_, + TimeFunction::BorderType::Border0, + TimeFunction::InterMode::InterConstRight)); + } + } + } + if (kernel_time_func.empty()) { + throw std::runtime_error("Could not set kernel_time_func"); + } else { + is_kernel_time_func = 1; + } +} + +void HawkesEM::compute_intensities_ur(const ulong r_u, const ArrayDouble &mu, + ArrayDouble2d &kernels, + std::function intensity_func, + bool store_unnormalized_kernel) { // Obtain realization and node index from r_u const ulong r = static_cast(r_u / n_nodes); const ulong node_u = r_u % n_nodes; @@ -159,15 +211,13 @@ void HawkesEM::compute_intensities_ur( // Fetch corresponding data SArrayDoublePtrList1D &realization = timestamps_list[r]; - ArrayDouble2d kernel_u(n_nodes, kernel_size, - view_row(kernels, node_u).data()); + ArrayDouble2d kernel_u(n_nodes, kernel_size, view_row(kernels, node_u).data()); const double mu_u = mu[node_u]; ArrayDouble2d unnormalized_kernel_ru; if (store_unnormalized_kernel) { unnormalized_kernel_ru = ArrayDouble2d( - n_nodes, kernel_size, - view_row(unnormalized_kernels, r * n_nodes + node_u).data()); + n_nodes, kernel_size, view_row(unnormalized_kernels, r * n_nodes + node_u).data()); } ArrayDouble timestamps_u = view(*realization[node_u]); @@ -197,13 +247,12 @@ void HawkesEM::compute_intensities_ur( // satisfies v[index] <= t_i while (true) { if (last_indices[node_v] == 0) break; - if (last_indices[node_v] < timestamps_v.size() && - t_i >= timestamps_v[last_indices[node_v]]) + if (last_indices[node_v] < timestamps_v.size() && t_i >= timestamps_v[last_indices[node_v]]) break; last_indices[node_v]--; } // if (t_i < timestamps_v[last_indices[node_v]]) continue; - if ( (timestamps_v.size() == 0) || (t_i < timestamps_v[last_indices[node_v]]) ) continue; + if ((timestamps_v.size() == 0) || (t_i < timestamps_v[last_indices[node_v]])) continue; // Get the corresponding kernels and their size ArrayDouble kernel_ruv = view_row(kernel_u, node_v); @@ -237,8 +286,7 @@ void HawkesEM::compute_intensities_ur( // Then we get the corresponding kernel value double unnormalized_p_uv_ij = kernel_ruv[m]; - if (store_unnormalized_kernel) - unnormalized_kernel_ruv[m] += unnormalized_p_uv_ij; + if (store_unnormalized_kernel) unnormalized_kernel_ruv[m] += unnormalized_p_uv_ij; // Update the norm intensity_t_i += unnormalized_p_uv_ij; @@ -290,12 +338,10 @@ double HawkesEM::compute_compensator_ur(const ulong r_u, const ArrayDouble &mu, // last_m allows us to find m value quicker as m >= last_m ulong m = last_m; while (kernel_discretization[m + 1] < t_diff) { - double interval = - kernel_discretization[m + 1] - kernel_discretization[m]; + double interval = kernel_discretization[m + 1] - kernel_discretization[m]; // We add the compensator value added by bucket m for (ulong node_v = 0; node_v < n_nodes; ++node_v) { - current_marginal_compensator += - interval * kernels(node_v, node_u * kernel_size + m); + current_marginal_compensator += interval * kernels(node_v, node_u * kernel_size + m); } m++; } @@ -303,8 +349,7 @@ double HawkesEM::compute_compensator_ur(const ulong r_u, const ArrayDouble &mu, double interval_part = t_diff - kernel_discretization[m]; // We add compensator value of part of the bucket for (ulong node_v = 0; node_v < n_nodes; ++node_v) { - compensator += - interval_part * kernels(node_v, node_u * kernel_size + m); + compensator += interval_part * kernels(node_v, node_u * kernel_size + m); } last_m = m; } else { @@ -320,6 +365,60 @@ double HawkesEM::compute_compensator_ur(const ulong r_u, const ArrayDouble &mu, return compensator; } +void HawkesEM::set_buffer_variables_for_integral_of_intensity(ArrayDouble &mu, + ArrayDouble2d &kernels) { + _baseline = mu; + _adjacency = kernels; + init_kernel_time_func(_adjacency); +} + +SArrayDoublePtr HawkesEM::primitive_of_intensity_at_jump_times(const ulong r_u, ArrayDouble &mu, + ArrayDouble2d &kernels) { + set_buffer_variables_for_integral_of_intensity(mu, kernels); + return primitive_of_intensity_at_jump_times(r_u); +} + +SArrayDoublePtr HawkesEM::primitive_of_intensity_at_jump_times(const ulong r_u) { + // Obtain realization and node index from r_u + const ulong r = static_cast(r_u / n_nodes); + const ulong u = r_u % n_nodes; + + // Fetch corresponding data + SArrayDoublePtrList1D &realization = timestamps_list[r]; + ArrayDouble timestamps_u = view(*realization[u]); + ulong n = timestamps_u.size(); + ArrayDouble res = ArrayDouble(n); + for (ulong k = 0; k < n; ++k) { + double t_uk = timestamps_u[k]; + res[k] = _evaluate_primitive_of_intensity(t_uk, r, u); + } + return res.as_sarray_ptr(); +} + +double HawkesEM::_evaluate_primitive_of_intensity(const double t, const ulong r, const ulong u) { + if ((is_kernel_time_func == 0) | (kernel_time_func.empty())) { + throw std::runtime_error("kernel_time_func has not been initialised yet."); + } + + // Fetch corresponding data + SArrayDoublePtrList1D &realization = timestamps_list[r]; + const double mu_u = _baseline[u]; + + double res = 0.; + // Iterate over components and timestamps + for (ulong v = 0; v < n_nodes; ++v) { + ArrayDouble timestamps_v = view(*realization[v]); + ulong n = timestamps_v.size(); + for (ulong k = 0; k < n; k++) { + double t_vk = timestamps_v[k]; + if (t_vk >= t) break; + res += kernel_time_func[u * n_nodes + v].primitive(t - t_vk); + } + } + res += mu_u * t; + return res; +} + double HawkesEM::get_kernel_dt(const ulong m) const { if (kernel_discretization == nullptr) { return kernel_support / kernel_size; @@ -328,15 +427,21 @@ double HawkesEM::get_kernel_dt(const ulong m) const { } } -void HawkesEM::check_baseline_and_kernels(const ArrayDouble &mu, - ArrayDouble2d &kernels) const { +double HawkesEM::get_kernel_t0() { + if (kernel_discretization == nullptr) { + return 0.; + } else { + return (*kernel_discretization)[0]; + } +} + +void HawkesEM::check_baseline_and_kernels(const ArrayDouble &mu, ArrayDouble2d &kernels) const { if (mu.size() != n_nodes) { TICK_ERROR("baseline / mu argument must be an array of size " << n_nodes); } - if (kernels.n_rows() != n_nodes || - kernels.n_cols() != n_nodes * kernel_size) { - TICK_ERROR("kernels argument must be an array of shape (" - << n_nodes << ", " << n_nodes * kernel_size << ")"); + if (kernels.n_rows() != n_nodes || kernels.n_cols() != n_nodes * kernel_size) { + TICK_ERROR("kernels argument must be an array of shape (" << n_nodes << ", " + << n_nodes * kernel_size << ")"); } } @@ -357,8 +462,7 @@ void HawkesEM::set_kernel_support(const double kernel_support) { "is explicitly set") } if (kernel_support <= 0) { - TICK_ERROR("Kernel support must be positive and you have provided " - << kernel_support) + TICK_ERROR("Kernel support must be positive and you have provided " << kernel_support) } this->kernel_support = kernel_support; weights_computed = false; @@ -371,8 +475,7 @@ void HawkesEM::set_kernel_size(const ulong kernel_size) { "is explicitly set") } if (kernel_size <= 0) { - TICK_ERROR("Kernel size must be positive and you have provided " - << kernel_size) + TICK_ERROR("Kernel size must be positive and you have provided " << kernel_size) } this->kernel_size = kernel_size; weights_computed = false; @@ -393,16 +496,13 @@ void HawkesEM::set_kernel_dt(const double kernel_dt) { << kernel_dt) } if (kernel_dt > kernel_support) { - TICK_ERROR( - "Kernel discretization parameter must be smaller than kernel support." - << "You have provided " << kernel_dt << " and kernel support is " - << kernel_support) + TICK_ERROR("Kernel discretization parameter must be smaller than kernel support." + << "You have provided " << kernel_dt << " and kernel support is " << kernel_support) } set_kernel_size(static_cast(std::ceil(kernel_support / kernel_dt))); } -void HawkesEM::set_kernel_discretization( - const SArrayDoublePtr kernel_discretization1) { +void HawkesEM::set_kernel_discretization(const SArrayDoublePtr kernel_discretization1) { set_kernel_support(kernel_discretization1->last()); set_kernel_size(kernel_discretization1->size() - 1); @@ -422,8 +522,7 @@ void HawkesEM::set_kernel_discretization( SArrayDoublePtr HawkesEM::get_kernel_discretization() const { if (kernel_discretization == nullptr) { ArrayDouble kernel_discretization_tmp = arange(0, kernel_size + 1); - kernel_discretization_tmp.mult_fill(kernel_discretization_tmp, - get_kernel_fixed_dt()); + kernel_discretization_tmp.mult_fill(kernel_discretization_tmp, get_kernel_fixed_dt()); return kernel_discretization_tmp.as_sarray_ptr(); } else { return kernel_discretization; diff --git a/lib/cpp/hawkes/simulation/hawkes_kernels/hawkes_kernel.cpp b/lib/cpp/hawkes/simulation/hawkes_kernels/hawkes_kernel.cpp index 5c8e16947..ab2bb0903 100644 --- a/lib/cpp/hawkes/simulation/hawkes_kernels/hawkes_kernel.cpp +++ b/lib/cpp/hawkes/simulation/hawkes_kernels/hawkes_kernel.cpp @@ -35,6 +35,15 @@ SArrayDoublePtr HawkesKernel::get_values(const ArrayDouble &t_values) { return y_values; } +// Get a shared array representing the values of the time-integral of the kernel on the t_values +SArrayDoublePtr HawkesKernel::get_primitive_values(const ArrayDouble &t_values) { + SArrayDoublePtr y_values = SArrayDouble::new_ptr(t_values.size()); + for (ulong i = 0; i < y_values->size(); ++i) { + (*y_values)[i] = get_primitive_value(t_values[i]); + } + return y_values; +} + // Get L1 norm // By default, it discretizes the integral with nsteps (Riemann sum with // step-wise function) Should be overloaded if L1 norm closed formula exists diff --git a/lib/cpp/hawkes/simulation/hawkes_kernels/hawkes_kernel_time_func.cpp b/lib/cpp/hawkes/simulation/hawkes_kernels/hawkes_kernel_time_func.cpp index cc4ec534a..36f7693a5 100644 --- a/lib/cpp/hawkes/simulation/hawkes_kernels/hawkes_kernel_time_func.cpp +++ b/lib/cpp/hawkes/simulation/hawkes_kernels/hawkes_kernel_time_func.cpp @@ -12,22 +12,17 @@ HawkesKernelTimeFunc::HawkesKernelTimeFunc(const TimeFunction &time_function) support = time_function.get_support_right(); } -HawkesKernelTimeFunc::HawkesKernelTimeFunc(const ArrayDouble &t_axis, - const ArrayDouble &y_axis) +HawkesKernelTimeFunc::HawkesKernelTimeFunc(const ArrayDouble &t_axis, const ArrayDouble &y_axis) : HawkesKernelTimeFunc(TimeFunction(t_axis, y_axis)) {} -HawkesKernelTimeFunc::HawkesKernelTimeFunc() - : HawkesKernelTimeFunc(TimeFunction(0.0)) {} +HawkesKernelTimeFunc::HawkesKernelTimeFunc() : HawkesKernelTimeFunc(TimeFunction(0.0)) {} -double HawkesKernelTimeFunc::get_value_(double x) { - return time_function.value(x); -} +double HawkesKernelTimeFunc::get_value_(double x) { return time_function.value(x); } + +double HawkesKernelTimeFunc::get_norm(int _) { return time_function.get_norm(); } double HawkesKernelTimeFunc::get_future_max(double t, double value_at_t) { return time_function.future_bound(t); } -double HawkesKernelTimeFunc::get_primitive_value_(double x) { - return time_function.primitive(x); -} - +double HawkesKernelTimeFunc::get_primitive_value_(double x) { return time_function.primitive(x); } diff --git a/lib/include/tick/base/time_func.h b/lib/include/tick/base/time_func.h index 6a8aa34a5..12ac6a6b3 100644 --- a/lib/include/tick/base/time_func.h +++ b/lib/include/tick/base/time_func.h @@ -70,8 +70,14 @@ class DLL_PUBLIC TimeFunction { SArrayDoublePtr get_sampled_y() const { return sampled_y; } + ulong get_sample_size() const { return sampled_y->size(); } + SArrayDoublePtr get_future_max() const { return future_max; } + double get_t0() const { return t0; } + + void set_t0(double t0) { this->t0 = t0; } + double get_dt() const { return dt; } double get_support_right() const { return support_right; } @@ -111,6 +117,14 @@ class DLL_PUBLIC TimeFunction { inline double get_t_from_index_(ulong i); + inline double _t_left(double t); + + inline double _t_right(double t); + + inline ulong _idx_left(double t); + + inline ulong _idx_right(double t); + inline double constant_left_interpolation(double x_left, double y_left, double x_right, double y_right, double x_value); diff --git a/lib/include/tick/hawkes/inference/hawkes_em.h b/lib/include/tick/hawkes/inference/hawkes_em.h index a586c9e95..e55c9bb7c 100644 --- a/lib/include/tick/hawkes/inference/hawkes_em.h +++ b/lib/include/tick/hawkes/inference/hawkes_em.h @@ -4,7 +4,9 @@ // License: BSD 3 clause +#include #include "tick/base/base.h" +#include "tick/base/time_func.h" #include "tick/hawkes/model/base/model_hawkes_list.h" //////////////////////////////////////////////////////////////////////////////////////////// @@ -26,6 +28,7 @@ class DLL_PUBLIC HawkesEM : public ModelHawkesList { ulong kernel_size; //! @brief explicit discretization of the kernel + //! It holds kernel_discretization.size() = kernel_size - 1 SArrayDoublePtr kernel_discretization; //! @brief buffer variables @@ -33,12 +36,16 @@ class DLL_PUBLIC HawkesEM : public ModelHawkesList { ArrayDouble2d next_kernels; ArrayDouble2d unnormalized_kernels; + //! @brief buffer variables for evaluation of integral of intensity + ArrayDouble _baseline; + ArrayDouble2d _adjacency; + std::vector kernel_time_func; + bool is_kernel_time_func = 0; + public: - HawkesEM(const double kernel_support, const ulong kernel_size, - const int max_n_threads = 1); + HawkesEM(const double kernel_support, const ulong kernel_size, const int max_n_threads = 1); - explicit HawkesEM(const SArrayDoublePtr kernel_discretization, - const int max_n_threads = 1); + explicit HawkesEM(const SArrayDoublePtr kernel_discretization, const int max_n_threads = 1); //! @brief allocate buffer arrays once data has been given void allocate_weights(); @@ -51,10 +58,25 @@ class DLL_PUBLIC HawkesEM : public ModelHawkesList { SArrayDouble2dPtr get_kernel_norms(ArrayDouble2d &kernels) const; + SArrayDouble2dPtr get_kernel_primitives(ArrayDouble2d &kernels) const; + + std::vector &get_kernel_time_func() { return kernel_time_func; } + double get_kernel_support() const { return kernel_support; } ulong get_kernel_size() const { return kernel_size; } + //! @brief Left point of kernel discretization. + //! Returns 0. if kernel _scretization is Implicitly set, + //! otherwise it returns the left most entry of the array. + double get_kernel_t0(); + + //! @brief Discretization parameter of the kernel + //! If kernel_discretization is a nullptr then it is equal to kernel_support / + //! kernel_size otherwise it is equal to the difference of + //! kernel_discretization[m+1] - kernel_discretization[m] + double get_kernel_dt(const ulong m = 0) const; + double get_kernel_fixed_dt() const; SArrayDoublePtr get_kernel_discretization() const; @@ -71,43 +93,44 @@ class DLL_PUBLIC HawkesEM : public ModelHawkesList { void set_kernel_discretization(const SArrayDoublePtr kernel_discretization); + void init_kernel_time_func(ArrayDouble2d &kernels); + + void set_buffer_variables_for_integral_of_intensity(ArrayDouble &mu, ArrayDouble2d &kernels); + + SArrayDoublePtr primitive_of_intensity_at_jump_times(const ulong r_u, ArrayDouble &mu, + ArrayDouble2d &kernels); + + SArrayDoublePtr primitive_of_intensity_at_jump_times(const ulong r_u); + private: //! @brief A method called in parallel by the method 'solve' - //! @param r_u : r * n_realizations + u, tells which realization and which + //! @param r_u : r * n_nodes + u, tells which realization and which //! node void solve_ur(const ulong r_u, const ArrayDouble &mu, ArrayDouble2d &kernel); //! @brief A method called in parallel by the method 'loglikelihood' - //! @param r_u : r * n_realizations + u, tells which realization and which + //! @param r_u : r * n_nodes + u, tells which realization and which //! node - double loglikelihood_ur(const ulong r_u, const ArrayDouble &mu, - ArrayDouble2d &kernels); + double loglikelihood_ur(const ulong r_u, const ArrayDouble &mu, ArrayDouble2d &kernels); //! @brief A method called by solve_ur and logliklihood_ur to compute all //! intensities at all timestamps occuring in node u of realization r - //! @param r_u : r * n_realizations + u, tells which realization and which + //! @param r_u : r * n_nodes + u, tells which realization and which //! node //! @param intensity_func : function that will be called for all timestamps //! with the intensity at this timestamp as argument //! @param store_unnormalized_kernel : solve_ur method needs to store an //! unnormalized version of the kernels in the class variable //! unnormalized_kernels - void compute_intensities_ur(const ulong r_u, const ArrayDouble &mu, - ArrayDouble2d &kernels, + void compute_intensities_ur(const ulong r_u, const ArrayDouble &mu, ArrayDouble2d &kernels, std::function intensity_func, bool store_unnormalized_kernel); - double compute_compensator_ur(const ulong r_u, const ArrayDouble &mu, - ArrayDouble2d &kernels); + double compute_compensator_ur(const ulong r_u, const ArrayDouble &mu, ArrayDouble2d &kernels); - void check_baseline_and_kernels(const ArrayDouble &mu, - ArrayDouble2d &kernels) const; + double _evaluate_primitive_of_intensity(const double t, const ulong r, const ulong u); - //! @brief Discretization parameter of the kernel - //! If kernel_discretization is a nullptr then it is equal to kernel_support / - //! kernel_size otherwise it is equal to the difference of - //! kernel_discretization[m+1] - kernel_discretization[m] - double get_kernel_dt(const ulong m = 0) const; + void check_baseline_and_kernels(const ArrayDouble &mu, ArrayDouble2d &kernels) const; }; #endif // LIB_INCLUDE_TICK_HAWKES_INFERENCE_HAWKES_EM_H_ diff --git a/lib/include/tick/hawkes/model/base/model_hawkes.h b/lib/include/tick/hawkes/model/base/model_hawkes.h index c96afecbb..cd53bd20b 100644 --- a/lib/include/tick/hawkes/model/base/model_hawkes.h +++ b/lib/include/tick/hawkes/model/base/model_hawkes.h @@ -35,12 +35,14 @@ class DLL_PUBLIC ModelHawkes : public Model { //! \param max_n_threads : maximum number of threads to be used for //! multithreading \param optimization_level : 0 corresponds to no //! optimization and 1 to use of faster (approximated) exponential function - ModelHawkes(const int max_n_threads = 1, - const unsigned int optimization_level = 0); + ModelHawkes(const int max_n_threads = 1, const unsigned int optimization_level = 0); //! @brief Returns the number of components of the process ulong get_n_nodes() const { return n_nodes; } + //! @brief set n_nodes + void set_n_nodes(const ulong n_nodes); + ulong get_n_total_jumps() const { return n_jumps_per_node->sum(); } void set_n_threads(const int max_n_threads); @@ -48,9 +50,6 @@ class DLL_PUBLIC ModelHawkes : public Model { SArrayULongPtr get_n_jumps_per_node() const { return n_jumps_per_node; } protected: - //! @brief set n_nodes - void set_n_nodes(const ulong n_nodes); - //! @brief Custom exponential function taking into account optimization //! level //! \param x : The value exponential is computed at @@ -72,8 +71,7 @@ class DLL_PUBLIC ModelHawkes : public Model { ss << get_class_name() << std::endl; auto are_equal = TICK_CMP_REPORT(ss, max_n_threads) && TICK_CMP_REPORT(ss, optimization_level) && - TICK_CMP_REPORT(ss, weights_computed) && - TICK_CMP_REPORT(ss, n_nodes) && + TICK_CMP_REPORT(ss, weights_computed) && TICK_CMP_REPORT(ss, n_nodes) && TICK_CMP_REPORT_PTR(ss, n_jumps_per_node); return BoolStrReport(are_equal, ss.str()); } @@ -81,9 +79,7 @@ class DLL_PUBLIC ModelHawkes : public Model { std::stringstream ss; return compare(that, ss); } - BoolStrReport operator==(const ModelHawkes &that) { - return ModelHawkes::compare(that); - } + BoolStrReport operator==(const ModelHawkes &that) { return ModelHawkes::compare(that); } }; #endif // LIB_INCLUDE_TICK_HAWKES_MODEL_BASE_MODEL_HAWKES_H_ diff --git a/lib/include/tick/hawkes/model/base/model_hawkes_list.h b/lib/include/tick/hawkes/model/base/model_hawkes_list.h index b9f542e87..5038cc664 100644 --- a/lib/include/tick/hawkes/model/base/model_hawkes_list.h +++ b/lib/include/tick/hawkes/model/base/model_hawkes_list.h @@ -32,16 +32,13 @@ class DLL_PUBLIC ModelHawkesList : public ModelHawkes { //! negative, the number of physical cores will be used \param //! optimization_level : 0 corresponds to no optimization and 1 to use of //! faster (approximated) exponential function - ModelHawkesList(const int max_n_threads = 1, - const unsigned int optimization_level = 0); + ModelHawkesList(const int max_n_threads = 1, const unsigned int optimization_level = 0); virtual void set_data(const SArrayDoublePtrList2D ×tamps_list, const VArrayDoublePtr end_times); //! @brief returns the number of jumps per realization - SArrayULongPtr get_n_jumps_per_realization() const { - return n_jumps_per_realization; - } + SArrayULongPtr get_n_jumps_per_realization() const { return n_jumps_per_realization; } VArrayDoublePtr get_end_times() const { return end_times; } @@ -62,8 +59,7 @@ class DLL_PUBLIC ModelHawkesList : public ModelHawkes { BoolStrReport compare(const ModelHawkesList &that, std::stringstream &ss) { ss << get_class_name() << std::endl; - auto are_equal = ModelHawkes::compare(that, ss) && - TICK_CMP_REPORT(ss, n_realizations) && + auto are_equal = ModelHawkes::compare(that, ss) && TICK_CMP_REPORT(ss, n_realizations) && TICK_CMP_REPORT_VECTOR_SPTR_2D(ss, timestamps_list, double) && TICK_CMP_REPORT_PTR(ss, end_times) && TICK_CMP_REPORT_PTR(ss, n_jumps_per_realization); @@ -73,9 +69,7 @@ class DLL_PUBLIC ModelHawkesList : public ModelHawkes { std::stringstream ss; return compare(that, ss); } - BoolStrReport operator==(const ModelHawkesList &that) { - return ModelHawkesList::compare(that); - } + BoolStrReport operator==(const ModelHawkesList &that) { return ModelHawkesList::compare(that); } }; #endif // LIB_INCLUDE_TICK_HAWKES_MODEL_BASE_MODEL_HAWKES_LIST_H_ diff --git a/lib/include/tick/hawkes/simulation/hawkes_kernels/hawkes_kernel.h b/lib/include/tick/hawkes/simulation/hawkes_kernels/hawkes_kernel.h index 96320a797..a05a62465 100644 --- a/lib/include/tick/hawkes/simulation/hawkes_kernels/hawkes_kernel.h +++ b/lib/include/tick/hawkes/simulation/hawkes_kernels/hawkes_kernel.h @@ -96,6 +96,9 @@ class DLL_PUBLIC HawkesKernel { //! @brief Returns the value of the kernel for each t in t_values SArrayDoublePtr get_values(const ArrayDouble &t_values); + //! @brief Returns the value of the time-integral of the kernel for each t in t_values + SArrayDoublePtr get_primitive_values(const ArrayDouble &t_values); + /** * Computes L1 norm * @param nsteps: number of steps used for integral discretization diff --git a/lib/include/tick/hawkes/simulation/hawkes_kernels/hawkes_kernel_time_func.h b/lib/include/tick/hawkes/simulation/hawkes_kernels/hawkes_kernel_time_func.h index 21b836eeb..920b0c040 100644 --- a/lib/include/tick/hawkes/simulation/hawkes_kernels/hawkes_kernel_time_func.h +++ b/lib/include/tick/hawkes/simulation/hawkes_kernels/hawkes_kernel_time_func.h @@ -20,8 +20,8 @@ class DLL_PUBLIC HawkesKernelTimeFunc : public HawkesKernel { //! Getting the value of the kernel at the point x (where x is positive) double get_value_(double x) override; - //! Getting the value of the primitive of the kernel - //at the point x (where x is positive) + //! Getting the value of the primitive of the kernel + // at the point x (where x is positive) double get_primitive_value_(double x) override; public: @@ -43,13 +43,14 @@ class DLL_PUBLIC HawkesKernelTimeFunc : public HawkesKernel { */ double get_future_max(double t, double value_at_t) override; + double get_norm(int _ = 1) override; + //! @brief simple getter const TimeFunction &get_time_function() const { return time_function; } template void serialize(Archive &ar) { - ar(cereal::make_nvp("HawkesKernel", - cereal::base_class(this))); + ar(cereal::make_nvp("HawkesKernel", cereal::base_class(this))); ar(CEREAL_NVP(time_function)); } diff --git a/lib/mkn.yaml b/lib/mkn.yaml index b2946533d..253f0b2fc 100644 --- a/lib/mkn.yaml +++ b/lib/mkn.yaml @@ -21,7 +21,7 @@ property: lib_path: ./../tick cargs: -DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION -D_FILE_OFFSET_BITS=64 -DPYTHON_LINK -DNDEBUG - win_cargs: -GL -MD -EHsc -DBUILDING_DLL -std:c++11 + win_cargs: -GL -MD -EHsc -DBUILDING_DLL nixish_cargs: -fwrapv -O2 -Wall -fstack-protector-strong -Wformat -Werror -Wdate-time @@ -53,7 +53,7 @@ profile: if_link: nix_lib: ${nix_largs} bsd_lib: ${bsd_largs} - win: -nodefaultlib:libucrt.lib ucrt.lib -MANIFEST:EMBED,ID=2 -MANIFESTUAC:NO -LTCG + win: -LTCG # -nodefaultlib:libucrt.lib ucrt.lib -MANIFEST:EMBED,ID=2 -MANIFESTUAC:NO - name: base parent: arg @@ -216,4 +216,4 @@ profile: - name: gtest parent: gtest_nodep - self: solver hawkes/model hawkes/simulation robust survival + self: solver hawkes/model hawkes/simulation hawkes/inference robust survival diff --git a/lib/swig/tick/array/array_module.i b/lib/swig/tick/array/array_module.i index 86e81f270..617e99b41 100644 --- a/lib/swig/tick/array/array_module.i +++ b/lib/swig/tick/array/array_module.i @@ -10,12 +10,14 @@ #include "numpy/arrayobject.h" #include "numpy/npy_math.h" +#ifndef _WIN32 // initialize python and numpy int tick_import_numpy(){ Py_Initialize(); return _import_array(); } static const int tick_numpy_import = tick_import_numpy(); +#endif #include "tick/base/debug.h" #include "tick/array/carray_python.h" diff --git a/lib/swig/tick/hawkes/inference/hawkes_em.i b/lib/swig/tick/hawkes/inference/hawkes_em.i index ed196c784..919c629fd 100644 --- a/lib/swig/tick/hawkes/inference/hawkes_em.i +++ b/lib/swig/tick/hawkes/inference/hawkes_em.i @@ -20,6 +20,7 @@ class HawkesEM : public ModelHawkesList { void solve(ArrayDouble &mu, ArrayDouble2d &kernels); SArrayDouble2dPtr get_kernel_norms(ArrayDouble2d &kernels) const; + SArrayDouble2dPtr get_kernel_primitives(ArrayDouble2d &kernels) const; double loglikelihood(ArrayDouble &mu, ArrayDouble2d &kernels); double get_kernel_support() const; @@ -31,4 +32,11 @@ class HawkesEM : public ModelHawkesList { void set_kernel_size(const ulong kernel_size); void set_kernel_dt(const double kernel_dt); void set_kernel_discretization(const SArrayDoublePtr kernel_discretization); + + void set_buffer_variables_for_integral_of_intensity(ArrayDouble &mu, ArrayDouble2d &kernels); + SArrayDoublePtr primitive_of_intensity_at_jump_times(const ulong r_u, ArrayDouble &mu, + ArrayDouble2d &kernels); + + SArrayDoublePtr primitive_of_intensity_at_jump_times(const ulong r_u); + }; diff --git a/lib/swig/tick/hawkes/simulation/hawkes_kernels.i b/lib/swig/tick/hawkes/simulation/hawkes_kernels.i index 6b1e53a4c..1286034a2 100644 --- a/lib/swig/tick/hawkes/simulation/hawkes_kernels.i +++ b/lib/swig/tick/hawkes/simulation/hawkes_kernels.i @@ -12,6 +12,7 @@ class HawkesKernel { double get_value(double x); SArrayDoublePtr get_values(const ArrayDouble &t_values); double get_primitive_value(double t); + SArrayDoublePtr get_primitive_values(const ArrayDouble &t_values); double get_primitive_value(double s, double t); virtual double get_norm(int nsteps = 10000); }; diff --git a/sh/configure_env.sh b/sh/configure_env.sh index 29a02a206..44e8a2f2a 100755 --- a/sh/configure_env.sh +++ b/sh/configure_env.sh @@ -17,7 +17,7 @@ # ###################################################################### -set -ex +set -e echo "Entering configure_env.sh" @@ -329,7 +329,7 @@ MKN=${MKN:-mkn} export MKN_LIB_LINK_LIB=1 export TICK_CONFIGURED=1 -echo "Finished configure_env.sh for python (${PYVER}.${PYVER_MIN}) with the following args" +echo "Finished configure_env.sh for python (${PYVER}) with the following args" echo "LDARGS = ${LDARGS}" echo "" ## diff --git a/sh/gtest.sh b/sh/gtest.sh index 61fbae84f..0033ce007 100755 --- a/sh/gtest.sh +++ b/sh/gtest.sh @@ -70,11 +70,11 @@ pushd $ROOT/lib 2>&1 > /dev/null echo FILE $FILE - mkn clean build -p gtest -a "${CARGS} -DGTEST_LINKED_AS_SHARED_LIBRARY" \ + mkn clean build -p gtest -a "${CARGS} -DGTEST_LINKED_AS_SHARED_LIBRARY" -g 9 -O 0 \ -tl "${LDARGS} ${LIB}" -b "$PY_INCS" \ -M "${FILE}" -P "${MKN_P}" "${MKN_WITH[@]}" \ -B $B_PATH ${RUN} ${MKN_X_FILE[@]} done -popd 2>&1 > /dev/null \ No newline at end of file +popd 2>&1 > /dev/null diff --git a/sh/mkn.sh b/sh/mkn.sh index bef11cab0..1a4dbaca5 100755 --- a/sh/mkn.sh +++ b/sh/mkn.sh @@ -25,7 +25,7 @@ pushd $CWD/.. 2>&1 > /dev/null ROOT=$PWD popd 2>&1 > /dev/null -source $ROOT/sh/configure_env.sh +source $CWD/configure_env.sh COMPILE=1 LINK=1 diff --git a/tick/base/tests/base_test.py b/tick/base/tests/base_test.py index eaaec8a30..7338259ef 100644 --- a/tick/base/tests/base_test.py +++ b/tick/base/tests/base_test.py @@ -1,6 +1,7 @@ # License: BSD 3 clause # -*- coding: utf8 -*- +import sys import unittest from tick.base import Base @@ -471,7 +472,10 @@ def test_getter_only_native_property(self): def frop(x): self.a0.readonly_prop = x - self.assertRaisesRegex(AttributeError, "can't set attribute", frop, 45) + msg = "can't set attribute" + if sys.version_info[1] == 11: + msg = "property 'readonly_prop' of 'A0' object has no setter" + self.assertRaisesRegex(AttributeError, msg, frop, 45) def test_attributes_are_not_shared(self): """...Test that two instances of the same class do not share any diff --git a/tick/base/tests/time_func_test.py b/tick/base/tests/time_func_test.py index 58c9e12e5..aef6264a8 100644 --- a/tick/base/tests/time_func_test.py +++ b/tick/base/tests/time_func_test.py @@ -86,7 +86,7 @@ def test_sample_y(self): created_sample_y = tf.sampled_y[:-1] sampled_times = t_array[0] + \ - tf.dt * np.arange(len(created_sample_y)) + tf.dt * np.arange(len(created_sample_y)) true_sample_y = dichotomic_value(sampled_times, t_array, y_array, inter_mode=inter_mode) @@ -144,26 +144,46 @@ def test_norm(self): tf = TimeFunction([T, Y], inter_mode=TimeFunction.InterConstLeft, dt=0.3) - self.assertAlmostEqual(tf.get_norm(), -1.1) + self.assertAlmostEqual(tf.get_norm(), -1.2) tf = TimeFunction([T, Y], inter_mode=TimeFunction.InterConstRight, dt=0.3) self.assertAlmostEqual(tf.get_norm(), 1.2) tf = TimeFunction([T, Y], inter_mode=TimeFunction.InterLinear, dt=0.3) - self.assertAlmostEqual(tf.get_norm(), 0) + self.assertAlmostEqual(tf.get_norm(), -.09, places=2) def test_cyclic(self): """...Test cyclic border type """ - last_value = 2.3 - T = np.linspace(0, last_value) - Y = np.cos(T * np.pi) + period = 1. + discretization_points = 200 + T = np.linspace(0, period, num=discretization_points) + Y = np.cos(2 * np.pi * T) tf = TimeFunction([T, Y], border_type=TimeFunction.Cyclic) - - self.assertAlmostEqual( - tf.value(0.3), tf.value(last_value + 0.3), delta=1e-8) + self.assertEqual( + tf.border_type, + TimeFunction.Cyclic, + ) + evaluation_points = np.random.uniform( + low=0., + high=period, + size=100, + ) + expected_values = np.cos(2 * np.pi * evaluation_points) + + np.testing.assert_array_almost_equal( + tf.value(evaluation_points), + expected_values, + decimal=3, + ) + + np.testing.assert_array_almost_equal( + tf.value(evaluation_points), + tf.value(period + evaluation_points), + decimal=3, + ) if __name__ == "__main__": diff --git a/tick/hawkes/inference/hawkes_em.py b/tick/hawkes/inference/hawkes_em.py index 1676fda80..b0cee8ecf 100644 --- a/tick/hawkes/inference/hawkes_em.py +++ b/tick/hawkes/inference/hawkes_em.py @@ -1,5 +1,6 @@ # License: BSD 3 clause +from typing import List import numpy as np from tick.hawkes.inference.base import LearnerHawkesNoParam @@ -239,6 +240,36 @@ def get_kernel_values(self, i, j, abscissa_array): kernel_values[indices_in_support] = self.kernel[i, j, index] return kernel_values + def _compute_primitive_kernel_values(self, i, j, abscissa_array): + """Computes value of the integral in time of the specified kernel + on given time values. + + Parameters + ---------- + i : `int` + First index of the kernel + + j : `int` + Second index of the kernel + + abscissa_array : `np.ndarray`, shape=(n_points, ) + 1d array containing all the times at which this kernel will + computes it value + + Returns + ------- + output : `np.ndarray`, shape=(n_points, ) + 1d array containing the values of the integral in time of the specified + kernels at the given times. + """ + n = self.n_nodes + index = np.maximum(0, np.searchsorted( + self.kernel_discretization, abscissa_array) - 1) + primitives = self._get_kernel_primitives() + assert primitives.shape == (n, n, self.kernel_size) + vals = primitives[i, j, index] + return vals + def get_kernel_norms(self): """Computes kernel norms. This makes our learner compliant with `tick.plot.plot_hawkes_kernel_norms` API @@ -251,6 +282,25 @@ def get_kernel_norms(self): """ return self._learner.get_kernel_norms(self._flat_kernels) + def _get_kernel_primitives(self): + """Computes integral in time of the kernel. + + Returns + ------- + primitives : `np.ndarray`, shape=(n_nodes, n_nodes, kernel_size) + 3d array in which each entry i, j, t corresponds to the values of the + integral in time of the kernel i, j evaluated at + the discretization point indexed by t. + """ + n = self.n_nodes + _flat_primitives = self._learner.get_kernel_primitives( + self._flat_kernels) + assert _flat_primitives.shape == (n, n * self.kernel_size) + primitives = _flat_primitives.reshape((n, n, self.kernel_size)) + assert np.allclose(primitives.reshape(( + n, n * self.kernel_size)), _flat_primitives) + return primitives + def objective(self, coeffs, loss: float = None): raise NotImplementedError() @@ -311,6 +361,80 @@ def score(self, events=None, end_times=None, baseline=None, kernel=None): return learner._learner.loglikelihood(baseline, flat_kernels) + def time_changed_interarrival_times(self, events=None, end_times=None, baseline=None, kernel=None): + """Compute time change interarrival times + + Parameters + ---------- + events : `list` of `list` of `np.ndarray`, default = None + List of Hawkes processes realizations used to measure score. + Each realization of the Hawkes process is a list of n_node for + each component of the Hawkes. Namely `events[i][j]` contains a + one-dimensional `numpy.array` of the events' timestamps of + component j of realization i. + If only one realization is given, it will be wrapped into a list + If None, events given while fitting model will be used + + end_times : `np.ndarray` or `float`, default = None + List of end time of all hawkes processes used to measure score. + If None, it will be set to each realization's latest time. + If only one realization is provided, then a float can be given. + + baseline : `np.ndarray`, shape=(n_nodes, ), default = None + Baseline vector for which the score is measured + If `None` baseline obtained during fitting is used + + kernel : `None` or `np.ndarray`, shape=(n_nodes, n_nodes, kernel_size), default=None + Used to force start values for kernel parameter + If `None` kernel obtained during fitting is used + + Returns + ------- + res : `list` of `list` of `np.ndarray` + Computed inter-arrival times for every component and every realization, namely + `res[i][j]` is a one-dimensional `numpy.array` of interarrival times for node `j` of realization `i`. + In other words, `res[i][j][k]` is the difference `T^j_{k+1} - T^j_{k}`, + where `T^{j}_{k}` is the `k`-th jump time of + the `j`-th component in the realization `i`. + """ + # Interface logic is the same of `self.score` + if events is None and not self._fitted: + raise ValueError('You must either call `fit` before `time_changed_interarrival_times` or ' + 'provide events') + + if events is None and end_times is None: + learner = self + else: + learner = HawkesEM(**self.get_params()) + learner._set('_end_times', end_times) + learner._set_data(events) + + n_nodes = learner.n_nodes + kernel_size = learner.kernel_size + + if baseline is None: + baseline = self.baseline + + if kernel is None: + kernel = self.kernel + + flat_kernels = kernel.reshape((n_nodes, n_nodes * kernel_size)) + learner._learner.set_buffer_variables_for_integral_of_intensity( + baseline, flat_kernels) + + res: List[List[np.ndarray]] = [] + + # TODO: Test this + for r in range(learner.n_realizations): + res_r: List[np.ndarray] = [] + for u in range(n_nodes): + r_u = r * n_nodes + u + x = learner._learner.primitive_of_intensity_at_jump_times(r_u) + res_r.append( + np.diff(x, axis=0)) + res.append(res_r) + return res + def get_params(self): return { 'kernel_support': self.kernel_support, diff --git a/tick/hawkes/inference/tests/hawkes_em_test.py b/tick/hawkes/inference/tests/hawkes_em_test.py index bad997294..e6e6b958a 100644 --- a/tick/hawkes/inference/tests/hawkes_em_test.py +++ b/tick/hawkes/inference/tests/hawkes_em_test.py @@ -7,6 +7,95 @@ from tick.hawkes.inference import HawkesEM from tick.hawkes.model.tests.model_hawkes_test_utils import ( hawkes_intensities, hawkes_log_likelihood) +from tick.hawkes import (SimuHawkes, HawkesKernelTimeFunc, + HawkesKernelExp, SimuHawkesExpKernels) +from tick.base import TimeFunction + + +def simulate_hawkes_exp_kern( + decays=[[1., 1.5], [0.1, 0.5]], + baseline=[0.12, 0.07], + adjacency=[[.1, .4], [.2, 0.5]], + end_time=3000, + max_jumps=1000, + verbose=False, + force_simulation=False, + seed=None, +): + model = SimuHawkesExpKernels( + adjacency=adjacency, + decays=decays, + baseline=baseline, + end_time=end_time, + max_jumps=max_jumps, + verbose=verbose, + force_simulation=force_simulation, + seed=seed, + ) + model.track_intensity(intensity_track_step=0.1) + model.simulate() + return model + + +def compute_approx_support_of_exp_kernel(a, b, eps): + return np.maximum(0., np.squeeze(np.max(- np.log(eps / (a*b)) / b))) + + +def simulate_hawkes_nonparam_kern( + end_time=30000, + seed=None +): + t_values1 = np.array([0, 1, 1.5, 2., 3.5], dtype=float) + y_values1 = np.array([0, 0.2, 0, 0.1, 0.], dtype=float) + tf1 = TimeFunction([t_values1, y_values1], + inter_mode=TimeFunction.InterConstRight, dt=0.1) + kernel1 = HawkesKernelTimeFunc(tf1) + + t_values2 = np.linspace(0, 4, 20) + y_values2 = np.maximum(0., np.sin(t_values2) / 4) + tf2 = TimeFunction([t_values2, y_values2]) + kernel2 = HawkesKernelTimeFunc(tf2) + + baseline = np.array([0.1, 0.3]) + + hawkes = SimuHawkes( + baseline=baseline, + end_time=end_time, + verbose=False, + seed=seed, + ) + + hawkes.set_kernel(0, 0, kernel1) + hawkes.set_kernel(0, 1, HawkesKernelExp(.5, .7)) + hawkes.set_kernel(1, 1, kernel2) + + hawkes.simulate() + return hawkes + + +def discretization_of_exp_kernel( + n_nodes, a, b, kernel_support, kernel_size): + assert (n_nodes, n_nodes) == a.shape + assert a.shape == b.shape + assert kernel_size > 0 + assert kernel_support > .0 + abscissa = np.linspace(0, kernel_support, kernel_size) + abscissa_ = np.expand_dims(abscissa, axis=0) + abscissa_ = np.repeat(abscissa_, repeats=n_nodes, axis=0) + abscissa__ = np.expand_dims(abscissa_, axis=0) + abscissa__ = np.repeat(abscissa__, repeats=n_nodes, axis=0) + assert abscissa_.shape == (n_nodes, kernel_size) + assert abscissa__.shape == (n_nodes, n_nodes, kernel_size) + a_ = np.repeat(np.expand_dims(a, axis=-1), + repeats=kernel_size, axis=2) + b_ = np.repeat(np.expand_dims(b, axis=-1), + repeats=kernel_size, axis=2) + assert a_.shape == (n_nodes, n_nodes, kernel_size) + assert a_.shape == b_.shape + assert a_.shape == abscissa__.shape + res_ = a_ * b_ * np.exp(-b_ * abscissa__) + assert res_.shape == (n_nodes, n_nodes, kernel_size) + return res_ class Test(unittest.TestCase): @@ -27,7 +116,7 @@ def test_hawkes_em_attributes(self): self.assertEqual(em.n_nodes, self.n_nodes) self.assertEqual(em.n_realizations, self.n_realizations) - def test_hawkes_em_fit(self): + def test_hawkes_em_fit_1(self): # hard-coded """...Test fit method of HawkesEM """ kernel_support = 3 @@ -76,6 +165,187 @@ def test_hawkes_em_fit(self): em.get_kernel_supports(), np.ones((self.n_nodes, self.n_nodes)) * 3) + # With simulated data from parametric (exponential) model + def test_hawkes_em_fit_2(self): + """ + Test estimation on simulated data from a parametric + Hawkes model with exponential kernels + """ + seed = 12345 + n_nodes = 2 + n_realizations = 1 + decays = np.array([[1., 1.5], [0.1, 0.5]]) + baseline = np.array([0.12, 0.07]) + adjacency = np.array([[.1, .4], [.2, 0.03]]) + end_time = 30000 + simu_model = simulate_hawkes_exp_kern( + decays=decays, + baseline=baseline, + adjacency=adjacency, + end_time=end_time, + max_jumps=200000, + verbose=True, + force_simulation=False, + seed=seed, + ) + + kernel_support = compute_approx_support_of_exp_kernel( + adjacency, decays, 1e-4) + # print(f'kernel_suppport = {kernel_support}') + kernel_size = 20 + + events = simu_model.timestamps + baseline_start = np.array([.05 * np.mean(np.diff(ts)) + for ts in simu_model.timestamps]) + # print(f'baseline_start = {baseline_start}') + kernel_start = np.zeros((n_nodes, n_nodes, kernel_size)) + kernel_start[:, :, :kernel_size-1] = .01 * np.cumsum( + np.random.uniform(size=(n_nodes, n_nodes, kernel_size-1)), axis=2)[:, :, ::-1] + # print(f'kernel_start = {kernel_start}') + em = HawkesEM(kernel_support=kernel_support, + kernel_size=kernel_size, + tol=1e-9, + print_every=50, + record_every=10, + max_iter=1200, + verbose=True) + em.fit(events, baseline_start=baseline_start, + kernel_start=kernel_start) + + expected_kernel = discretization_of_exp_kernel( + n_nodes, + adjacency, + decays, + kernel_support, + kernel_size, + ) + expected_baseline = baseline + + # Test 1.1: shape of baseline + self.assertEqual( + expected_baseline.shape, + em.baseline.shape, + 'Expected baseline and estimated baseline do not have the same shape\n' + 'expected_baseline.shape: {expected_baseline.shape}\n' + 'em.baseline.shape: {em.baseline.shape}\n' + ) + + # Test 1.2: relative magnitudes of baseline + np.testing.assert_array_equal( + np.argsort(em.baseline), + np.argsort(expected_baseline), + err_msg='Relative magnitudes are not consistent' + 'between expected baseline and estimated baseline', + ) + + # Test 1.3: approximate equality of baseline + np.testing.assert_array_almost_equal( + em.baseline, + expected_baseline, + decimal=1, + ) + + # Test 2.1: shape of kernel + self.assertEqual( + expected_kernel.shape, + em.kernel.shape, + 'Expected kernel and estimated kernel do not have the same shape\n' + 'expected_kernel.shape: {expected_kernel.shape}\n' + 'em.kernel.shape: {em.kernel.shape}\n' + ) + + # Test 2.2: estimated kernel must be non-negative + self.assertTrue( + np.all(em.kernel >= 0), + f'Estimated kernel takes negative values' + ) + + # Test 2.3: estimated kernel must be non-increasing + significance_threshold = 7e-3 # Can we do better? + estimated_kernel_increments = np.diff(em.kernel, append=0., axis=2) + estimated_kernel_significant_increments = estimated_kernel_increments[ + np.abs(estimated_kernel_increments) > significance_threshold + ] + assert estimated_kernel_increments.shape == ( + n_nodes, n_nodes, kernel_size) + + self.assertTrue( + np.all( + estimated_kernel_significant_increments <= 0. + ), + 'Estimated kernel are not non-increasing functions. ' + 'This is not compatible with the simulation ' + 'being performed with exponential kernels.\n' + f'estimated_kernel_increments:\n{estimated_kernel_increments}\n' + f'estimated_kernel_significant_increments:\n{estimated_kernel_significant_increments}\n' + f'estimated_kernel_significant_increments bigger than 0:\n{estimated_kernel_significant_increments[estimated_kernel_significant_increments>.0]}\n' + ) + + # Test 2.4: relative magnitudes of kernel at zero + expected_kernel_at_zero = expected_kernel[:, :, 0].flatten() + estimated_kernel_at_zero = np.array(em.kernel, copy=True)[ + :, :, 0].flatten() + np.testing.assert_array_equal( + np.argsort( + estimated_kernel_at_zero[estimated_kernel_at_zero > .0]), + np.argsort(expected_kernel_at_zero[expected_kernel_at_zero > .0]), + err_msg='Relative magnitudes are not consistent ' + 'between expected kernel and estimated kernel\n' + f'expected_kernel_at_zero:\n{expected_kernel_at_zero}\n' + f'estimated_kernel_at_zero:\n{estimated_kernel_at_zero}\n' + ) + + # Test 2.5: approximate equality of kernel at zero + np.testing.assert_array_almost_equal( + expected_kernel_at_zero, + estimated_kernel_at_zero, + decimal=0, # Can we do better? + err_msg='estimated kernel at zero deviates ' + 'from expected kernel at zero.\n' + f'expected_kernel_at_zero:\n{expected_kernel_at_zero}\n' + f'estimated_kernel_at_zero:\n{estimated_kernel_at_zero}\n' + ) + + def test_hawkes_em_fit_3(self): + """Test estimation on simulated data from a non-parametric Hawkes model + + The test compares the time-integral of Hawkes kernels: + expected (i.e. exact, coming from the model used to simulate the data) + and estimated (i.e. coming from the estimated kernels). + """ + simu_model = simulate_hawkes_nonparam_kern( + end_time=30000, + seed=2334, + ) + + em = HawkesEM( + kernel_support=4., + kernel_size=32, + n_threads=8, + verbose=True, + max_iter=500, + tol=1e-6) + em.fit(simu_model.timestamps) + + evaluation_points = np.linspace(0, 4., num=10) + for i in range(2): + for j in range(2): + estimated_primitive_kernel_values = em._compute_primitive_kernel_values( + i, j, evaluation_points) + expected_primitive_kernel_values = simu_model.kernels[i, j].get_primitive_values( + evaluation_points) + self.assertTrue( + np.allclose( + expected_primitive_kernel_values, + estimated_primitive_kernel_values, + atol=2e-2, + rtol=3.5-1, + ), + f'Kernel[{i}, {j}]: Estimation error\n' + f'Estimated values:\n{estimated_primitive_kernel_values}\n' + f'Expected values:\n{expected_primitive_kernel_values}\n' + ) + def test_hawkes_em_score(self): """...Test score (ie. likelihood) function of Hawkes EM """ @@ -83,7 +353,8 @@ def test_hawkes_em_score(self): def approximate_likelihood(em, events, end_times, precision=2): n_total_jumps = sum(map(len, events)) kernels_func = [[ - lambda t, i=i, j=j: em.get_kernel_values(i, j, np.array([t]))[0] + lambda t, i=i, j=j: em.get_kernel_values( + i, j, np.array([t]))[0] for j in range(n_nodes) ] for i in range(n_nodes)] intensities = hawkes_intensities(events, em.baseline, kernels_func) @@ -93,7 +364,7 @@ def approximate_likelihood(em, events, end_times, precision=2): # We use only 2 nodes otherwise integral approximation might be very # slow n_nodes = 2 - kernel_support = 1 + kernel_support = 1. kernel_size = 3 baseline = np.random.rand(n_nodes) + .2 kernel = np.random.rand(n_nodes, n_nodes, kernel_size) + .4 @@ -148,6 +419,22 @@ def approximate_likelihood(em, events, end_times, precision=2): approximate_likelihood(em, test_events, test_end_times, 4), delta=1e-3, msg='Failed on test for {}'.format(kwargs)) + def test_hawkes_em_kernel_shape(self): + kernel_support = 4 + kernel_size = 10 + em = HawkesEM(kernel_support=kernel_support, kernel_size=kernel_size, + n_threads=2, max_iter=11, verbose=False) + em.fit(self.events) + reshaped_kernel = em._flat_kernels.reshape(( + em.n_nodes, em.n_nodes, em.kernel_size)) + self.assertTrue( + np.allclose(em.kernel, reshaped_kernel + ), + "Reshaping of kernel is inconsistent:" + f"kernel: {em.kernel}\n" + f"reshaped kernel: {reshaped_kernel}\n" + ) + def test_hawkes_em_kernel_support(self): """...Test that Hawkes em kernel support parameter is correctly synchronized @@ -224,6 +511,211 @@ def test_hawkes_em_kernel_dt(self): 0.19047619047619047) self.assertEqual(learner.kernel_size, 21) + def test_hawkes_em_get_kernel_values(self): + kernel_support = 4 + kernel_size = 10 + em = HawkesEM(kernel_support=kernel_support, kernel_size=kernel_size, + n_threads=2, max_iter=11, verbose=False) + em.fit(self.events) + + # Test 0 + self.assertEqual(em.kernel.shape, (self.n_nodes, self.n_nodes, kernel_size), + 'Estimated kernel has wrong shape' + f'Expected shape: {(self.n_nodes, self.n_nodes, kernel_size)}\n' + f'Shape of estimated kernel: {em.kernel.shape}\n' + ) + # Test 1 + self.assertTrue(np.all(em.kernel >= 0.), + "Error: kernel cannot take negative values!") + + # Test 2 + # The method `get_kernel_values` when evaluated at the + # discrtetization point of the kernel must yield the values stored in `kernel` + for i in range(self.n_nodes): + for j in range(self.n_nodes): + vals = em.get_kernel_values( + i, j, em.kernel_discretization[1:]) + self.assertTrue(np.allclose(vals, em.kernel[i, j, :])) + + def test_hawkes_em_kernel_primitives(self): + kernel_support = 4 + kernel_size = 10 + em = HawkesEM(kernel_support=kernel_support, kernel_size=kernel_size, + n_threads=2, max_iter=11, verbose=False) + em.fit(self.events) + # Pre-test 1 + self.assertTrue(np.all(em.kernel >= 0.), + "Error: kernel cannot take negative values!") + # Pre-test 2 + primitives = em._get_kernel_primitives() + self.assertEqual(primitives.shape, + (self.n_nodes, self.n_nodes, kernel_size), + "Erorr : Shape of primitives does not match expected shape" + ) + + # Test 0 + # Primitives must be non-decreasing functions, since kernels are non-negative + self.assertTrue( + np.all(np.diff(primitives, axis=2) >= 0.), + "Error: primitives is not non-decreasing" + ) + + # Test 1 + # Since kernels are positive, their primitive evaluated at the + # rightmost point of their support is expected to be equal to their norm + norms = em.get_kernel_norms() + self.assertTrue( + np.allclose(norms, primitives[:, :, -1]), + "The values of the kernel primitives at the end of the kernel supports," + "must agree with the kernel norms." + ) + + # Test 2 + # The method `_compute_primitive_kernel_values` when evaluated at the + # discrtetization point of the kernel must yield the values stored in `primitive` + for i in range(self.n_nodes): + for j in range(self.n_nodes): + vals = em._compute_primitive_kernel_values( + i, j, em.kernel_discretization[1:]) + self.assertTrue(np.allclose(vals, primitives[i, j, :])) + + # Test 3 + # We test that the values of the primitives computed via the class method agree + # with the primitives of the kernels computed using numpy arrays + def _compute_primitive_with_numpy(kernel, kernel_discretization): + n = kernel.shape[0] + m = kernel.shape[1] + l = kernel.shape[2] + assert len(kernel_discretization) == l + 1 + steps = np.diff(kernel_discretization) + steps = np.repeat(steps.reshape((1, l)), repeats=m, axis=0) + steps = np.repeat(steps.reshape((1, m, l)), repeats=n, axis=0) + assert steps.shape == (n, m, l) + assert steps.shape == kernel.shape + primitives = np.cumsum(kernel*steps, axis=2) + assert primitives.shape == (n, m, l) + return primitives + self.assertTrue(np.allclose(primitives, _compute_primitive_with_numpy( + em.kernel, em.kernel_discretization))) + + def test_time_changed_interarrival_times_exp_kern(self): + seed = 12345 + n_nodes = 2 + n_realizations = 1 + decays = np.array([[1., 1.5], [0.1, 0.5]]) + baseline = np.array([0.12, 0.07]) + adjacency = np.array([[.1, .4], [.2, 0.03]]) + end_time = 30000 + simu_model = simulate_hawkes_exp_kern( + decays=decays, + baseline=baseline, + adjacency=adjacency, + end_time=end_time, + max_jumps=200000, + verbose=True, + force_simulation=False, + seed=seed, + ) + simu_model.store_compensator_values() + simu_time_changed_interarrival_times = [ + [np.diff(c) for c in simu_model.tracked_compensator] + ] + + kernel_support = compute_approx_support_of_exp_kernel( + adjacency, decays, 1e-4) + # print(f'kernel_suppport = {kernel_support}') + kernel_size = 20 + + events = [simu_model.timestamps] + + em = HawkesEM(kernel_support=kernel_support, + kernel_size=kernel_size, + tol=1e-9, + print_every=50, + record_every=10, + max_iter=1200, + verbose=True) + + # Test Part 1 - Exact parameters + discretized_kernel = discretization_of_exp_kernel( + n_nodes, + adjacency, + decays, + kernel_support, + kernel_size, + ) + tcit = em.time_changed_interarrival_times( + events=events, + end_times=end_time, + baseline=baseline, + kernel=discretized_kernel, + ) + for r in range(n_realizations): + for e in range(n_nodes): + self.assertTrue( + np.all(tcit[r][e] > .0), + 'Exact parameters: Assertion error: ' + 'Inter-arrival times of ' + f'realization {r} and component {e}' + ' are not all positive' + ) + for r in range(n_realizations): + for e in range(n_nodes): + self.assertAlmostEqual( + np.quantile( + simu_time_changed_interarrival_times[r][e], .55), + 1., + None, + msg=f'Realization {r}, component {e}:\n' + 'Time-changed inter-arrival times ' + 'as computed from simulation ' + 'are not distributed around 1.', + delta=.35, # Can we do better? + ) + self.assertAlmostEqual( + np.quantile(tcit[r][e], .5), + 1., + None, + msg='Exact parameters: Assertion error: ' + f'Realization {r}, component {e}:\n' + 'Time-changed inter-arrival times ' + 'as computed from estimation ' + 'are not distributed around 1.', + delta=.35, # Can we do better? + ) + + # Test - Part 2 - Estimated parameters + baseline_start = np.array([.05 * np.mean(np.diff(ts)) + for ts in simu_model.timestamps]) + kernel_start = np.zeros((n_nodes, n_nodes, kernel_size)) + kernel_start[:, :, :kernel_size-1] = .01 * np.cumsum( + np.random.uniform(size=(n_nodes, n_nodes, kernel_size-1)), axis=2)[:, :, ::-1] + em.fit(events, baseline_start=baseline_start, + kernel_start=kernel_start) + tcit = em.time_changed_interarrival_times() + for r in range(n_realizations): + for e in range(n_nodes): + self.assertTrue( + np.all(tcit[r][e] > .0), + 'Estimated parameters: Assertion error: ' + 'Inter-arrival times of ' + f'realization {r} and component {e}' + ' are not all positive' + ) + for r in range(n_realizations): + for e in range(n_nodes): + self.assertAlmostEqual( + np.quantile(tcit[r][e], .5), + 1., + None, + msg='Estimated parameters: Assertion error: ' + f'Realization {r}, component {e}:\n' + 'Time-changed inter-arrival times ' + 'as computed from estimation ' + 'are not distributed around 1.', + delta=.35, # Can we do better? + ) + if __name__ == "__main__": unittest.main() diff --git a/tick/hawkes/simulation/base/simu_point_process.py b/tick/hawkes/simulation/base/simu_point_process.py index 13e7110fc..0a936051a 100644 --- a/tick/hawkes/simulation/base/simu_point_process.py +++ b/tick/hawkes/simulation/base/simu_point_process.py @@ -201,7 +201,7 @@ def store_compensator_values(self): Notes ----- - This method must be after simulation + This method must be called after simulation """ self._pp.store_compensator_values() diff --git a/tick/hawkes/simulation/hawkes_kernels/hawkes_kernel.py b/tick/hawkes/simulation/hawkes_kernels/hawkes_kernel.py index ec07d8dc9..3fc474b21 100644 --- a/tick/hawkes/simulation/hawkes_kernels/hawkes_kernel.py +++ b/tick/hawkes/simulation/hawkes_kernels/hawkes_kernel.py @@ -40,6 +40,16 @@ def get_values(self, t_values): """ return self._kernel.get_values(t_values) + def get_primitive_value(self, t): + """Returns the value of the time-integral of the kernel at t + """ + return self._kernel.get_primitive_value(t) + + def get_primitive_values(self, t_values): + """Returns the value of the time-integral of the kernel for all times in t_values + """ + return self._kernel.get_primitive_values(t_values) + def get_norm(self, n_steps=10000): """Computes L1 norm diff --git a/tick/plot/plot_point_processes.py b/tick/plot/plot_point_processes.py index 7d5502259..1c2f8e6fa 100644 --- a/tick/plot/plot_point_processes.py +++ b/tick/plot/plot_point_processes.py @@ -2,6 +2,8 @@ import matplotlib.pyplot as plt import numpy as np +from typing import Optional, List +from tick.hawkes.simulation.base import SimuPointProcess def plot_point_process(point_process, plot_intensity=None, n_points=10000, @@ -53,7 +55,7 @@ def plot_point_process(point_process, plot_intensity=None, n_points=10000, node_names = list(map(lambda n: 'ticks #{}'.format(n), plot_nodes)) elif len(node_names) != len(plot_nodes): raise ValueError('node_names must be a list of length {} but has length {}' - .format(len(plot_nodes), len(node_names))) + .format(len(plot_nodes), len(node_names))) labels = [] for name, node in zip(node_names, plot_nodes): label = name @@ -197,7 +199,8 @@ def _extract_process_interval(plot_nodes, end_time, timestamps, def qq_plots( - point_process, + point_process: Optional[SimuPointProcess] = None, + residuals: Optional[List[np.ndarray]] = None, plot_nodes=None, node_names=None, line='45', @@ -207,14 +210,24 @@ def qq_plots( import statsmodels.api as sm from scipy.stats import expon + if residuals is None: + assert point_process is not None, 'You must pass an object of class `SimuPointProcess` or a list of np.ndarray of residuals' + else: + assert point_process is None, 'You must pass either an object of class `SimuPointProcess` or residuals, not both' + if plot_nodes is None: - plot_nodes = range(point_process.n_nodes) + if point_process is not None: + plot_nodes = range(point_process.n_nodes) + elif residuals is not None: + plot_nodes = range(len(residuals)) + else: + raise ValueError('Could not infer which nodes to plot') if node_names is None: node_names = list(map(lambda n: 'ticks #{}'.format(n), plot_nodes)) elif len(node_names) != len(plot_nodes): raise ValueError('node_names must be a list of length {} but has length {}' - .format(len(plot_nodes), len(node_names))) + .format(len(plot_nodes), len(node_names))) labels = node_names if ax is None: @@ -227,10 +240,11 @@ def qq_plots( if len(plot_nodes) == 1: ax = [ax] - compensators = point_process.tracked_compensator - if compensators is None: - raise ValueError("No tracked compensator!") - residuals = [np.diff(comp) for comp in compensators] + if residuals is None: + compensators = point_process.tracked_compensator + if compensators is None: + raise ValueError("No tracked compensator!") + residuals = [np.diff(comp) for comp in compensators] for count, i in enumerate(plot_nodes): sm.qqplot(data=residuals[i],