stmorse · SoumayaMauthoorMOJ · Jan 1, 2024
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,160 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
diff --git a/.python-version b/.python-version
@@ -0,0 +1 @@
+3.11.2
diff --git a/MHP.py b/MHP.py
@@ -14,22 +14,22 @@
 
 
 import numpy as np
-import time as T
 
 from sklearn.metrics.pairwise import pairwise_distances
 from sklearn.utils.extmath import cartesian
 
 import matplotlib.pyplot as plt
 
 class MHP:
-    def __init__(self, alpha=[[0.5]], mu=[0.1], omega=1.0):
+    def __init__(self, alpha=[[0.5]], mu=[0.1], omega=1.0, seed=None):
         '''params should be of form:
         alpha: numpy.array((u,u)), mu: numpy.array((,u)), omega: float'''
 
         self.data = []
         self.alpha, self.mu, self.omega = np.array(alpha), np.array(mu), omega
         self.dim = self.mu.shape[0]
         self.check_stability()
+        self.rng = np.random.default_rng(seed=seed)
 
     def check_stability(self):
         ''' check stability of process (max alpha eigenvalue < 1)'''
@@ -46,16 +46,16 @@ def generate_seq(self, horizon):
         self.data = []  # clear history
 
         Istar = np.sum(self.mu)
-        s = np.random.exponential(scale=1./Istar)
+        s = self.rng.exponential(scale=1./Istar)
 
         # attribute (weighted random sample, since sum(mu)==Istar)
-        n0 = np.random.choice(np.arange(self.dim), 
-                              1, 
+        n0 = self.rng.choice(np.arange(self.dim), 1, 
                               p=(self.mu / Istar))
-        self.data.append([s, n0])
+        self.data.append([s, n0[0]])
 
         # value of \lambda(t_k) where k is most recent event
         # starts with just the base rate
+        rates = 0
         lastrates = self.mu.copy()
 
         decIstar = False
@@ -72,7 +72,7 @@ def generate_seq(self, horizon):
                         self.omega * np.sum(self.alpha[:,uj])
 
             # generate new event
-            s += np.random.exponential(scale=1./Istar)
+            s += self.rng.exponential(scale=1./Istar)
 
             # calc rates at time s (use trick to take advantage of rates at last event)
             rates = self.mu + np.exp(-self.omega * (s - tj)) * \
@@ -82,7 +82,7 @@ def generate_seq(self, horizon):
             # handle attribution and thinning in one step as weighted random sample
             diff = Istar - np.sum(rates)
             try:
-                n0 = np.random.choice(np.arange(self.dim+1), 1, 
+                n0 = self.rng.choice(np.arange(self.dim+1), 1, 
                                       p=(np.append(rates, diff) / Istar))
             except ValueError:
                 # by construction this should not happen
@@ -91,7 +91,7 @@ def generate_seq(self, horizon):
                 return self.data
 
             if n0 < self.dim:
-                self.data.append([s, n0])
+                self.data.append([s, n0[0]])
                 # update lastrates
                 lastrates = rates.copy()
             else:
@@ -157,7 +157,6 @@ def sum_pij(a,b):
 
         k = 0
         old_LL = -10000
-        START = T.time()
         while k < maxiter:
             Auu = Ahat[rowidx, colidx]
             ag = np.multiply(Auu, kern)
@@ -233,7 +232,7 @@ def plot_rates(self, horizon=-1):
         f, axarr = plt.subplots(self.dim*2,1, sharex='col', 
                                 gridspec_kw = {'height_ratios':sum([[3,1] for i in range(self.dim)],[])}, 
                                 figsize=(8,self.dim*2))
-        xs = np.linspace(0, horizon, (horizon/100.)*1000)
+        xs = np.linspace(0, horizon, int((horizon/100.)*1000))
         for i in range(self.dim):
             row = i * 2
 
@@ -269,9 +268,7 @@ def plot_events(self, horizon=-1, showDays=True, labeled=True):
                 plt.plot([j,j], [-self.dim, 1], 'k:', alpha=0.15)
 
         if labeled:
-            ax.set_yticklabels('')
-            ax.set_yticks(-np.arange(0, self.dim), minor=True)
-            ax.set_yticklabels([r'$e_{%d}$' % i for i in range(self.dim)], minor=True)
+            ax.set_yticks(-np.arange(0, self.dim), labels=[r'$e_{%d}$' % i for i in range(self.dim)])
         else:
             ax.yaxis.set_visible(False)
 

diff --git a/README.md b/README.md
@@ -13,7 +13,7 @@ The `MHP.py` file contains a single class, `MHP`.  The core methods are:
 
 2. `EM`: uses a Bayesian EM algorithm to learn parameters, given a sequence and estimates of `alpha`, `mu`, and `omega`.  It treats `omega` as a hyperparameter and does not optimize over this parameter.  For more details see [the report](https://stmorse.github.io/docs/6-867-final-writeup.pdf), [my thesis](https://stmorse.github.io/docs/orc-thesis.pdf), or [this blog post](https://stmorse.github.io/blog).
 
-There are also two visualization methods, `plot_events` (which plots only the time series of events), and `plot_rates` (which plots the time series with the corresponding conditional intensities, currently only implemented for `dim=3`).
+There are also two visualization methods, `plot_events` (which plots only the time series of events), and `plot_rates` (which plots the time series with the corresponding conditional intensities).
 
 
 # Usage
@@ -58,21 +58,21 @@ We can learn the parameters of an already generated sequence by simply calling `
 mhat = np.random.uniform(0,1, size=3)
 ahat = np.random.uniform(0,1, size=(3,3))
 w = 3.
-
+np.set_printoptions(precision=3,suppress=True)
 P.EM(ahat, mhat, w)
 ```
 which will give output something like:
 ```
-After ITER 0 (old: -10000.000 new: -0.129)
- terms 5.1766, 17.4638, 8.5362
-After ITER 10 (old: -0.129 new: -1.349)
- terms -20.8908, 11.8172, 14.1828
-Reached stopping criterion. (Old: -1.349 New: -1.350)
-
-(array([[ 0.0223,  0.1475,  0.    ],
-        [ 0.5954,  0.    ,  0.    ],
-        [ 0.    ,  0.625 ,  0.    ]]), 
- array([ 0.2038,  0.0046,  0.    ]))
+After ITER 0 (old: -10000.000 new: -0.044)
+ terms 9.4483, 15.4094, 10.5906
+After ITER 10 (old: -0.044 new: -1.509)
+ terms -25.8799, 12.6529, 13.3471
+Reached stopping criterion. (Old: -1.509 New: -1.509)
+
+(array([[0.06 , 0.   , 0.071],
+        [0.503, 0.   , 0.076],
+        [0.   , 0.735, 0.   ]]),
+ array([0.211, 0.   , 0.015]))
 ```
 
 This is already a reasonable approximation to the actual ground truth values from above, without including regularization, cross-validation, a larger sample, etc.

diff --git a/requirements_3_11.txt b/requirements_3_11.txt
@@ -0,0 +1,7 @@
+numpy==1.26.2
+ipykernel==6.28.0
+matplotlib==3.8.2
+scipy==1.11.4
+scikit-learn==1.3.2
+pytest==7.4.4
+ruff==0.1.9
diff --git a/test_MHP.py b/test_MHP.py
@@ -0,0 +1,54 @@
+import numpy as np
+
+from MHP import MHP
+
+expected_seq = np.array([[6.799319039689095, 0], [6.832330144004188, 0]])
+expected_multiple_seq = np.array(
+    [
+        [3.39965952, 0.0],
+        [3.40566154, 0.0],
+        [12.48864946, 0.0],
+        [13.17625391, 0.0],
+        [13.46781712, 1.0],
+        [14.65373507, 2.0],
+        [14.77459075, 0.0],
+    ]
+)
+
+m = np.array([0.2, 0.0, 0.0])
+a = np.array([[0.1, 0.0, 0.0], [0.9, 0.0, 0.0], [0.0, 0.9, 0.0]])
+w = 3.1
+
+
+def test_generate_seq():
+    P = MHP(seed=0)
+    P.generate_seq(10)
+    np.testing.assert_array_equal(P.data, expected_seq)
+
+
+def test_generate_seq_multiple():
+    P = MHP(mu=m, alpha=a, omega=w, seed=0)
+    P.generate_seq(15)
+    np.testing.assert_allclose(P.data, expected_multiple_seq)
+
+
+def test_EM():
+    P = MHP(seed=0)
+    P.data = expected_seq
+    alpha = np.array([[0.5]])
+    mu = np.array([0.1])
+    omega = 1.0
+    expected = (np.array([[0.28310623]]), np.array([0.20985337]))
+    for i, value in enumerate(P.EM(alpha, mu, omega)):
+        np.testing.assert_allclose(value, expected[i])
+
+
+def test_EM_multiple():
+    P = MHP(seed=0)
+    P.data = expected_multiple_seq
+    expected = (
+        np.array([[0.1676253, 0.0, 0.0], [0.2, 0.0, 0.0], [0.0, 1.0, 0.0]]),
+        np.array([0.28169129, 0.0, 0.0]),
+    )
+    for i, value in enumerate(P.EM(a, m, w)):
+        np.testing.assert_allclose(value, expected[i])