From 25beb1138c1efe1f725fef267c20ccfbefc37d74 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Tue, 1 Oct 2024 13:17:30 +0200 Subject: [PATCH 01/37] Add pyOpenSci review badge --- README.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index ce4c0bbdb..eabf01ded 100644 --- a/README.rst +++ b/README.rst @@ -7,7 +7,7 @@ Stingray :widths: 50, 50, 50, 50 |Python version|, |GitHub release|, |Build Status Master|, |Slack| - |Docs|, |joss|, |Repo status|, " " + |Docs|, |joss|, |Repo status|, |pyOpenSci Peer-Reviewed| |License|, |doi|, |Coverage Status Master|, " " @@ -98,6 +98,8 @@ The code is distributed under the MIT license; see `LICENSE.rst `_ :target: https://www.repostatus.org/#active .. |License| image:: https://img.shields.io/badge/License-MIT-yellow.svg :target: https://opensource.org/licenses/MIT +.. |pyOpenSci Peer-Reviewed| image:: https://pyopensci.org/badges/peer-reviewed.svg + :target: https://github.com/pyOpenSci/software-review/issues/201 .. _Astropy: https://www.github.com/astropy/astropy .. _Issues: https://www.github.com/stingraysoftware/stingray/issues From 9aca13052b1c1c60889488105afe8128e6600ac1 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Thu, 3 Oct 2024 12:31:13 +0200 Subject: [PATCH 02/37] Basic functions for shift-and-add technique --- stingray/fourier.py | 133 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) diff --git a/stingray/fourier.py b/stingray/fourier.py index 613e687eb..092ca1414 100644 --- a/stingray/fourier.py +++ b/stingray/fourier.py @@ -1358,6 +1358,139 @@ def fun(times, gti, segment_size): yield cts +def center_pds_on_f(freqs, powers, f0, nbins=100): + """Extract a slice of PDS around a given frequency. + + This function extracts a slice of the power spectrum around a given frequency. + The slice has a length of ``nbins``. If the slice goes beyond the edges of the + power spectrum, the missing values are filled with NaNs. + + Parameters + ---------- + freqs : np.array + Array of frequencies, the same for all powers + powers : np.array + Array of powers + f0 : float + Central frequency + + Other parameters + ---------------- + nbins : int, default 100 + Number of bins to extract + + Examples + -------- + >>> freqs = np.arange(1, 100) * 0.1 + >>> powers = 10 / freqs + >>> f0 = 0.3 + >>> p = center_pds_on_f(freqs, powers, f0) + >>> assert np.isnan(p[0]) + >>> assert not np.any(np.isnan(p[48:])) + """ + powers = np.asarray(powers) + chunk = np.zeros(nbins) + np.nan + fchunk = np.zeros(nbins) + + start_f_idx = np.searchsorted(freqs, f0) + + minbin = start_f_idx - nbins // 2 + maxbin = minbin + nbins + + if minbin < 0: + chunk[-minbin : min(nbins, powers.size - minbin)] = powers[: minbin + nbins] + elif maxbin > powers.size: + chunk[: nbins - (maxbin - powers.size)] = powers[minbin:] + else: + chunk[:] = powers[minbin:maxbin] + + return chunk + + +def shift_and_add(freqs, power_list, f0_list, nbins=100, rebin=None, df=None, M=None): + """Add a list of power spectra, centered on different frequencies. + + This is the basic operation for the shift-and-add operation used to track + kHz QPOs in X-ray binaries (e.g. Méndez et al. 1998, ApJ, 494, 65). + + Parameters + ---------- + freqs : np.array + Array of frequencies, the same for all powers. Must be sorted and on a uniform + grid. + power_list : list of np.array + List of power spectra. Each power spectrum must have the same length + as the frequency array. + f0_list : list of float + List of central frequencies + + Other parameters + ---------------- + nbins : int, default 100 + Number of bins to extract + rebin : int, default None + Rebin the final spectrum by this factor. At the moment, the rebinning + is linear. + df : float, default None + Frequency resolution of the power spectra. If not given, it is calculated + from the input frequencies. + M : int or list of int, default None + Number of segments used to calculate each power spectrum. If a list is + given, it must have the same length as the power list. + + Returns + ------- + f : np.array + Array of output frequencies + p : np.array + Array of output powers + n : np.array + Number of contributing power spectra at each frequency + + Examples + -------- + >>> power_list = [[2, 5, 2, 2, 2], [1, 1, 5, 1, 1], [3, 3, 3, 5, 3]] + >>> freqs = np.arange(5) * 0.1 + >>> f0_list = [0.1, 0.2, 0.3, 0.4] + >>> f, p, n = shift_and_add(freqs, power_list, f0_list, nbins=5) + >>> assert np.array_equal(n, [2, 3, 3, 3, 2]) + >>> assert np.array_equal(p, [2. , 2. , 5. , 2. , 1.5]) + >>> assert np.allclose(f, [0.05, 0.15, 0.25, 0.35, 0.45]) + """ + final_powers = np.zeros(nbins) + freqs = np.asarray(freqs) + + mid_idx = np.searchsorted(freqs, np.mean(f0_list)) + if M is None: + M = 1 + if not isinstance(M, Iterable): + M = np.ones(len(power_list)) * M + + count = np.zeros(nbins) + for f0, powers, m in zip(f0_list, power_list, M): + idx = np.searchsorted(freqs, f0_list) + + powers = np.asarray(powers) * m + new_power = center_pds_on_f(freqs, powers, f0, nbins=nbins) + bad = np.isnan(new_power) + new_power[bad] = 0.0 + final_powers += new_power + count += np.array(~bad, dtype=int) * m + + if df is None: + df = freqs[1] - freqs[0] + + final_freqs = np.arange(-nbins // 2, nbins // 2 + 1)[:nbins] * df + final_freqs = final_freqs - (final_freqs[0] + final_freqs[-1]) / 2 + np.mean(f0_list) + final_powers = final_powers / count + + if rebin is not None: + final_freqs, final_powers, _, _ = rebin_data(final_freqs, final_powers, rebin * df) + final_powers = final_powers / rebin + + return final_freqs, final_powers, count + + def avg_pds_from_iterable( flux_iterable, dt, norm="frac", use_common_mean=True, silent=False, return_subcs=False ): From ea9ffd0ddc37bb8f47d68ba2e62a1e445a3ca751 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Thu, 3 Oct 2024 16:26:31 +0200 Subject: [PATCH 03/37] Eliminate lightkurve link --- stingray/lightcurve.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/stingray/lightcurve.py b/stingray/lightcurve.py index 367082cfc..085b13b9f 100644 --- a/stingray/lightcurve.py +++ b/stingray/lightcurve.py @@ -1383,8 +1383,7 @@ def analyze_lc_chunks(self, segment_size, func, fraction_step=1, **kwargs): def to_lightkurve(self): """ Returns a `lightkurve.LightCurve` object. - This feature requires `Lightkurve - `_ to be installed + This feature requires ``Lightkurve`` to be installed (e.g. ``pip install lightkurve``). An `ImportError` will be raised if this package is not available. From cb4262e180380725fb71d97aa0833ac1759164cd Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Fri, 4 Oct 2024 12:18:38 +0200 Subject: [PATCH 04/37] Add methods for shift-and-add to DynamicalPowerspectrum and DynamicalCrossspectrum --- stingray/crossspectrum.py | 84 ++++++++++++++++++++++++++-- stingray/powerspectrum.py | 23 ++++++-- stingray/tests/test_crossspectrum.py | 15 +++++ stingray/tests/test_powerspectrum.py | 15 +++++ 4 files changed, 125 insertions(+), 12 deletions(-) diff --git a/stingray/crossspectrum.py b/stingray/crossspectrum.py index 608dc764a..741f448c1 100644 --- a/stingray/crossspectrum.py +++ b/stingray/crossspectrum.py @@ -2132,7 +2132,22 @@ class DynamicalCrossspectrum(AveragedCrossspectrum): The number of averaged powers in each spectral bin (initially 1, it changes after rebinning). """ - def __init__(self, data1, data2, segment_size, norm="frac", gti=None, sample_time=None): + def __init__( + self, data1=None, data2=None, segment_size=None, norm="frac", gti=None, sample_time=None + ): + self.segment_size = segment_size + self.sample_time = sample_time + self.gti = gti + self.norm = norm + + if segment_size is None and data1 is None and data2 is None: + self._initialize_empty() + self.dyn_ps = None + return + + if segment_size is None or data1 is None or data2 is None: + raise RuntimeError("data1, data2, and segment_size must all be specified") + if isinstance(data1, EventList) and sample_time is None: raise ValueError("To pass input event lists, please specify sample_time") elif isinstance(data1, Lightcurve): @@ -2145,11 +2160,6 @@ def __init__(self, data1, data2, segment_size, norm="frac", gti=None, sample_tim if segment_size < 2 * sample_time: raise ValueError("Length of the segment is too short to form a light curve!") - self.segment_size = segment_size - self.sample_time = sample_time - self.gti = gti - self.norm = norm - self._make_matrix(data1, data2) def _make_matrix(self, data1, data2): @@ -2370,6 +2380,68 @@ def trace_maximum(self, min_freq=None, max_freq=None): return np.array(max_positions) + def shift_and_add(self, f0_list, nbins=100, output_obj_type=AveragedCrossspectrum): + """Shift and add the dynamical power spectrum. + + This is the basic operation for the shift-and-add operation used to track + kHz QPOs in X-ray binaries (e.g. Méndez et al. 1998, ApJ, 494, 65). + + Parameters + ---------- + freqs : np.array + Array of frequencies, the same for all powers. Must be sorted and on a uniform + grid. + power_list : list of np.array + List of power spectra. Each power spectrum must have the same length + as the frequency array. + f0_list : list of float + List of central frequencies + + Other parameters + ---------------- + nbins : int, default 100 + Number of bins to extract + + Returns + ------- + output: :class:`AveragedPowerspectrum` or :class:`AveragedCrossspectrum` + The final averaged power spectrum. + + Examples + -------- + >>> power_list = [[2, 5, 2, 2, 2], [1, 1, 5, 1, 1], [3, 3, 3, 5, 3]] + >>> power_list = np.array(power_list).T + >>> freqs = np.arange(5) * 0.1 + >>> f0_list = [0.1, 0.2, 0.3, 0.4] + >>> dps = DynamicalCrossspectrum() + >>> dps.dyn_ps = power_list + >>> dps.freq = freqs + >>> dps.df = 0.1 + >>> dps.m = 1 + >>> output = dps.shift_and_add(f0_list, nbins=5) + >>> assert np.array_equal(output.m, [2, 3, 3, 3, 2]) + >>> assert np.array_equal(output.power, [2. , 2. , 5. , 2. , 1.5]) + >>> assert np.allclose(output.freq, [0.05, 0.15, 0.25, 0.35, 0.45]) + """ + from .fourier import shift_and_add + + final_freqs, final_powers, count = shift_and_add( + self.freq, self.dyn_ps.T, f0_list, nbins=nbins, M=self.m, df=self.df + ) + + output = output_obj_type() + good = ~np.isnan(final_powers) + + output.freq = final_freqs[good] + output.power = final_powers[good] + output.m = count[good] + output.df = self.df + output.norm = self.norm + output.gti = self.gti + output.segment_size = self.segment_size + + return output + def power_colors( self, freq_edges=[1 / 256, 1 / 32, 0.25, 2.0, 16.0], diff --git a/stingray/powerspectrum.py b/stingray/powerspectrum.py index d9d7dc175..fa6334f7a 100755 --- a/stingray/powerspectrum.py +++ b/stingray/powerspectrum.py @@ -1020,7 +1020,20 @@ class DynamicalPowerspectrum(DynamicalCrossspectrum): The number of averaged cross spectra. """ - def __init__(self, lc, segment_size, norm="frac", gti=None, sample_time=None): + def __init__(self, lc=None, segment_size=None, norm="frac", gti=None, sample_time=None): + self.segment_size = segment_size + self.sample_time = sample_time + self.gti = gti + self.norm = norm + + if segment_size is None and lc is None: + self._initialize_empty() + self.dyn_ps = None + return + + if segment_size is None or lc is None: + raise RuntimeError("lc and segment_size must all be specified") + if isinstance(lc, EventList) and sample_time is None: raise ValueError("To pass an input event lists, please specify sample_time") elif isinstance(lc, Lightcurve): @@ -1033,13 +1046,11 @@ def __init__(self, lc, segment_size, norm="frac", gti=None, sample_time=None): if segment_size < 2 * sample_time: raise ValueError("Length of the segment is too short to form a light curve!") - self.segment_size = segment_size - self.sample_time = sample_time - self.gti = gti - self.norm = norm - self._make_matrix(lc) + def shift_and_add(self, f0_list, nbins=100): + return super().shift_and_add(f0_list, nbins=nbins, output_obj_type=AveragedPowerspectrum) + def _make_matrix(self, lc): """ Create a matrix of powers for each time step and each frequency step. diff --git a/stingray/tests/test_crossspectrum.py b/stingray/tests/test_crossspectrum.py index b7e1919a5..fb5b6a9b6 100644 --- a/stingray/tests/test_crossspectrum.py +++ b/stingray/tests/test_crossspectrum.py @@ -1660,3 +1660,18 @@ def test_rebin_frequency_mean_method(self, method): assert np.allclose(new_dps.freq, rebin_freq) assert np.allclose(new_dps.dyn_ps, rebin_dps, atol=0.00001) assert np.isclose(new_dps.df, df_new) + + def test_shift_and_add(self): + power_list = [[2, 5, 2, 2, 2], [1, 1, 5, 1, 1], [3, 3, 3, 5, 3]] + power_list = np.array(power_list).T + freqs = np.arange(5) * 0.1 + f0_list = [0.1, 0.2, 0.3, 0.4] + dps = DynamicalCrossspectrum() + dps.dyn_ps = power_list + dps.freq = freqs + dps.df = 0.1 + dps.m = 1 + output = dps.shift_and_add(f0_list, nbins=5) + assert np.array_equal(output.m, [2, 3, 3, 3, 2]) + assert np.array_equal(output.power, [2. , 2. , 5. , 2. , 1.5]) + assert np.allclose(output.freq, [0.05, 0.15, 0.25, 0.35, 0.45]) \ No newline at end of file diff --git a/stingray/tests/test_powerspectrum.py b/stingray/tests/test_powerspectrum.py index 205493266..062571d18 100644 --- a/stingray/tests/test_powerspectrum.py +++ b/stingray/tests/test_powerspectrum.py @@ -1021,6 +1021,21 @@ def test_rebin_frequency_mean_method(self, method): assert np.allclose(new_dps.dyn_ps, rebin_dps, atol=0.00001) assert np.isclose(new_dps.df, df_new) + def test_shift_and_add(self): + power_list = [[2, 5, 2, 2, 2], [1, 1, 5, 1, 1], [3, 3, 3, 5, 3]] + power_list = np.array(power_list).T + freqs = np.arange(5) * 0.1 + f0_list = [0.1, 0.2, 0.3, 0.4] + dps = DynamicalPowerspectrum() + dps.dyn_ps = power_list + dps.freq = freqs + dps.df = 0.1 + dps.m = 1 + output = dps.shift_and_add(f0_list, nbins=5) + assert np.array_equal(output.m, [2, 3, 3, 3, 2]) + assert np.array_equal(output.power, [2.0, 2.0, 5.0, 2.0, 1.5]) + assert np.allclose(output.freq, [0.05, 0.15, 0.25, 0.35, 0.45]) + class TestRoundTrip: @classmethod From 9f4ecdc77b08fc7b693e925c7de8063ac0e371e7 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Fri, 4 Oct 2024 12:23:11 +0200 Subject: [PATCH 05/37] Test for bad input to dyn*spectrum --- stingray/crossspectrum.py | 2 +- stingray/powerspectrum.py | 2 +- stingray/tests/test_crossspectrum.py | 8 ++++++-- stingray/tests/test_powerspectrum.py | 4 ++++ 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/stingray/crossspectrum.py b/stingray/crossspectrum.py index 741f448c1..322763962 100644 --- a/stingray/crossspectrum.py +++ b/stingray/crossspectrum.py @@ -2146,7 +2146,7 @@ def __init__( return if segment_size is None or data1 is None or data2 is None: - raise RuntimeError("data1, data2, and segment_size must all be specified") + raise TypeError("data1, data2, and segment_size must all be specified") if isinstance(data1, EventList) and sample_time is None: raise ValueError("To pass input event lists, please specify sample_time") diff --git a/stingray/powerspectrum.py b/stingray/powerspectrum.py index fa6334f7a..b0e138046 100755 --- a/stingray/powerspectrum.py +++ b/stingray/powerspectrum.py @@ -1032,7 +1032,7 @@ def __init__(self, lc=None, segment_size=None, norm="frac", gti=None, sample_tim return if segment_size is None or lc is None: - raise RuntimeError("lc and segment_size must all be specified") + raise TypeError("lc and segment_size must all be specified") if isinstance(lc, EventList) and sample_time is None: raise ValueError("To pass an input event lists, please specify sample_time") diff --git a/stingray/tests/test_crossspectrum.py b/stingray/tests/test_crossspectrum.py index fb5b6a9b6..e5ea543e0 100644 --- a/stingray/tests/test_crossspectrum.py +++ b/stingray/tests/test_crossspectrum.py @@ -1430,6 +1430,10 @@ def setup_class(cls): test_counts = [2, 3, 1, 3, 1, 5, 2, 1, 4, 2, 2, 2, 3, 4, 1, 7] cls.lc_test = Lightcurve(test_times, test_counts) + def test_bad_args(self): + with pytest.raises(TypeError, match=".must all be specified"): + _ = DynamicalCrossspectrum(1) + def test_with_short_seg_size(self): with pytest.raises(ValueError): dps = DynamicalCrossspectrum(self.lc, self.lc, segment_size=0) @@ -1673,5 +1677,5 @@ def test_shift_and_add(self): dps.m = 1 output = dps.shift_and_add(f0_list, nbins=5) assert np.array_equal(output.m, [2, 3, 3, 3, 2]) - assert np.array_equal(output.power, [2. , 2. , 5. , 2. , 1.5]) - assert np.allclose(output.freq, [0.05, 0.15, 0.25, 0.35, 0.45]) \ No newline at end of file + assert np.array_equal(output.power, [2.0, 2.0, 5.0, 2.0, 1.5]) + assert np.allclose(output.freq, [0.05, 0.15, 0.25, 0.35, 0.45]) diff --git a/stingray/tests/test_powerspectrum.py b/stingray/tests/test_powerspectrum.py index 062571d18..d05d12a7f 100644 --- a/stingray/tests/test_powerspectrum.py +++ b/stingray/tests/test_powerspectrum.py @@ -878,6 +878,10 @@ def setup_class(cls): test_counts = [2, 3, 1, 3, 1, 5, 2, 1, 4, 2, 2, 2, 3, 4, 1, 7] cls.lc_test = Lightcurve(test_times, test_counts) + def test_bad_args(self): + with pytest.raises(TypeError, match=".must all be specified"): + _ = DynamicalPowerspectrum(1) + def test_with_short_seg_size(self): with pytest.raises(ValueError): dps = DynamicalPowerspectrum(self.lc, segment_size=0) From 55e6ac3c4b9b2965f94a4d16752d09fdd3bef660 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Fri, 4 Oct 2024 12:26:10 +0200 Subject: [PATCH 06/37] Add changelog --- docs/changes/849.feature.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 docs/changes/849.feature.rst diff --git a/docs/changes/849.feature.rst b/docs/changes/849.feature.rst new file mode 100644 index 000000000..e5880abe5 --- /dev/null +++ b/docs/changes/849.feature.rst @@ -0,0 +1,2 @@ +implementation of the shift-and-add technique for QPOs and other varying power spectral features + From 6f489458ab25620799fac5e9004a605f11844710 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Fri, 4 Oct 2024 13:48:29 +0200 Subject: [PATCH 07/37] Fix and test rebin option --- stingray/crossspectrum.py | 8 +++++--- stingray/fourier.py | 2 ++ stingray/powerspectrum.py | 6 ++++-- stingray/tests/test_powerspectrum.py | 15 +++++++++++++++ 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/stingray/crossspectrum.py b/stingray/crossspectrum.py index 322763962..476d36355 100644 --- a/stingray/crossspectrum.py +++ b/stingray/crossspectrum.py @@ -2380,7 +2380,7 @@ def trace_maximum(self, min_freq=None, max_freq=None): return np.array(max_positions) - def shift_and_add(self, f0_list, nbins=100, output_obj_type=AveragedCrossspectrum): + def shift_and_add(self, f0_list, nbins=100, output_obj_type=AveragedCrossspectrum, rebin=None): """Shift and add the dynamical power spectrum. This is the basic operation for the shift-and-add operation used to track @@ -2401,7 +2401,9 @@ def shift_and_add(self, f0_list, nbins=100, output_obj_type=AveragedCrossspectru ---------------- nbins : int, default 100 Number of bins to extract - + rebin : int, default None + Rebin the final spectrum by this factor. At the moment, the rebinning + is linear. Returns ------- output: :class:`AveragedPowerspectrum` or :class:`AveragedCrossspectrum` @@ -2426,7 +2428,7 @@ def shift_and_add(self, f0_list, nbins=100, output_obj_type=AveragedCrossspectru from .fourier import shift_and_add final_freqs, final_powers, count = shift_and_add( - self.freq, self.dyn_ps.T, f0_list, nbins=nbins, M=self.m, df=self.df + self.freq, self.dyn_ps.T, f0_list, nbins=nbins, M=self.m, df=self.df, rebin=rebin ) output = output_obj_type() diff --git a/stingray/fourier.py b/stingray/fourier.py index 092ca1414..6f1148697 100644 --- a/stingray/fourier.py +++ b/stingray/fourier.py @@ -20,6 +20,7 @@ show_progress, sum_if_not_none_or_initialize, fix_segment_size_to_integer_samples, + rebin_data, ) @@ -1485,6 +1486,7 @@ def shift_and_add(freqs, power_list, f0_list, nbins=100, rebin=None, df=None, M= final_powers = final_powers / count if rebin is not None: + _, count, _, _ = rebin_data(final_freqs, count, rebin * df) final_freqs, final_powers, _, _ = rebin_data(final_freqs, final_powers, rebin * df) final_powers = final_powers / rebin diff --git a/stingray/powerspectrum.py b/stingray/powerspectrum.py index b0e138046..08cb85984 100755 --- a/stingray/powerspectrum.py +++ b/stingray/powerspectrum.py @@ -1048,8 +1048,10 @@ def __init__(self, lc=None, segment_size=None, norm="frac", gti=None, sample_tim self._make_matrix(lc) - def shift_and_add(self, f0_list, nbins=100): - return super().shift_and_add(f0_list, nbins=nbins, output_obj_type=AveragedPowerspectrum) + def shift_and_add(self, f0_list, nbins=100, rebin=None): + return super().shift_and_add( + f0_list, nbins=nbins, output_obj_type=AveragedPowerspectrum, rebin=rebin + ) def _make_matrix(self, lc): """ diff --git a/stingray/tests/test_powerspectrum.py b/stingray/tests/test_powerspectrum.py index d05d12a7f..fbe77fdca 100644 --- a/stingray/tests/test_powerspectrum.py +++ b/stingray/tests/test_powerspectrum.py @@ -1040,6 +1040,21 @@ def test_shift_and_add(self): assert np.array_equal(output.power, [2.0, 2.0, 5.0, 2.0, 1.5]) assert np.allclose(output.freq, [0.05, 0.15, 0.25, 0.35, 0.45]) + def test_shift_and_add_rebin(self): + power_list = [[2, 5, 2, 2, 2], [1, 1, 5, 1, 1], [3, 3, 3, 5, 3]] + power_list = np.array(power_list).T + freqs = np.arange(5) * 0.1 + f0_list = [0.1, 0.2, 0.3, 0.4] + dps = DynamicalPowerspectrum() + dps.dyn_ps = power_list + dps.freq = freqs + dps.df = 0.1 + dps.m = 1 + output = dps.shift_and_add(f0_list, nbins=5, rebin=2) + assert np.array_equal(output.m, [5, 6]) + assert np.array_equal(output.power, [2.0, 3.5]) + assert np.allclose(output.freq, [0.1, 0.3]) + class TestRoundTrip: @classmethod From 7b7585d24fc37602c21c2f01cb622e566a9a1cbe Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Fri, 4 Oct 2024 14:38:17 +0200 Subject: [PATCH 08/37] Additional test with orbital motion --- stingray/tests/test_fourier.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/stingray/tests/test_fourier.py b/stingray/tests/test_fourier.py index acd3fcfd8..96076a113 100644 --- a/stingray/tests/test_fourier.py +++ b/stingray/tests/test_fourier.py @@ -938,3 +938,29 @@ def test_deprecation_rms_calculation(self): M=M, ) assert np.isclose(rms, rms_from_unnorm, atol=3 * rmse_from_unnorm) + + +@pytest.mark.parametrize("ntimes", [100, 1000]) +def test_shift_and_add_orbit(ntimes): + # This time correct for orbital motion + from stingray.fourier import shift_and_add + + fmid = 0.7 + freqs = np.linspace(0.699, 0.701, 1001) + porb = 2.52 * 86400 + asini = 22.5 + t0 = porb / 2 + times = np.linspace(0, porb, ntimes + 1)[:-1] + power_list = np.zeros((times.size, freqs.size)) + omega = 2 * np.pi / porb + orbit_freqs = fmid * (1 - asini * omega * np.cos(omega * (times - t0))) + + idx = np.searchsorted(freqs, orbit_freqs) + for i_t, power in zip(idx, power_list): + power[i_t] = 1 + + f, p, n = shift_and_add(freqs, power_list, orbit_freqs, nbins=5) + # If we corrected well, the power should be the average of all max powers in the + # original series + assert np.max(p) == 1 + assert np.max(n) == times.size From e867a1c203fa95c8670cd6dd86e963ab241b5aa6 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Mon, 7 Oct 2024 16:46:59 +0200 Subject: [PATCH 09/37] Specify how the output frequency array is created [docs only] --- stingray/fourier.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/stingray/fourier.py b/stingray/fourier.py index 6f1148697..ad4021d75 100644 --- a/stingray/fourier.py +++ b/stingray/fourier.py @@ -1442,7 +1442,9 @@ def shift_and_add(freqs, power_list, f0_list, nbins=100, rebin=None, df=None, M= Returns ------- f : np.array - Array of output frequencies + Array of output frequencies. This will be centered on the mean of the + input ``f0_list``, and have the same frequency resolution as the original + frequency array. p : np.array Array of output powers n : np.array From 9d06733c26079c52c2817682327e767a0c3d5435 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Mon, 7 Oct 2024 16:51:42 +0200 Subject: [PATCH 10/37] Fix docstrings [docs only] --- stingray/crossspectrum.py | 6 ++++- stingray/powerspectrum.py | 46 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/stingray/crossspectrum.py b/stingray/crossspectrum.py index 476d36355..64d33b2a7 100644 --- a/stingray/crossspectrum.py +++ b/stingray/crossspectrum.py @@ -2381,7 +2381,7 @@ def trace_maximum(self, min_freq=None, max_freq=None): return np.array(max_positions) def shift_and_add(self, f0_list, nbins=100, output_obj_type=AveragedCrossspectrum, rebin=None): - """Shift and add the dynamical power spectrum. + """Shift and add the dynamical cross spectrum. This is the basic operation for the shift-and-add operation used to track kHz QPOs in X-ray binaries (e.g. Méndez et al. 1998, ApJ, 494, 65). @@ -2404,6 +2404,10 @@ def shift_and_add(self, f0_list, nbins=100, output_obj_type=AveragedCrossspectru rebin : int, default None Rebin the final spectrum by this factor. At the moment, the rebinning is linear. + output_obj_type : class, default :class:`AveragedCrossspectrum` + The type of the output object. Can be, e.g. :class:`AveragedCrossspectrum` or + :class:`AveragedPowerspectrum`. + Returns ------- output: :class:`AveragedPowerspectrum` or :class:`AveragedCrossspectrum` diff --git a/stingray/powerspectrum.py b/stingray/powerspectrum.py index 08cb85984..1c3093129 100755 --- a/stingray/powerspectrum.py +++ b/stingray/powerspectrum.py @@ -1049,6 +1049,52 @@ def __init__(self, lc=None, segment_size=None, norm="frac", gti=None, sample_tim self._make_matrix(lc) def shift_and_add(self, f0_list, nbins=100, rebin=None): + """Shift-and-add the dynamical power spectrum. + + This is the basic operation for the shift-and-add operation used to track + kHz QPOs in X-ray binaries (e.g. Méndez et al. 1998, ApJ, 494, 65). + + Parameters + ---------- + freqs : np.array + Array of frequencies, the same for all powers. Must be sorted and on a uniform + grid. + power_list : list of np.array + List of power spectra. Each power spectrum must have the same length + as the frequency array. + f0_list : list of float + List of central frequencies + + Other parameters + ---------------- + nbins : int, default 100 + Number of bins to extract + rebin : int, default None + Rebin the final spectrum by this factor. At the moment, the rebinning + is linear. + + Returns + ------- + output: :class:`AveragedPowerspectrum` + The final averaged power spectrum. + + Examples + -------- + >>> power_list = [[2, 5, 2, 2, 2], [1, 1, 5, 1, 1], [3, 3, 3, 5, 3]] + >>> power_list = np.array(power_list).T + >>> freqs = np.arange(5) * 0.1 + >>> f0_list = [0.1, 0.2, 0.3, 0.4] + >>> dps = DynamicalPowerspectrum() + >>> dps.dyn_ps = power_list + >>> dps.freq = freqs + >>> dps.df = 0.1 + >>> dps.m = 1 + >>> output = dps.shift_and_add(f0_list, nbins=5) + >>> assert isinstance(output, AveragedPowerspectrum) + >>> assert np.array_equal(output.m, [2, 3, 3, 3, 2]) + >>> assert np.array_equal(output.power, [2. , 2. , 5. , 2. , 1.5]) + >>> assert np.allclose(output.freq, [0.05, 0.15, 0.25, 0.35, 0.45]) + """ return super().shift_and_add( f0_list, nbins=nbins, output_obj_type=AveragedPowerspectrum, rebin=rebin ) From fc6d5680084e006598a7b5029bab95edb65bf584 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Tue, 8 Oct 2024 14:02:24 +0200 Subject: [PATCH 11/37] Speed up functions by eliminating needless copies and `asarray`s, jit-ing functions --- stingray/fourier.py | 138 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 111 insertions(+), 27 deletions(-) diff --git a/stingray/fourier.py b/stingray/fourier.py index ad4021d75..8c9aff1df 100644 --- a/stingray/fourier.py +++ b/stingray/fourier.py @@ -21,6 +21,7 @@ sum_if_not_none_or_initialize, fix_segment_size_to_integer_samples, rebin_data, + njit, ) @@ -1359,7 +1360,66 @@ def fun(times, gti, segment_size): yield cts -def center_pds_on_f(freqs, powers, f0, nbins=100): +@njit() +def _safe_array_slice_indices(input_size, input_center_idx, nbins): + """Calculate the indices needed to extract a n-bin slice of an array, centered at an index. + + Let us say we have an array of size ``input_size`` and we want to extract a slice of + ``nbins`` centered at index ``input_center_idx``. We should be robust when the slice goes + beyond the edges of the input array, possibly leaving missing values in the output array. + This function calculates the indices needed to extract the slice from the input array, and + the indices in the output array that will be filled. + + In the most common case, the slice is entirely contained within the input array, so that the + output slice will just be ``[0:nbins]`` and the input slice + ``[input_center_idx - nbins // 2: input_center_idx - nbins // 2 + nbins]``. + + Parameters + ---------- + input_size : int + Input array size + center_idx : int + Index of the center of the slice + nbins : int + Number of bins to extract + + Returns + ------- + input_slice : list + Indices to extract the slice from the input array + output_slice : list + Indices to fill the output array + + Examples + -------- + >>> _safe_array_slice_indices(input_size=10, input_center_idx=5, nbins=3) + ([4, 7], [0, 3]) + + If the slice goes beyond the right edge: the output slice will only cover + the first two bins of the output array, and up to the end of the input array. + >>> _safe_array_slice_indices(input_size=6, input_center_idx=5, nbins=3) + ([4, 6], [0, 2]) + + """ + + minbin = input_center_idx - nbins // 2 + maxbin = minbin + nbins + + if minbin < 0: + output_slice = [-minbin, min(nbins, input_size - minbin)] + input_slice = [0, minbin + nbins] + elif maxbin > input_size: + output_slice = [0, nbins - (maxbin - input_size)] + input_slice = [minbin, input_size] + else: + output_slice = [0, nbins] + input_slice = [minbin, maxbin] + + return input_slice, output_slice + + +@njit() +def extract_pds_slice_around_freq(freqs, powers, f0, nbins=100): """Extract a slice of PDS around a given frequency. This function extracts a slice of the power spectrum around a given frequency. @@ -1385,27 +1445,57 @@ def center_pds_on_f(freqs, powers, f0, nbins=100): >>> freqs = np.arange(1, 100) * 0.1 >>> powers = 10 / freqs >>> f0 = 0.3 - >>> p = center_pds_on_f(freqs, powers, f0) + >>> p = extract_pds_slice_around_freq(freqs, powers, f0) >>> assert np.isnan(p[0]) >>> assert not np.any(np.isnan(p[48:])) """ powers = np.asarray(powers) chunk = np.zeros(nbins) + np.nan - fchunk = np.zeros(nbins) + # fchunk = np.zeros(nbins) start_f_idx = np.searchsorted(freqs, f0) - minbin = start_f_idx - nbins // 2 - maxbin = minbin + nbins + input_slice, output_slice = _safe_array_slice_indices(powers.size, start_f_idx, nbins) + chunk[output_slice[0] : output_slice[1]] = powers[input_slice[0] : input_slice[1]] + return chunk - if minbin < 0: - chunk[-minbin : min(nbins, powers.size - minbin)] = powers[: minbin + nbins] - elif maxbin > powers.size: - chunk[: nbins - (maxbin - powers.size)] = powers[minbin:] - else: - chunk[:] = powers[minbin:maxbin] - return chunk +@njit() +def _shift_and_average_core(input_array_list, weight_list, center_indices, nbins): + """Core function to shift_and_add, JIT-compiled for your convenience. + + Parameters + ---------- + input_array_list : list of np.array + List of input arrays + weight_list : list of float + List of weights for each input array + center_indices : list of int + Central indices of the slice of each input array to be summed + nbins : int + Number of bins to extract around the central index of each input array + + Returns + ------- + output_array : np.array + Average of the input arrays, weighted by the weights + sum_of_weights : np.array + Sum of the weights at each output bin + """ + input_size = input_array_list[0].size + output_array = np.zeros(nbins) + sum_of_weights = np.zeros(nbins) + for idx, array, weight in zip(center_indices, input_array_list, weight_list): + input_slice, output_slice = _safe_array_slice_indices(input_size, idx, nbins) + + for i in range(input_slice[1] - input_slice[0]): + output_array[output_slice[0] + i] += array[input_slice[0] + i] * weight + + sum_of_weights[output_slice[0] + i] += weight + + output_array = output_array / sum_of_weights + + return output_array, sum_of_weights def shift_and_add(freqs, power_list, f0_list, nbins=100, rebin=None, df=None, M=None): @@ -1442,9 +1532,7 @@ def shift_and_add(freqs, power_list, f0_list, nbins=100, rebin=None, df=None, M= Returns ------- f : np.array - Array of output frequencies. This will be centered on the mean of the - input ``f0_list``, and have the same frequency resolution as the original - frequency array. + Array of output frequencies p : np.array Array of output powers n : np.array @@ -1460,32 +1548,28 @@ def shift_and_add(freqs, power_list, f0_list, nbins=100, rebin=None, df=None, M= >>> assert np.array_equal(p, [2. , 2. , 5. , 2. , 1.5]) >>> assert np.allclose(f, [0.05, 0.15, 0.25, 0.35, 0.45]) """ - final_powers = np.zeros(nbins) + + # Check if the input list of power contains numpy arrays + if not hasattr(power_list[0], "size"): + power_list = np.asarray(power_list) + # input_size = np.size(power_list[0]) freqs = np.asarray(freqs) - mid_idx = np.searchsorted(freqs, np.mean(f0_list)) + # mid_idx = np.searchsorted(freqs, np.mean(f0_list)) if M is None: M = 1 if not isinstance(M, Iterable): M = np.ones(len(power_list)) * M - count = np.zeros(nbins) - for f0, powers, m in zip(f0_list, power_list, M): - idx = np.searchsorted(freqs, f0_list) + center_f_indices = np.searchsorted(freqs, f0_list) - powers = np.asarray(powers) * m - new_power = center_pds_on_f(freqs, powers, f0, nbins=nbins) - bad = np.isnan(new_power) - new_power[bad] = 0.0 - final_powers += new_power - count += np.array(~bad, dtype=int) * m + final_powers, count = _shift_and_average_core(power_list, M, center_f_indices, nbins) if df is None: df = freqs[1] - freqs[0] final_freqs = np.arange(-nbins // 2, nbins // 2 + 1)[:nbins] * df final_freqs = final_freqs - (final_freqs[0] + final_freqs[-1]) / 2 + np.mean(f0_list) - final_powers = final_powers / count if rebin is not None: _, count, _, _ = rebin_data(final_freqs, count, rebin * df) From a4a8b8a022766a47149c8a762b4c25d5af43055b Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Tue, 8 Oct 2024 22:56:34 +0200 Subject: [PATCH 12/37] Update docs with latest fixes and additions --- docs/notebooks | 2 +- docs/timeseries.rst | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/notebooks b/docs/notebooks index 2db8e50be..29620d646 160000 --- a/docs/notebooks +++ b/docs/notebooks @@ -1 +1 @@ -Subproject commit 2db8e50be46e3d5cddebb2c15647c45d63de4a92 +Subproject commit 29620d646b22266dc097c455fe615f9a0827269a diff --git a/docs/timeseries.rst b/docs/timeseries.rst index 1a62b5860..54322bc1f 100644 --- a/docs/timeseries.rst +++ b/docs/timeseries.rst @@ -6,3 +6,4 @@ Working with more generic time series notebooks/StingrayTimeseries/StingrayTimeseries Tutorial.ipynb notebooks/StingrayTimeseries/Working with weights and polarization.ipynb + notebooks/Modeling/GP_Modeling/GP_modeling_tutorial.ipynb From 69bf6c564dce2a664e78d4e40286b164c5b02c8e Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Wed, 9 Oct 2024 14:18:16 +0200 Subject: [PATCH 13/37] Update docs with dynspec notebooks --- docs/notebooks | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/notebooks b/docs/notebooks index 29620d646..65c1ba97f 160000 --- a/docs/notebooks +++ b/docs/notebooks @@ -1 +1 @@ -Subproject commit 29620d646b22266dc097c455fe615f9a0827269a +Subproject commit 65c1ba97fa5f40085973b7684e4ff967463c2b09 From df828a3abcb30d1aba9e778035fa4b667924eb3f Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Wed, 19 Jun 2024 14:36:14 +0200 Subject: [PATCH 14/37] Specify lightcurve chunks --- stingray/fourier.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stingray/fourier.py b/stingray/fourier.py index 8c9aff1df..3e88527c9 100644 --- a/stingray/fourier.py +++ b/stingray/fourier.py @@ -2045,7 +2045,7 @@ def local_show_progress(a): ft1 = fft(flux1) ft2 = fft(flux2) - # Calculate the sum of each light curve, to calculate the mean + # Calculate the sum of each light curve chunk, to calculate the mean n_ph1 = flux1.sum() n_ph2 = flux2.sum() n_ph = np.sqrt(n_ph1 * n_ph2) From 695fedd3d31db832311c259b1c0bb98403808932 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Mon, 5 Aug 2024 16:16:33 +0200 Subject: [PATCH 15/37] Generalize the input to rxte_pca_event_file_interpretation --- stingray/mission_support/missions.py | 10 ++-- stingray/mission_support/rxte.py | 56 ++++++++++++++++--- .../tests/test_mission_support.py | 25 +++++++++ 3 files changed, 76 insertions(+), 15 deletions(-) diff --git a/stingray/mission_support/missions.py b/stingray/mission_support/missions.py index 8b1bb98a8..4e221e27e 100644 --- a/stingray/mission_support/missions.py +++ b/stingray/mission_support/missions.py @@ -181,14 +181,12 @@ def get_rough_conversion_function(mission, instrument=None, epoch=None): def mission_specific_event_interpretation(mission): """Get the mission-specific FITS interpretation function. - This function will read a FITS :class:`astropy.io.fits.HDUList` object and modify it - in place to make the read into Stingray easier. + This function will read a file name or a FITS :class:`astropy.io.fits.HDUList` + object and modify it (see, e.g., :func:`rxte_pca_event_file_interpretation` for an + example) """ if mission.lower() == "xte": return rxte_pca_event_file_interpretation - def _empty(x): - return x - - return _empty + return None diff --git a/stingray/mission_support/rxte.py b/stingray/mission_support/rxte.py index d0842ec36..50a24efc6 100644 --- a/stingray/mission_support/rxte.py +++ b/stingray/mission_support/rxte.py @@ -4,6 +4,8 @@ from astropy.time import Time from astropy.table import Table +from astropy.io import fits + c_match = re.compile(r"C\[(.*)\]") _EDGE_TIMES = [ @@ -308,7 +310,7 @@ def rxte_calibration_func(instrument, epoch): raise ValueError(f"Unknown XTE instrument: {instrument}") -def rxte_pca_event_file_interpretation(hdulist): +def rxte_pca_event_file_interpretation(input_data, header=None, hduname=None): """Interpret the FITS header of an RXTE event file. At the moment, only science event files are supported. In these files, @@ -322,16 +324,52 @@ def rxte_pca_event_file_interpretation(hdulist): Parameters ---------- - hdulist : `astropy.io.fits.HDUList` - The FITS file to interpret. + input_data : str, fits.HDUList, fits.HDU, np.array + The name of the FITS file to, or the HDUList inside, or the HDU with + the data, or the data. + + Other parameters + ---------------- + header : `fits.Header`, optional + Compulsory if ``hdulist`` is not a class:`fits._BaseHDU`, a + :class:`fits.HDUList`, or a file name. The header of the relevant extension. + hduname : str, optional + Name of the HDU (only relevant if hdulist is a :class:`fits.HDUList`), + ignored otherwise. + """ - if "XTE_SE" not in hdulist: + if isinstance(input_data, str): + return rxte_pca_event_file_interpretation( + fits.open(input_data), header=header, hduname=hduname + ) + + if isinstance(input_data, fits.HDUList): + if hduname is None and "XTE_SE" not in input_data: + raise ValueError( + "No XTE_SE extension found. At the moment, only science events " + "are supported by Stingray for XTE." + ) + if hduname is None: + hduname = "XTE_SE" + new_hdu = rxte_pca_event_file_interpretation(input_data[hduname], header=header) + input_data[hduname] = new_hdu + return input_data + + if isinstance(input_data, fits.hdu.base._BaseHDU): + if header is None: + header = input_data.header + input_data.data = rxte_pca_event_file_interpretation(input_data.data, header=header) + return input_data + + data = input_data + if header is None: raise ValueError( - "No XTE_SE extension found. At the moment, only science events " - "are supported by Stingray for XTE." + "If the input data is not a HDUList or a HDU, the header must be specified" ) - tevtb2 = hdulist["XTE_SE"].header["TEVTB2"] + + tevtb2 = header["TEVTB2"] local_chans = np.asarray([int(np.mean(ch)) for ch in _decode_energy_channels(tevtb2)]) - # channels = _decode_energy_channels(tevtb2) - hdulist["XTE_SE"].data["PHA"] = local_chans[hdulist["XTE_SE"].data["PHA"]] + data["PHA"] = local_chans[data["PHA"]] + + return data diff --git a/stingray/mission_support/tests/test_mission_support.py b/stingray/mission_support/tests/test_mission_support.py index 9a1f27fd5..3ee8dbec4 100644 --- a/stingray/mission_support/tests/test_mission_support.py +++ b/stingray/mission_support/tests/test_mission_support.py @@ -1,5 +1,6 @@ import pytest import os +import numpy as np from astropy.io import fits from stingray.mission_support import ( rxte_pca_event_file_interpretation, @@ -24,3 +25,27 @@ def setup_class(cls): def test_wrong_file_raises(self): with pytest.raises(ValueError, match="No XTE_SE extension found."): rxte_pca_event_file_interpretation(fits.open(self.wrongfile)) + + def test_rxte_pca_event_file_interpretation(self): + filename = os.path.join(datadir, "xte_test.evt.gz") + unchanged_hdulist = fits.open(filename) + assert np.min(unchanged_hdulist["XTE_SE"].data["PHA"]) == 0 + assert np.max(unchanged_hdulist["XTE_SE"].data["PHA"]) == 60 + + first_new_hdulist = rxte_pca_event_file_interpretation(filename) + + second_new_hdulist = rxte_pca_event_file_interpretation(fits.open(filename)) + + new_hdu = rxte_pca_event_file_interpretation(fits.open(filename)["XTE_SE"]) + new_data = rxte_pca_event_file_interpretation( + fits.open(filename)["XTE_SE"].data, header=fits.open(filename)["XTE_SE"].header + ) + + for data in ( + new_data, + new_hdu.data, + first_new_hdulist["XTE_SE"].data, + second_new_hdulist["XTE_SE"].data, + ): + assert np.min(data["PHA"]) == 2 + assert np.max(data["PHA"]) == 221 From e67a4c17d861667a41e0f60b647cc5f3b8b5f56d Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Mon, 5 Aug 2024 18:01:20 +0200 Subject: [PATCH 16/37] Functions to split GTIs according to criteria Accept file names instead of hdulist Allow for shorter exposure cuts than GTIs --- stingray/gti.py | 161 +++++++++++++++++++++++++++++++++++++ stingray/tests/test_gti.py | 33 ++++++++ 2 files changed, 194 insertions(+) diff --git a/stingray/gti.py b/stingray/gti.py index 6a0648f13..e1e2dd70b 100644 --- a/stingray/gti.py +++ b/stingray/gti.py @@ -36,6 +36,9 @@ "gti_border_bins", "generate_indices_of_segment_boundaries_unbinned", "generate_indices_of_segment_boundaries_binned", + "split_gtis_by_exposure", + "split_gtis_at_index", + "find_large_bad_time_intervals", ] logger = setup_logger() @@ -270,6 +273,8 @@ def get_gti_from_all_extensions(lchdulist, accepted_gtistrings=["GTI"], det_numb ... det_numbers=[5]) >>> assert np.allclose(gti, [[200, 250]]) """ + if isinstance(lchdulist, str): + lchdulist = fits.open(lchdulist) acc_gti_strs = copy.deepcopy(accepted_gtistrings) if det_numbers is not None: for i in det_numbers: @@ -1687,3 +1692,159 @@ def generate_indices_of_segment_boundaries_binned(times, gti, segment_size, dt=N dt = 0 for idx0, idx1 in zip(startidx, stopidx): yield times[idx0] - dt / 2, times[min(idx1, times.size - 1)] - dt / 2, idx0, idx1 + + +def split_gtis_at_indices(gtis, index_list): + """Split a GTI list at the given indices, creating multiple GTI lists. + + Parameters + ---------- + gtis : 2-d float array + List of GTIs of the form ``[[gti0_0, gti0_1], [gti1_0, gti1_1], ...]`` + index_list : int or array-like + Index or list of indices at which to split the GTIs + + Returns + ------- + gti_lists : list of 2-d float arrays + List of GTI lists, split at the given indices + + Examples + -------- + >>> gtis = [[0, 30], [50, 60], [80, 90]] + >>> new_gtis = split_gtis_at_indices(gtis, 1) + >>> assert np.allclose(new_gtis[0], [[0, 30]]) + >>> assert np.allclose(new_gtis[1], [[50, 60], [80, 90]]) + """ + gtis = np.asanyarray(gtis) + if not isinstance(index_list, Iterable): + index_list = [index_list] + previous_idx = 0 + gti_lists = [] + if index_list[0] == 0: + index_list = index_list[1:] + for idx in index_list: + gti_lists.append(gtis[previous_idx:idx, :]) + previous_idx = idx + if index_list[-1] != -1 and index_list[-1] <= gtis[:, 0].size - 1: + gti_lists.append(gtis[previous_idx:, :]) + + return gti_lists + + +def find_large_bad_time_intervals(gtis, bti_length_limit=86400): + """Find large bad time intervals in a list of GTIs, and split the GTI list accordingly. + + Parameters + ---------- + gtis : 2-d float array + List of GTIs of the form ``[[gti0_0, gti0_1], [gti1_0, gti1_1], ...]`` + bti_length_limit : float + Maximum length of a bad time interval. If a BTI is longer than this, an edge will be + returned at the midpoint of the BTI. + + Returns + ------- + bad_interval_midpoints : list of float + List of midpoints of large bad time intervals + + Examples + -------- + >>> gtis = [[0, 30], [86450, 86460], [86480, 86490]] + >>> bad_interval_midpoints = find_large_bad_time_intervals(gtis) + >>> assert np.allclose(bad_interval_midpoints, [43240]) + """ + gtis = np.asanyarray(gtis) + bad_interval_midpoints = [] + # Check for compulsory edges + last_edge = gtis[0, 0] + for g in gtis: + if g[0] - last_edge > bti_length_limit: + logger.info(f"Detected large bad time interval between {g[0]} and {last_edge}") + bad_interval_midpoints.append((g[0] + last_edge) / 2) + last_edge = g[1] + + return bad_interval_midpoints + + +def split_gtis_by_exposure(gtis, exposure_per_chunk, new_interval_if_gti_sep=None): + """Split a list of GTIs into smaller GTI lists of a given total (approximate) exposure. + + Parameters + ---------- + gtis : 2-d float array + List of GTIs of the form ``[[gti0_0, gti0_1], [gti1_0, gti1_1], ...]`` + exposure_per_chunk : float + Total exposure of each chunk + + Other Parameters + ---------------- + new_interval_if_gti_sep : float + If the GTIs are separated by more than this time, split the observation in two. + + Returns + ------- + gti_list : list of 2-d float arrays + List of GTI lists, split into chunks of the given exposure / separated by more + than the given limit separation + + Examples + -------- + >>> gtis = [[0, 30], [86450, 86460]] + >>> new_gtis = split_gtis_by_exposure(gtis, 400, new_interval_if_gti_sep=86400) + >>> assert np.allclose(new_gtis[0], [[0, 30]]) + >>> assert np.allclose(new_gtis[1], [[86450, 86460]]) + >>> gtis = [[0, 30], [40, 70], [90, 120], [130, 160]] + >>> new_gtis = split_gtis_by_exposure(gtis, 60) + >>> assert np.allclose(new_gtis[0], [[0, 30], [40, 70]]) + >>> assert np.allclose(new_gtis[1], [[90, 120], [130, 160]]) + + """ + gtis = np.asanyarray(gtis) + total_exposure = np.sum(np.diff(gtis, axis=1)) + compulsory_edges = [] + if new_interval_if_gti_sep is not None: + compulsory_edges = find_large_bad_time_intervals(gtis, new_interval_if_gti_sep) + + base_gti_list = split_gtis_at_indices(gtis, np.searchsorted(gtis[:, 1], compulsory_edges)) + final_gti_list = [] + for local_gtis in base_gti_list: + local_split_gtis = split_gtis_by_exposure(local_gtis, exposure_per_chunk) + final_gti_list.extend(local_split_gtis) + return final_gti_list + + n_intervals = int(np.rint(total_exposure / exposure_per_chunk)) + + if n_intervals <= 1: + return np.asarray([gtis]) + + if len(gtis) <= n_intervals: + new_gtis = [] + for g in gtis: + if g[1] - g[0] > exposure_per_chunk: + new_edges = np.arange(g[0], g[1], exposure_per_chunk) + if new_edges[-1] < g[1]: + new_edges = np.append(new_edges, g[1]) + + new_gtis.extend([[ed0, ed1] for ed0, ed1 in zip(new_edges[:-1], new_edges[1:])]) + else: + new_gtis.append(g) + gtis = np.asarray(new_gtis) + + exposure_edges = [] + last_exposure = 0 + for g in gtis: + exposure_edges.append(last_exposure) + last_exposure += g[1] - g[0] + + exposure_edges = np.asarray(exposure_edges) + + total_exposure = last_exposure + + exposure_per_interval = total_exposure / n_intervals + exposure_intervals = np.arange(0, total_exposure + exposure_per_interval, exposure_per_interval) + + index_list = np.searchsorted(exposure_edges, exposure_intervals) + + vals = split_gtis_at_indices(gtis, index_list) + return vals diff --git a/stingray/tests/test_gti.py b/stingray/tests/test_gti.py index 1cf29cda1..1848d08f4 100644 --- a/stingray/tests/test_gti.py +++ b/stingray/tests/test_gti.py @@ -7,6 +7,7 @@ from stingray.gti import create_gti_from_condition, gti_len, gti_border_bins from stingray.gti import time_intervals_from_gtis, bin_intervals_from_gtis from stingray.gti import create_gti_mask_complete, join_equal_gti_boundaries +from stingray.gti import split_gtis_at_indices, split_gtis_by_exposure from stingray import StingrayError curdir = os.path.abspath(os.path.dirname(__file__)) @@ -363,6 +364,38 @@ def test_join_boundaries(self): newg = join_equal_gti_boundaries(gti) assert np.allclose(newg, np.array([[1.16703354e08, 1.16703514e08]])) + def test_split_gtis_by_exposure_min_gti_sep(self): + gtis = [[0, 30], [86450, 86460]] + new_gtis = split_gtis_by_exposure(gtis, 400, new_interval_if_gti_sep=86400) + assert np.allclose(new_gtis[0], [[0, 30]]) + assert np.allclose(new_gtis[1], [[86450, 86460]]) + + def test_split_gtis_by_exposure_no_min_gti_sep(self): + gtis = [[0, 30], [86440, 86470], [86490, 86520], [86530, 86560]] + new_gtis = split_gtis_by_exposure(gtis, 60, new_interval_if_gti_sep=None) + assert np.allclose(new_gtis[0], [[0, 30], [86440, 86470]]) + assert np.allclose(new_gtis[1], [[86490, 86520], [86530, 86560]]) + + def test_split_gtis_by_exposure_small_exp(self): + gtis = [[0, 30], [86440, 86470], [86490, 86495], [86500, 86505]] + new_gtis = split_gtis_by_exposure(gtis, 15, new_interval_if_gti_sep=None) + assert np.allclose( + new_gtis[:4], + [ + [[0, 15]], + [[15, 30]], + [[86440, 86455]], + [[86455, 86470]], + ], + ) + assert np.allclose(new_gtis[4], [[86490, 86495], [86500, 86505]]) + + def test_split_gtis_at_indices(self): + gtis = [[0, 30], [50, 60], [80, 90]] + new_gtis = split_gtis_at_indices(gtis, 1) + assert np.allclose(new_gtis[0], [[0, 30]]) + assert np.allclose(new_gtis[1], [[50, 60], [80, 90]]) + _ALL_METHODS = ["intersection", "union", "infer", "append"] From 05d15ee9da99fac5bdfdeebbace3fbe65fde4056 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Mon, 2 Sep 2024 15:39:31 +0200 Subject: [PATCH 17/37] Stub FITSTimeseriesReader implementation --- stingray/io.py | 429 +++++++++++++++++++++++++++++++++++++- stingray/tests/test_io.py | 59 +++++- 2 files changed, 484 insertions(+), 4 deletions(-) diff --git a/stingray/io.py b/stingray/io.py index e3bb10de9..bb11fca36 100644 --- a/stingray/io.py +++ b/stingray/io.py @@ -16,6 +16,7 @@ import stingray.utils as utils from stingray.loggingconfig import setup_logger + from .utils import ( assign_value_if_none, is_string, @@ -23,7 +24,13 @@ is_sorted, make_dictionary_lowercase, ) -from .gti import get_gti_from_all_extensions, load_gtis +from .gti import ( + get_gti_from_all_extensions, + load_gtis, + get_total_gti_length, + split_gtis_by_exposure, + cross_two_gtis, +) from .mission_support import ( read_mission_info, @@ -36,11 +43,13 @@ import pickle _H5PY_INSTALLED = True +DEFAULT_FORMAT = "hdf5" try: import h5py except ImportError: _H5PY_INSTALLED = False + DEFAULT_FORMAT = "pickle" HAS_128 = True @@ -557,8 +566,8 @@ def load_events_and_gtis( mission = probe_header[mission_key].lower() mission_specific_processing = mission_specific_event_interpretation(mission) - - mission_specific_processing(hdulist) + if mission_specific_processing is not None: + mission_specific_processing(hdulist) db = read_mission_info(mission) instkey = get_key_from_mission_info(db, "instkey", "INSTRUME") @@ -714,6 +723,420 @@ def __init__(self): pass +class FITSTimeseriesReader(object): + main_array_attr = "time" + + def __init__( + self, + fname, + output_class, + force_hduname=None, + gti_file=None, + gtistring=None, + additional_columns=None, + ): + self.fname = fname + self._meta_attrs = [] + self.gtistring = gtistring + self.output_class = output_class + self.additional_columns = additional_columns + self._initialize_header(fname, force_hduname=force_hduname) + + if additional_columns is None and self.detector_key != "NONE": + additional_columns = [self.detector_key] + elif self.detector_key != "NONE": + additional_columns.append(self.detector_key) + self.data_hdu = fits.open(self.fname)[self.hduname] + self.gti_file = gti_file + self._read_gtis(self.gti_file) + + @property + def time(self): + return self[:].time + + def meta_attrs(self): + return self._meta_attrs + + def add_meta_attr(self, name, value): + if name not in self._meta_attrs: + self._meta_attrs.append(name) + setattr(self, name, value) + + @property + def exposure(self): + """ + Return the total exposure of the time series, i.e. the sum of the GTIs. + + Returns + ------- + total_exposure : float + The total exposure of the time series, in seconds. + """ + + return get_total_gti_length(self.gti) + + def __getitem__(self, index): + new_ts = self.output_class() + + columns = [self.time_column] + for col in self.pi_column, self.energy_column: + if col is not None: + columns.append(col) + + data = self.data_hdu.data[index] + + if self._mission_specific_processing is not None: + data = self._mission_specific_processing(data, header=self.header, hduname=self.hduname) + + # Set the times + setattr( + new_ts, + self.main_array_attr, + data[self.time_column][:] + self.timezero, + ) + # Get conversion function PI->Energy + try: + pi_energy_func = get_rough_conversion_function( + self.mission, + instrument=self.instr, + epoch=self.t_start / 86400 + self.mjdref, + ) + except ValueError: + pi_energy_func = None + + if self.energy_column in data.dtype.names: + new_ts.energy = data[self.energy_column] + elif self.pi_column in data.dtype.names: + new_ts.pi = data[self.pi_column] + if pi_energy_func is not None: + new_ts.energy = pi_energy_func(new_ts.pi) + + det_numbers = None + if self.detector_key is not None and self.detector_key in data.dtype.names: + new_ts.detector_id = data[self.detector_key] + det_numbers = list(set(new_ts.detector_id)) + self._read_gtis(self.gti_file, det_numbers=det_numbers) + + if self.additional_columns is not None: + for col in self.additional_columns: + if col == self.detector_key: + continue + if col in data.dtype.names: + setattr(new_ts, col.lower(), data[col]) + + for attr in self.meta_attrs(): + local_value = getattr(self, attr) + if attr in ["t_start", "t_stop", "gti"] and local_value is not None: + setattr(new_ts, attr, local_value + self.timezero) + else: + setattr(new_ts, attr, local_value) + + return new_ts + + def _initialize_header(self, fname, force_hduname=None): + hdulist = fits.open(fname) + if not force_hduname: + probe_header = hdulist[0].header + else: + probe_header = hdulist[force_hduname].header + + mission_key = "MISSION" + if mission_key not in probe_header: + mission_key = "TELESCOP" + self.add_meta_attr("mission", probe_header[mission_key].lower()) + self.add_meta_attr( + "_mission_specific_processing", + mission_specific_event_interpretation(self.mission), + ) + + db = read_mission_info(self.mission) + instkey = get_key_from_mission_info(db, "instkey", "INSTRUME") + instr = mode = None + if instkey in probe_header: + instr = probe_header[instkey].strip() + + modekey = get_key_from_mission_info(db, "dmodekey", None, instr) + if modekey is not None and modekey in probe_header: + mode = probe_header[modekey].strip() + self.add_meta_attr("instr", instr) + self.add_meta_attr("mode", mode) + + gtistring = self.gtistring + + if self.gtistring is None: + gtistring = get_key_from_mission_info(db, "gti", "GTI,STDGTI", instr, self.mode) + self.add_meta_attr("gtistring", gtistring) + + if force_hduname is None: + hduname = get_key_from_mission_info(db, "events", "EVENTS", instr, self.mode) + else: + hduname = force_hduname + + if hduname not in hdulist: + warnings.warn(f"HDU {hduname} not found. Trying first extension") + hduname = 1 + self.add_meta_attr("hduname", hduname) + + self.add_meta_attr("header", dict(hdulist[self.hduname].header)) + self.add_meta_attr("nphot", self.header["NAXIS2"]) + + ephem = timeref = timesys = None + if "PLEPHEM" in self.header: + # For the rare cases where this is a number, e.g. 200, I add `str` + # It's supposed to be a string. + ephem = str(self.header["PLEPHEM"]).strip().lstrip("JPL-").lower() + if "TIMEREF" in self.header: + timeref = self.header["TIMEREF"].strip().lower() + if "TIMESYS" in self.header: + timesys = self.header["TIMESYS"].strip().lower() + self.add_meta_attr("ephem", ephem) + self.add_meta_attr("timeref", timeref) + self.add_meta_attr("timesys", timesys) + + timezero = np.longdouble(0.0) + if "TIMEZERO" in self.header: + timezero = np.longdouble(self.header["TIMEZERO"]) + t_start = t_stop = None + if "TSTART" in self.header: + t_start = np.longdouble(self.header["TSTART"]) + if "TSTOP" in self.header: + t_stop = np.longdouble(self.header["TSTOP"]) + self.add_meta_attr("timezero", timezero) + self.add_meta_attr("t_start", t_start) + self.add_meta_attr("t_stop", t_stop) + + self.add_meta_attr( + "time_column", + get_key_from_mission_info(db, "time", "TIME", instr, mode), + ) + + self.add_meta_attr( + "detector_key", + get_key_from_mission_info(db, "ccol", "NONE", instr, mode), + ) + + self.add_meta_attr( + "mjdref", np.longdouble(high_precision_keyword_read(self.header, "MJDREF")) + ) + + default_pi_column = get_key_from_mission_info(db, "ecol", "PI", instr, self.mode) + if default_pi_column not in hdulist[self.hduname].data.columns.names: + default_pi_column = None + self.add_meta_attr("pi_column", default_pi_column) + + if "energy" in [val.lower() for val in hdulist[self.hduname].data.columns.names]: + energy_column = "energy" + else: + energy_column = None + self.add_meta_attr("energy_column", energy_column) + + def _read_gtis(self, gti_file=None, det_numbers=None): + # This is ugly, but if, e.g., we are reading XMM data, we *need* the + # detector number to access GTIs. + # So, here I'm reading a bunch of rows hoping that they represent the + # detector number population + if self.detector_key is not None: + with fits.open(self.fname) as hdul: + data = hdul[self.hduname].data + if self.detector_key in data.dtype.names: + probe_vals = data[:100][self.detector_key] + det_numbers = list(set(probe_vals)) + del hdul + + accepted_gtistrings = self.gtistring.split(",") + gti_list = None + + if gti_file is not None: + self.add_meta_attr("gti", load_gtis(gti_file, self.gtistring)) + return + + # Select first GTI with accepted name + try: + gti_list = get_gti_from_all_extensions( + self.fname, + accepted_gtistrings=accepted_gtistrings, + det_numbers=det_numbers, + ) + except Exception as e: # pragma: no cover + warnings.warn( + ( + f"No valid GTI extensions found. \nError: {str(e)}\n" + "GTIs will be set to the entire time series." + ), + ) + + self.add_meta_attr("gti", gti_list) + + def get_idx_from_time_range(self, start, stop): + """Get the index of the times in the event list that fall within the given time range.""" + time_edges, idx_edges = self.trace_nphots_in_file( + nedges=int(self.exposure // (stop - start)) + 2 + ) + + raw_min_idx = np.searchsorted(time_edges, start, side="left") + raw_max_idx = np.searchsorted(time_edges, stop, side="right") + + raw_min_idx = max(0, raw_min_idx - 2) + raw_max_idx = min(time_edges.size - 1, raw_max_idx + 2) + + raw_lower_edge = idx_edges[raw_min_idx] + raw_upper_edge = idx_edges[raw_max_idx] + + assert ( + start - time_edges[raw_min_idx] >= 0 + ), f"Start: {start}; {start - time_edges[raw_min_idx]} > 0" + assert ( + time_edges[raw_max_idx] - stop >= 0 + ), f"Stop: {stop}; {time_edges[raw_max_idx] - stop} < 0" + + with fits.open(self.fname) as hdulist: + filtered_times = hdulist[self.hduname].data[self.time_column][ + raw_lower_edge : raw_upper_edge + 1 + ] + # lower_edge = np.searchsorted(filtered_times, [start, stop]) + lower_edge, upper_edge = np.searchsorted(filtered_times, [start, stop]) + # Searchsorted will find the first number above stop. We want the last number below stop! + upper_edge -= 1 + + return lower_edge + raw_lower_edge, upper_edge + raw_lower_edge + + def apply_gti_lists(self, new_gti_lists, root_file_name=None, fmt=DEFAULT_FORMAT): + """Split the event list into different files, each with a different GTI. + + Parameters + ---------- + new_gti_lists : list of lists + A list of lists of GTIs. Each sublist should contain a list of GTIs + for a new file. + + Other Parameters + ---------------- + root_file_name : str, default None + The root name of the output files. The file name will be appended with + "_00", "_01", etc. + If None, a generator is returned instead of writing the files. + fmt : str + The format of the output files. Default is 'hdf5'. + + Returns + ------- + output_files : list of str + A list of the output file names. + + """ + + if len(new_gti_lists[0]) == len(self.gti) and np.all( + np.abs(np.asanyarray(new_gti_lists[0]).flatten() - self.gti.flatten()) < 1e-3 + ): + ev = self[:] + if root_file_name is None: + yield ev + else: + output_file = root_file_name + f"_00." + fmt.lstrip(".") + ev.write(output_file, fmt=fmt) + yield output_file + + for i, gti in enumerate(new_gti_lists): + if len(gti) == 0: + continue + + lower_edge, upper_edge = self.get_idx_from_time_range(gti[0, 0], gti[-1, 1]) + + ev = self[lower_edge : upper_edge + 1] + ev.gti = gti + + if root_file_name is not None: + new_file = root_file_name + f"_{i:002d}." + fmt.lstrip(".") + logger.info(f"Writing {new_file}") + ev.write(new_file, fmt=fmt) + yield new_file + else: + yield ev + + def trace_nphots_in_file(self, nedges=1001): + """Trace the number of photons as time advances in the file.""" + + if hasattr(self, "_time_edges") and len(self._time_edges) >= nedges: + return self._time_edges, self._idx_edges + + fname = self.fname + + with fits.open(fname) as hdul: + size = hdul[1].header["NAXIS2"] + nedges = min(nedges, size // 10 + 2) + + time_edges = np.zeros(nedges) + idx_edges = np.zeros(nedges, dtype=int) + for i, edge_idx in enumerate(np.linspace(0, size - 1, nedges).astype(int)): + idx_edges[i] = edge_idx + time_edges[i] = hdul[1].data["TIME"][edge_idx] + + mingti, maxgti = np.min(self.gti), np.max(self.gti) + if time_edges[0] > mingti: + time_edges[0] = mingti + if time_edges[-1] < maxgti: + time_edges[-1] = maxgti + + self._time_edges, self._idx_edges = time_edges, idx_edges + + return time_edges, idx_edges + + def split_by_number_of_samples(self, nsamples, root_file_name=None, fmt=DEFAULT_FORMAT): + """Split the event list into different files, each with approx. the given no. of photons. + + Parameters + ---------- + nsamples : int + The number of photons in each output file. + + Other Parameters + ---------------- + root_file_name : str, default None + The root name of the output files. The file name will be appended with + "_00", "_01", etc. + If None, a generator is returned instead of writing the files. + fmt : str + The format of the output files. Default is 'hdf5'. + + Returns + ------- + output_files : list of str + A list of the output file names. + """ + n_intervals = int(np.rint(self.nphot / nsamples)) + exposure_per_interval = self.exposure / n_intervals + new_gti_lists = split_gtis_by_exposure(self.gti, exposure_per_interval) + + return self.apply_gti_lists(new_gti_lists, root_file_name=root_file_name, fmt=fmt) + + def filter_at_time_intervals(self, time_intervals, root_file_name=None, fmt=DEFAULT_FORMAT): + """Filter the event list at the given time intervals. + + Parameters + ---------- + time_intervals : 2-d float array + List of time intervals of the form ``[[time0_0, time0_1], [time1_0, time1_1], ...]`` + + Other Parameters + ---------------- + root_file_name : str, default None + The root name of the output files. The file name will be appended with + "_00", "_01", etc. + If None, a generator is returned instead of writing the files. + fmt : str + The format of the output files. Default is 'hdf5'. + + Returns + ------- + output_files : list of str + A list of the output file names. + """ + if len(np.shape(time_intervals)) == 1: + time_intervals = [time_intervals] + new_gti = [cross_two_gtis(self.gti, [t_int]) for t_int in time_intervals] + return self.apply_gti_lists(new_gti, root_file_name=root_file_name, fmt=fmt) + + def mkdir_p(path): # pragma: no cover """Safe ``mkdir`` function diff --git a/stingray/tests/test_io.py b/stingray/tests/test_io.py index 5f3603e31..893c0456f 100644 --- a/stingray/tests/test_io.py +++ b/stingray/tests/test_io.py @@ -9,7 +9,8 @@ from ..io import ref_mjd from ..io import high_precision_keyword_read from ..io import load_events_and_gtis, read_mission_info -from ..io import read_header_key +from ..io import read_header_key, FITSTimeseriesReader, DEFAULT_FORMAT +from ..events import EventList import warnings @@ -214,3 +215,59 @@ def test_calibrate_spectrum(self): pis = np.array([1, 2, 3]) energies = pi_to_energy(pis, self.rmf) assert np.allclose(energies, [1.66, 1.70, 1.74]) + + +class TestFITSTimeseriesReader(object): + @classmethod + def setup_class(cls): + curdir = os.path.abspath(os.path.dirname(__file__)) + cls.datadir = os.path.join(curdir, "data") + cls.fname = os.path.join(datadir, "monol_testA.evt") + + def test_read_fits_timeseries(self): + reader = FITSTimeseriesReader(self.fname, output_class=EventList) + # Full slice + all_ev = reader[:] + assert np.all((all_ev.time > 80000000) & (all_ev.time < 80001024)) + + def test_read_fits_timeseries_by_nsamples(self): + reader = FITSTimeseriesReader(self.fname, output_class=EventList) + # Full slice + outfnames = list(reader.split_by_number_of_samples(500, root_file_name="test")) + assert len(outfnames) == 2 + ev0 = EventList.read(outfnames[0], fmt=DEFAULT_FORMAT) + ev1 = EventList.read(outfnames[1], fmt=DEFAULT_FORMAT) + assert np.all(ev0.time < 80000512.5) + assert np.all(ev1.time > 80000512.5) + for fname in outfnames: + os.unlink(fname) + + def test_read_fits_timeseries_by_time_intv(self): + reader = FITSTimeseriesReader(self.fname, output_class=EventList) + # Full slice + outfnames = list( + reader.filter_at_time_intervals([80000100, 80001100], root_file_name="test") + ) + assert len(outfnames) == 1 + ev0 = EventList.read(outfnames[0], fmt=DEFAULT_FORMAT) + assert np.all((ev0.time > 80000100) & (ev0.time < 80001100)) + assert np.all((ev0.gti >= 80000100) & (ev0.gti < 80001100)) + for fname in outfnames: + os.unlink(fname) + + def test_read_fits_timeseries_by_nsamples_generator(self): + reader = FITSTimeseriesReader(self.fname, output_class=EventList) + # Full slice + ev0, ev1 = list(reader.split_by_number_of_samples(500)) + + assert np.all(ev0.time < 80000512.5) + assert np.all(ev1.time > 80000512.5) + + def test_read_fits_timeseries_by_time_intv_generator(self): + reader = FITSTimeseriesReader(self.fname, output_class=EventList) + # Full slice + evs = list(reader.filter_at_time_intervals([80000100, 80001100])) + assert len(evs) == 1 + ev0 = evs[0] + assert np.all((ev0.time > 80000100) & (ev0.time < 80001100)) + assert np.all((ev0.gti >= 80000100) & (ev0.gti < 80001100)) From 2b38af22bc8797a331c3fd07ca7ec9d662f7c478 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Tue, 6 Aug 2024 00:32:57 +0200 Subject: [PATCH 18/37] Use FITSTimeseriesReader when reading event lists Additional Inference of format for fits formats --- stingray/events.py | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/stingray/events.py b/stingray/events.py index c59ec49cf..01941f8d6 100644 --- a/stingray/events.py +++ b/stingray/events.py @@ -14,7 +14,7 @@ from .base import StingrayTimeseries from .filters import get_deadtime_mask from .gti import generate_indices_of_boundaries -from .io import load_events_and_gtis, pi_to_energy +from .io import load_events_and_gtis, pi_to_energy, get_file_extension from .lightcurve import Lightcurve from .utils import simon, njit from .utils import histogram @@ -620,28 +620,20 @@ def read(cls, filename, fmt=None, rmf_file=None, **kwargs): ev: :class:`EventList` object The :class:`EventList` object reconstructed from file """ - + if fmt is None: + for fits_ext in ["fits", "evt"]: + if fits_ext in get_file_extension(filename).lower(): + fmt = "hea" + break if fmt is not None and fmt.lower() in ("hea", "ogip"): - evtdata = load_events_and_gtis(filename, **kwargs) - - evt = EventList( - time=evtdata.ev_list, - gti=evtdata.gti_list, - pi=evtdata.pi_list, - energy=evtdata.energy_list, - mjdref=evtdata.mjdref, - instr=evtdata.instr, - mission=evtdata.mission, - header=evtdata.header, - detector_id=evtdata.detector_id, - ephem=evtdata.ephem, - timeref=evtdata.timeref, - timesys=evtdata.timesys, - ) - if "additional_columns" in kwargs: - for key in evtdata.additional_data: - if not hasattr(evt, key.lower()): - setattr(evt, key.lower(), evtdata.additional_data[key]) + from .io import FITSTimeseriesReader + + additional_columns = kwargs.pop("additional_columns", None) + + evt = FITSTimeseriesReader( + filename, output_class=EventList, additional_columns=additional_columns + )[:] + if rmf_file is not None: evt.convert_pi_to_energy(rmf_file) return evt From 5045d2b2e2e5014ffc976581fd35e40a2c57ec18 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Mon, 2 Sep 2024 16:50:33 +0200 Subject: [PATCH 19/37] Add changelog --- docs/changes/834.feature.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/changes/834.feature.rst diff --git a/docs/changes/834.feature.rst b/docs/changes/834.feature.rst new file mode 100644 index 000000000..2cfddf02b --- /dev/null +++ b/docs/changes/834.feature.rst @@ -0,0 +1 @@ +Introduced FITSReader class for lazy-loading of event lists From 8fb42137fff858f03d086caf6763d8063a11ffc0 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Fri, 27 Sep 2024 17:20:05 +0200 Subject: [PATCH 20/37] Small fixes --- stingray/gti.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/stingray/gti.py b/stingray/gti.py index e1e2dd70b..52e368ecf 100644 --- a/stingray/gti.py +++ b/stingray/gti.py @@ -1733,14 +1733,14 @@ def split_gtis_at_indices(gtis, index_list): def find_large_bad_time_intervals(gtis, bti_length_limit=86400): - """Find large bad time intervals in a list of GTIs, and split the GTI list accordingly. + """Find large bad time intervals (BTIs) in a list of GTIs, and split the GTI list accordingly. Parameters ---------- gtis : 2-d float array List of GTIs of the form ``[[gti0_0, gti0_1], [gti1_0, gti1_1], ...]`` bti_length_limit : float - Maximum length of a bad time interval. If a BTI is longer than this, an edge will be + Maximum length of a BTI. If a BTI is longer than this, an edge will be returned at the midpoint of the BTI. Returns @@ -1775,7 +1775,7 @@ def split_gtis_by_exposure(gtis, exposure_per_chunk, new_interval_if_gti_sep=Non gtis : 2-d float array List of GTIs of the form ``[[gti0_0, gti0_1], [gti1_0, gti1_1], ...]`` exposure_per_chunk : float - Total exposure of each chunk + Total exposure of each chunk, in seconds Other Parameters ---------------- @@ -1801,7 +1801,7 @@ def split_gtis_by_exposure(gtis, exposure_per_chunk, new_interval_if_gti_sep=Non """ gtis = np.asanyarray(gtis) - total_exposure = np.sum(np.diff(gtis, axis=1)) + rough_total_exposure = np.sum(np.diff(gtis, axis=1)) compulsory_edges = [] if new_interval_if_gti_sep is not None: compulsory_edges = find_large_bad_time_intervals(gtis, new_interval_if_gti_sep) @@ -1813,7 +1813,7 @@ def split_gtis_by_exposure(gtis, exposure_per_chunk, new_interval_if_gti_sep=Non final_gti_list.extend(local_split_gtis) return final_gti_list - n_intervals = int(np.rint(total_exposure / exposure_per_chunk)) + n_intervals = int(np.rint(rough_total_exposure / exposure_per_chunk)) if n_intervals <= 1: return np.asarray([gtis]) @@ -1836,11 +1836,10 @@ def split_gtis_by_exposure(gtis, exposure_per_chunk, new_interval_if_gti_sep=Non for g in gtis: exposure_edges.append(last_exposure) last_exposure += g[1] - g[0] + total_exposure = last_exposure exposure_edges = np.asarray(exposure_edges) - total_exposure = last_exposure - exposure_per_interval = total_exposure / n_intervals exposure_intervals = np.arange(0, total_exposure + exposure_per_interval, exposure_per_interval) From a328fe6b1f340aa7bf26b76618ce6a2b84392c9a Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Wed, 16 Oct 2024 10:06:32 +0200 Subject: [PATCH 21/37] Fix Zenodo links --- docs/citing.rst | 2 +- docs/conf.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/citing.rst b/docs/citing.rst index 13c27e343..c3952bb00 100644 --- a/docs/citing.rst +++ b/docs/citing.rst @@ -12,7 +12,7 @@ If possible, we ask that you cite a DOI corresponding to the specific version of .. include:: _zenodo.rst -If this isn't possible — for example, because you worked with an unreleased version of the code — you can cite Stingray's `concept DOI `__, `10.5281/zenodo.1490116 `__ (`BibTeX `__), which will always resolve to the latest release. +If this isn't possible — for example, because you worked with an unreleased version of the code — you can cite Stingray's `concept DOI `__, `10.5281/zenodo.1490116 `__ (`BibTeX `__), which will always resolve to the latest release. Papers ====== diff --git a/docs/conf.py b/docs/conf.py index a3866d877..18d431756 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -197,7 +197,7 @@ class Release(object): @property def zenodo_url(self): - return f"https://zenodo.org/record/{self.doi.split('.')[-1]}" + return f"https://zenodo.org/records/{self.doi.split('.')[-1]}" @property def github_url(self): From 624a407870369035b829bc3ee2a9d651d95358f7 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Wed, 16 Oct 2024 10:12:48 +0200 Subject: [PATCH 22/37] Update notebooks --- docs/notebooks | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/notebooks b/docs/notebooks index 65c1ba97f..2728debe4 160000 --- a/docs/notebooks +++ b/docs/notebooks @@ -1 +1 @@ -Subproject commit 65c1ba97fa5f40085973b7684e4ff967463c2b09 +Subproject commit 2728debe4c7086b646c021d91d8cc1a3f6a94846 From 97956e31a87373ea4aefd403deed8243655c287b Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Wed, 16 Oct 2024 10:29:56 +0200 Subject: [PATCH 23/37] Trust some sites for links --- docs/conf.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 18d431756..7b24d9417 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -158,8 +158,13 @@ # (source start file, name, description, authors, manual section). man_pages = [("index", project.lower(), project + " Documentation", [author], 1)] -# Trust the links from doi.org, even if they might have Client errors or other minor issues -linkcheck_ignore = [r"https://doi.org/"] +# Trust the links from these sites, even if they might have Client errors or other minor issues +linkcheck_ignore = [ + r"https://doi.org/", + r"https://arxiv.org/", + r"https://.*adsabs.harvard.edu/", + r"https://zenodo.org/", +] # -- Options for the edit_on_github extension --------------------------------- From f76fdb6b111a32cfa2f2d61670af5067cd284ee7 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Wed, 16 Oct 2024 10:31:05 +0200 Subject: [PATCH 24/37] Change Numba warning --- pyproject.toml | 2 +- stingray/utils.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d7fa7ec03..fc5abe488 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -152,7 +152,7 @@ filterwarnings = [ "ignore:.*Number of segments used in averaging.*:UserWarning", "ignore:.*:astropy.units.core.UnitsWarning", "ignore:.*cannot be added to FITS Header:astropy.utils.exceptions.AstropyUserWarning", - "ignore:Numba not installed:UserWarning", + "ignore:The recommended numba package is not installed:UserWarning", "ignore:More than 20 figures have been opened.:RuntimeWarning", "ignore:This platform does not support:RuntimeWarning", "ignore:Some error bars in the Averaged:UserWarning", diff --git a/stingray/utils.py b/stingray/utils.py index 9c59c6901..e8572b2c8 100644 --- a/stingray/utils.py +++ b/stingray/utils.py @@ -51,7 +51,9 @@ from numba import njit, prange, vectorize, float32, float64, int32, int64 from numba.core.errors import NumbaValueError, NumbaNotImplementedError, TypingError except ImportError: - warnings.warn("Numba not installed. Faking it") + warnings.warn( + "The recommended numba package is not installed. Some functionality might be slower." + ) HAS_NUMBA = False NumbaValueError = NumbaNotImplementedError = TypingError = Exception From 33bb09169e3f6b98c6821d37fc9b6067091315cf Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Wed, 16 Oct 2024 15:06:54 +0200 Subject: [PATCH 25/37] Update notebooks [docs only] --- docs/notebooks | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/notebooks b/docs/notebooks index 2728debe4..f3210ebe1 160000 --- a/docs/notebooks +++ b/docs/notebooks @@ -1 +1 @@ -Subproject commit 2728debe4c7086b646c021d91d8cc1a3f6a94846 +Subproject commit f3210ebe12e48fd17c89f10c6f6b61e5ef733c69 From 3dda838830c16dab2b04ec4bf086b24c21f84353 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Wed, 16 Oct 2024 21:10:48 +0200 Subject: [PATCH 26/37] Move import to top --- stingray/events.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stingray/events.py b/stingray/events.py index 01941f8d6..9c5e5d39f 100644 --- a/stingray/events.py +++ b/stingray/events.py @@ -14,7 +14,8 @@ from .base import StingrayTimeseries from .filters import get_deadtime_mask from .gti import generate_indices_of_boundaries -from .io import load_events_and_gtis, pi_to_energy, get_file_extension +from .io import pi_to_energy, get_file_extension +from .io import FITSTimeseriesReader from .lightcurve import Lightcurve from .utils import simon, njit from .utils import histogram @@ -626,7 +627,6 @@ def read(cls, filename, fmt=None, rmf_file=None, **kwargs): fmt = "hea" break if fmt is not None and fmt.lower() in ("hea", "ogip"): - from .io import FITSTimeseriesReader additional_columns = kwargs.pop("additional_columns", None) From 79d2507b1b705921aa8767b64dcbf8b8723eb96e Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Wed, 16 Oct 2024 21:40:13 +0200 Subject: [PATCH 27/37] Maake add_meta_attr private, draft docstrings --- stingray/io.py | 47 +++++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/stingray/io.py b/stingray/io.py index bb11fca36..80675f48c 100644 --- a/stingray/io.py +++ b/stingray/io.py @@ -757,7 +757,7 @@ def time(self): def meta_attrs(self): return self._meta_attrs - def add_meta_attr(self, name, value): + def _add_meta_attr(self, name, value): if name not in self._meta_attrs: self._meta_attrs.append(name) setattr(self, name, value) @@ -776,6 +776,7 @@ def exposure(self): return get_total_gti_length(self.gti) def __getitem__(self, index): + """Return an element or a slice of the object, e.g. ``ts[1]`` or ``ts[1:2].""" new_ts = self.output_class() columns = [self.time_column] @@ -834,6 +835,7 @@ def __getitem__(self, index): return new_ts def _initialize_header(self, fname, force_hduname=None): + """Read the header of the FITS file and set the relevant attributes.""" hdulist = fits.open(fname) if not force_hduname: probe_header = hdulist[0].header @@ -843,8 +845,8 @@ def _initialize_header(self, fname, force_hduname=None): mission_key = "MISSION" if mission_key not in probe_header: mission_key = "TELESCOP" - self.add_meta_attr("mission", probe_header[mission_key].lower()) - self.add_meta_attr( + self._add_meta_attr("mission", probe_header[mission_key].lower()) + self._add_meta_attr( "_mission_specific_processing", mission_specific_event_interpretation(self.mission), ) @@ -858,14 +860,14 @@ def _initialize_header(self, fname, force_hduname=None): modekey = get_key_from_mission_info(db, "dmodekey", None, instr) if modekey is not None and modekey in probe_header: mode = probe_header[modekey].strip() - self.add_meta_attr("instr", instr) - self.add_meta_attr("mode", mode) + self._add_meta_attr("instr", instr) + self._add_meta_attr("mode", mode) gtistring = self.gtistring if self.gtistring is None: gtistring = get_key_from_mission_info(db, "gti", "GTI,STDGTI", instr, self.mode) - self.add_meta_attr("gtistring", gtistring) + self._add_meta_attr("gtistring", gtistring) if force_hduname is None: hduname = get_key_from_mission_info(db, "events", "EVENTS", instr, self.mode) @@ -875,10 +877,10 @@ def _initialize_header(self, fname, force_hduname=None): if hduname not in hdulist: warnings.warn(f"HDU {hduname} not found. Trying first extension") hduname = 1 - self.add_meta_attr("hduname", hduname) + self._add_meta_attr("hduname", hduname) - self.add_meta_attr("header", dict(hdulist[self.hduname].header)) - self.add_meta_attr("nphot", self.header["NAXIS2"]) + self._add_meta_attr("header", dict(hdulist[self.hduname].header)) + self._add_meta_attr("nphot", self.header["NAXIS2"]) ephem = timeref = timesys = None if "PLEPHEM" in self.header: @@ -889,9 +891,9 @@ def _initialize_header(self, fname, force_hduname=None): timeref = self.header["TIMEREF"].strip().lower() if "TIMESYS" in self.header: timesys = self.header["TIMESYS"].strip().lower() - self.add_meta_attr("ephem", ephem) - self.add_meta_attr("timeref", timeref) - self.add_meta_attr("timesys", timesys) + self._add_meta_attr("ephem", ephem) + self._add_meta_attr("timeref", timeref) + self._add_meta_attr("timesys", timesys) timezero = np.longdouble(0.0) if "TIMEZERO" in self.header: @@ -901,36 +903,37 @@ def _initialize_header(self, fname, force_hduname=None): t_start = np.longdouble(self.header["TSTART"]) if "TSTOP" in self.header: t_stop = np.longdouble(self.header["TSTOP"]) - self.add_meta_attr("timezero", timezero) - self.add_meta_attr("t_start", t_start) - self.add_meta_attr("t_stop", t_stop) + self._add_meta_attr("timezero", timezero) + self._add_meta_attr("t_start", t_start) + self._add_meta_attr("t_stop", t_stop) - self.add_meta_attr( + self._add_meta_attr( "time_column", get_key_from_mission_info(db, "time", "TIME", instr, mode), ) - self.add_meta_attr( + self._add_meta_attr( "detector_key", get_key_from_mission_info(db, "ccol", "NONE", instr, mode), ) - self.add_meta_attr( + self._add_meta_attr( "mjdref", np.longdouble(high_precision_keyword_read(self.header, "MJDREF")) ) default_pi_column = get_key_from_mission_info(db, "ecol", "PI", instr, self.mode) if default_pi_column not in hdulist[self.hduname].data.columns.names: default_pi_column = None - self.add_meta_attr("pi_column", default_pi_column) + self._add_meta_attr("pi_column", default_pi_column) if "energy" in [val.lower() for val in hdulist[self.hduname].data.columns.names]: energy_column = "energy" else: energy_column = None - self.add_meta_attr("energy_column", energy_column) + self._add_meta_attr("energy_column", energy_column) def _read_gtis(self, gti_file=None, det_numbers=None): + """Read GTIs from the FITS file.""" # This is ugly, but if, e.g., we are reading XMM data, we *need* the # detector number to access GTIs. # So, here I'm reading a bunch of rows hoping that they represent the @@ -947,7 +950,7 @@ def _read_gtis(self, gti_file=None, det_numbers=None): gti_list = None if gti_file is not None: - self.add_meta_attr("gti", load_gtis(gti_file, self.gtistring)) + self._add_meta_attr("gti", load_gtis(gti_file, self.gtistring)) return # Select first GTI with accepted name @@ -965,7 +968,7 @@ def _read_gtis(self, gti_file=None, det_numbers=None): ), ) - self.add_meta_attr("gti", gti_list) + self._add_meta_attr("gti", gti_list) def get_idx_from_time_range(self, start, stop): """Get the index of the times in the event list that fall within the given time range.""" From 59cc4494f3dd8cc2537d545da7550cd6f39cea5e Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Thu, 17 Oct 2024 15:03:20 +0200 Subject: [PATCH 28/37] Add docstrings everywhere --- stingray/io.py | 53 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/stingray/io.py b/stingray/io.py index 80675f48c..99eaa159d 100644 --- a/stingray/io.py +++ b/stingray/io.py @@ -758,6 +758,7 @@ def meta_attrs(self): return self._meta_attrs def _add_meta_attr(self, name, value): + """Add a meta attribute to the object.""" if name not in self._meta_attrs: self._meta_attrs.append(name) setattr(self, name, value) @@ -970,9 +971,31 @@ def _read_gtis(self, gti_file=None, det_numbers=None): self._add_meta_attr("gti", gti_list) - def get_idx_from_time_range(self, start, stop): - """Get the index of the times in the event list that fall within the given time range.""" - time_edges, idx_edges = self.trace_nphots_in_file( + def _get_idx_from_time_range(self, start, stop): + """Get the index of the times in the event list that fall within the given time range. + + Instead of reading all the data from the file and doing ``np.searchsorted``, which could + easily fill up the memory, this function does a two-step procedure. It first uses + ``self._trace_nphots_in_file`` to get a grid of times and their corresponding + indices in the file. Then, it reads only the data that strictly include the requested time + range, and on those data it performs a searchsorted operation. The final indices will be + summed to the lower index of the data that was read. + + Parameters + ---------- + start : float + Start time of the interval + stop : float + Stop time of the interval + + Returns + ------- + lower_edge : int + Index of the first photon in the requested time range + upper_edge : int + Index of the last photon in the requested time range + """ + time_edges, idx_edges = self._trace_nphots_in_file( nedges=int(self.exposure // (stop - start)) + 2 ) @@ -1043,7 +1066,7 @@ def apply_gti_lists(self, new_gti_lists, root_file_name=None, fmt=DEFAULT_FORMAT if len(gti) == 0: continue - lower_edge, upper_edge = self.get_idx_from_time_range(gti[0, 0], gti[-1, 1]) + lower_edge, upper_edge = self._get_idx_from_time_range(gti[0, 0], gti[-1, 1]) ev = self[lower_edge : upper_edge + 1] ev.gti = gti @@ -1056,8 +1079,26 @@ def apply_gti_lists(self, new_gti_lists, root_file_name=None, fmt=DEFAULT_FORMAT else: yield ev - def trace_nphots_in_file(self, nedges=1001): - """Trace the number of photons as time advances in the file.""" + def _trace_nphots_in_file(self, nedges=1001): + """Trace the number of photons as time advances in the file. + + This function traces the number of photons as time advances in the file. + This is a way to quickly map the distribution of photons in time, without + reading the entire file. This map can be useful to then access the wanted + data without loading all the file in memory. + + Other Parameters + ---------------- + nedges : int + The number of time edges to trace. Default is 1001. + + Returns + ------- + time_edges : np.ndarray + The time edges + idx_edges : np.ndarray + The index edges + """ if hasattr(self, "_time_edges") and len(self._time_edges) >= nedges: return self._time_edges, self._idx_edges From 014b0a3c5366df4a1ca627cf96b9c6e325d21373 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Thu, 17 Oct 2024 15:30:51 +0200 Subject: [PATCH 29/37] Comment the reading of event file information; prepare for extension to other kinds of timeseries --- stingray/io.py | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/stingray/io.py b/stingray/io.py index 99eaa159d..d296405c4 100644 --- a/stingray/io.py +++ b/stingray/io.py @@ -734,13 +734,20 @@ def __init__( gti_file=None, gtistring=None, additional_columns=None, + data_kind="events", ): self.fname = fname self._meta_attrs = [] self.gtistring = gtistring self.output_class = output_class self.additional_columns = additional_columns - self._initialize_header(fname, force_hduname=force_hduname) + if data_kind.lower() == "events": + self._initialize_header_events(fname, force_hduname=force_hduname) + else: + raise NotImplementedError( + "Only events are supported by FITSTimeseriesReader at the moment. " + f"{data_kind} is an unknown data kind." + ) if additional_columns is None and self.detector_key != "NONE": additional_columns = [self.detector_key] @@ -835,14 +842,32 @@ def __getitem__(self, index): return new_ts - def _initialize_header(self, fname, force_hduname=None): - """Read the header of the FITS file and set the relevant attributes.""" + def _initialize_header_events(self, fname, force_hduname=None): + """Read the header of the FITS file and set the relevant attributes. + + When possibile, some mission-specific information is read from the keywords and + extension names found in ``xselect.mdb``. + + Parameters + ---------- + fname : str + The name of the FITS file to read + + Other parameters + ---------------- + force_hduname : str or int, default None + If not None, the name of the HDU to read. If None, an extension called + EVENTS or the first extension. + """ hdulist = fits.open(fname) if not force_hduname: probe_header = hdulist[0].header else: probe_header = hdulist[force_hduname].header + # We need the minimal information to read the mission database. + # That is, the name of the mission/telescope, the instrument and, + # if available, the observing mode. mission_key = "MISSION" if mission_key not in probe_header: mission_key = "TELESCOP" @@ -852,6 +877,8 @@ def _initialize_header(self, fname, force_hduname=None): mission_specific_event_interpretation(self.mission), ) + # Now, we read the mission info, and we try to get the relevant + # information from the header using the mission-specific keywords. db = read_mission_info(self.mission) instkey = get_key_from_mission_info(db, "instkey", "INSTRUME") instr = mode = None @@ -875,6 +902,8 @@ def _initialize_header(self, fname, force_hduname=None): else: hduname = force_hduname + # If the EVENT/``force_hduname`` extension is not found, try the first extension + # which is usually the one containing the data if hduname not in hdulist: warnings.warn(f"HDU {hduname} not found. Trying first extension") hduname = 1 @@ -883,6 +912,7 @@ def _initialize_header(self, fname, force_hduname=None): self._add_meta_attr("header", dict(hdulist[self.hduname].header)) self._add_meta_attr("nphot", self.header["NAXIS2"]) + # These are the important keywords for timing. ephem = timeref = timesys = None if "PLEPHEM" in self.header: # For the rare cases where this is a number, e.g. 200, I add `str` @@ -922,11 +952,15 @@ def _initialize_header(self, fname, force_hduname=None): "mjdref", np.longdouble(high_precision_keyword_read(self.header, "MJDREF")) ) + # Try to get the information needed to calculate the event energy. We start from the + # PI column default_pi_column = get_key_from_mission_info(db, "ecol", "PI", instr, self.mode) if default_pi_column not in hdulist[self.hduname].data.columns.names: default_pi_column = None self._add_meta_attr("pi_column", default_pi_column) + # If a column named "energy" is found, we read it and assume the energy conversion + # is already done. if "energy" in [val.lower() for val in hdulist[self.hduname].data.columns.names]: energy_column = "energy" else: From 0f4a16b7dec830d0929755c660bbb4b71fa6c53b Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Thu, 17 Oct 2024 15:31:26 +0200 Subject: [PATCH 30/37] Test extension to other kinds of timeseries --- stingray/tests/test_io.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/stingray/tests/test_io.py b/stingray/tests/test_io.py index 893c0456f..8c22ab145 100644 --- a/stingray/tests/test_io.py +++ b/stingray/tests/test_io.py @@ -224,6 +224,12 @@ def setup_class(cls): cls.datadir = os.path.join(curdir, "data") cls.fname = os.path.join(datadir, "monol_testA.evt") + def test_read_fits_timeseries_bad_kind(self): + with pytest.raises( + NotImplementedError, match="Only events are supported by FITSTimeseriesReader" + ): + FITSTimeseriesReader(self.fname, output_class="bubu", data_kind="BAD_KIND") + def test_read_fits_timeseries(self): reader = FITSTimeseriesReader(self.fname, output_class=EventList) # Full slice From df066db1e434e3b29db1f741c53dfae4bcba3f79 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Thu, 17 Oct 2024 16:33:04 +0200 Subject: [PATCH 31/37] Fix formatting --- stingray/events.py | 1 - 1 file changed, 1 deletion(-) diff --git a/stingray/events.py b/stingray/events.py index 9c5e5d39f..bd7881107 100644 --- a/stingray/events.py +++ b/stingray/events.py @@ -627,7 +627,6 @@ def read(cls, filename, fmt=None, rmf_file=None, **kwargs): fmt = "hea" break if fmt is not None and fmt.lower() in ("hea", "ogip"): - additional_columns = kwargs.pop("additional_columns", None) evt = FITSTimeseriesReader( From 0ce846a21d0f1743344fd7fcb31490fed054fa7c Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Fri, 18 Oct 2024 22:51:38 +0200 Subject: [PATCH 32/37] Small changes to allow the streaming of simple time arrays --- stingray/io.py | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/stingray/io.py b/stingray/io.py index d296405c4..f588cc479 100644 --- a/stingray/io.py +++ b/stingray/io.py @@ -51,7 +51,6 @@ _H5PY_INSTALLED = False DEFAULT_FORMAT = "pickle" - HAS_128 = True try: np.float128 @@ -729,7 +728,7 @@ class FITSTimeseriesReader(object): def __init__( self, fname, - output_class, + output_class=None, force_hduname=None, gti_file=None, gtistring=None, @@ -741,14 +740,14 @@ def __init__( self.gtistring = gtistring self.output_class = output_class self.additional_columns = additional_columns - if data_kind.lower() == "events": + if "EventList" in str(output_class) or data_kind.lower() in ["events", "times"]: self._initialize_header_events(fname, force_hduname=force_hduname) else: raise NotImplementedError( "Only events are supported by FITSTimeseriesReader at the moment. " f"{data_kind} is an unknown data kind." ) - + self.data_kind = data_kind if additional_columns is None and self.detector_key != "NONE": additional_columns = [self.detector_key] elif self.detector_key != "NONE": @@ -785,15 +784,27 @@ def exposure(self): def __getitem__(self, index): """Return an element or a slice of the object, e.g. ``ts[1]`` or ``ts[1:2].""" - new_ts = self.output_class() + data = self.data_hdu.data[index] + + return self.transform_slice(data) + + def transform_slice(self, data): + # Here there will be some logic to understand whether transfomring to events or something else + + if self.data_kind == "times": + return data[self.time_column][:] + self.timezero + if self.output_class is None: + return data + if self.data_kind == "events": + return self._transform_slice_into_events(data) + + def _transform_slice_into_events(self, data): columns = [self.time_column] for col in self.pi_column, self.energy_column: if col is not None: columns.append(col) - - data = self.data_hdu.data[index] - + new_ts = self.output_class() if self._mission_specific_processing is not None: data = self._mission_specific_processing(data, header=self.header, hduname=self.hduname) @@ -1103,7 +1114,8 @@ def apply_gti_lists(self, new_gti_lists, root_file_name=None, fmt=DEFAULT_FORMAT lower_edge, upper_edge = self._get_idx_from_time_range(gti[0, 0], gti[-1, 1]) ev = self[lower_edge : upper_edge + 1] - ev.gti = gti + if hasattr(ev, "gti"): + ev.gti = gti if root_file_name is not None: new_file = root_file_name + f"_{i:002d}." + fmt.lstrip(".") @@ -1187,7 +1199,9 @@ def split_by_number_of_samples(self, nsamples, root_file_name=None, fmt=DEFAULT_ return self.apply_gti_lists(new_gti_lists, root_file_name=root_file_name, fmt=fmt) - def filter_at_time_intervals(self, time_intervals, root_file_name=None, fmt=DEFAULT_FORMAT): + def filter_at_time_intervals( + self, time_intervals, root_file_name=None, fmt=DEFAULT_FORMAT, check_gtis=True + ): """Filter the event list at the given time intervals. Parameters @@ -1211,7 +1225,10 @@ def filter_at_time_intervals(self, time_intervals, root_file_name=None, fmt=DEFA """ if len(np.shape(time_intervals)) == 1: time_intervals = [time_intervals] - new_gti = [cross_two_gtis(self.gti, [t_int]) for t_int in time_intervals] + if check_gtis: + new_gti = [cross_two_gtis(self.gti, [t_int]) for t_int in time_intervals] + else: + new_gti = [np.asarray([t_int]) for t_int in time_intervals] return self.apply_gti_lists(new_gti, root_file_name=root_file_name, fmt=fmt) From 9729b7d1f4c767157330ed66bc18044644a9d0b0 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Sun, 20 Oct 2024 16:59:50 +0200 Subject: [PATCH 33/37] Remove setuptools from runtime dependencies --- docs/notebooks | 2 +- pyproject.toml | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/notebooks b/docs/notebooks index 65c1ba97f..f3210ebe1 160000 --- a/docs/notebooks +++ b/docs/notebooks @@ -1 +1 @@ -Subproject commit 65c1ba97fa5f40085973b7684e4ff967463c2b09 +Subproject commit f3210ebe12e48fd17c89f10c6f6b61e5ef733c69 diff --git a/pyproject.toml b/pyproject.toml index d7fa7ec03..e0acb097d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,8 +34,6 @@ keywords = [ dependencies = [ "numpy>=1.17", "astropy>=4.0", - "setuptools", - "setuptools_scm", "scipy>=1.1.0", "matplotlib>=3.0,!=3.4.00", ] From d37e712fed8a712a485a4b3654028e66df0656a8 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Mon, 21 Oct 2024 18:21:41 +0200 Subject: [PATCH 34/37] Add docstring to _transform_slice_into_events --- stingray/io.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/stingray/io.py b/stingray/io.py index f588cc479..a20cedd16 100644 --- a/stingray/io.py +++ b/stingray/io.py @@ -800,6 +800,30 @@ def transform_slice(self, data): return self._transform_slice_into_events(data) def _transform_slice_into_events(self, data): + """Take a slice of data from a FITS event file and make it a StingrayTimeseries. + + Data taken from a FITS file will typically be a Numpy record array. This method + tries to interpret the information contained in the record array based on what + we know of the mission and the instrument. For sure, there will be a TIME column + that will become the ``time`` array of the timeseries object. If there is a PI/PHA + column, it will become the ``pi`` array, and if we know the conversion law for the mission, + this will also be converted to energy. If there is an ENERGY column, it will directly + be loaded into the energy column. + Additional meta (e.g. GTIs, MJDREF, etc.) information will also be added to the object. + + Parameters + ---------- + data : np.recarray + The slice of data to transform + + Returns + ------- + new_ts : any StingrayTimeseries subclass + The transformed timeseries object. It will typically be an ``EventList`` object, + but the user can change this by specifying the ``output_class`` parameter in the + constructor of the reader. + + """ columns = [self.time_column] for col in self.pi_column, self.energy_column: if col is not None: From c62053d52f5d489e7fbefb1ecbc41aa20316ea93 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Mon, 21 Oct 2024 22:55:31 +0200 Subject: [PATCH 35/37] Add tutorial on large files --- docs/index.rst | 1 + docs/largedata.rst | 7 +++++++ docs/notebooks | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 docs/largedata.rst diff --git a/docs/index.rst b/docs/index.rst index 14034087d..052eb9303 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -308,6 +308,7 @@ Advanced :maxdepth: 2 timeseries + largedata api Additional information diff --git a/docs/largedata.rst b/docs/largedata.rst new file mode 100644 index 000000000..eb2546188 --- /dev/null +++ b/docs/largedata.rst @@ -0,0 +1,7 @@ +Working with large data sets +**************************** + +.. toctree:: + :maxdepth: 2 + + notebooks/Performance/Dealing with large data files.ipynb diff --git a/docs/notebooks b/docs/notebooks index 65c1ba97f..2db8e50be 160000 --- a/docs/notebooks +++ b/docs/notebooks @@ -1 +1 @@ -Subproject commit 65c1ba97fa5f40085973b7684e4ff967463c2b09 +Subproject commit 2db8e50be46e3d5cddebb2c15647c45d63de4a92 From eeca57e02a165d001cf611c51def8e072369f828 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Mon, 21 Oct 2024 22:59:46 +0200 Subject: [PATCH 36/37] Update changelog --- CHANGELOG.rst | 28 ++++++++++++++++++++++++++++ docs/changes/748.docs.rst | 1 - docs/changes/828.feature.rst | 1 - docs/changes/834.feature.rst | 1 - docs/changes/837.bugfix.rst | 3 --- docs/changes/842.trivial.rst | 1 - docs/changes/847.trivial.rst | 1 - docs/changes/849.feature.rst | 2 -- 8 files changed, 28 insertions(+), 10 deletions(-) delete mode 100644 docs/changes/748.docs.rst delete mode 100644 docs/changes/828.feature.rst delete mode 100644 docs/changes/834.feature.rst delete mode 100644 docs/changes/837.bugfix.rst delete mode 100644 docs/changes/842.trivial.rst delete mode 100644 docs/changes/847.trivial.rst delete mode 100644 docs/changes/849.feature.rst diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a9be169eb..46eb3ae2e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,31 @@ +v2.2 (2024-10-22) +----------------- + +New Features +^^^^^^^^^^^^ + +- Add a compute_rms function to LombScarglePowerspectrum (`#828 `__) +- Introduced FITSReader class for lazy-loading of event lists (`#834 `__) +- implementation of the shift-and-add technique for QPOs and other varying power spectral features (`#849 `__) + + +Bug Fixes +^^^^^^^^^ + +- The ``fold_events`` function now checks if the keyword arguments (`kwargs`) are in the list of optional parameters. + If any unidentified keys are present, it raises a `ValueError`. + This fix ensures that the function only accepts valid optional parameters and provides a clear error message for unsupported keys. (`#837 `__) + + +Internal Changes +^^^^^^^^^^^^^^^^ + +- Eliminated runtime dependency on setuptools (`#852 `__) +- Moved configuration to pyproject.toml as recommended by PEP 621 (`#842 `__) +- Added pre-commit hooks in ``pre-commit-config.yaml`` (`#847 `__) +- Improved main page of the documentation (`#748 `__) + + v2.1 (2024-05-29) ----------------- diff --git a/docs/changes/748.docs.rst b/docs/changes/748.docs.rst deleted file mode 100644 index 07c169ad5..000000000 --- a/docs/changes/748.docs.rst +++ /dev/null @@ -1 +0,0 @@ -Improved main page of the documentation \ No newline at end of file diff --git a/docs/changes/828.feature.rst b/docs/changes/828.feature.rst deleted file mode 100644 index e2f2fffe5..000000000 --- a/docs/changes/828.feature.rst +++ /dev/null @@ -1 +0,0 @@ -Add a compute_rms function to LombScarglePowerspectrum diff --git a/docs/changes/834.feature.rst b/docs/changes/834.feature.rst deleted file mode 100644 index 2cfddf02b..000000000 --- a/docs/changes/834.feature.rst +++ /dev/null @@ -1 +0,0 @@ -Introduced FITSReader class for lazy-loading of event lists diff --git a/docs/changes/837.bugfix.rst b/docs/changes/837.bugfix.rst deleted file mode 100644 index bdebe104b..000000000 --- a/docs/changes/837.bugfix.rst +++ /dev/null @@ -1,3 +0,0 @@ -The ``fold_events`` function now checks if the keyword arguments (`kwargs`) are in the list of optional parameters. -If any unidentified keys are present, it raises a `ValueError`. -This fix ensures that the function only accepts valid optional parameters and provides a clear error message for unsupported keys. diff --git a/docs/changes/842.trivial.rst b/docs/changes/842.trivial.rst deleted file mode 100644 index 978490481..000000000 --- a/docs/changes/842.trivial.rst +++ /dev/null @@ -1 +0,0 @@ -Moved configuration to pyproject.toml as recommended by PEP 621 diff --git a/docs/changes/847.trivial.rst b/docs/changes/847.trivial.rst deleted file mode 100644 index 1c8c5e9bf..000000000 --- a/docs/changes/847.trivial.rst +++ /dev/null @@ -1 +0,0 @@ -Added pre-commit hooks in ``pre-commit-config.yaml`` \ No newline at end of file diff --git a/docs/changes/849.feature.rst b/docs/changes/849.feature.rst deleted file mode 100644 index e5880abe5..000000000 --- a/docs/changes/849.feature.rst +++ /dev/null @@ -1,2 +0,0 @@ -implementation of the shift-and-add technique for QPOs and other varying power spectral features - From 00c1dd4600912bfa531ffa899972a6a39a199561 Mon Sep 17 00:00:00 2001 From: Matteo Bachetti Date: Tue, 22 Oct 2024 13:13:52 +0200 Subject: [PATCH 37/37] Update docs --- docs/notebooks | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/notebooks b/docs/notebooks index 2db8e50be..43ba05942 160000 --- a/docs/notebooks +++ b/docs/notebooks @@ -1 +1 @@ -Subproject commit 2db8e50be46e3d5cddebb2c15647c45d63de4a92 +Subproject commit 43ba0594276c66e27a877488ec333b5c3e0b56b3