StingraySoftware · matteobachetti · Jan 11, 2024 · Dec 8, 2023 · Dec 8, 2023 · Dec 8, 2023
diff --git a/docs/changes/782.feature.rst b/docs/changes/782.feature.rst
@@ -0,0 +1 @@
+Add function to randomize data in small bad time intervals
diff --git a/docs/index.rst b/docs/index.rst
@@ -30,6 +30,7 @@ Current Capabilities
 * simulating a light curve from another light curve and a 1-d (time) or 2-d (time-energy) impulse response
 * simulating an event list from a given light curve _and_ with a given energy spectrum
 * Good Time Interval operations
+* Filling gaps in light curves with statistically sound fake data
 
 2. Fourier methods
 ~~~~~~~~~~~~~~~~~~

diff --git a/stingray/base.py b/stingray/base.py
@@ -2134,6 +2134,147 @@ def sort(self, reverse=False, inplace=False):
             mask = mask[::-1]
         return self.apply_mask(mask, inplace=inplace)
 
+    def fill_bad_time_intervals(
+        self,
+        max_length=None,
+        attrs_to_randomize=None,
+        buffer_size=100,
+        uniform=None,
+        seed=None,
+    ):
+        """Fill short bad time intervals with random data.
+
+        To fill the gaps in all but the time points (i.e., flux measures, energies), we take the
+        ``buffer_size`` (default 100) valid data points closest to the gap and repeat them randomly
+        with the same empirical statistical distribution. That is, if the "blabla" attributes, in
+        the 100 points of the buffer, has 30 times 10, 10 times 9, and 60 times 11, there will be
+        *on average* 30% of 10, 60% of 11, and 10% of 9 in the simulated data.
+
+        Times are treated differently depending on the fact that the time series is uniformly
+        sampled or not. If it is not, the times are simulated from a uniform distribution with the
+        same count rate found in the buffer. Otherwise, times just follow the same grid used
+        inside GTIs. Using the uniformly sampled or not is decided based on the ``uniform``
+        parameter. If left to ``None``, the time series is considered uniformly sampled if
+        ``self.dt`` is greater than zero and the median separation between subsequent times is
+        within 1% of the time resolution.
+
+        Other Parameters
+        ----------------
+        max_length : float
+            Maximum length of a bad time interval to be filled. If None, the criterion is bad
+            time intervals shorter than 1/100th of the longest bad time interval.
+        attrs_to_randomize : list of str, default None
+            List of array_attrs to randomize. ``If None``, all array_attrs are randomized.
+            It should not include ``time`` and ``_mask``, which are treated separately.
+        buffer_size : int, default 100
+            Number of good data points to use to calculate the means and variance the random data
+            on each side of the bad time interval
+        uniform : bool, default None
+            Force the treatment of the data as uniformly sampled or not. If None, the data are
+            considered uniformly sampled if ``self.dt`` is larger than zero and the median
+            separation between subsequent times is within 1% of ``self.dt``.
+        seed : int, default None
+            Random seed to use for the simulation. If None, a random seed is generated.
+
+        """
+        from .gti import get_btis
+        from .utils import get_random_state
+
+        rs = get_random_state(seed)
+
+        if attrs_to_randomize is None:
+            attrs_to_randomize = self.array_attrs() + self.internal_array_attrs()
+            for attr in ["time", "_mask"]:
+                if attr in attrs_to_randomize:
+                    attrs_to_randomize.remove(attr)
+
+        attrs_to_leave_alone = [
+            a
+            for a in self.array_attrs() + self.internal_array_attrs()
+            if a not in attrs_to_randomize
+        ]
+
+        if max_length is None:
+            max_length = np.max(self.gti[:, 1] - self.gti[:, 0]) / 100
+
+        btis = get_btis(self.gti, self.time[0], self.time[-1])
+        if len(btis) == 0:
+            logging.info("No bad time intervals to fill")
+            return copy.deepcopy(self)
+        filtered_times = self.time[self.mask]
+        from .utils import find_nearest
+
+        new_times = [filtered_times.copy()]
+        new_attrs = {}
+        if uniform is None:
+            # The time series is considered uniformly sampled if the median separation between
+            # subsequent times is within 1% of the time resolution
+            uniform = False
+            if self.dt > 0 and np.isclose(np.median(np.diff(self.time)), self.dt, rtol=0.01):
+                uniform = True
+            logging.info(f"Data are {'not' if not uniform else ''} uniformly sampled")
+
+        added_gtis = []
+
+        total_filled_time = 0
+        for bti in btis:
+            length = bti[1] - bti[0]
+            if length > max_length:
+                continue
+            logging.info(f"Filling bad time interval {bti} ({length:.4f} s)")
+            epsilon = 1e-5 * length
+            added_gtis.append([bti[0] - epsilon, bti[1] + epsilon])
+            filt_low_t, filt_low_idx = find_nearest(filtered_times, bti[0])
+            filt_hig_t, filt_hig_idx = find_nearest(filtered_times, bti[1], side="right")
+            if uniform:
+                local_new_times = np.arange(bti[0] + self.dt / 2, bti[1], self.dt)
+                nevents = local_new_times.size
+                new_times.append(local_new_times)
+            else:
+                low_time_arr = filtered_times[max(filt_low_idx - buffer_size, 0) : filt_low_idx]
+                high_time_arr = filtered_times[filt_hig_idx : buffer_size + filt_hig_idx]
+
+                ctrate = (
+                    np.count_nonzero(low_time_arr) / (filt_low_t - low_time_arr[0])
+                    + np.count_nonzero(high_time_arr) / (high_time_arr[-1] - filt_hig_t)
+                ) / 2
+                nevents = rs.poisson(ctrate * (bti[1] - bti[0]))
+                local_new_times = rs.uniform(bti[0], bti[1], nevents)
+                new_times.append(local_new_times)
+
+            for attr in attrs_to_randomize:
+                low_arr = getattr(self, attr)[max(buffer_size - filt_low_idx, 0) : filt_low_idx]
+                high_arr = getattr(self, attr)[filt_hig_idx : buffer_size + filt_hig_idx]
+                if attr not in new_attrs:
+                    new_attrs[attr] = [getattr(self, attr)[self.mask]]
+                new_attrs[attr].append(rs.choice(np.concatenate([low_arr, high_arr]), nevents))
+            for attr in attrs_to_leave_alone:
+                if attr not in new_attrs:
+                    new_attrs[attr] = [getattr(self, attr)[self.mask]]
+                if attr == "_mask":
+                    new_attrs[attr].append(np.ones(nevents, dtype=bool))
+                else:
+                    new_attrs[attr].append(np.zeros(nevents) + np.nan)
+            total_filled_time += length
+
+        logging.info(f"A total of {total_filled_time} s of data were simulated")
+
+        from .gti import join_gtis
+
+        new_gtis = join_gtis(self.gti, added_gtis)
+        new_times = np.concatenate(new_times)
+        order = np.argsort(new_times)
+        new_obj = type(self)()
+        new_obj.time = new_times[order]
+
+        for attr in self.meta_attrs():
+            setattr(new_obj, attr, getattr(self, attr))
+        # Todo: fix GTIs
+        for attr, values in new_attrs.items():
+            setattr(new_obj, attr, np.concatenate(values)[order])
+        new_obj.gti = new_gtis
+        return new_obj
+
     def plot(
         self,
         attr,
@@ -2145,6 +2286,7 @@ def plot(
         save=False,
         filename=None,
         plot_btis=True,
+        axis_limits=None,
     ):
         """
         Plot the time series using ``matplotlib``.
@@ -2168,6 +2310,10 @@ def plot(
             could be ``['Time (s)', 'Counts (s^-1)']``
         ax : ``matplotlib.pyplot.axis`` object
             Axis to be used for plotting. Defaults to creating a new one.
+        axis_limits : list, tuple, string, default ``None``
+            Parameter to set axis properties of the ``matplotlib`` figure. For example
+            it can be a list like ``[xmin, xmax, ymin, ymax]`` or any other
+            acceptable argument for the``matplotlib.pyplot.axis()`` method.
         title : str, default ``None``
             The title of the plot.
         marker : str, default '-'
@@ -2188,11 +2334,18 @@ def plot(
             plt.figure(attr)
             ax = plt.gca()
 
-        if labels is None:
+        valid_labels = (isinstance(labels, Iterable) and not isinstance(labels, str)) and len(
+            labels
+        ) == 2
+        if labels is not None and not valid_labels:
+            warnings.warn("``labels`` must be an iterable with two labels for x and y axes.")
+
+        if labels is None or not valid_labels:
             labels = ["Time (s)"] + [attr]
 
-        ylabel = labels[1]
         xlabel = labels[0]
+        ylabel = labels[1]
+        # Default values for labels
 
         ax.plot(self.time, getattr(self, attr), marker, ds="steps-mid", label=attr, zorder=10)
 
@@ -2202,11 +2355,15 @@ def plot(
                 getattr(self, attr),
                 yerr=getattr(self, attr + "_err"),
                 fmt="o",
+                zorder=10,
             )
 
         ax.set_ylabel(ylabel)
         ax.set_xlabel(xlabel)
 
+        if axis_limits is not None:
+            ax.set_xlim(axis_limits[0], axis_limits[1])
+            ax.set_ylim(axis_limits[2], axis_limits[3])
         if title is not None:
             ax.set_title(title)
 
@@ -2221,7 +2378,14 @@ def plot(
             tend = max(self.time[-1] + self.dt / 2, self.gti[-1, 1])
             btis = get_btis(self.gti, tstart, tend)
             for bti in btis:
-                plt.axvspan(bti[0], bti[1], alpha=0.5, color="r", zorder=10)
+                plt.axvspan(
+                    bti[0],
+                    bti[1],
+                    alpha=0.5,
+                    facecolor="r",
+                    zorder=1,
+                    edgecolor="none",
+                )
         return ax
 
 

diff --git a/stingray/lightcurve.py b/stingray/lightcurve.py
@@ -1608,11 +1608,14 @@
         self,
         witherrors=False,
         labels=None,
-        axis=None,
+        ax=None,
         title=None,
         marker="-",
         save=False,
         filename=None,
+        axis_limits=None,
+        axis=None,
+        plot_btis=True,
     ):
         """
         Plot the light curve using ``matplotlib``.
@@ -1629,11 +1632,14 @@
         labels : iterable, default ``None``
             A list of tuple with ``xlabel`` and ``ylabel`` as strings.
 
-        axis : list, tuple, string, default ``None``
+        axis_limits : list, tuple, string, default ``None``
             Parameter to set axis properties of the ``matplotlib`` figure. For example
             it can be a list like ``[xmin, xmax, ymin, ymax]`` or any other
             acceptable argument for the``matplotlib.pyplot.axis()`` method.
 
+        axis : list, tuple, string, default ``None``
+            Deprecated in favor of ``axis_limits``, same functionality.
+
         title : str, default ``None``
             The title of the plot.
 
@@ -1647,39 +1653,37 @@
 
         filename : str
             File name of the image to save. Depends on the boolean ``save``.
-        """
 
-        fig = plt.figure()
-        if witherrors:
-            fig = plt.errorbar(self.time, self.counts, yerr=self.counts_err, fmt=marker)
-        else:
-            fig = plt.plot(self.time, self.counts, marker)
-
-        if labels is not None:
-            try:
-                plt.xlabel(labels[0])
-                plt.ylabel(labels[1])
-            except TypeError:
-                utils.simon("``labels`` must be either a list or tuple with " "x and y labels.")
-                raise
-            except IndexError:
-                utils.simon("``labels`` must have two labels for x and y " "axes.")
-                # Not raising here because in case of len(labels)==1, only
-                # x-axis will be labelled.
+        ax : ``matplotlib.pyplot.axis`` object
+            Axis to be used for plotting. Defaults to creating a new one.
 
+        plot_btis : bool
+            Plot the bad time intervals as red areas on the plot
+        """
         if axis is not None:
-            plt.axis(axis)
-
-        if title is not None:
-            plt.title(title)
-
-        if save:
-            if filename is None:
-                plt.savefig("out.png")
-            else:
-                plt.savefig(filename)
-        else:
-            plt.show(block=False)
+            warnings.warn(
+                "The ``axis`` argument is deprecated in favor of ``axis_limits``. "
+                "Please use that instead.",
+                DeprecationWarning,
+            )
+            axis_limits = axis
+
+        flux_attr = "counts"
+        if not self.input_counts:
+            flux_attr = "countrate"
+
+        return super().plot(
+            flux_attr,
+            witherrors=witherrors,
+            labels=labels,
+            ax=ax,
+            title=title,
+            marker=marker,
+            save=save,
+            filename=filename,
+            plot_btis=plot_btis,
+            axis_limits=axis_limits,
+        )
 
     @classmethod
     def read(
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Add function to randomize data in small bad time intervals