From 2bb2a01ff490e0212b4f929f8197448b113c2a03 Mon Sep 17 00:00:00 2001
From: Thibault Lestang <thibault.lestang@protonmail.com>
Date: Sat, 22 Jul 2023 14:40:53 +0100
Subject: [PATCH 01/21] Compute window_size in WindowedForecast __init__

---
 cats/forecast.py                | 13 +++++++++++--
 cats/optimise_starttime.py      | 10 ++--------
 tests/test_windowed_forecast.py | 12 ++++++++----
 3 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/cats/forecast.py b/cats/forecast.py
index 0c85954..470ef90 100644
--- a/cats/forecast.py
+++ b/cats/forecast.py
@@ -1,3 +1,4 @@
+from math import ceil
 from dataclasses import dataclass, field
 from datetime import datetime
 
@@ -36,12 +37,20 @@ def __post_init__(self):
 
 class WindowedForecast:
 
-    def __init__(self, data: list[CarbonIntensityPointEstimate], window_size: int):
+    def __init__(
+            self,
+            data: list[CarbonIntensityPointEstimate],
+            duration: int,  # in minutes
+            start: datetime,
+    ):
         self.times = [point.datetime for point in data]
         self.intensities = [point.value for point in data]
         # Integration window size in number of time intervals covered
         # by the window.
-        self.window_size = window_size
+        data_stepsize = (
+            data[1].datetime - data[0].datetime
+        ).total_seconds() / 60
+        self.window_size = ceil(duration / data_stepsize)
 
     def __getitem__(self, index: int) -> CarbonIntensityAverageEstimate:
         """Return the average of timeseries data from index over the
diff --git a/cats/optimise_starttime.py b/cats/optimise_starttime.py
index 67aebee..570c35b 100644
--- a/cats/optimise_starttime.py
+++ b/cats/optimise_starttime.py
@@ -1,4 +1,4 @@
-from math import ceil
+from datetime import datetime
 from .forecast import WindowedForecast
 
 
@@ -28,11 +28,5 @@ def get_avg_estimates(data, method="simple", duration=None):
 
     if method == "windowed":
         # get length of interval between timestamps
-        interval = (
-            data[1].datetime - data[0].datetime
-        ).total_seconds() / 60
-        wf = WindowedForecast(
-            data=data,
-            window_size=ceil(duration / interval)
-        )
+        wf = WindowedForecast(data, duration, start=datetime.now())
         return wf[0], min(wf)
diff --git a/tests/test_windowed_forecast.py b/tests/test_windowed_forecast.py
index 5390aea..ee54b03 100644
--- a/tests/test_windowed_forecast.py
+++ b/tests/test_windowed_forecast.py
@@ -24,7 +24,7 @@
 
 def test_has_right_length():
     window_size = 160  # In number of time intervals
-    wf = WindowedForecast(DATA, window_size)
+    wf = WindowedForecast(DATA, window_size, start=DATA[0].datetime)
 
     # Expecting (200 - 160 - 1) (39) data points in the time
     # integrated timeseries.
@@ -39,7 +39,7 @@ def test_values():
     # a step size `step` small compared the the integration window
 
     window_size = 160
-    wf = WindowedForecast(DATA, window_size)
+    wf = WindowedForecast(DATA, window_size, start=DATA[0].datetime)
     expected = [
 
         math.cos((i + window_size) * step) - math.cos(i * step)
@@ -68,7 +68,9 @@ def test_minimise_average():
         ]
 
         window_size = 6
-        result = min(WindowedForecast(data, window_size))
+        # Data points separated by 30 minutes intervals
+        duration = window_size * 30
+        result = min(WindowedForecast(data, duration, start=data[0].datetime))
 
         # Intensity point estimates over best runtime period
         v = [10, 8, 7, 7, 5, 8, 8]
@@ -95,7 +97,9 @@ def test_average_intensity_now():
         ]
 
         window_size = 11
-        result = WindowedForecast(data, window_size)[0]
+        # Data points separated by 30 minutes intervals
+        duration = window_size * 30
+        result = WindowedForecast(data, duration, start=data[0].datetime)[0]
 
         # Intensity point estimates over best runtime period
         v = [p.value for p in data[:window_size + 1]]

From 8b3b5feaddb99d1ec7cb0d692602f0ab2ad2cd39 Mon Sep 17 00:00:00 2001
From: Thibault Lestang <thibault.lestang@protonmail.com>
Date: Sun, 23 Jul 2023 10:52:03 +0100
Subject: [PATCH 02/21] Add function to interpolation intensity value

---
 cats/forecast.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/cats/forecast.py b/cats/forecast.py
index 470ef90..8ab3422 100644
--- a/cats/forecast.py
+++ b/cats/forecast.py
@@ -52,6 +52,19 @@ def __init__(
         ).total_seconds() / 60
         self.window_size = ceil(duration / data_stepsize)
 
+    @staticmethod
+    def interp(
+            p1: CarbonIntensityPointEstimate,
+            p2: CarbonIntensityPointEstimate,
+            p: datetime
+    ):
+        timestep = (p2.datetime - p1.datetime).total_seconds()
+
+        slope = (p2.value - p1.value) / timestep
+        offset = (p - p1.datetime).total_seconds()
+        # import pdb; pdb.set_trace()
+        return p1.value + slope * offset  # Value at t = start
+
     def __getitem__(self, index: int) -> CarbonIntensityAverageEstimate:
         """Return the average of timeseries data from index over the
         window size.  Data points are integrated using the trapeziodal

From f6251b30a48fa411141374e614914f0ddd3800c8 Mon Sep 17 00:00:00 2001
From: Thibault Lestang <thibault.lestang@protonmail.com>
Date: Sun, 23 Jul 2023 10:53:07 +0100
Subject: [PATCH 03/21] Interpolate start and end CI for each potential
 duration

---
 cats/forecast.py | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/cats/forecast.py b/cats/forecast.py
index 8ab3422..4092bbe 100644
--- a/cats/forecast.py
+++ b/cats/forecast.py
@@ -1,6 +1,6 @@
 from math import ceil
 from dataclasses import dataclass, field
-from datetime import datetime
+from datetime import datetime, timedelta
 
 
 @dataclass(order=True)
@@ -47,10 +47,11 @@ def __init__(
         self.intensities = [point.value for point in data]
         # Integration window size in number of time intervals covered
         # by the window.
-        data_stepsize = (
-            data[1].datetime - data[0].datetime
-        ).total_seconds() / 60
-        self.window_size = ceil(duration / data_stepsize)
+        self.data_stepsize = data[1].datetime - data[0].datetime
+        self.window_size = ceil(duration / (self.data_stepsize.total_seconds() / 60))
+        self.start = start
+        # TODO: Expect duration as a timedelta directly
+        self.duration = timedelta(minutes=duration)
 
     @staticmethod
     def interp(
@@ -78,6 +79,18 @@ def __getitem__(self, index: int) -> CarbonIntensityAverageEstimate:
                     self.intensities[index + 1 : index + self.window_size + 1]
             )]
 
+        start = self.start + index * self.data_stepsize
+        p1 = CarbonIntensityPointEstimate(self.times[index], self.intensities[index])
+        p2 = CarbonIntensityPointEstimate(self.times[index + 1], self.intensities[index + 1])
+        v[0] = 0.5 * (self.interp(p1, p2, start) + self.intensities[index + 1])
+
+        end = self.start + index * self.data_stepsize + self.duration
+        p1 = CarbonIntensityPointEstimate(self.times[index + self.window_size - 1], self.intensities[index + self.window_size - 1])
+        p2 = CarbonIntensityPointEstimate(self.times[index + self.window_size], self.intensities[index + self.window_size])
+        v[-1] = 0.5 * (
+            self.intensities[index + self.window_size - 1] + self.interp(p1, p2, end)
+        )
+
         return CarbonIntensityAverageEstimate(
             start=self.times[index],
             # Note that `end` points to the _start_ of the last

From d876e4f50b5457965294ad5f48c066c43e7d58e9 Mon Sep 17 00:00:00 2001
From: Thibault Lestang <thibault.lestang@protonmail.com>
Date: Sun, 23 Jul 2023 14:35:58 +0100
Subject: [PATCH 04/21] Work with nb of data points instead of nb of intervals

---
 cats/forecast.py | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/cats/forecast.py b/cats/forecast.py
index 4092bbe..cb32d80 100644
--- a/cats/forecast.py
+++ b/cats/forecast.py
@@ -1,3 +1,4 @@
+from bisect import bisect_left
 from math import ceil
 from dataclasses import dataclass, field
 from datetime import datetime, timedelta
@@ -45,11 +46,10 @@ def __init__(
     ):
         self.times = [point.datetime for point in data]
         self.intensities = [point.value for point in data]
-        # Integration window size in number of time intervals covered
-        # by the window.
-        self.data_stepsize = data[1].datetime - data[0].datetime
-        self.window_size = ceil(duration / (self.data_stepsize.total_seconds() / 60))
+        self.end = start + timedelta(minutes=duration)
+        self.ndata = bisect_left(data, self.end, key=lambda x: x.datetime) + 1
         self.start = start
+        self.data_stepsize = data[1].datetime - data[0].datetime
         # TODO: Expect duration as a timedelta directly
         self.duration = timedelta(minutes=duration)
 
@@ -75,8 +75,8 @@ def __getitem__(self, index: int) -> CarbonIntensityAverageEstimate:
         v = [  # If you think of a better name, pls help!
             0.5 * (a + b)
             for a, b in zip(
-                    self.intensities[index: index + self.window_size],
-                    self.intensities[index + 1 : index + self.window_size + 1]
+                    self.intensities[index: index + self.ndata - 1],
+                    self.intensities[index + 1 : index + self.ndata]
             )]
 
         start = self.start + index * self.data_stepsize
@@ -85,18 +85,16 @@ def __getitem__(self, index: int) -> CarbonIntensityAverageEstimate:
         v[0] = 0.5 * (self.interp(p1, p2, start) + self.intensities[index + 1])
 
         end = self.start + index * self.data_stepsize + self.duration
-        p1 = CarbonIntensityPointEstimate(self.times[index + self.window_size - 1], self.intensities[index + self.window_size - 1])
-        p2 = CarbonIntensityPointEstimate(self.times[index + self.window_size], self.intensities[index + self.window_size])
+        p1 = CarbonIntensityPointEstimate(self.times[index + self.ndata - 2], self.intensities[index + self.ndata - 2])
+        p2 = CarbonIntensityPointEstimate(self.times[index + self.ndata - 1], self.intensities[index + self.ndata - 1])
         v[-1] = 0.5 * (
-            self.intensities[index + self.window_size - 1] + self.interp(p1, p2, end)
+            self.intensities[index + self.ndata - 2] + self.interp(p1, p2, end)
         )
 
         return CarbonIntensityAverageEstimate(
-            start=self.times[index],
-            # Note that `end` points to the _start_ of the last
-            # interval in the window.
-            end=self.times[index + self.window_size],
-            value=sum(v) / self.window_size,
+            start=start,
+            end=end,
+            value=sum(v) / (self.ndata - 1),
         )
 
     def __iter__(self):
@@ -104,4 +102,4 @@ def __iter__(self):
             yield self.__getitem__(index)
 
     def __len__(self):
-        return len(self.times) - self.window_size - 1
+        return len(self.times) - self.ndata

From b2d548887a329168c80667dbf872bf4d6785270b Mon Sep 17 00:00:00 2001
From: Thibault Lestang <thibault.lestang@protonmail.com>
Date: Sun, 23 Jul 2023 14:43:24 +0100
Subject: [PATCH 05/21] Use data list directly instead of times and intensities

---
 cats/forecast.py | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/cats/forecast.py b/cats/forecast.py
index cb32d80..d0ea3a7 100644
--- a/cats/forecast.py
+++ b/cats/forecast.py
@@ -44,8 +44,7 @@ def __init__(
             duration: int,  # in minutes
             start: datetime,
     ):
-        self.times = [point.datetime for point in data]
-        self.intensities = [point.value for point in data]
+        self.data = data
         self.end = start + timedelta(minutes=duration)
         self.ndata = bisect_left(data, self.end, key=lambda x: x.datetime) + 1
         self.start = start
@@ -73,22 +72,30 @@ def __getitem__(self, index: int) -> CarbonIntensityAverageEstimate:
         with a straight line.
         """
         v = [  # If you think of a better name, pls help!
-            0.5 * (a + b)
+            0.5 * (a.value + b.value)
             for a, b in zip(
-                    self.intensities[index: index + self.ndata - 1],
-                    self.intensities[index + 1 : index + self.ndata]
+                    self.data[index: index + self.ndata - 1],
+                    self.data[index + 1: index + self.ndata]
             )]
 
         start = self.start + index * self.data_stepsize
-        p1 = CarbonIntensityPointEstimate(self.times[index], self.intensities[index])
-        p2 = CarbonIntensityPointEstimate(self.times[index + 1], self.intensities[index + 1])
-        v[0] = 0.5 * (self.interp(p1, p2, start) + self.intensities[index + 1])
+        v[0] = 0.5 * (
+            self.interp(
+                p1=self.data[index],
+                p2=self.data[index + 1],
+                p=start
+            ) +
+            self.data[index + 1].value
+        )
 
         end = self.start + index * self.data_stepsize + self.duration
-        p1 = CarbonIntensityPointEstimate(self.times[index + self.ndata - 2], self.intensities[index + self.ndata - 2])
-        p2 = CarbonIntensityPointEstimate(self.times[index + self.ndata - 1], self.intensities[index + self.ndata - 1])
         v[-1] = 0.5 * (
-            self.intensities[index + self.ndata - 2] + self.interp(p1, p2, end)
+            self.data[index + self.ndata - 2].value +
+            self.interp(
+                p1=self.data[index + self.ndata - 2],
+                p2=self.data[index + self.ndata - 1],
+                p=end
+            )
         )
 
         return CarbonIntensityAverageEstimate(
@@ -102,4 +109,4 @@ def __iter__(self):
             yield self.__getitem__(index)
 
     def __len__(self):
-        return len(self.times) - self.ndata
+        return len(self.data) - self.ndata

From 1158bad5c5e6dc5c270117c7d5eccfb7faed0451 Mon Sep 17 00:00:00 2001
From: Thibault Lestang <thibault.lestang@protonmail.com>
Date: Sun, 23 Jul 2023 14:58:33 +0100
Subject: [PATCH 06/21] test: case where job start/end dont match data points

---
 tests/test_windowed_forecast.py | 37 +++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/tests/test_windowed_forecast.py b/tests/test_windowed_forecast.py
index ee54b03..efa8c23 100644
--- a/tests/test_windowed_forecast.py
+++ b/tests/test_windowed_forecast.py
@@ -111,3 +111,40 @@ def test_average_intensity_now():
             ) / window_size
         )
         assert (result == expected)
+
+
+def test_average_intensity_with_offset():
+    # Case where job start and end time are not colocated with data
+    # carbon intensity data points. In this case cats interpolate the
+    # intensity value at beginning and end of each potential job
+    # duration window.
+    with open(TEST_DATA, "r") as f:
+        csvfile = csv.reader(f, delimiter=",")
+        next(csvfile)  # Skip header line
+        data = [
+            CarbonIntensityPointEstimate(
+                datetime=datetime.fromisoformat(datestr[:-1]),
+                value=float(intensity_value),
+            )
+            for datestr, _, _, intensity_value in csvfile
+        ]
+
+        duration = 194  # in minutes
+        # First available data point is for 12:30 but the job
+        # starts 18 minutes later.
+        job_start = datetime.fromisoformat("2023-05-04T12:48")
+        result = WindowedForecast(data, duration, start=job_start)[2]
+
+        # First and last element in v are interpolated intensity value.
+        # e.g v[0] = 15 + 18min * (18 - 15) / 30min = 16.8
+        v = [16.8, 18, 19, 17, 16, 11, 11, 11, 11]
+        data_timestep = data[1].datetime - data[0].datetime
+        expected = CarbonIntensityAverageEstimate(
+            start=job_start + 2 * data_timestep,
+            end=job_start + 2 * data_timestep + timedelta(minutes=duration),
+            value=sum(
+                [0.5 * (a + b) for a, b in zip(v[:-1], v[1:])]
+            ) / (len(v) - 1)
+        )
+        assert (result == expected)
+

From 60a29f011604f851a7c66002f13ae58685adea5c Mon Sep 17 00:00:00 2001
From: Thibault Lestang <thibault.lestang@protonmail.com>
Date: Sun, 23 Jul 2023 15:23:26 +0100
Subject: [PATCH 07/21] Change a few variable names and layout for readability

---
 cats/forecast.py | 42 ++++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/cats/forecast.py b/cats/forecast.py
index d0ea3a7..71dffee 100644
--- a/cats/forecast.py
+++ b/cats/forecast.py
@@ -56,12 +56,12 @@ def __init__(
     def interp(
             p1: CarbonIntensityPointEstimate,
             p2: CarbonIntensityPointEstimate,
-            p: datetime
+            when: datetime
     ):
         timestep = (p2.datetime - p1.datetime).total_seconds()
 
         slope = (p2.value - p1.value) / timestep
-        offset = (p - p1.datetime).total_seconds()
+        offset = (when - p1.datetime).total_seconds()
         # import pdb; pdb.set_trace()
         return p1.value + slope * offset  # Value at t = start
 
@@ -69,39 +69,41 @@ def __getitem__(self, index: int) -> CarbonIntensityAverageEstimate:
         """Return the average of timeseries data from index over the
         window size.  Data points are integrated using the trapeziodal
         rule, that is assuming that forecast data points are joined
-        with a straight line.
+        with a straight line. Integral value between two points is the
+        intensity value at the midpoint times the duration between the
+        two points.  This duration is assumed to be unity and the
+        average is computed by dividing the total integral value by
+        the number of intervals.
         """
-        v = [  # If you think of a better name, pls help!
+        midpt = [
             0.5 * (a.value + b.value)
             for a, b in zip(
                     self.data[index: index + self.ndata - 1],
                     self.data[index + 1: index + self.ndata]
             )]
 
+        # Account for the fact that the start and end of each window
+        # might not fall exactly on data points.  The starting
+        # intensity is interpolated between the first (index) and
+        # second data point (index + 1) in the window.  The ending
+        # intensity value is interpolated between the last and
+        # penultimate data points in he window.
         start = self.start + index * self.data_stepsize
-        v[0] = 0.5 * (
-            self.interp(
-                p1=self.data[index],
-                p2=self.data[index + 1],
-                p=start
-            ) +
-            self.data[index + 1].value
-        )
+        i = self.interp(self.data[index], self.data[index + 1], when=start)
+        midpt[0] = 0.5 * (i + self.data[index + 1].value)
 
         end = self.start + index * self.data_stepsize + self.duration
-        v[-1] = 0.5 * (
-            self.data[index + self.ndata - 2].value +
-            self.interp(
-                p1=self.data[index + self.ndata - 2],
-                p2=self.data[index + self.ndata - 1],
-                p=end
-            )
+        i = self.interp(
+            self.data[index + self.ndata - 2],
+            self.data[index + self.ndata - 1],
+            when=end,
         )
+        midpt[-1] = 0.5 * (self.data[index + self.ndata - 2].value + i)
 
         return CarbonIntensityAverageEstimate(
             start=start,
             end=end,
-            value=sum(v) / (self.ndata - 1),
+            value=sum(midpt) / (self.ndata - 1),
         )
 
     def __iter__(self):

From 1c14f0cf451f253739b86dbf5eb1bc21ae2e9791 Mon Sep 17 00:00:00 2001
From: Thibault Lestang <thibault.lestang@protonmail.com>
Date: Sun, 23 Jul 2023 15:26:28 +0100
Subject: [PATCH 08/21] Add docstring for interp method

---
 cats/forecast.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cats/forecast.py b/cats/forecast.py
index 71dffee..9577d4e 100644
--- a/cats/forecast.py
+++ b/cats/forecast.py
@@ -58,6 +58,10 @@ def interp(
             p2: CarbonIntensityPointEstimate,
             when: datetime
     ):
+        """Return value of carbon intensity at a time between data
+        points, assuming points are joined by a straight line (linear
+        interpolation).
+        """
         timestep = (p2.datetime - p1.datetime).total_seconds()
 
         slope = (p2.value - p1.value) / timestep

From 93be87c0aaed5ed232769156b9963c09701d64d2 Mon Sep 17 00:00:00 2001
From: Thibault Lestang <thibault.lestang@protonmail.com>
Date: Sun, 23 Jul 2023 15:27:23 +0100
Subject: [PATCH 09/21] Move interp method below __getitem__

---
 cats/forecast.py | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/cats/forecast.py b/cats/forecast.py
index 9577d4e..4a0d4ad 100644
--- a/cats/forecast.py
+++ b/cats/forecast.py
@@ -52,23 +52,6 @@ def __init__(
         # TODO: Expect duration as a timedelta directly
         self.duration = timedelta(minutes=duration)
 
-    @staticmethod
-    def interp(
-            p1: CarbonIntensityPointEstimate,
-            p2: CarbonIntensityPointEstimate,
-            when: datetime
-    ):
-        """Return value of carbon intensity at a time between data
-        points, assuming points are joined by a straight line (linear
-        interpolation).
-        """
-        timestep = (p2.datetime - p1.datetime).total_seconds()
-
-        slope = (p2.value - p1.value) / timestep
-        offset = (when - p1.datetime).total_seconds()
-        # import pdb; pdb.set_trace()
-        return p1.value + slope * offset  # Value at t = start
-
     def __getitem__(self, index: int) -> CarbonIntensityAverageEstimate:
         """Return the average of timeseries data from index over the
         window size.  Data points are integrated using the trapeziodal
@@ -110,6 +93,23 @@ def __getitem__(self, index: int) -> CarbonIntensityAverageEstimate:
             value=sum(midpt) / (self.ndata - 1),
         )
 
+    @staticmethod
+    def interp(
+            p1: CarbonIntensityPointEstimate,
+            p2: CarbonIntensityPointEstimate,
+            when: datetime
+    ):
+        """Return value of carbon intensity at a time between data
+        points, assuming points are joined by a straight line (linear
+        interpolation).
+        """
+        timestep = (p2.datetime - p1.datetime).total_seconds()
+
+        slope = (p2.value - p1.value) / timestep
+        offset = (when - p1.datetime).total_seconds()
+        # import pdb; pdb.set_trace()
+        return p1.value + slope * offset  # Value at t = start
+
     def __iter__(self):
         for index in range(self.__len__()):
             yield self.__getitem__(index)

From 22bb8b7bcbfbccc93dd970e00b22de1faedbc3d7 Mon Sep 17 00:00:00 2001
From: Thibault Lestang <thibault.lestang@protonmail.com>
Date: Sun, 23 Jul 2023 15:27:51 +0100
Subject: [PATCH 10/21] Remove import of unused ceil function

---
 cats/forecast.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cats/forecast.py b/cats/forecast.py
index 4a0d4ad..90c1e66 100644
--- a/cats/forecast.py
+++ b/cats/forecast.py
@@ -1,5 +1,4 @@
 from bisect import bisect_left
-from math import ceil
 from dataclasses import dataclass, field
 from datetime import datetime, timedelta
 

From 925e6c9b40deb8bd54515f4a51ed5906828d978b Mon Sep 17 00:00:00 2001
From: Thibault Lestang <thibault.lestang@protonmail.com>
Date: Sun, 23 Jul 2023 15:31:24 +0100
Subject: [PATCH 11/21] Dont need job duration as WindowedForecast attribute

---
 cats/forecast.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/cats/forecast.py b/cats/forecast.py
index 90c1e66..eda9906 100644
--- a/cats/forecast.py
+++ b/cats/forecast.py
@@ -44,22 +44,22 @@ def __init__(
             start: datetime,
     ):
         self.data = data
-        self.end = start + timedelta(minutes=duration)
-        self.ndata = bisect_left(data, self.end, key=lambda x: x.datetime) + 1
-        self.start = start
         self.data_stepsize = data[1].datetime - data[0].datetime
+        self.start = start
         # TODO: Expect duration as a timedelta directly
-        self.duration = timedelta(minutes=duration)
+        self.end = start + timedelta(minutes=duration)
+        self.ndata = bisect_left(data, self.end, key=lambda x: x.datetime) + 1
 
     def __getitem__(self, index: int) -> CarbonIntensityAverageEstimate:
         """Return the average of timeseries data from index over the
         window size.  Data points are integrated using the trapeziodal
         rule, that is assuming that forecast data points are joined
-        with a straight line. Integral value between two points is the
-        intensity value at the midpoint times the duration between the
-        two points.  This duration is assumed to be unity and the
-        average is computed by dividing the total integral value by
-        the number of intervals.
+        with a straight line.
+
+        Integral value between two points is the intensity value at
+        the midpoint times the duration between the two points.  This
+        duration is assumed to be unity and the average is computed by
+        dividing the total integral value by the number of intervals.
         """
         midpt = [
             0.5 * (a.value + b.value)
@@ -78,7 +78,7 @@ def __getitem__(self, index: int) -> CarbonIntensityAverageEstimate:
         i = self.interp(self.data[index], self.data[index + 1], when=start)
         midpt[0] = 0.5 * (i + self.data[index + 1].value)
 
-        end = self.start + index * self.data_stepsize + self.duration
+        end = self.end + index * self.data_stepsize
         i = self.interp(
             self.data[index + self.ndata - 2],
             self.data[index + self.ndata - 1],

From 51bc3364c5521aa0f8f0b762674eea98c550144d Mon Sep 17 00:00:00 2001
From: Thibault Lestang <thibault.lestang@protonmail.com>
Date: Sun, 23 Jul 2023 17:31:48 +0100
Subject: [PATCH 12/21] Cannot use 'key' param of bisect for python 3.9

---
 cats/forecast.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/cats/forecast.py b/cats/forecast.py
index eda9906..dbf50a4 100644
--- a/cats/forecast.py
+++ b/cats/forecast.py
@@ -1,4 +1,3 @@
-from bisect import bisect_left
 from dataclasses import dataclass, field
 from datetime import datetime, timedelta
 
@@ -48,7 +47,19 @@ def __init__(
         self.start = start
         # TODO: Expect duration as a timedelta directly
         self.end = start + timedelta(minutes=duration)
-        self.ndata = bisect_left(data, self.end, key=lambda x: x.datetime) + 1
+
+        # Find number of data points in a window, by finding the index
+        # of the first data point past the job end time. Could be done
+        # with the bisect module in the stdlib for python 3.10+ ('key'
+        # parameter was introduced in 3.10).
+        #
+        # bisect_left(data, self.end, key=lambda x: x.datetime)
+        #
+        def bisect_left(data, t):
+            for i, d in enumerate(data):
+                if d.datetime >= t:
+                    return i
+        self.ndata = bisect_left(data, self.end) + 1
 
     def __getitem__(self, index: int) -> CarbonIntensityAverageEstimate:
         """Return the average of timeseries data from index over the

From b7dc142ad4413036d5040f2e9d00d4981a4357b0 Mon Sep 17 00:00:00 2001
From: Thibault Lestang <t.lestang@imperial.ac.uk>
Date: Mon, 24 Jul 2023 12:45:42 +0100
Subject: [PATCH 13/21] Fix intensity value at window boundaries instead of
 midpoints

This allows handling of short jobs that fit between two consecutive
data points.  It also makes the implementation simpler, in my opinion.
---
 cats/forecast.py | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/cats/forecast.py b/cats/forecast.py
index dbf50a4..9c17eaa 100644
--- a/cats/forecast.py
+++ b/cats/forecast.py
@@ -72,12 +72,6 @@ def __getitem__(self, index: int) -> CarbonIntensityAverageEstimate:
         duration is assumed to be unity and the average is computed by
         dividing the total integral value by the number of intervals.
         """
-        midpt = [
-            0.5 * (a.value + b.value)
-            for a, b in zip(
-                    self.data[index: index + self.ndata - 1],
-                    self.data[index + 1: index + self.ndata]
-            )]
 
         # Account for the fact that the start and end of each window
         # might not fall exactly on data points.  The starting
@@ -85,21 +79,31 @@ def __getitem__(self, index: int) -> CarbonIntensityAverageEstimate:
         # second data point (index + 1) in the window.  The ending
         # intensity value is interpolated between the last and
         # penultimate data points in he window.
-        start = self.start + index * self.data_stepsize
-        i = self.interp(self.data[index], self.data[index + 1], when=start)
-        midpt[0] = 0.5 * (i + self.data[index + 1].value)
-
-        end = self.end + index * self.data_stepsize
-        i = self.interp(
+        window_start = self.start + index * self.data_stepsize
+        window_end = self.end + index * self.data_stepsize
+        lbound = self.interp(
+            self.data[index],
+            self.data[index + 1],
+            when=window_start,
+        )
+        rbound = self.interp(
             self.data[index + self.ndata - 2],
             self.data[index + self.ndata - 1],
-            when=end,
+            when=window_end,
         )
-        midpt[-1] = 0.5 * (self.data[index + self.ndata - 2].value + i)
-
+        # window_data <- [lbound] + [...bulk...] + [rbound] where
+        # lbound and rbound are interpolated intensity values.
+        window_data = (
+            [lbound] +
+            [d.value for d in self.data[index + 1: index + self.ndata - 1]] +
+            [rbound]
+        )
+        midpt = [
+            0.5 * (a + b) for a, b in zip(window_data[:-1], window_data[1:])
+        ]
         return CarbonIntensityAverageEstimate(
-            start=start,
-            end=end,
+            start=window_start,
+            end=window_end,
             value=sum(midpt) / (self.ndata - 1),
         )
 

From 50ff2cc25635fba555fceae2385d7a47070b657d Mon Sep 17 00:00:00 2001
From: Thibault Lestang <t.lestang@imperial.ac.uk>
Date: Mon, 24 Jul 2023 12:53:07 +0100
Subject: [PATCH 14/21] test: job with duration smaller than time between data
 points

---
 tests/test_windowed_forecast.py | 38 +++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/tests/test_windowed_forecast.py b/tests/test_windowed_forecast.py
index efa8c23..b36334e 100644
--- a/tests/test_windowed_forecast.py
+++ b/tests/test_windowed_forecast.py
@@ -148,3 +148,41 @@ def test_average_intensity_with_offset():
         )
         assert (result == expected)
 
+def test_average_intensity_with_offset_short_job():
+    # Case where job is short: start and end time fall between two
+    # consecutive data points (first and second).
+    with open(TEST_DATA, "r") as f:
+        csvfile = csv.reader(f, delimiter=",")
+        next(csvfile)  # Skip header line
+        data = [
+            CarbonIntensityPointEstimate(
+                datetime=datetime.fromisoformat(datestr[:-1]),
+                value=float(intensity_value),
+            )
+            for datestr, _, _, intensity_value in csvfile
+        ]
+
+        duration = 6  # in minutes
+        # First available data point is for 12:30 but the job
+        # starts 6 minutes later.
+        job_start = datetime.fromisoformat("2023-05-04T12:48")
+        result = WindowedForecast(data, duration, start=job_start)[2]
+
+        # Job starts at 12:48 and ends at 12:54. For each candidate
+        # running window, both start and end times fall between two
+        # consecutive data points (e.g. 13:30 and 14:00 for the third
+        # window).
+        #
+        # First and second element in v are interpolated intensity
+        # values.  e.g v[0] = 15 + 18min * (18 - 15) / 30min = 16.8
+        # and v[1] = v[-1] = 15 + 24min * (18 - 15) / 30min = 17.4
+        v = [16.8, 17.4]
+        data_timestep = data[1].datetime - data[0].datetime
+        expected = CarbonIntensityAverageEstimate(
+            start=job_start + 2 * data_timestep,
+            end=job_start + 2 * data_timestep + timedelta(minutes=duration),
+            value=sum(
+                [0.5 * (a + b) for a, b in zip(v[:-1], v[1:])]
+            ) / (len(v) - 1)
+        )
+        assert (result == expected)

From 9f61ba8b0c75cc766dec95972dca2432319602f6 Mon Sep 17 00:00:00 2001
From: Thibault Lestang <t.lestang@imperial.ac.uk>
Date: Wed, 26 Jul 2023 21:53:59 +0100
Subject: [PATCH 15/21] Remove commented pdb call

---
 cats/forecast.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cats/forecast.py b/cats/forecast.py
index 9c17eaa..cb6a846 100644
--- a/cats/forecast.py
+++ b/cats/forecast.py
@@ -121,8 +121,7 @@ def interp(
 
         slope = (p2.value - p1.value) / timestep
         offset = (when - p1.datetime).total_seconds()
-        # import pdb; pdb.set_trace()
-        return p1.value + slope * offset  # Value at t = start
+        return p1.value + slope * offset  # Value at t = when
 
     def __iter__(self):
         for index in range(self.__len__()):

From 02ed7440b5355c7106efb2811ad82b14095413f3 Mon Sep 17 00:00:00 2001
From: Thibault Lestang <t.lestang@imperial.ac.uk>
Date: Thu, 27 Jul 2023 11:34:01 +0100
Subject: [PATCH 16/21] interp function returns a CarbonPointEstimate instance

---
 cats/forecast.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/cats/forecast.py b/cats/forecast.py
index cb6a846..874263a 100644
--- a/cats/forecast.py
+++ b/cats/forecast.py
@@ -95,7 +95,7 @@ def __getitem__(self, index: int) -> CarbonIntensityAverageEstimate:
         # lbound and rbound are interpolated intensity values.
         window_data = (
             [lbound] +
-            [d.value for d in self.data[index + 1: index + self.ndata - 1]] +
+            self.data[index + 1: index + self.ndata - 1] +
             [rbound]
         )
         midpt = [
@@ -112,8 +112,8 @@ def interp(
             p1: CarbonIntensityPointEstimate,
             p2: CarbonIntensityPointEstimate,
             when: datetime
-    ):
-        """Return value of carbon intensity at a time between data
+    ) -> CarbonIntensityPointEstimate:
+        """Return carbon intensity pt estimate at a time between data
         points, assuming points are joined by a straight line (linear
         interpolation).
         """
@@ -121,7 +121,11 @@ def interp(
 
         slope = (p2.value - p1.value) / timestep
         offset = (when - p1.datetime).total_seconds()
-        return p1.value + slope * offset  # Value at t = when
+
+        return CarbonIntensityPointEstimate(
+            value=p1.value + slope * offset,
+            datetime=when,
+        )
 
     def __iter__(self):
         for index in range(self.__len__()):

From afe12b45a783a62a1f0e95ca653940ccfed0e2d7 Mon Sep 17 00:00:00 2001
From: Thibault Lestang <t.lestang@imperial.ac.uk>
Date: Thu, 27 Jul 2023 11:35:01 +0100
Subject: [PATCH 17/21] Weight midpoints with interpoint distance

Although inner midpoints are separated by the same timestep. The
first (interpolated) point and second are not. Same goes for
penultimate and last (interpolated) points.
---
 cats/forecast.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/cats/forecast.py b/cats/forecast.py
index 874263a..af40e59 100644
--- a/cats/forecast.py
+++ b/cats/forecast.py
@@ -98,13 +98,15 @@ def __getitem__(self, index: int) -> CarbonIntensityAverageEstimate:
             self.data[index + 1: index + self.ndata - 1] +
             [rbound]
         )
-        midpt = [
-            0.5 * (a + b) for a, b in zip(window_data[:-1], window_data[1:])
+        acc = [
+            0.5 * (a.value + b.value) * (b.datetime - a.datetime).total_seconds()
+            for a, b in zip(window_data[:-1], window_data[1:])
         ]
+        duration = window_data[-1].datetime - window_data[0].datetime
         return CarbonIntensityAverageEstimate(
             start=window_start,
             end=window_end,
-            value=sum(midpt) / (self.ndata - 1),
+            value=sum(acc) / duration.total_seconds(),
         )
 
     @staticmethod

From b26841ecb39a24b1aea81e1f1077106052aca6dc Mon Sep 17 00:00:00 2001
From: Thibault Lestang <t.lestang@imperial.ac.uk>
Date: Thu, 27 Jul 2023 11:43:18 +0100
Subject: [PATCH 18/21] test: account for weighted midpoints

---
 tests/test_windowed_forecast.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/test_windowed_forecast.py b/tests/test_windowed_forecast.py
index b36334e..da0124f 100644
--- a/tests/test_windowed_forecast.py
+++ b/tests/test_windowed_forecast.py
@@ -138,13 +138,15 @@ def test_average_intensity_with_offset():
         # First and last element in v are interpolated intensity value.
         # e.g v[0] = 15 + 18min * (18 - 15) / 30min = 16.8
         v = [16.8, 18, 19, 17, 16, 11, 11, 11, 11]
-        data_timestep = data[1].datetime - data[0].datetime
+        data_timestep = data[1].datetime - data[0].datetime # 30 minutes
         expected = CarbonIntensityAverageEstimate(
             start=job_start + 2 * data_timestep,
             end=job_start + 2 * data_timestep + timedelta(minutes=duration),
-            value=sum(
-                [0.5 * (a + b) for a, b in zip(v[:-1], v[1:])]
-            ) / (len(v) - 1)
+            value=(
+                0.5 * (v[0] + v[1]) * 12 +
+                sum([0.5 * (a + b) * 30 for a, b in zip(v[1:-2], v[2:-1])]) +
+                0.5 * (v[7] + v[8]) * 2
+            ) / duration
         )
         assert (result == expected)
 

From 62d1255da80205bae103cdbc175dd729c993e18c Mon Sep 17 00:00:00 2001
From: Thibault Lestang <t.lestang@imperial.ac.uk>
Date: Thu, 27 Jul 2023 11:43:51 +0100
Subject: [PATCH 19/21] test: add a second test with a job spanning a few data
 points
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Loïc Lannelongue <loic.lannelongue@posteo.net>
---
 tests/test_windowed_forecast.py | 34 +++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/tests/test_windowed_forecast.py b/tests/test_windowed_forecast.py
index da0124f..73578d6 100644
--- a/tests/test_windowed_forecast.py
+++ b/tests/test_windowed_forecast.py
@@ -114,6 +114,40 @@ def test_average_intensity_now():
 
 
 def test_average_intensity_with_offset():
+    # Case where job start and end time are not colocated with data
+    # carbon intensity data points. In this case cats interpolate the
+    # intensity value at beginning and end of each potential job
+    # duration window.
+    CI_forecast = [
+        CarbonIntensityPointEstimate(datetime(2023,1,1,8,30), value=26),
+        CarbonIntensityPointEstimate(datetime(2023,1,1,9,0), value=40),
+        CarbonIntensityPointEstimate(datetime(2023,1,1,9,30), value=50),
+        CarbonIntensityPointEstimate(datetime(2023,1,1,10,0), value=60),
+        CarbonIntensityPointEstimate(datetime(2023,1,1,10,30), value=25),
+    ]
+    duration = 70  # in minutes
+    # First available data point is for 08:00 but the job
+    # starts 15 minutes later.
+    job_start = datetime.fromisoformat("2023-01-01T08:45")
+    result = WindowedForecast(CI_forecast, duration, start=job_start)[1]
+
+    expected = CarbonIntensityAverageEstimate(
+        start=datetime(2023,1,1,9,15),
+        end=datetime(2023,1,1,10,25),
+        value=(
+            0.5 * (45 + 50) * 15 +
+            0.5 * (50 + 60) * 30 +
+            0.5 * (60 + 30.83) * 25
+        ) / duration
+    )
+    assert result.start == expected.start
+    assert result.end == result.end
+    # Allow for 1% relative tolerance between result and expected
+    # value.  Difference is probably due to truncating the
+    # second interpolated value above.
+    assert math.isclose(result.value, expected.value, rel_tol=0.01)
+
+def test_average_intensity_with_offset_long_job():
     # Case where job start and end time are not colocated with data
     # carbon intensity data points. In this case cats interpolate the
     # intensity value at beginning and end of each potential job

From 6930399120c101e90293c0a67406915bdc5b54ea Mon Sep 17 00:00:00 2001
From: Thibault Lestang <t.lestang@imperial.ac.uk>
Date: Fri, 28 Jul 2023 15:28:31 +0100
Subject: [PATCH 20/21] test: Don't truncate interpolated intensity values

---
 tests/test_windowed_forecast.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/tests/test_windowed_forecast.py b/tests/test_windowed_forecast.py
index 73578d6..28210c6 100644
--- a/tests/test_windowed_forecast.py
+++ b/tests/test_windowed_forecast.py
@@ -131,21 +131,18 @@ def test_average_intensity_with_offset():
     job_start = datetime.fromisoformat("2023-01-01T08:45")
     result = WindowedForecast(CI_forecast, duration, start=job_start)[1]
 
+    interp1 = 40 + 15 * (50 - 40) / 30
+    interp2 = 60 + 25 * (25 - 60) / 30
     expected = CarbonIntensityAverageEstimate(
         start=datetime(2023,1,1,9,15),
         end=datetime(2023,1,1,10,25),
         value=(
-            0.5 * (45 + 50) * 15 +
+            0.5 * (interp1 + 50) * 15 +
             0.5 * (50 + 60) * 30 +
-            0.5 * (60 + 30.83) * 25
+            0.5 * (60 + interp2) * 25
         ) / duration
     )
-    assert result.start == expected.start
-    assert result.end == result.end
-    # Allow for 1% relative tolerance between result and expected
-    # value.  Difference is probably due to truncating the
-    # second interpolated value above.
-    assert math.isclose(result.value, expected.value, rel_tol=0.01)
+    assert result == expected
 
 def test_average_intensity_with_offset_long_job():
     # Case where job start and end time are not colocated with data

From db76c1968464c47b31222b998b4142437ad0fe24 Mon Sep 17 00:00:00 2001
From: Thibault Lestang <t.lestang@imperial.ac.uk>
Date: Fri, 28 Jul 2023 15:50:13 +0100
Subject: [PATCH 21/21] Dont assume start time falls withing first data
 interval

---
 cats/forecast.py                | 22 +++++++++++++++++-----
 tests/test_windowed_forecast.py | 11 +++++++++++
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/cats/forecast.py b/cats/forecast.py
index af40e59..70bd80e 100644
--- a/cats/forecast.py
+++ b/cats/forecast.py
@@ -42,16 +42,28 @@ def __init__(
             duration: int,  # in minutes
             start: datetime,
     ):
-        self.data = data
         self.data_stepsize = data[1].datetime - data[0].datetime
         self.start = start
         # TODO: Expect duration as a timedelta directly
         self.end = start + timedelta(minutes=duration)
 
+        # Restrict data points so that start time falls within the
+        # first data interval.  In other we don't need any data prior
+        # the closest data preceding (on the left of) the job start
+        # time.
+        def bisect_right(data, t):
+            for i, d in enumerate(data):
+                if d.datetime > t:
+                    return i - 1
+        # bisect_right(data, start) returns the index of the first
+        # data point with datetime value immediately preceding the job
+        # start time
+        self.data = data[bisect_right(data, start):]
+
         # Find number of data points in a window, by finding the index
-        # of the first data point past the job end time. Could be done
-        # with the bisect module in the stdlib for python 3.10+ ('key'
-        # parameter was introduced in 3.10).
+        # of the closest data point past the job end time. Could be
+        # done with the bisect module in the stdlib for python 3.10+
+        # ('key' parameter was introduced in 3.10).
         #
         # bisect_left(data, self.end, key=lambda x: x.datetime)
         #
@@ -59,7 +71,7 @@ def bisect_left(data, t):
             for i, d in enumerate(data):
                 if d.datetime >= t:
                     return i
-        self.ndata = bisect_left(data, self.end) + 1
+        self.ndata = bisect_left(self.data, self.end) + 1
 
     def __getitem__(self, index: int) -> CarbonIntensityAverageEstimate:
         """Return the average of timeseries data from index over the
diff --git a/tests/test_windowed_forecast.py b/tests/test_windowed_forecast.py
index 28210c6..d5d787c 100644
--- a/tests/test_windowed_forecast.py
+++ b/tests/test_windowed_forecast.py
@@ -144,6 +144,17 @@ def test_average_intensity_with_offset():
     )
     assert result == expected
 
+    # Test that the WindowedForecast is able to work with a job start
+    # beyond the first data interval.  Not technically required when
+    # working with carbonintensity.org.uk, but useful generalisation
+    # nontheless.
+
+    # When start at 09:15 the WindowedForecast's 'data' attribute
+    # should discard the first data point at 08:30.
+    job_start = datetime.fromisoformat("2023-01-01T09:15")
+    result = WindowedForecast(CI_forecast, duration, start=job_start)[0]
+    assert result == expected
+
 def test_average_intensity_with_offset_long_job():
     # Case where job start and end time are not colocated with data
     # carbon intensity data points. In this case cats interpolate the