From 35ee478d80ee5f3b6bb7a8785e790c699ad85ee0 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel@breakthroughenergy.org>
Date: Wed, 5 Jan 2022 14:35:32 -0800
Subject: [PATCH 01/12] chore: add URL paths to Form 860 solar/wind-specific
 data

---
 prereise/gather/griddata/hifld/const.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/prereise/gather/griddata/hifld/const.py b/prereise/gather/griddata/hifld/const.py
index 55e802cbe..755e1dd7e 100644
--- a/prereise/gather/griddata/hifld/const.py
+++ b/prereise/gather/griddata/hifld/const.py
@@ -107,6 +107,8 @@
 blob_paths = {
     "eia_form860_2019_generator": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/3_1_Generator_Y2019_Operable.csv",
     "eia_form860_2019_plant": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/2___Plant_Y2019.csv",
+    "eia_form860_2019_solar": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/3_3_Solar_Y2019_Operable.csv",
+    "eia_form860_2019_wind": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/3_2_Wind_Y2019_Operable.csv",
     "epa_ampd": "https://besciences.blob.core.windows.net/datasets/EPA_AMPD/",
     "epa_needs": "https://besciences.blob.core.windows.net/datasets/EPA_NEEDS/needs-v620_06-30-21-2_active.csv",
     "substations": "https://besciences.blob.core.windows.net/datasets/hifld/Electric_Substations_Jul2020.csv",

From a4a9a95173f955d299395b8eb501146bd5844fd6 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel@breakthroughenergy.org>
Date: Thu, 6 Jan 2022 11:47:00 -0800
Subject: [PATCH 02/12] chore: update nrel-pysam requirement to version 3.0

---
 Pipfile          |  2 +-
 Pipfile.lock     | 33 ++++++++++++++++++---------------
 requirements.txt |  2 +-
 3 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/Pipfile b/Pipfile
index 2f3eb2f3c..a893c698b 100644
--- a/Pipfile
+++ b/Pipfile
@@ -23,6 +23,6 @@ tqdm = "==4.29.1"
 xlrd = "==1.2.0"
 netCDF4 = "~=1.5.8"
 powersimdata = "~=0.4.4"
-nrel-pysam = "~=2.2"
+nrel-pysam = "~=3.0"
 pyproj = "~=3.0"
 pygrib = "*"
diff --git a/Pipfile.lock b/Pipfile.lock
index 96ee06d32..49a1e74bc 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "3776c28a392d55c14dd5f650d957a5cb86d8fd0073ea13d81dd417e6b1c993c1"
+            "sha256": "da607b969c8ee56efee681dfce43c9e652b3cf43a7e7181c781c8711068b3af2"
         },
         "pipfile-spec": 6,
         "requires": {},
@@ -520,26 +520,29 @@
                 "sha256:80b6b89c77d1dfb64a4c7854981b60aeea6360ac02c6d4e4913319e0a313abef",
                 "sha256:c0946ed31d71f1b732b5aaa6da5a0388a345019af232ce2f49c766e2d6795c51"
             ],
-            "markers": "python_version >= '3.7'",
+            "index": "pypi",
             "version": "==2.6.3"
         },
         "nrel-pysam": {
             "hashes": [
-                "sha256:40f579d546d8763ead40da22c4755f463204abb59c5b56d6756f416de5a9408d",
-                "sha256:4da7bf23749b2b9f0b603b84e21e833a92e1965078b86363de0da2781dcb8aeb",
-                "sha256:5335095823001aa893706d6fde612b129cdc97078b2107426c40c5bde72d3a2d",
-                "sha256:55012ef8c44ff32cdd4befa360bd8d9490f1bbee68958ec6dee4a4091d377df5",
-                "sha256:55825638b64d29d1c0046abc29a168a319a7d5962242cc306b6b7770791fb3d4",
-                "sha256:898c587c89a1ee25f711ef0de51a9575a029b79d221a785b4a5e4c56e1ed3d00",
-                "sha256:9b30775d20c679f2f6c7f6b51a0d07ebdb681133e021039d583c3450ffcdca6a",
-                "sha256:ae00d9f2e3b7c03ff91d0ea5c26752b33dc8a5300b86cd3ee63f8ac77c47fa63",
-                "sha256:b0464daacbf854d506323675071c6b5eb7f23bc35a36014cfba6e60babe05804",
-                "sha256:bef67975fec3340eff73dd9a16ffb662c32bb6eaa8bef109d2c8db51aa0c5273",
-                "sha256:eca2af66cf3dacbef80a8c3f1a4f602b9f1cce4189249c2e28aab62d52aa7863",
-                "sha256:f58f676c4b1fe83615721d8f864077d62f9c93022b1538351c9ecd53c31763b3"
+                "sha256:1537d0b1263eb0a179b91e2fc0d3135c7af6e20ad8519179def5df59aaf367db",
+                "sha256:192c1845c6f4216672e909bafbfbae6ec8de86e88f883d88ac18cfd169e9d4be",
+                "sha256:3dd476e832787cffd5909d8ddb0f3e59d1c1e3864a3366c093d71a5f092dd8c6",
+                "sha256:48b5ccd3db019d59aa005f573dfb6d605e27ea6cfbd0d211ddb2fd49884813a9",
+                "sha256:4b3b7e8cc77e6ccb81676655f712362cc7ab8ed1039a0756546857228a8782f5",
+                "sha256:6619d78002d0168d2adac51266d042620ebb97718c4e5dc1ea9a3c937b53516d",
+                "sha256:71768d40122536dbee683db933817eeb62674b2a6fdf8dcd11665b1d95c06370",
+                "sha256:765545f9876e2f55c4774aa45e8e09d727aad2e9d76d0a75e181cc0de48ba45f",
+                "sha256:767bec11b8b95fa2c61c9e4f4f3875a305275afdcf27480511ad84309f5f67bc",
+                "sha256:8d605a62e182142d422c05ab34b27eef24a6e218ac5dbbd28a18b5c5e8fc6a99",
+                "sha256:90cd2746b9e34ccd161576f67e93d38ec2fb745c7dccdd795975b00cc1933f57",
+                "sha256:b2fc1f68c78498c80375c805f2b872c8c6085b56224c4cf7d494912647f3ba20",
+                "sha256:b8017892e225bc4275846f6a941b3e65fd5cf1ba006ccba687c3767d73c38543",
+                "sha256:d31239e87a0e3319da17a41dd8618f988117ee1c9e408d3413092f1ca5bc66ec",
+                "sha256:d82269f42ae3cb74236c5124bd90333af5182919b8bd877db54ad860e8c49e66"
             ],
             "index": "pypi",
-            "version": "==2.2.4"
+            "version": "==3.0.0"
         },
         "nrel-pysam-stubs": {
             "hashes": [
diff --git a/requirements.txt b/requirements.txt
index f946e12f3..e47d21c11 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,7 +12,7 @@ requests~=2.25
 scipy~=1.7
 tqdm==4.29.1
 xlrd==1.2.0
-nrel-pysam~=2.2
+nrel-pysam~=3.0
 black
 pytest
 coverage

From 59d66aea407696a138e25b3df34a3a11b19ce594 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel@breakthroughenergy.org>
Date: Thu, 6 Jan 2022 11:38:26 -0800
Subject: [PATCH 03/12] chore: update PVWatts module to version 8

---
 prereise/gather/solardata/nsrdb/sam.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/prereise/gather/solardata/nsrdb/sam.py b/prereise/gather/solardata/nsrdb/sam.py
index c05574c92..67d07fb32 100644
--- a/prereise/gather/solardata/nsrdb/sam.py
+++ b/prereise/gather/solardata/nsrdb/sam.py
@@ -1,6 +1,6 @@
 import numpy as np
 import pandas as pd
-import PySAM.Pvwattsv7 as PVWatts
+import PySAM.Pvwattsv8 as PVWatts
 import PySAM.PySSC as pssc  # noqa: N813
 from tqdm import tqdm
 
@@ -48,7 +48,7 @@ def calculate_power(solar_data, pv_dict):
     :param dict pv_dict: solar plant attributes.
     :return: (*numpy.array*) hourly power output.
     """
-    pv_dat = pssc.dict_to_ssc_table(pv_dict, "pvwattsv7")
+    pv_dat = pssc.dict_to_ssc_table(pv_dict, "pvwattsv8")
     pv = PVWatts.wrap(pv_dat)
     pv.SolarResource.assign({"solar_resource_data": solar_data})
     pv.execute()

From 6b0d6a917994bd978927856682c37816d78e0beb Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel@breakthroughenergy.org>
Date: Wed, 5 Jan 2022 17:37:51 -0800
Subject: [PATCH 04/12] chore: add name to generator index

---
 prereise/gather/griddata/hifld/data_process/generators.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/prereise/gather/griddata/hifld/data_process/generators.py b/prereise/gather/griddata/hifld/data_process/generators.py
index 36f1a773c..ff9818c2e 100644
--- a/prereise/gather/griddata/hifld/data_process/generators.py
+++ b/prereise/gather/griddata/hifld/data_process/generators.py
@@ -343,5 +343,6 @@ def build_plant(bus, substations, kwargs={}):
     )
     generators["type"] = generators["type"].replace(const.fuel_translations)
     generators["GenIOD"] = 0
+    generators.index.name = "plant_id"
 
     return generators

From ada819e2fab43575c31a777913576323ef258a31 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel@breakthroughenergy.org>
Date: Thu, 6 Jan 2022 16:16:46 -0800
Subject: [PATCH 05/12] ci: add python 3.10 test

---
 .github/workflows/test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index ada5f3d00..5922d7ea0 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -14,7 +14,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ['3.8', '3.9']
+        python-version: ['3.8', '3.9', '3.10']
 
     name: Python ${{ matrix.python-version }}
     steps:

From 2f777061cbdd707742ae6aa06f233be57157b0c8 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel@breakthroughenergy.org>
Date: Wed, 5 Jan 2022 15:47:38 -0800
Subject: [PATCH 06/12] doc: fix docstrings for solar profile functions

---
 prereise/gather/solardata/nsrdb/sam.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/prereise/gather/solardata/nsrdb/sam.py b/prereise/gather/solardata/nsrdb/sam.py
index 67d07fb32..4b07ae4b0 100644
--- a/prereise/gather/solardata/nsrdb/sam.py
+++ b/prereise/gather/solardata/nsrdb/sam.py
@@ -80,8 +80,9 @@ def retrieve_data_blended(
     :param int/str year: year.
     :param int/float rate_limit: minimum seconds to wait between requests to NREL
     :param str cache_dir: directory to cache downloaded data. If None, don't cache.
-    :return: (*pandas.DataFrame*) -- data frame with *'Pout'*, *'plant_id'*,
-        *'ts'* and *'ts_id'* as columns. Values are power output for a 1MW generator.
+    :return: (*pandas.DataFrame*) -- data frame of normalized power profiles. The index
+        is hourly timestamps for the profile year, the columns are plant IDs, the values
+        are floats.
     """
     xor_err_msg = (
         "Either grid xor (solar_plant and interconnect_to_state_abvs) must be defined"
@@ -178,9 +179,7 @@ def retrieve_data_individual(
     email, api_key, solar_plant, year="2016", rate_limit=0.5, cache_dir=None
 ):
     """Retrieves irradiance data from NSRDB and calculate the power output using
-    the System Adviser Model (SAM). Either a Grid object needs to be passed to ``grid``,
-    or (a data frame needs to be passed to ``solar_plant`` and a string needs to be
-    passed to ``grid_model``.
+    the System Adviser Model (SAM).
 
     :param str email: email used to`sign up <https://developer.nrel.gov/signup/>`_.
     :param str api_key: API key.
@@ -190,8 +189,9 @@ def retrieve_data_individual(
     :param int/str year: year.
     :param int/float rate_limit: minimum seconds to wait between requests to NREL
     :param str cache_dir: directory to cache downloaded data. If None, don't cache.
-    :return: (*pandas.DataFrame*) -- data frame with *'Pout'*, *'plant_id'*,
-        *'ts'* and *'ts_id'* as columns. Values are power output for a 1MW generator.
+    :return: (*pandas.DataFrame*) -- data frame of normalized power profiles. The index
+        is hourly timestamps for the profile year, the columns are plant IDs, the values
+        are floats.
     """
     # Verify that each solar plant has exactly one tracking type equal to True
     array_type_mapping = {

From 89835e1cd50d54764bf0c2e88f96e1019ad32d41 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel@breakthroughenergy.org>
Date: Thu, 6 Jan 2022 20:26:41 -0800
Subject: [PATCH 07/12] feat: add method to translate PSM3Data back to original
 CSV file format

---
 prereise/gather/solardata/nsrdb/nrel_api.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/prereise/gather/solardata/nsrdb/nrel_api.py b/prereise/gather/solardata/nsrdb/nrel_api.py
index e17c22905..985561507 100644
--- a/prereise/gather/solardata/nsrdb/nrel_api.py
+++ b/prereise/gather/solardata/nsrdb/nrel_api.py
@@ -71,6 +71,19 @@ def to_dict(self):
         )
         return result
 
+    def to_sam_weather_file_format(self):
+        """Convert the data to the format expected by nrel-pysam for local files. See
+        https://developer.nrel.gov/docs/solar/nsrdb/psm3-download/.
+
+        :return: (*list*) -- a list of lists which can be passed to
+        :meth:`csv.writer.writerows` and then loaded from disk.
+        """
+        metadata_names = ["lat", "lon", "tz", "elevation"]
+        metadata_values = self.lat, self.lon, self.tz, self.elevation
+        data_headers = self.data_resource.columns.tolist()
+        data_rows = self.data_resource.to_numpy().tolist()
+        return [metadata_names, metadata_values, data_headers] + data_rows
+
 
 class NrelApi:
     """Provides an interface to the NREL API for PSM3 data. It supports

From d4d62fe53194cbe8fd2ac477274a45b68db9b18b Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel@breakthroughenergy.org>
Date: Fri, 7 Jan 2022 09:17:25 -0800
Subject: [PATCH 08/12] refactor: make NREL API downloads resilient to 5xx
 errors

---
 prereise/gather/solardata/nsrdb/nrel_api.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/prereise/gather/solardata/nsrdb/nrel_api.py b/prereise/gather/solardata/nsrdb/nrel_api.py
index 985561507..c00223892 100644
--- a/prereise/gather/solardata/nsrdb/nrel_api.py
+++ b/prereise/gather/solardata/nsrdb/nrel_api.py
@@ -164,6 +164,10 @@ def get_psm3_at(
         )
         def download(url):
             resp = requests.get(url)
+            if resp.status_code // 100 == 5:  # all 5xx errors, server side
+                raise TransientError(
+                    f"Server side error, retry_count={download.retry_count}"
+                )
             if resp.status_code == 429:
                 raise TransientError(
                     f"Too many requests, retry_count={download.retry_count}"

From 6e6a31d7acde4aa016263d5a8613e66973ba1457 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel@breakthroughenergy.org>
Date: Wed, 5 Jan 2022 17:55:42 -0800
Subject: [PATCH 09/12] feat: add concentrating solar power profile generation
 function

---
 prereise/gather/solardata/nsrdb/sam.py | 75 +++++++++++++++++++-------
 1 file changed, 55 insertions(+), 20 deletions(-)

diff --git a/prereise/gather/solardata/nsrdb/sam.py b/prereise/gather/solardata/nsrdb/sam.py
index 4b07ae4b0..aa23ec8e2 100644
--- a/prereise/gather/solardata/nsrdb/sam.py
+++ b/prereise/gather/solardata/nsrdb/sam.py
@@ -1,7 +1,12 @@
+import csv
+import os
+import tempfile
+
 import numpy as np
 import pandas as pd
 import PySAM.Pvwattsv8 as PVWatts
 import PySAM.PySSC as pssc  # noqa: N813
+from PySAM import TcsgenericSolar
 from tqdm import tqdm
 
 from prereise.gather.solardata.helpers import get_plant_id_unique_location
@@ -18,6 +23,7 @@
     "inv_eff": 94,
     "losses": 14,
     "tilt": 30,
+    "ilr": 1.25,  # Inverter Loading Ratio
 }
 
 
@@ -41,8 +47,8 @@ def generate_timestamps_without_leap_day(year):
     return sam_dates, leap_day
 
 
-def calculate_power(solar_data, pv_dict):
-    """Use PVWatts to translate weather data into power.
+def calculate_power_pv(solar_data, pv_dict):
+    """Use PVWatts to translate weather data into power using a photovoltaic (PV) model.
 
     :param dict solar_data: weather data as returned by :meth:`Psm3Data.to_dict`.
     :param dict pv_dict: solar plant attributes.
@@ -55,6 +61,28 @@ def calculate_power(solar_data, pv_dict):
     return np.array(pv.Outputs.gen)
 
 
+def calculate_power_csp(solar_data):
+    """Use the System Adviser Model (SAM) to translate weather data into power using a
+    concentrating solar power (CSP) model.
+
+    :param list solar_data: weather data as returned by
+        :meth:`Psm3Data.to_sam_weather_file_format`.
+    :param dict csp_dict: solar plant attributes.
+    :return: (*numpy.array*) hourly power output.
+    """
+    csp = TcsgenericSolar.default("GenericCSPSystemCommercial")
+    csp.Type260.assign({"w_des": 1e-3})  # Capacity is in MW, but outputs are in kW
+    # The solar module expects weather data as a local file, so let's create one
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        filename = os.path.join(tmpdirname, "weather_data.csv")
+        with open(filename, "w", newline="") as f:
+            writer = csv.writer(f)
+            writer.writerows(solar_data)
+        csp.Weather.assign({"file_name": filename})
+        csp.execute()
+    return np.clip(np.array(csp.Outputs.gen), 0, 1)  # clip outside the [0, 1] interval
+
+
 def retrieve_data_blended(
     email,
     api_key,
@@ -131,8 +159,6 @@ def retrieve_data_blended(
             states_in_interconnect = list(interconnect_to_state_abvs[interconnect])
             frac[zone] = get_pv_tracking_ratio_state(pv_info, states_in_interconnect)
 
-    # Inverter Loading Ratio
-    ilr = 1.25
     api = NrelApi(email, api_key, rate_limit)
 
     # Identify unique location
@@ -149,7 +175,7 @@ def retrieve_data_blended(
             leap_day=False,
             dates=sam_dates,
             cache_dir=cache_dir,
-        ).to_dict()
+        )
 
         for i, plant_id in enumerate(plants):
             if i == 0:
@@ -159,12 +185,14 @@ def retrieve_data_blended(
                 power = 0
                 for j, axis in enumerate([0, 2, 4]):
                     plant_pv_dict = {
-                        "system_capacity": ilr,
-                        "dc_ac_ratio": ilr,
+                        "system_capacity": default_pv_parameters["ilr"],
+                        "dc_ac_ratio": default_pv_parameters["ilr"],
                         "array_type": axis,
                     }
                     pv_dict = {**default_pv_parameters, **plant_pv_dict}
-                    power += tracking_ratios[j] * calculate_power(solar_data, pv_dict)
+                    power += tracking_ratios[j] * calculate_power_pv(
+                        solar_data.to_dict(), pv_dict
+                    )
                 if leap_day is not None:
                     power = np.insert(power, leap_day, power[leap_day - 24 : leap_day])
             else:
@@ -228,23 +256,30 @@ def retrieve_data_individual(
             leap_day=False,
             dates=sam_dates,
             cache_dir=cache_dir,
-        ).to_dict()
+        )
 
         for plant_id in plants:
             series = solar_plant.loc[plant_id]
-            ilr = series["DC Net Capacity (MW)"] / series["Nameplate Capacity (MW)"]
-            plant_pv_dict = {
-                "system_capacity": ilr,
-                "dc_ac_ratio": ilr,
-                "array_type": plant_array_types.loc[plant_id],
-            }
-            if plant_pv_dict["array_type"] == 0:
-                plant_pv_dict["tilt"] = series["Tilt Angle"]
-            pv_dict = {**default_pv_parameters, **plant_pv_dict}
-            power = calculate_power(solar_data, pv_dict)
+            # EIA codes: PV = Photovoltaic, CP = Concentrating solar, ST = steam turbine
+            if series["Prime Mover"] == "PV":
+                ilr = series["DC Net Capacity (MW)"] / series["Nameplate Capacity (MW)"]
+            else:
+                # Besides PV, other types don't seem to have DC capacity defined
+                ilr = default_pv_parameters["ilr"]
+            if series["Prime Mover"] in {"CP", "PV"}:
+                plant_pv_dict = {
+                    "system_capacity": ilr,
+                    "dc_ac_ratio": ilr,
+                    "array_type": plant_array_types.loc[plant_id],
+                }
+                if plant_pv_dict["array_type"] == 0:
+                    plant_pv_dict["tilt"] = series["Tilt Angle"]
+                pv_dict = {**default_pv_parameters, **plant_pv_dict}
+                power = calculate_power_pv(solar_data.to_dict(), pv_dict)
+            elif series["Prime Mover"] == "ST":
+                power = calculate_power_csp(solar_data.to_sam_weather_file_format())
             if leap_day is not None:
                 power = np.insert(power, leap_day, power[leap_day - 24 : leap_day])
-
             data[plant_id] = power
 
     return pd.DataFrame(data, index=real_dates).sort_index(axis="columns")

From 16d7427f5acb52eab8b3bb28aa55c8ea6ba745c6 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel@breakthroughenergy.org>
Date: Fri, 7 Jan 2022 15:37:29 -0800
Subject: [PATCH 10/12] refactor: fetch full weather year, but still manage
 non-leap output

---
 prereise/gather/solardata/nsrdb/sam.py | 61 ++++++++++----------------
 1 file changed, 23 insertions(+), 38 deletions(-)

diff --git a/prereise/gather/solardata/nsrdb/sam.py b/prereise/gather/solardata/nsrdb/sam.py
index aa23ec8e2..a6ff65e38 100644
--- a/prereise/gather/solardata/nsrdb/sam.py
+++ b/prereise/gather/solardata/nsrdb/sam.py
@@ -25,26 +25,7 @@
     "tilt": 30,
     "ilr": 1.25,  # Inverter Loading Ratio
 }
-
-
-def generate_timestamps_without_leap_day(year):
-    """For a given year, return timestamps for each non-leap-day hour, and the timestamp
-    of the beginning of the leap day (if there is one).
-
-    :param int/str year: year to generate timestamps for.
-    :return: (*tuple*) --
-        pandas.DatetimeIndex: for each non-leap-day-hour of the given year.
-        pandas.Timestamp/None: timestamp for the first hour of the leap day (if any).
-    """
-    # SAM only takes 365 days, so for a leap year: leave out the leap day.
-    try:
-        leap_day = (pd.Timestamp(f"{year}-02-29-00").dayofyear - 1) * 24
-        sam_dates = pd.date_range(start=f"{year}-01-01-00", freq="H", periods=365 * 24)
-        sam_dates = sam_dates.map(lambda t: t.replace(year=int(year)))
-    except ValueError:
-        leap_day = None
-        sam_dates = pd.date_range(start=f"{year}-01-01-00", freq="H", periods=365 * 24)
-    return sam_dates, leap_day
+leap_hour_idx = 59 * 24  # 59 days have passed already (31 in January, 28 in February)
 
 
 def calculate_power_pv(solar_data, pv_dict):
@@ -141,10 +122,8 @@ def retrieve_data_blended(
             for z in solar_plant["zone_id"].unique()
         }
 
-    real_dates = pd.date_range(
-        start=f"{year}-01-01-00", end=f"{year}-12-31-23", freq="H"
-    )
-    sam_dates, leap_day = generate_timestamps_without_leap_day(year)
+    dates = pd.date_range(start=f"{year}-01-01-00", end=f"{year}-12-31-23", freq="H")
+    leap_day = f"{year}-02-29" in dates
 
     # PV tracking ratios
     # By state and by interconnect when EIA data do not have any solar PV in the state
@@ -172,8 +151,8 @@ def retrieve_data_blended(
             lon,
             attributes="dhi,dni,wind_speed,air_temperature",
             year=year,
-            leap_day=False,
-            dates=sam_dates,
+            leap_day=leap_day,
+            dates=dates,
             cache_dir=cache_dir,
         )
 
@@ -193,14 +172,18 @@ def retrieve_data_blended(
                     power += tracking_ratios[j] * calculate_power_pv(
                         solar_data.to_dict(), pv_dict
                     )
-                if leap_day is not None:
-                    power = np.insert(power, leap_day, power[leap_day - 24 : leap_day])
+                if leap_day:
+                    power = np.insert(
+                        power,
+                        leap_hour_idx,
+                        power[leap_hour_idx - 24 : leap_hour_idx],
+                    )
             else:
                 # For every other plant, look up power from first plant at the location
                 power = data[first_plant_id]
             data[plant_id] = power
 
-    return pd.DataFrame(data, index=real_dates).sort_index(axis="columns")
+    return pd.DataFrame(data, index=dates).sort_index(axis="columns")
 
 
 def retrieve_data_individual(
@@ -236,10 +219,8 @@ def retrieve_data_individual(
         .apply(lambda x: array_type_mapping[x.idxmax()], axis=1)
     )
 
-    real_dates = pd.date_range(
-        start=f"{year}-01-01-00", end=f"{year}-12-31-23", freq="H"
-    )
-    sam_dates, leap_day = generate_timestamps_without_leap_day(year)
+    dates = pd.date_range(start=f"{year}-01-01-00", end=f"{year}-12-31-23", freq="H")
+    leap_day = f"{year}-02-29" in dates
 
     api = NrelApi(email, api_key, rate_limit)
 
@@ -253,8 +234,8 @@ def retrieve_data_individual(
             lon,
             attributes="dhi,dni,wind_speed,air_temperature",
             year=year,
-            leap_day=False,
-            dates=sam_dates,
+            leap_day=leap_day,
+            dates=dates,
             cache_dir=cache_dir,
         )
 
@@ -278,8 +259,12 @@ def retrieve_data_individual(
                 power = calculate_power_pv(solar_data.to_dict(), pv_dict)
             elif series["Prime Mover"] == "ST":
                 power = calculate_power_csp(solar_data.to_sam_weather_file_format())
-            if leap_day is not None:
-                power = np.insert(power, leap_day, power[leap_day - 24 : leap_day])
+            if leap_day:
+                power = np.insert(
+                    power,
+                    leap_hour_idx,
+                    power[leap_hour_idx - 24 : leap_hour_idx],
+                )
             data[plant_id] = power
 
-    return pd.DataFrame(data, index=real_dates).sort_index(axis="columns")
+    return pd.DataFrame(data, index=dates).sort_index(axis="columns")

From 62067be58aced46f3c271602b510e483069aaaa5 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel@breakthroughenergy.org>
Date: Wed, 5 Jan 2022 15:43:28 -0800
Subject: [PATCH 11/12] feat: add function to pass solar-specific 860 data to
 profile generation

---
 .../griddata/hifld/data_process/profiles.py   | 59 +++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 prereise/gather/griddata/hifld/data_process/profiles.py

diff --git a/prereise/gather/griddata/hifld/data_process/profiles.py b/prereise/gather/griddata/hifld/data_process/profiles.py
new file mode 100644
index 000000000..a469b5e05
--- /dev/null
+++ b/prereise/gather/griddata/hifld/data_process/profiles.py
@@ -0,0 +1,59 @@
+from prereise.gather.griddata.hifld import const
+from prereise.gather.griddata.hifld.data_access.load import get_eia_form_860
+from prereise.gather.solardata.nsrdb.sam import retrieve_data_individual
+
+
+def floatify(x):
+    """Coerce an object to a float, returning a float NaN for any objects which raise an
+    exception when passed to :func:`float`.
+
+    :param object x: object to coerce.
+    :return: (*float*) -- coerced value.
+    """
+    try:
+        return float(x)
+    except ValueError:
+        return float("nan")
+
+
+def build_solar(nrel_email, nrel_api_key, solar_plants, **solar_kwargs):
+    """Use plant-level data to build solar profiles.
+
+    :param str nrel_email: email used to`sign up <https://developer.nrel.gov/signup/>`_.
+    :param str nrel_api_key: API key.
+    :param pandas.DataFrame solar_plants: data frame of solar farms.
+    :param dict solar_kwargs: keyword arguments to pass to
+        :func:`prereise.gather.solardata.nsrdb.sam.retrieve_data_individual`.
+    :return: (*pandas.DataFrame*) -- data frame of normalized power profiles. The index
+        is hourly timestamps for the profile year, the columns are plant IDs, the values
+        are floats.
+    """
+    boolean_columns = ["Single-Axis Tracking?", "Dual-Axis Tracking?", "Fixed Tilt?"]
+    float_columns = ["DC Net Capacity (MW)", "Nameplate Capacity (MW)", "Tilt Angle"]
+    # Load raw 'extra' table data, join on plant & generating unit, re-establish index
+    extra_solar_data = get_eia_form_860(const.blob_paths["eia_form860_2019_solar"])
+    full_data = solar_plants.merge(
+        extra_solar_data, on=["Plant Code", "Generator ID"], suffixes=(None, "_extra")
+    )
+    full_data.index = solar_plants.index
+    # Process data to expected types for profile generation
+    for col in float_columns:
+        full_data[col] = full_data[col].map(floatify)
+    for col in boolean_columns:
+        # 'Y' becomes True, anything else ('N', blank, etc) becomes False
+        full_data[col] = full_data[col] == "Y"
+
+    # If panel type isn't known definitively, assume 100% Fixed Tilt
+    # Solar thermal also ends up labeled as fixed tilt, but this will be ignored
+    bad_booleans = full_data.index[full_data[boolean_columns].sum(axis=1) != 1]
+    full_data.loc[bad_booleans, boolean_columns] = False
+    full_data.loc[bad_booleans, "Fixed Tilt?"] = True
+
+    full_data.index.name = "plant_id"  # needed for next step but gets lost in the merge
+    profiles = retrieve_data_individual(
+        nrel_email,
+        nrel_api_key,
+        solar_plant=full_data,
+        **solar_kwargs,
+    )
+    return profiles

From f639807edbef01e659b5e1dd9a68f1caa7faf0c2 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel@breakthroughenergy.org>
Date: Wed, 5 Jan 2022 15:23:32 -0800
Subject: [PATCH 12/12] feat: add solar profile generation step to
 orchestration

---
 .../gather/griddata/hifld/orchestration.py    | 127 +++++++++++++-----
 1 file changed, 93 insertions(+), 34 deletions(-)

diff --git a/prereise/gather/griddata/hifld/orchestration.py b/prereise/gather/griddata/hifld/orchestration.py
index 05fd47106..74b4bf23d 100644
--- a/prereise/gather/griddata/hifld/orchestration.py
+++ b/prereise/gather/griddata/hifld/orchestration.py
@@ -6,55 +6,114 @@
 from prereise.gather.griddata.hifld.const import powersimdata_column_defaults
 from prereise.gather.griddata.hifld.data_process.demand import assign_demand_to_buses
 from prereise.gather.griddata.hifld.data_process.generators import build_plant
+from prereise.gather.griddata.hifld.data_process.profiles import build_solar
 from prereise.gather.griddata.hifld.data_process.transmission import build_transmission
 
 
-def create_csvs(output_folder):
+def create_csvs(output_folder, nrel_email, nrel_api_key, solar_kwargs={}):
     """Process HIFLD source data to CSVs compatible with PowerSimData.
 
     :param str output_folder: directory to write CSVs to.
+    :param str nrel_email: email used to`sign up <https://developer.nrel.gov/signup/>`_.
+    :param str nrel_api_key: API key.
+    :param dict solar_kwargs: keyword arguments to pass to
+        :func:`prereise.gather.solardata.nsrdb.sam.retrieve_data_individual`.
+    """
+    full_tables = create_grid(output_folder)
+    create_profiles(
+        full_tables["plant"], nrel_email, nrel_api_key, output_folder, solar_kwargs
+    )
+
+
+def create_grid(output_folder=None):
+    """Process a table of plant data to produce grid CSVs compatible with PowerSimData.
+
+    :param str output_folder: directory to write CSVs to. If None, CSVs will not be
+        written (just returned).
+    :return: (*dict*) -- keys are strings for table names, values are dataframes that
+        correspond to those tables. These dataframes have all available columns for
+        each table, even though the CSV files which are written are limited to only the
+        columns expected by powersimdata.
     """
     # Process grid data from original sources
     branch, bus, substation, dcline = build_transmission()
     plant = build_plant(bus, substation)
     assign_demand_to_buses(substation, branch, plant, bus)
 
-    outputs = {}
-    outputs["branch"] = branch
-    outputs["dcline"] = dcline
-    outputs["sub"] = substation
+    full_tables = {}
+    full_tables["branch"] = branch
+    full_tables["dcline"] = dcline
+    full_tables["sub"] = substation
     # Separate tables as necessary to match PowerSimData format
     # bus goes to bus and bus2sub
-    outputs["bus2sub"] = bus[["sub_id", "interconnect"]]
-    outputs["bus"] = bus.drop(["sub_id"], axis=1)
+    full_tables["bus2sub"] = bus[["sub_id", "interconnect"]]
+    full_tables["bus"] = bus.drop(["sub_id"], axis=1)
     # plant goes to plant and gencost
-    outputs["gencost"] = plant[["c0", "c1", "c2", "interconnect"]].copy()
-    outputs["plant"] = plant.drop(["c0", "c1", "c2"], axis=1)
+    full_tables["gencost"] = plant[["c0", "c1", "c2", "interconnect"]].copy()
+    full_tables["plant"] = plant.drop(["c0", "c1", "c2"], axis=1)
 
     # Fill in missing column values
     for name, defaults in powersimdata_column_defaults.items():
-        outputs[name] = outputs[name].assign(**defaults)
-
-    # Filter to only the columns expected by PowerSimData, in the expected order
-    for name, df in outputs.items():
-        col_names = getattr(psd_const, f"col_name_{name}")
-        if name == "bus":
-            # The bus column names in PowerSimData include the index for legacy reasons
-            col_names = col_names[1:]
-        if name == "branch":
-            col_names += ["branch_device_type"]
-        if name == "plant":
-            col_names += ["type", "GenFuelCost", "GenIOB", "GenIOC", "GenIOD"]
-        if name == "dcline":
-            col_names += ["from_interconnect", "to_interconnect"]
-        else:
-            col_names += ["interconnect"]
-        outputs[name] = outputs[name][col_names]
-
-    # Save files
-    os.makedirs(output_folder, exist_ok=True)
-    for name, df in outputs.items():
-        df.to_csv(os.path.join(output_folder, f"{name}.csv"))
-    # The zone file gets copied directly
-    zone_path = os.path.join(os.path.dirname(__file__), "data", "zone.csv")
-    shutil.copyfile(zone_path, os.path.join(output_folder, "zone.csv"))
+        full_tables[name] = full_tables[name].assign(**defaults)
+
+    if output_folder is not None:
+        os.makedirs(output_folder, exist_ok=True)
+        # Filter to only the columns expected by PowerSimData, in the expected order
+        powersimdata_outputs = {}
+        for name, df in full_tables.items():
+            col_names = getattr(psd_const, f"col_name_{name}")
+            if name == "bus":
+                # The bus column names in PowerSimData include the index
+                col_names = col_names[1:]
+            if name == "branch":
+                col_names += ["branch_device_type"]
+            if name == "plant":
+                col_names += ["type", "GenFuelCost", "GenIOB", "GenIOC", "GenIOD"]
+            if name == "dcline":
+                col_names += ["from_interconnect", "to_interconnect"]
+            else:
+                col_names += ["interconnect"]
+            powersimdata_outputs[name] = full_tables[name][col_names]
+
+        # Save files
+        for name, df in powersimdata_outputs.items():
+            df.to_csv(os.path.join(output_folder, f"{name}.csv"))
+        # The zone file gets copied directly
+        zone_path = os.path.join(os.path.dirname(__file__), "data", "zone.csv")
+        shutil.copyfile(zone_path, os.path.join(output_folder, "zone.csv"))
+
+    return full_tables
+
+
+def create_profiles(
+    plants, nrel_email, nrel_api_key, output_folder=None, solar_kwargs={}
+):
+    """Process a table of plant data to produce profile CSVs compatible with
+        PowerSimData.
+
+    :param pandas.DataFrame plants: table of plant data.
+    :param str nrel_email: email used to`sign up <https://developer.nrel.gov/signup/>`_.
+    :param str nrel_api_key: API key.
+    :param str output_folder: directory to write CSVs to. If None, CSVs will not be
+        written (just returned).
+    :param dict solar_kwargs: keyword arguments to pass to
+        :func:`prereise.gather.solardata.nsrdb.sam.retrieve_data_individual`.
+    :return: (*dict*) -- keys are strings for profile names, values are dataframes,
+        indexed by timestamp, with plant IDs as columns.
+    """
+    # Use plant data to build profiles
+    profiles = {
+        "solar": build_solar(
+            nrel_email,
+            nrel_api_key,
+            plants.query("type == 'solar'"),
+            **solar_kwargs,
+        ),
+    }
+    if output_folder is not None:
+        os.makedirs(output_folder, exist_ok=True)
+        # Write profiles
+        for name, df in profiles.items():
+            df.to_csv(os.path.join(output_folder, f"{name}.csv"))
+
+    return profiles