Merge pull request #256 from Breakthrough-Energy/daniel/hifld_solar_p…

…rofiles feat: generate solar profiles during HIFLD grid-building
Breakthrough-Energy · Mar 14, 2022 · f2ccac7 · f2ccac7
2 parents c78dc02 + 1592af7
commit f2ccac7
Show file tree

Hide file tree

Showing 10 changed files with 383 additions and 219 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -12,7 +12,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ['3.8', '3.9']
+        python-version: ['3.8', '3.9', '3.10']
 
     name: Python ${{ matrix.python-version }}
     steps:

diff --git a/Pipfile b/Pipfile
@@ -24,6 +24,6 @@ xlrd = "==1.2.0"
 openpyxl = "*"
 netCDF4 = "~=1.5.8"
 powersimdata = "~=0.4.4"
-nrel-pysam = "~=2.2"
+nrel-pysam = "~=3.0"
 pyproj = "~=3.0"
 pygrib = "~=2.1.4"
diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/prereise/gather/griddata/hifld/const.py b/prereise/gather/griddata/hifld/const.py
@@ -107,6 +107,8 @@
 blob_paths = {
     "eia_form860_2019_generator": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/3_1_Generator_Y2019_Operable.csv",
     "eia_form860_2019_plant": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/2___Plant_Y2019.csv",
+    "eia_form860_2019_solar": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/3_3_Solar_Y2019_Operable.csv",
+    "eia_form860_2019_wind": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/3_2_Wind_Y2019_Operable.csv",
     "epa_ampd": "https://besciences.blob.core.windows.net/datasets/EPA_AMPD/",
     "epa_needs": "https://besciences.blob.core.windows.net/datasets/EPA_NEEDS/needs-v620_06-30-21-2_active.csv",
     "substations": "https://besciences.blob.core.windows.net/datasets/hifld/Electric_Substations_Jul2020.csv",

diff --git a/prereise/gather/griddata/hifld/data_process/generators.py b/prereise/gather/griddata/hifld/data_process/generators.py
@@ -343,5 +343,6 @@ def build_plant(bus, substations, kwargs={}):
     )
     generators["type"] = generators["type"].replace(const.fuel_translations)
     generators["GenIOD"] = 0
+    generators.index.name = "plant_id"
 
     return generators
diff --git a/prereise/gather/griddata/hifld/data_process/profiles.py b/prereise/gather/griddata/hifld/data_process/profiles.py
@@ -0,0 +1,59 @@
+from prereise.gather.griddata.hifld import const
+from prereise.gather.griddata.hifld.data_access.load import get_eia_form_860
+from prereise.gather.solardata.nsrdb.sam import retrieve_data_individual
+
+
+def floatify(x):
+    """Coerce an object to a float, returning a float NaN for any objects which raise an
+    exception when passed to :func:`float`.
+
+    :param object x: object to coerce.
+    :return: (*float*) -- coerced value.
+    """
+    try:
+        return float(x)
+    except ValueError:
+        return float("nan")
+
+
+def build_solar(nrel_email, nrel_api_key, solar_plants, **solar_kwargs):
+    """Use plant-level data to build solar profiles.
+
+    :param str nrel_email: email used to`sign up <https://developer.nrel.gov/signup/>`_.
+    :param str nrel_api_key: API key.
+    :param pandas.DataFrame solar_plants: data frame of solar farms.
+    :param dict solar_kwargs: keyword arguments to pass to
+        :func:`prereise.gather.solardata.nsrdb.sam.retrieve_data_individual`.
+    :return: (*pandas.DataFrame*) -- data frame of normalized power profiles. The index
+        is hourly timestamps for the profile year, the columns are plant IDs, the values
+        are floats.
+    """
+    boolean_columns = ["Single-Axis Tracking?", "Dual-Axis Tracking?", "Fixed Tilt?"]
+    float_columns = ["DC Net Capacity (MW)", "Nameplate Capacity (MW)", "Tilt Angle"]
+    # Load raw 'extra' table data, join on plant & generating unit, re-establish index
+    extra_solar_data = get_eia_form_860(const.blob_paths["eia_form860_2019_solar"])
+    full_data = solar_plants.merge(
+        extra_solar_data, on=["Plant Code", "Generator ID"], suffixes=(None, "_extra")
+    )
+    full_data.index = solar_plants.index
+    # Process data to expected types for profile generation
+    for col in float_columns:
+        full_data[col] = full_data[col].map(floatify)
+    for col in boolean_columns:
+        # 'Y' becomes True, anything else ('N', blank, etc) becomes False
+        full_data[col] = full_data[col] == "Y"
+
+    # If panel type isn't known definitively, assume 100% Fixed Tilt
+    # Solar thermal also ends up labeled as fixed tilt, but this will be ignored
+    bad_booleans = full_data.index[full_data[boolean_columns].sum(axis=1) != 1]
+    full_data.loc[bad_booleans, boolean_columns] = False
+    full_data.loc[bad_booleans, "Fixed Tilt?"] = True
+
+    full_data.index.name = "plant_id"  # needed for next step but gets lost in the merge
+    profiles = retrieve_data_individual(
+        nrel_email,
+        nrel_api_key,
+        solar_plant=full_data,
+        **solar_kwargs,
+    )
+    return profiles
diff --git a/prereise/gather/griddata/hifld/orchestration.py b/prereise/gather/griddata/hifld/orchestration.py
@@ -6,55 +6,114 @@
 from prereise.gather.griddata.hifld.const import powersimdata_column_defaults
 from prereise.gather.griddata.hifld.data_process.demand import assign_demand_to_buses
 from prereise.gather.griddata.hifld.data_process.generators import build_plant
+from prereise.gather.griddata.hifld.data_process.profiles import build_solar
 from prereise.gather.griddata.hifld.data_process.transmission import build_transmission
 
 
-def create_csvs(output_folder):
+def create_csvs(output_folder, nrel_email, nrel_api_key, solar_kwargs={}):
     """Process HIFLD source data to CSVs compatible with PowerSimData.
 
     :param str output_folder: directory to write CSVs to.
+    :param str nrel_email: email used to`sign up <https://developer.nrel.gov/signup/>`_.
+    :param str nrel_api_key: API key.
+    :param dict solar_kwargs: keyword arguments to pass to
+        :func:`prereise.gather.solardata.nsrdb.sam.retrieve_data_individual`.
+    """
+    full_tables = create_grid(output_folder)
+    create_profiles(
+        full_tables["plant"], nrel_email, nrel_api_key, output_folder, solar_kwargs
+    )
+
+
+def create_grid(output_folder=None):
+    """Process a table of plant data to produce grid CSVs compatible with PowerSimData.
+
+    :param str output_folder: directory to write CSVs to. If None, CSVs will not be
+        written (just returned).
+    :return: (*dict*) -- keys are strings for table names, values are dataframes that
+        correspond to those tables. These dataframes have all available columns for
+        each table, even though the CSV files which are written are limited to only the
+        columns expected by powersimdata.
     """
     # Process grid data from original sources
     branch, bus, substation, dcline = build_transmission()
     plant = build_plant(bus, substation)
     assign_demand_to_buses(substation, branch, plant, bus)
 
-    outputs = {}
-    outputs["branch"] = branch
-    outputs["dcline"] = dcline
-    outputs["sub"] = substation
+    full_tables = {}
+    full_tables["branch"] = branch
+    full_tables["dcline"] = dcline
+    full_tables["sub"] = substation
     # Separate tables as necessary to match PowerSimData format
     # bus goes to bus and bus2sub
-    outputs["bus2sub"] = bus[["sub_id", "interconnect"]]
-    outputs["bus"] = bus.drop(["sub_id"], axis=1)
+    full_tables["bus2sub"] = bus[["sub_id", "interconnect"]]
+    full_tables["bus"] = bus.drop(["sub_id"], axis=1)
     # plant goes to plant and gencost
-    outputs["gencost"] = plant[["c0", "c1", "c2", "interconnect"]].copy()
-    outputs["plant"] = plant.drop(["c0", "c1", "c2"], axis=1)
+    full_tables["gencost"] = plant[["c0", "c1", "c2", "interconnect"]].copy()
+    full_tables["plant"] = plant.drop(["c0", "c1", "c2"], axis=1)
 
     # Fill in missing column values
     for name, defaults in powersimdata_column_defaults.items():
-        outputs[name] = outputs[name].assign(**defaults)
-
-    # Filter to only the columns expected by PowerSimData, in the expected order
-    for name, df in outputs.items():
-        col_names = getattr(psd_const, f"col_name_{name}")
-        if name == "bus":
-            # The bus column names in PowerSimData include the index for legacy reasons
-            col_names = col_names[1:]
-        if name == "branch":
-            col_names += ["branch_device_type"]
-        if name == "plant":
-            col_names += ["type", "GenFuelCost", "GenIOB", "GenIOC", "GenIOD"]
-        if name == "dcline":
-            col_names += ["from_interconnect", "to_interconnect"]
-        else:
-            col_names += ["interconnect"]
-        outputs[name] = outputs[name][col_names]
-
-    # Save files
-    os.makedirs(output_folder, exist_ok=True)
-    for name, df in outputs.items():
-        df.to_csv(os.path.join(output_folder, f"{name}.csv"))
-    # The zone file gets copied directly
-    zone_path = os.path.join(os.path.dirname(__file__), "data", "zone.csv")
-    shutil.copyfile(zone_path, os.path.join(output_folder, "zone.csv"))
+        full_tables[name] = full_tables[name].assign(**defaults)
+
+    if output_folder is not None:
+        os.makedirs(output_folder, exist_ok=True)
+        # Filter to only the columns expected by PowerSimData, in the expected order
+        powersimdata_outputs = {}
+        for name, df in full_tables.items():
+            col_names = getattr(psd_const, f"col_name_{name}")
+            if name == "bus":
+                # The bus column names in PowerSimData include the index
+                col_names = col_names[1:]
+            if name == "branch":
+                col_names += ["branch_device_type"]
+            if name == "plant":
+                col_names += ["type", "GenFuelCost", "GenIOB", "GenIOC", "GenIOD"]
+            if name == "dcline":
+                col_names += ["from_interconnect", "to_interconnect"]
+            else:
+                col_names += ["interconnect"]
+            powersimdata_outputs[name] = full_tables[name][col_names]
+
+        # Save files
+        for name, df in powersimdata_outputs.items():
+            df.to_csv(os.path.join(output_folder, f"{name}.csv"))
+        # The zone file gets copied directly
+        zone_path = os.path.join(os.path.dirname(__file__), "data", "zone.csv")
+        shutil.copyfile(zone_path, os.path.join(output_folder, "zone.csv"))
+
+    return full_tables
+
+
+def create_profiles(
+    plants, nrel_email, nrel_api_key, output_folder=None, solar_kwargs={}
+):
+    """Process a table of plant data to produce profile CSVs compatible with
+        PowerSimData.
+
+    :param pandas.DataFrame plants: table of plant data.
+    :param str nrel_email: email used to`sign up <https://developer.nrel.gov/signup/>`_.
+    :param str nrel_api_key: API key.
+    :param str output_folder: directory to write CSVs to. If None, CSVs will not be
+        written (just returned).
+    :param dict solar_kwargs: keyword arguments to pass to
+        :func:`prereise.gather.solardata.nsrdb.sam.retrieve_data_individual`.
+    :return: (*dict*) -- keys are strings for profile names, values are dataframes,
+        indexed by timestamp, with plant IDs as columns.
+    """
+    # Use plant data to build profiles
+    profiles = {
+        "solar": build_solar(
+            nrel_email,
+            nrel_api_key,
+            plants.query("type == 'solar'"),
+            **solar_kwargs,
+        ),
+    }
+    if output_folder is not None:
+        os.makedirs(output_folder, exist_ok=True)
+        # Write profiles
+        for name, df in profiles.items():
+            df.to_csv(os.path.join(output_folder, f"{name}.csv"))
+
+    return profiles
diff --git a/prereise/gather/solardata/nsrdb/nrel_api.py b/prereise/gather/solardata/nsrdb/nrel_api.py
@@ -71,6 +71,19 @@ def to_dict(self):
         )
         return result
 
+    def to_sam_weather_file_format(self):
+        """Convert the data to the format expected by nrel-pysam for local files. See
+        https://developer.nrel.gov/docs/solar/nsrdb/psm3-download/.
+
+        :return: (*list*) -- a list of lists which can be passed to
+        :meth:`csv.writer.writerows` and then loaded from disk.
+        """
+        metadata_names = ["lat", "lon", "tz", "elevation"]
+        metadata_values = self.lat, self.lon, self.tz, self.elevation
+        data_headers = self.data_resource.columns.tolist()
+        data_rows = self.data_resource.to_numpy().tolist()
+        return [metadata_names, metadata_values, data_headers] + data_rows
+
 
 class NrelApi:
     """Provides an interface to the NREL API for PSM3 data. It supports
@@ -151,6 +164,10 @@ def get_psm3_at(
         )
         def download(url):
             resp = requests.get(url)
+            if resp.status_code // 100 == 5:  # all 5xx errors, server side
+                raise TransientError(
+                    f"Server side error, retry_count={download.retry_count}"
+                )
             if resp.status_code == 429:
                 raise TransientError(
                     f"Too many requests, retry_count={download.retry_count}"