Merge pull request #236 from Breakthrough-Energy/daniel/hifld_top_level

feat: add top-level HIFLD grid orchestration function
Breakthrough-Energy · Jan 8, 2022 · 3883571 · 3883571
2 parents daa2b89 + fe0ceb5
commit 3883571
Show file tree

Hide file tree

Showing 7 changed files with 272 additions and 11 deletions.
diff --git a/prereise/gather/griddata/hifld/__init__.py b/prereise/gather/griddata/hifld/__init__.py
@@ -0,0 +1 @@
+from prereise.gather.griddata.hifld.orchestration import create_csvs  # noqa: F401
diff --git a/prereise/gather/griddata/hifld/const.py b/prereise/gather/griddata/hifld/const.py
@@ -349,6 +349,26 @@
     "WC": 2.02,  # Waste Coal
 }
 
+fuel_translations = {
+    "BIT": "coal",
+    "DFO": "dfo",
+    "GEO": "geothermal",
+    "JF": "dfo",
+    "KER": "dfo",
+    "LIG": "coal",
+    "NG": "ng",
+    "NUC": "nuclear",
+    "PC": "coal",
+    "PG": "ng",
+    "RC": "coal",
+    "RFO": "dfo",
+    "SUB": "coal",
+    "SUN": "solar",
+    "WAT": "hydro",
+    "WC": "coal",
+    "WND": "wind",
+}
+
 # Values from EPA's Power Sector Modeling Platform v6 - Summer 2021 Reference Case
 reasonable_heat_rates_size_cutoffs = {
     ("Natural Gas Fired Combustion Turbine", "GT"): 80,
@@ -536,3 +556,86 @@
 }
 
 interconnect_size_rank = ["Eastern", "Western", "ERCOT"]
+
+powersimdata_column_defaults = {
+    "branch": {
+        "r": 0,
+        "b": 0,
+        "ratio": 0,
+        "rateB": 0,
+        "rateC": 0,
+        "angle": 0,
+        "status": 1,
+        "angmin": 0,
+        "angmax": 0,
+        "Pf": 0,
+        "Qf": 0,
+        "Pt": 0,
+        "Qt": 0,
+        "mu_Sf": 0,
+        "mu_St": 0,
+        "mu_angmin": 0,
+        "mu_angmax": 0,
+    },
+    "bus": {
+        "type": 1,
+        "Qd": 0,
+        "Gs": 0,
+        "Bs": 0,
+        "Vm": 1,
+        "Va": 0,
+        "loss_zone": 1,
+        "Vmax": 1.1,
+        "Vmin": 0.9,
+        "lam_P": 0,
+        "lam_Q": 0,
+        "mu_Vmax": 0,
+        "mu_Vmin": 0,
+    },
+    "dcline": {
+        "status": 1,
+        "Pf": 0,
+        "Pt": 0,
+        "Qf": 0,
+        "Qt": 0,
+        "Vf": 1,
+        "Vt": 1,
+        "QminF": 0,
+        "QmaxF": 0,
+        "QminT": 0,
+        "QmaxT": 0,
+        "loss0": 0,
+        "loss1": 0,
+        "muPmin": 0,
+        "muPmax": 0,
+        "muQminF": 0,
+        "muQmaxF": 0,
+        "muQminT": 0,
+        "muQmaxT": 0,
+    },
+    "gencost": {"type": 2, "startup": 0, "shutdown": 0, "n": 3},
+    "plant": {
+        "Pg": 0,
+        "Qg": 0,
+        "Qmax": 0,
+        "Qmin": 0,
+        "Vg": 1,
+        "mBase": 1000,
+        "status": 1,
+        "Pc1": 0,
+        "Pc2": 0,
+        "Qc1min": 0,
+        "Qc1max": 0,
+        "Qc2min": 0,
+        "Qc2max": 0,
+        "ramp_agc": 0,
+        "ramp_10": 0,
+        "ramp_30": 0,
+        "ramp_q": 0,
+        "apf": 0,
+        "mu_Pmax": 0,
+        "mu_Pmin": 0,
+        "mu_Qmax": 0,
+        "mu_Qmin": 0,
+    },
+}
diff --git a/prereise/gather/griddata/hifld/data/zone.csv b/prereise/gather/griddata/hifld/data/zone.csv
@@ -45,13 +45,12 @@ zone_id,zone_name,state,interconnect,time_zone
 44,South Dakota Western,South Dakota,Western,ETC/GMT+6
 45,Tennessee,Tennessee,Eastern,ETC/GMT+6
 46,Texas Eastern,Texas,Eastern,ETC/GMT+6
-47,Texas Panhandle,Texas,Eastern,ETC/GMT+6
-48,ERCOT,Texas,Texas,ETC/GMT+6
-49,El Paso,Texas,Western,ETC/GMT+7
-50,Utah,Utah,Western,ETC/GMT+7
-51,Virginia,Virginia,Eastern,ETC/GMT+5
-52,Vermont,Vermont,Eastern,ETC/GMT+5
-53,Washington,Washington,Western,ETC/GMT+8
-54,Wisconsin,Wisconsin,Eastern,ETC/GMT+6
-55,West Virginia,West Virginia,Eastern,ETC/GMT+5
-56,Wyoming,Wyoming,Western,ETC/GMT+7
+47,ERCOT,Texas,ERCOT,ETC/GMT+6
+48,El Paso,Texas,Western,ETC/GMT+7
+49,Utah,Utah,Western,ETC/GMT+7
+50,Virginia,Virginia,Eastern,ETC/GMT+5
+51,Vermont,Vermont,Eastern,ETC/GMT+5
+52,Washington,Washington,Western,ETC/GMT+8
+53,Wisconsin,Wisconsin,Eastern,ETC/GMT+6
+54,West Virginia,West Virginia,Eastern,ETC/GMT+5
+55,Wyoming,Wyoming,Western,ETC/GMT+7
diff --git a/prereise/gather/griddata/hifld/data_process/generators.py b/prereise/gather/griddata/hifld/data_process/generators.py
@@ -66,7 +66,7 @@ def map_generator_to_sub_by_location(generator, substation_groupby):
             # If no matching subs within the given interconnection and ZIPs, give up
             return pd.NA
     distance_to_subs = matching_subs.apply(
-        lambda x: haversine((x.LATITUDE, x.LONGITUDE), (generator.lat, generator.lon)),
+        lambda x: haversine((x.lat, x.lon), (generator.lat, generator.lon)),
         axis=1,
     )
     return distance_to_subs.idxmin()
@@ -334,4 +334,14 @@ def build_plant(bus, substations, kwargs={}):
     for i in range(3):
         generators[f"c{i}"] = generators[f"h{i}"] * generators["GenFuelCost"].fillna(0)
 
+    generators = generators.loc[~generators["bus_id"].isna()].copy()
+    # Rename columns (or add as necessary) to match PowerSimData expectations
+    generators.rename(
+        {"Energy Source 1": "type", "h1": "GenIOB", "h2": "GenIOC"},
+        axis=1,
+        inplace=True,
+    )
+    generators["type"] = generators["type"].replace(const.fuel_translations)
+    generators["GenIOD"] = 0
+
     return generators
diff --git a/prereise/gather/griddata/hifld/data_process/tests/test_transmission.py b/prereise/gather/griddata/hifld/data_process/tests/test_transmission.py
@@ -4,6 +4,7 @@
 
 from prereise.gather.griddata.hifld import const
 from prereise.gather.griddata.hifld.data_process.transmission import (
+    assign_buses_to_lines,
     augment_line_voltages,
     create_buses,
     create_transformers,
@@ -102,6 +103,7 @@ def test_create_buses():
             "baseKV": [69, 345, 69, 115, 115, 230, 345, 230],
         },
     )
+    expected_return.index.name = "bus_id"
     expected_return["baseKV"] = expected_return["baseKV"].astype(float)
     bus = create_buses(lines)
     assert_frame_equal(bus, expected_return)
@@ -225,3 +227,26 @@ def test_map_lines_to_substations_using_coords():
 
     assert len(new_substations) == 2
     assert all(o is None for o in new_substations["OTHER_SUB"])
+
+
+def test_assign_buses_to_lines():
+    bus = pd.DataFrame(
+        {
+            "baseKV": [115, 115, 230, 230, 345, 345],
+            "sub_id": [30, 31, 31, 32, 40, 41],
+        },
+        index=pd.Index([300, 310, 311, 320, 400, 410], name="bus_id"),
+    )
+    ac_lines = pd.DataFrame(
+        {
+            "SUB_1_ID": [30, 32, 40],
+            "SUB_2_ID": [31, 31, 41],
+            "VOLTAGE": [115, 230, 345],
+        }
+    )
+    dc_lines = pd.DataFrame({"SUB_1_ID": [31], "SUB_2_ID": [40]})
+    assign_buses_to_lines(ac_lines, dc_lines, bus)
+    assert ac_lines["from_bus_id"].equals(pd.Series([300, 320, 400]))
+    assert ac_lines["to_bus_id"].equals(pd.Series([310, 311, 410]))
+    assert dc_lines["from_bus_id"].equals(pd.Series([311]))
+    assert dc_lines["to_bus_id"].equals(pd.Series([400]))
diff --git a/prereise/gather/griddata/hifld/data_process/transmission.py b/prereise/gather/griddata/hifld/data_process/transmission.py
@@ -502,6 +502,7 @@ def create_buses(lines):
     buses = buses.astype(float)
     buses.index.name = "sub_id"
     buses = buses.to_frame(name="baseKV").reset_index()
+    buses.index.name = "bus_id"
 
     return buses
 
@@ -654,6 +655,54 @@ def add_b2bs_to_dc_lines(dc_lines, substations, b2b_ratings):
         dc_lines.loc[first_new_id + i] = pd.Series(info)
 
 
+def assign_buses_to_lines(ac_lines, dc_lines, bus):
+    """Map substation IDs to bus IDs for AC & DC lines. Within the ``bus`` table, each
+    unique 'sub_id' should have one bus per connected voltage level; AC lines map
+    uniquely based on their 'VOLTAGE' attribute, while DC lines are mapped to the
+    highest-voltage bus within each substation. Both are modified inplace.
+
+    :param pandas.DataFrame ac_lines: data frame containing at least
+        'SUB_1_ID' and 'SUB_2_ID' columns.
+    :param pandas.DataFrame dc_lines: data frame containing at least
+        'SUB_1_ID' and 'SUB_2_ID' columns.
+    :param pandas.DataFrame bus: data frame containing at least 'sub_id' and 'baseKV'
+        columns, with an index named 'bus_id'.
+    """
+    # Create pandas Series that can be used for quick lookups
+    reindexed = bus.reset_index()
+    sub_and_voltage_to_bus = reindexed.set_index(["sub_id", "baseKV"])["bus_id"]
+    highest_voltage = reindexed.sort_values("baseKV").groupby("sub_id").last()["bus_id"]
+    # Use mappings to fill bus IDs
+    ac_lines["from_bus_id"] = ac_lines.apply(
+        lambda x: sub_and_voltage_to_bus.loc[(x["SUB_1_ID"], x["VOLTAGE"])], axis=1
+    )
+    ac_lines["to_bus_id"] = ac_lines.apply(
+        lambda x: sub_and_voltage_to_bus.loc[(x["SUB_2_ID"], x["VOLTAGE"])], axis=1
+    )
+    dc_lines["from_bus_id"] = dc_lines["SUB_1_ID"].map(highest_voltage)
+    dc_lines["to_bus_id"] = dc_lines["SUB_2_ID"].map(highest_voltage)
+
+
+def add_substation_info_to_buses(bus, substations, zones):
+    """Using information looked up from substations and defined zones, add 'zone_id' and
+    'interconnect' columns to the ``bus`` table (modified in-place).
+
+    :param pandas.DataFrame bus: table of bus data, including 'sub_id' column.
+    :param pandas.DataFrame substations: table of substation data, including 'STATE' and
+        'interconnect' columns.
+    :param pandas.DataFrame zones: table of zone data, including 'state' and
+        'interconnect' columns, with an index named 'zone_id'.
+    """
+    zone_lookup = zones.reset_index().set_index(["state", "interconnect"])["zone_id"]
+    zone_lookup.sort_index(inplace=True)  # unsorted MultiIndices have poor performance
+    states = bus["sub_id"].map(substations["STATE"]).map(const.abv2state)
+    bus["interconnect"] = bus["sub_id"].map(substations["interconnect"])
+    bus["zone_id"] = bus.apply(
+        lambda x: zone_lookup.loc[(states.loc[x.name], x.interconnect)],
+        axis=1,
+    )
+
+
 def build_transmission(method="line2sub", **kwargs):
     """Build transmission network
 
@@ -734,6 +783,9 @@ def build_transmission(method="line2sub", **kwargs):
         const.line_interconnect_assumptions,
         const.interconnect_size_rank,
     )
+    # use substation interconnects to label DC lines
+    dc_lines["from_interconnect"] = dc_lines.SUB_1_ID.map(substations.interconnect)
+    dc_lines["to_interconnect"] = dc_lines.SUB_2_ID.map(substations.interconnect)
     # Now that substations are split across interconnects, we can add B2B facilities
     add_b2bs_to_dc_lines(dc_lines, substations, const.b2b_ratings)
 
@@ -742,10 +794,13 @@ def build_transmission(method="line2sub", **kwargs):
 
     # Create buses from lines
     bus = create_buses(ac_lines)
+    assign_buses_to_lines(ac_lines, dc_lines, bus)
+    add_substation_info_to_buses(bus, substations, hifld_zones)
 
     # Add transformers, and calculate rating and impedance for all branches
     transformers = create_transformers(bus)
     transformers["type"] = "Transformer"
+    transformers["interconnect"] = transformers["from_bus_id"].map(bus["interconnect"])
     first_new_id = ac_lines.index.max() + 1
     transformers.index = pd.RangeIndex(first_new_id, first_new_id + len(transformers))
     ac_lines["type"] = "Line"
@@ -758,4 +813,12 @@ def build_transmission(method="line2sub", **kwargs):
         lambda x: estimate_branch_rating(x, bus["baseKV"]), axis=1
     )
 
+    # Rename columns to match PowerSimData expectations
+    branch.rename({"type": "branch_device_type"}, axis=1, inplace=True)
+    substations.rename(
+        {"NAME": "name", "LATITUDE": "lat", "LONGITUDE": "lon"}, axis=1, inplace=True
+    )
+    substations["interconnect_sub_id"] = substations.groupby("interconnect").cumcount()
+    substations.index.name = "sub_id"
+
     return branch, bus, substations, dc_lines
diff --git a/prereise/gather/griddata/hifld/orchestration.py b/prereise/gather/griddata/hifld/orchestration.py
@@ -0,0 +1,60 @@
+import os
+import shutil
+
+from powersimdata.input import const as psd_const
+
+from prereise.gather.griddata.hifld.const import powersimdata_column_defaults
+from prereise.gather.griddata.hifld.data_process.demand import assign_demand_to_buses
+from prereise.gather.griddata.hifld.data_process.generators import build_plant
+from prereise.gather.griddata.hifld.data_process.transmission import build_transmission
+
+
+def create_csvs(output_folder):
+    """Process HIFLD source data to CSVs compatible with PowerSimData.
+
+    :param str output_folder: directory to write CSVs to.
+    """
+    # Process grid data from original sources
+    branch, bus, substation, dcline = build_transmission()
+    plant = build_plant(bus, substation)
+    assign_demand_to_buses(substation, branch, plant, bus)
+
+    outputs = {}
+    outputs["branch"] = branch
+    outputs["dcline"] = dcline
+    outputs["sub"] = substation
+    # Separate tables as necessary to match PowerSimData format
+    # bus goes to bus and bus2sub
+    outputs["bus2sub"] = bus[["sub_id", "interconnect"]]
+    outputs["bus"] = bus.drop(["sub_id"], axis=1)
+    # plant goes to plant and gencost
+    outputs["gencost"] = plant[["c0", "c1", "c2", "interconnect"]].copy()
+    outputs["plant"] = plant.drop(["c0", "c1", "c2"], axis=1)
+
+    # Fill in missing column values
+    for name, defaults in powersimdata_column_defaults.items():
+        outputs[name] = outputs[name].assign(**defaults)
+
+    # Filter to only the columns expected by PowerSimData, in the expected order
+    for name, df in outputs.items():
+        col_names = getattr(psd_const, f"col_name_{name}")
+        if name == "bus":
+            # The bus column names in PowerSimData include the index for legacy reasons
+            col_names = col_names[1:]
+        if name == "branch":
+            col_names += ["branch_device_type"]
+        if name == "plant":
+            col_names += ["type", "GenFuelCost", "GenIOB", "GenIOC", "GenIOD"]
+        if name == "dcline":
+            col_names += ["from_interconnect", "to_interconnect"]
+        else:
+            col_names += ["interconnect"]
+        outputs[name] = outputs[name][col_names]
+
+    # Save files
+    os.makedirs(output_folder, exist_ok=True)
+    for name, df in outputs.items():
+        df.to_csv(os.path.join(output_folder, f"{name}.csv"))
+    # The zone file gets copied directly
+    zone_path = os.path.join(os.path.dirname(__file__), "data", "zone.csv")
+    shutil.copyfile(zone_path, os.path.join(output_folder, "zone.csv"))
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from prereise.gather.griddata.hifld.orchestration import create_csvs # noqa: F401