diff --git a/prereise/gather/griddata/hifld/const.py b/prereise/gather/griddata/hifld/const.py index df84def92..2fb656474 100644 --- a/prereise/gather/griddata/hifld/const.py +++ b/prereise/gather/griddata/hifld/const.py @@ -415,3 +415,124 @@ "south": 25, "west": -125, } + +seams_substations = { + "east_west": { + 202364, # 'LAMAR HVDC TIE' + 202159, # 'UNKNOWN202159', NE (effecitvely Sidney/Virginia Smith) + 202160, # 'VIRGINIA SMITH CONVERTER STATION' + 202177, # Sidney-adjacent + 202178, # 'STEGAL', NE + 131797, # Stegal-adjacent + 203572, # 'MILES CITY', MT (the substation that appears more likely to be the real one) + 203590, # 'RICHARDSON COULEE' (near MALTA, shouldn't be necessary?) + 303738, # 'BLACKWATER TIE', NM + 304165, # 'EDDY AC-DC-AC TIE', NM + # Rapid City Disconnections + 131171, # North of Rapid City + 131176, # North of Rapid City + 202567, # East of Rapid City + # Highline NE/CO border + 205884, # Julesburg, CO + 205888, # Holyoke, CO + 203719, # 'ALVIN' substation + }, + "east_ercot": { + 161924, # Logansport, TX + 300490, # Vernon, TX + 301314, # Valley Lake, TX connection to OK + 301729, # Hawkins, TX + 302012, # Vernon, TX + 302274, # 'COTTONWOOD', Glenn, TX + 303004, # Crowell, TX + 303646, # San Augustine, TX + 303719, # Big Sandy, TX + 304100, # Matador, TX + 304328, # Midland, TX + 304477, # Oklaunion substation (B2B) + 304825, # Dennison, TX connection to OK + 304391, # Long Branch, TX + 304994, # Welsh substation (B2B) + 306058, # Munday, TX + 306638, # Pittsburg, TX + 306738, # Henderson, TX + 307121, # Kirkland, TX + 307363, # Navasota, TX + 307539, # Mt. Pleasant, TX + 307735, # Shiro, TX + 308062, # Lufkin, TX + 308951, # Beckville, TX + 308976, # Dayton, TX + 309403, # Kilgore, TX + 310861, # Overton, TX + 310879, # Huntsville, TX + }, +} + +substation_interconnect_assumptions = { + "Eastern": { + 131171, + 131172, + 131853, + 161925, + 167678, + 167679, + 167681, + 167682, + 167684, + 307364, + }, + "Western": { + 201396, + 202172, + 205667, + 205889, + 205890, + }, + "ERCOT": { + 301181, + 301291, + 302826, + 303024, # Substations between East/ERCOT AC connector and Oklaunion B2B station + 303394, + 303406, + 303433, + 304994, # Welsh B2B + 309433, + 309658, + }, +} + +line_interconnect_assumptions = { + "Eastern": { + 128641, + 132264, + 135527, + 141367, + 300170, + 301858, + 303906, + 305887, + 306332, + 306885, + 310668, + 311279, + 311520, + }, + "Western": {123525, 141873}, + "ERCOT": {305330, 309428, 310121}, +} + +b2b_ratings = { # MW + "BLACKWATER TIE": 200, # a.k.a. 'Clovis'/'Roosevelt County' (Eastern/Western) + "EDDY AC-DC-AC TIE": 200, # a.k.a. 'Artesia' (Eastern/Western) + "LAMAR HVDC TIE": 210, # (Eastern/Western) + "MILES CITY": 200, # (Eastern/Western) + "NEW UNDERWOOD": 200, # representative of the Rapid City DC Tie (Eastern/Western) + "STEGALL": 110, # (Eastern/Western) + "UNKNOWN304477": 220, # Oklaunion (Eastern/ERCOT) + "UNKNOWN304994": 600, # Welsh (Eastern/ERCOT) + "VIRGINIA SMITH CONVERTER STATION": 200, # a.k.a. 'Sidney' (Eastern/Western) +} + +interconnect_size_rank = ["Eastern", "Western", "ERCOT"] diff --git a/prereise/gather/griddata/hifld/data/zone.csv b/prereise/gather/griddata/hifld/data/zone.csv index cfc0ffd61..4eb618108 100644 --- a/prereise/gather/griddata/hifld/data/zone.csv +++ b/prereise/gather/griddata/hifld/data/zone.csv @@ -3,53 +3,55 @@ zone_id,zone_name,state,interconnect,time_zone 2,Arkansas,Arkansas,Eastern,ETC/GMT+6 3,Arizona,Arizona,Western,ETC/GMT+7 4,California,California,Western,ETC/GMT+8 -5,Colorado,Colorado,Western,ETC/GMT+7 -6,Connecticut,Connecticut,Eastern,ETC/GMT+5 -7,Delaware,Delaware,Eastern,ETC/GMT+5 -8,Florida,Florida,Eastern,ETC/GMT+6 -9,Georgia,Georgia,Eastern,ETC/GMT+5 -10,Iowa,Iowa,Eastern,ETC/GMT+6 -11,Idaho,Idaho,Western,ETC/GMT+7 -12,Illinois,Illinois,Eastern,ETC/GMT+6 -13,Indiana,Indiana,Eastern,ETC/GMT+5 -14,Kansas,Kansas,Eastern,ETC/GMT+6 -15,Kentucky,Kentucky,Eastern,ETC/GMT+5 -16,Louisiana,Louisiana,Eastern,ETC/GMT+6 -17,Massachusetts,Massachusetts,Eastern,ETC/GMT+5 -18,Maryland,Maryland,Eastern,ETC/GMT+5 -19,Maine,Maine,Eastern,ETC/GMT+5 -20,Michigan,Michigan,Eastern,ETC/GMT+5 -21,Minnesota,Minnesota,Eastern,ETC/GMT+6 -22,Missouri,Missouri,Eastern,ETC/GMT+6 -23,Mississippi,Mississippi,Eastern,ETC/GMT+6 -24,Montana Eastern,Montana,Eastern,ETC/GMT+7 -25,Montana Western,Montana,Western,ETC/GMT+7 -26,North Carolina,North Carolina,Eastern,ETC/GMT+5 -27,North Dakota,North Dakota,Eastern,ETC/GMT+6 -28,Nebraska,Nebraska,Eastern,ETC/GMT+6 -29,New Hampshire,New Hampshire,Eastern,ETC/GMT+5 -30,New Jersey,New Jersey,Eastern,ETC/GMT+5 -31,New Mexico Eastern,New Mexico,Eastern,ETC/GMT+7 -32,New Mexico Western,New Mexico,Western,ETC/GMT+7 -33,Nevada,Nevada,Western,ETC/GMT+8 -34,New York,New York,Eastern,ETC/GMT+5 -35,Ohio,Ohio,Eastern,ETC/GMT+5 -36,Oklahoma,Oklahoma,Eastern,ETC/GMT+6 -37,Oregon,Oregon,Western,ETC/GMT+8 -38,Pennsylvania,Pennsylvania,Eastern,ETC/GMT+5 -39,Rhode Island,Rhode Island,Eastern,ETC/GMT+5 -40,South Carolina,South Carolina,Eastern,ETC/GMT+5 -41,South Dakota Eastern,South Dakota,Eastern,ETC/GMT+6 -42,South Dakota Western,South Dakota,Western,ETC/GMT+6 -43,Tennessee,Tennessee,Eastern,ETC/GMT+6 -44,Texas Eastern,Texas,Eastern,ETC/GMT+6 -45,Texas Panhandle,Texas,Eastern,ETC/GMT+6 -46,ERCOT,Texas,Texas,ETC/GMT+6 -47,El Paso,Texas,Western,ETC/GMT+7 -48,Utah,Utah,Western,ETC/GMT+7 -49,Virginia,Virginia,Eastern,ETC/GMT+5 -50,Vermont,Vermont,Eastern,ETC/GMT+5 -51,Washington,Washington,Western,ETC/GMT+8 -52,Wisconsin,Wisconsin,Eastern,ETC/GMT+6 -53,West Virginia,West Virginia,Eastern,ETC/GMT+5 -54,Wyoming,Wyoming,Western,ETC/GMT+7 +5,Colorado Western,Colorado,Western,ETC/GMT+7 +6,Colorado Eastern,Colorado,Eastern,ETC/GMT+7 +7,Connecticut,Connecticut,Eastern,ETC/GMT+5 +8,Delaware,Delaware,Eastern,ETC/GMT+5 +9,Florida,Florida,Eastern,ETC/GMT+6 +10,Georgia,Georgia,Eastern,ETC/GMT+5 +11,Iowa,Iowa,Eastern,ETC/GMT+6 +12,Idaho,Idaho,Western,ETC/GMT+7 +13,Illinois,Illinois,Eastern,ETC/GMT+6 +14,Indiana,Indiana,Eastern,ETC/GMT+5 +15,Kansas,Kansas,Eastern,ETC/GMT+6 +16,Kentucky,Kentucky,Eastern,ETC/GMT+5 +17,Louisiana,Louisiana,Eastern,ETC/GMT+6 +18,Massachusetts,Massachusetts,Eastern,ETC/GMT+5 +19,Maryland,Maryland,Eastern,ETC/GMT+5 +20,Maine,Maine,Eastern,ETC/GMT+5 +21,Michigan,Michigan,Eastern,ETC/GMT+5 +22,Minnesota,Minnesota,Eastern,ETC/GMT+6 +23,Missouri,Missouri,Eastern,ETC/GMT+6 +24,Mississippi,Mississippi,Eastern,ETC/GMT+6 +25,Montana Eastern,Montana,Eastern,ETC/GMT+7 +26,Montana Western,Montana,Western,ETC/GMT+7 +27,North Carolina,North Carolina,Eastern,ETC/GMT+5 +28,North Dakota,North Dakota,Eastern,ETC/GMT+6 +29,Nebraska Eastern,Nebraska,Eastern,ETC/GMT+6 +30,Nebraska Western,Nebraska,Western,ETC/GMT+6 +31,New Hampshire,New Hampshire,Eastern,ETC/GMT+5 +32,New Jersey,New Jersey,Eastern,ETC/GMT+5 +33,New Mexico Eastern,New Mexico,Eastern,ETC/GMT+7 +34,New Mexico Western,New Mexico,Western,ETC/GMT+7 +35,Nevada,Nevada,Western,ETC/GMT+8 +36,New York,New York,Eastern,ETC/GMT+5 +37,Ohio,Ohio,Eastern,ETC/GMT+5 +38,Oklahoma,Oklahoma,Eastern,ETC/GMT+6 +39,Oregon,Oregon,Western,ETC/GMT+8 +40,Pennsylvania,Pennsylvania,Eastern,ETC/GMT+5 +41,Rhode Island,Rhode Island,Eastern,ETC/GMT+5 +42,South Carolina,South Carolina,Eastern,ETC/GMT+5 +43,South Dakota Eastern,South Dakota,Eastern,ETC/GMT+6 +44,South Dakota Western,South Dakota,Western,ETC/GMT+6 +45,Tennessee,Tennessee,Eastern,ETC/GMT+6 +46,Texas Eastern,Texas,Eastern,ETC/GMT+6 +47,Texas Panhandle,Texas,Eastern,ETC/GMT+6 +48,ERCOT,Texas,Texas,ETC/GMT+6 +49,El Paso,Texas,Western,ETC/GMT+7 +50,Utah,Utah,Western,ETC/GMT+7 +51,Virginia,Virginia,Eastern,ETC/GMT+5 +52,Vermont,Vermont,Eastern,ETC/GMT+5 +53,Washington,Washington,Western,ETC/GMT+8 +54,Wisconsin,Wisconsin,Eastern,ETC/GMT+6 +55,West Virginia,West Virginia,Eastern,ETC/GMT+5 +56,Wyoming,Wyoming,Western,ETC/GMT+7 diff --git a/prereise/gather/griddata/hifld/data_access/load.py b/prereise/gather/griddata/hifld/data_access/load.py index 65048d5c0..99d2a1e5d 100644 --- a/prereise/gather/griddata/hifld/data_access/load.py +++ b/prereise/gather/griddata/hifld/data_access/load.py @@ -181,7 +181,9 @@ def get_hifld_electric_substations(path): .round({"MAX_VOLT": 3, "MIN_VOLT": 3}) ) - return data.query("STATUS == 'IN SERVICE' and STATE in @abv2state") + return data.query( + "(STATUS == 'IN SERVICE' or STATUS == 'NOT AVAILABLE') and STATE in @abv2state" + ) def get_hifld_electric_power_transmission_lines(path): diff --git a/prereise/gather/griddata/hifld/data_process/topology.py b/prereise/gather/griddata/hifld/data_process/topology.py index e4f6dbaa0..0246774ab 100644 --- a/prereise/gather/griddata/hifld/data_process/topology.py +++ b/prereise/gather/griddata/hifld/data_process/topology.py @@ -4,6 +4,7 @@ from itertools import combinations, product import networkx as nx +import pandas as pd from powersimdata.utility.distance import haversine, ll2uv from scipy.spatial import KDTree from tqdm import tqdm @@ -217,3 +218,100 @@ def get_mst_edges(lines, substations, **kwargs): with open(os.path.join(cache_dir, f"mst_{cache_hash}.pkl"), "wb") as f: pickle.dump(mst_edges, f) return mst_edges + + +def add_interconnects_by_connected_components( + lines, + substations, + seams_substations, + substation_assumptions, + line_assumptions, + interconnect_size_rank, +): + """Disconnect a large connected component using a set of connecting substations, + label the resulting connected components, then reconnect dropped lines and + substations, using explicit assumptions plus inference from neighboring lines. The + ``lines`` and ``substations`` data frames are modified inplace with a new + 'interconnect' column. + + :param pandas.DataFrame lines: data frame of line information. + :param pandas.DataFrame substations: data frame of substation information. + :param iterable seams_substations: IDs of substations to drop. + :param dict substation_assumptions: labeling assumptions for substations. + :param dict line_assumptions: labeling assumptions for lines. + :param iterable interconnect_size_rank: ordered iterable of interconnection names. + :raises ValueError: if at least one dropped line's interconnection isn't specified + and can't be inferred from its neighbors, or if the final number of connected + components don't match the length of the ``interconnect_size_rank``. + """ + # Create a graph of the network, and drop lines based on substations + dropped_lines = lines.loc[ + lines["SUB_1_ID"].isin(seams_substations) + | lines["SUB_2_ID"].isin(seams_substations) + ] + g = nx.convert_matrix.from_pandas_edgelist(lines, "SUB_1_ID", "SUB_2_ID") + g.remove_nodes_from(seams_substations) + # Label interconnections based on their sizes + sorted_interconnects = sorted(nx.connected_components(g), key=len)[::-1] + labels = pd.Series("unknown", index=lines.index, dtype="string") + for i, name in enumerate(interconnect_size_rank): + labels.loc[lines.SUB_1_ID.isin(sorted_interconnects[i])] = name + labels.loc[dropped_lines.index] = "dropped" + # Label some dropped lines and unknown interconnection lines (small islands) + for interconnect, sub_ids in substation_assumptions.items(): + labels.loc[ + lines["SUB_1_ID"].isin(sub_ids) | lines["SUB_2_ID"].isin(sub_ids), + ] = interconnect + for interconnect, line_ids in line_assumptions.items(): + labels.loc[line_ids] = interconnect + # Use neighboring lines at non-dropped substations to infer line interconnections + dropped_lines = lines.loc[labels == "dropped"] + non_dropped_lines = lines.loc[labels != "dropped"] + for id, line in dropped_lines.iterrows(): + non_dropped_sub = ( # noqa: F841 + line.SUB_1_ID if line.SUB_2_ID in seams_substations else line.SUB_2_ID + ) + other_lines = non_dropped_lines.query( + "SUB_1_ID == @non_dropped_sub or SUB_2_ID == @non_dropped_sub" + ) + other_line_interconnects = labels.loc[other_lines.index].unique() + if len(other_line_interconnects) != 1: + raise ValueError(f"Couldn't infer interconnection for line {id}") + labels.loc[id] = other_line_interconnects[0] + lines["interconnect"] = labels + + # When lines of multiple interconnections meet at a substation, split it + for sub_id in sorted(seams_substations): + # Find all lines disconnected by removing this substation + sub_dropped_lines = lines.query("SUB_1_ID == @sub_id or SUB_2_ID == @sub_id") + # Find interconnects for dropped lines which have been successfully labeled + new_sub_interconnects = sub_dropped_lines["interconnect"].unique() + # Build new substations to replace the old one + first_new_sub_id = substations.index.max() + 1 + new_substations = pd.concat( + [substations.loc[sub_id]] * len(new_sub_interconnects), axis=1 + ).T + new_substations.index = pd.RangeIndex( + first_new_sub_id, first_new_sub_id + len(new_sub_interconnects) + ) + new_substations["NAME"] = [ + substations.loc[sub_id, "NAME"] + f"_{i}" for i in new_sub_interconnects + ] + # Add these new substations to the existing ones + for new_id, new_substation in new_substations.iterrows(): + substations.loc[new_id] = new_substation + # Re-map the labelled lines to the new substations + for line_id, line in sub_dropped_lines.iterrows(): + for new_sub_id, sub in new_substations.iterrows(): + if line["interconnect"] == sub["NAME"].split("_")[1]: + if line["SUB_1_ID"] == sub_id: + lines.loc[line_id, "SUB_1_ID"] = new_sub_id + if line["SUB_2_ID"] == sub_id: + lines.loc[line_id, "SUB_2_ID"] = new_sub_id + revised_g = nx.convert_matrix.from_pandas_edgelist(lines, "SUB_1_ID", "SUB_2_ID") + sorted_interconnects = sorted(nx.connected_components(revised_g), key=len)[::-1] + if len(sorted_interconnects) != len(interconnect_size_rank): + raise ValueError("Interconnections were not separated successfully") + for i, name in enumerate(interconnect_size_rank): + substations.loc[sorted_interconnects[i], "interconnect"] = name + substations.drop(seams_substations, inplace=True) diff --git a/prereise/gather/griddata/hifld/data_process/transmission.py b/prereise/gather/griddata/hifld/data_process/transmission.py index c1275cfb8..125d8010f 100644 --- a/prereise/gather/griddata/hifld/data_process/transmission.py +++ b/prereise/gather/griddata/hifld/data_process/transmission.py @@ -1,3 +1,4 @@ +import math import os import networkx as nx @@ -13,10 +14,10 @@ get_hifld_electric_substations, get_zone, ) -from prereise.gather.griddata.hifld.data_process.helpers import ( - map_state_and_county_to_interconnect, +from prereise.gather.griddata.hifld.data_process.topology import ( + add_interconnects_by_connected_components, + get_mst_edges, ) -from prereise.gather.griddata.hifld.data_process.topology import get_mst_edges def check_for_location_conflicts(substations): @@ -110,7 +111,7 @@ def map_lines_to_substations_using_coords( } for a in all2one: type2id = substations.loc[list(a)].reset_index().groupby("TYPE").first()["ID"] - for t in ["SUBSTATION", "TAP", "RISER", "DEAD END"]: + for t in ["SUBSTATION", "TAP", "RISER", "DEAD END", "NOT AVAILABLE"]: try: all2one[a] = type2id.loc[t] break @@ -622,6 +623,37 @@ def split_lines_to_ac_and_dc(lines, dc_override_indices=None): return ac_lines.copy(), dc_lines.copy() +def add_b2bs_to_dc_lines(dc_lines, substations, b2b_ratings): + """Given back-to-back (B2B) converter station ratings, add entries to the DC lines + table (modified inplace) representing the HVDC links between interconnections. + + :param pandas.DataFrame dc_lines: table of HVDC line information. + :param pandas.DataFrame substations: table of substation information. + :param dict/pandas.Series b2b_capacities: capacities of B2B HVDC facilties. Keys are + strings which are containined within exactly two substation 'NAME' properties + (one on either 'side' of an interconnection seam), values are B2B facilitiy + capacity in MW. + :raises ValueError: if a given B2B capacity name does not identify exactly two + substations. + """ + # Check all lines and build dict of lines to be added (if validation passes) + to_add = [] + for name, rating in b2b_ratings.items(): + sub_ids = substations.loc[substations["NAME"].str.contains(f"{name}_")].index + if len(sub_ids) != 2: + raise ValueError(f"Could not identify two substations for B2B: {name}") + to_add.append({"SUB_1_ID": sub_ids[0], "SUB_2_ID": sub_ids[1], "Pmax": rating}) + + # Now that we know all are good, loop through and append to extend DC lines inplace + # The first new ID is calculated to not share a leading digit with existing DC lines + prev_max = dc_lines.index.max() + order_of_magnitude = 10 ** (int(math.log10(prev_max))) + first_new_id = order_of_magnitude * int(prev_max / order_of_magnitude + 1) + # We need to loop through and add one-by-one to be able to append inplace + for i, info in enumerate(to_add): + dc_lines.loc[first_new_id + i] = pd.Series(info) + + def build_transmission(method="line2sub", **kwargs): """Build transmission network @@ -693,6 +725,18 @@ def build_transmission(method="line2sub", **kwargs): dc_lines["Pmax"] = dc_lines.index.to_series().map(const.dc_line_ratings) dc_lines["Pmin"] = -1 * dc_lines["Pmax"] + # Add interconnect information to lines and substations via topology analysis + add_interconnects_by_connected_components( + ac_lines, + substations, + set().union(*const.seams_substations.values()), + const.substation_interconnect_assumptions, + const.line_interconnect_assumptions, + const.interconnect_size_rank, + ) + # Now that substations are split across interconnects, we can add B2B facilities + add_b2bs_to_dc_lines(dc_lines, substations, const.b2b_ratings) + # Add voltages to lines with missing data augment_line_voltages(ac_lines, substations) @@ -714,9 +758,4 @@ def build_transmission(method="line2sub", **kwargs): lambda x: estimate_branch_rating(x, bus["baseKV"]), axis=1 ) - # Add additional information to substations - substations["interconnect"] = substations.apply( - lambda x: map_state_and_county_to_interconnect(x.STATE, x.COUNTY), axis=1 - ) - return branch, bus, substations, dc_lines