Skip to content

Commit

Permalink
refactor: map generators to substations using voltages
Browse files Browse the repository at this point in the history
  • Loading branch information
danielolsen committed Feb 18, 2022
1 parent 98d272b commit 67377d5
Showing 1 changed file with 84 additions and 50 deletions.
134 changes: 84 additions & 50 deletions prereise/gather/griddata/hifld/data_process/generators.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from math import asin

import numpy as np
import pandas as pd
from scipy.optimize import curve_fit
Expand All @@ -24,7 +26,9 @@ def floatify(value, default=float("nan")):
return default


def map_generators_to_sub_by_location(generators, substations, inplace=True):
def map_generators_to_sub_by_location(
generators, substations, inplace=True, report_worst=None
):
"""Determine the closest substation to each generator. For generators without
latitude and longitude, an attempt will be made to match via ZIP code, and failing
that a pandas.NA value will be returned.
Expand All @@ -37,17 +41,38 @@ def map_generators_to_sub_by_location(generators, substations, inplace=True):
'sub_id' column or to return a new one. If ``inplace`` is `True`, entries in
`generators` which have non-sensical combinations of 'state' and 'interconnect'
columns will have their 'interconnect' entries modified.
:param int report_worst: if not None, display the distances of the worst N mappings.
:return: (*pandas.DataFrame/None*) -- if ``inplace`` is `False`, return the modified
DataFrame; otherwise return nothing.
"""

def get_sub_id_of_closest_substation(generator, state_trees, subs_state_lookup):
def get_closest_substation(generator, state_trees, subs_state_lookup):
if not isinstance(generator["xyz"], list):
return pd.NA
grouper_key = (generator["interconnect"], generator["state"])
_, array_index = state_trees[grouper_key].query(generator["xyz"])
sub_index = subs_state_lookup[grouper_key][array_index]
return sub_index
if pd.isnull(generator["voltage_class"]) or generator["Pmax"] < 100:
grouper_key = generator["interconnect"]
else:
grouper_key = (generator["interconnect"], generator["voltage_class"])
chord_dist, array_index = voltage_trees[grouper_key].query(generator["xyz"])
sub_id = subs_voltage_lookup[grouper_key][array_index]
# Translate chord distance (unit circle) to great circle distance (miles)
dist_in_miles = 3963 * 2 * asin(chord_dist / 2) # use 3963 mi as earth radius
return pd.Series({"dist": dist_in_miles, "sub_id": sub_id})

def classify_voltages(voltage, voltage_ranges):
for v_range, bounds in voltage_ranges.items():
if bounds["min"] <= voltage <= bounds["max"]:
return v_range
return float("nan")

voltage_ranges = {
"under 100": {"min": 0, "max": 99},
"100-161": {"min": 100, "max": 161},
"220-287": {"min": 220, "max": 287},
"345": {"min": 345, "max": 345},
"500": {"min": 500, "max": 500},
"735 and above": {"min": 735, "max": float("inf")},
}

# Translate lat/lon to 3D positions (assume spherical earth, origin at center)
substations_with_xyz = substations.assign(
Expand All @@ -64,68 +89,67 @@ def get_sub_id_of_closest_substation(generator, state_trees, subs_state_lookup):
)
)

# Group substations by state to build KDTrees
subs_state_lookup = substations_with_xyz.groupby(["interconnect", "STATE"]).groups
# Bin voltages into broad classes
substations_with_xyz["voltage_class"] = substations["MAX_VOLT"].map(
lambda x: classify_voltages(x, voltage_ranges)
)
generators_with_xyz["voltage_class"] = generators["Grid Voltage (kV)"].map(
lambda x: classify_voltages(x, voltage_ranges)
)

# Group substations by voltage to build KDTrees
subs_voltage_lookup = {
(interconnect, voltage_level): substations_with_xyz.query(
"interconnect == @interconnect and MAX_VOLT >= @voltage_range['min']"
).index
for interconnect in generators["interconnect"].unique()
for voltage_level, voltage_range in voltage_ranges.items()
}
# Group substations by ZIP code for a fallback for generators without coordinates
subs_zip_groupby = substations_with_xyz.groupby(["interconnect", "ZIP"])

# Create a KDTree for each combination of state and interconnect
state_trees = {
voltage_trees = {
key: KDTree(np.array(substations_with_xyz.loc[sub_ids, "xyz"].tolist()))
for key, sub_ids in subs_state_lookup.items()
for key, sub_ids in subs_voltage_lookup.items()
if len(sub_ids) > 0
}
# Ensure that we have a tree for every generator
gens_state_groupby = generators_with_xyz.groupby(["interconnect", "state"])
missing_groups = set(gens_state_groupby.groups) - set(state_trees)
if len(missing_groups) > 0:
# There are some combinations of generator (interconnect, state) without subs
allowable_border_states = {"KS", "NE", "OK"}
for interconnect, state in missing_groups:
if state in allowable_border_states:
# Assume that the interconnect and state are correct
print(
f"no substations within ({interconnect}, {state}), "
f"will map generators to substations within {interconnect} instead"
)
# Find all substations for the interconnection
new_subs = substations_with_xyz.query("interconnect == @interconnect")
# Extend the 'true' combinations of (interconnect, state) with fakes
state_trees[(interconnect, state)] = KDTree(
np.array(new_subs["xyz"].tolist())
)
subs_state_lookup[(interconnect, state)] = new_subs.index
else:
# Assume that the state is correct, the interconnect is wrong
print(
f"no substations within ({interconnect}, {state}), "
f"will map generators to substations within {state} instead"
)
(assumed,) = {
interconnect
for interconnect, state_list in const.interconnect2state.items()
if interconnect not in {"ignore", "split"} and state in state_list
}
gens_to_fix = gens_state_groupby.get_group((interconnect, state)).index
generators_with_xyz.loc[gens_to_fix, "interconnect"] = assumed
# Create a KDTree for each interconnect (all voltages)
subs_interconnect_groupby = substations_with_xyz.groupby("interconnect")
for interconnect in generators["interconnect"].unique():
tree_subs = subs_interconnect_groupby.get_group(interconnect)
voltage_trees[interconnect] = KDTree(np.array(tree_subs["xyz"].tolist()))
subs_voltage_lookup[interconnect] = tree_subs.index

# Query the appropriate tree for each generator to get the closest substation ID
sub_ids = generators_with_xyz.apply(
lambda x: get_sub_id_of_closest_substation(x, state_trees, subs_state_lookup),
mapping_results = generators_with_xyz.apply(
lambda x: get_closest_substation(x, voltage_trees, subs_voltage_lookup),
axis=1,
)
# For generators without coordinates, try to pick a substation with a matching ZIP
for g in generators.loc[sub_ids.isnull()].index:
for g in generators.loc[mapping_results["sub_id"].isnull()].index:
try:
candidates = subs_zip_groupby.get_group(generators.loc[g, "ZIP"])
sub_ids.loc[g] = candidates.index[0] # arbitrary choose the first one
# arbitrary choose the first one
mapping_results.loc[g, "sub_id"] = candidates.index[0]
except KeyError:
continue # No coordinates, no matching ZIP, we're out of luck

if report_worst is not None:
print(
mapping_results.sort_values("sub_dist", ascending=False)
.join(generators[["Plant Code", "Grid Voltage (kV)", "Pmax"]])
.head(report_worst)
)

if inplace:
generators["sub_id"] = sub_ids
generators["sub_id"] = mapping_results["sub_id"]
generators["sub_dist"] = mapping_results["dist"]
generators["interconnect"] = generators_with_xyz["interconnect"]
else:
return generators_with_xyz.drop("xyz", axis=1).assign(sub_id=sub_ids)
return generators_with_xyz.drop(["xyz", "voltage_class"], axis=1).join(
mapping_results
)


def map_generator_to_bus_by_sub(generator, bus_groupby):
Expand Down Expand Up @@ -375,7 +399,16 @@ def build_plant(bus, substations, kwargs={}):
epa_ampd_groupby = epa_ampd.groupby(["ORISPL_CODE", "UNITID"])

# Add information to generators based on Form 860 Plant table
generators = generators.merge(plants, on="Plant Code", suffixes=(None, "_860Plant"))
# Merging this way allows column-on-column merge while preserving original index
generators = (
generators.reset_index()
.merge(
plants,
on="Plant Code",
suffixes=(None, "_860Plant"),
)
.set_index("index")
)
generators.rename(
{"Latitude": "lat", "Longitude": "lon", "Zip": "ZIP"}, axis=1, inplace=True
)
Expand All @@ -385,6 +418,7 @@ def build_plant(bus, substations, kwargs={}):
.map(const.balancingauthority2interconnect)
.combine_first(generators["NERC Region"].map(const.nercregion2interconnect))
)
generators["Grid Voltage (kV)"] = generators["Grid Voltage (kV)"].map(floatify)

# Ensure we have Pmax and Pmin for each generator
generators["Pmax"] = generators[
Expand Down

0 comments on commit 67377d5

Please sign in to comment.