Skip to content

Commit

Permalink
Merge pull request #256 from Breakthrough-Energy/daniel/hifld_solar_p…
Browse files Browse the repository at this point in the history
…rofiles

feat: generate solar profiles during HIFLD grid-building
  • Loading branch information
danielolsen committed Mar 14, 2022
2 parents c78dc02 + 1592af7 commit f2ccac7
Show file tree
Hide file tree
Showing 10 changed files with 383 additions and 219 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ['3.8', '3.9']
python-version: ['3.8', '3.9', '3.10']

name: Python ${{ matrix.python-version }}
steps:
Expand Down
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ xlrd = "==1.2.0"
openpyxl = "*"
netCDF4 = "~=1.5.8"
powersimdata = "~=0.4.4"
nrel-pysam = "~=2.2"
nrel-pysam = "~=3.0"
pyproj = "~=3.0"
pygrib = "~=2.1.4"
236 changes: 121 additions & 115 deletions Pipfile.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions prereise/gather/griddata/hifld/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@
blob_paths = {
"eia_form860_2019_generator": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/3_1_Generator_Y2019_Operable.csv",
"eia_form860_2019_plant": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/2___Plant_Y2019.csv",
"eia_form860_2019_solar": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/3_3_Solar_Y2019_Operable.csv",
"eia_form860_2019_wind": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/3_2_Wind_Y2019_Operable.csv",
"epa_ampd": "https://besciences.blob.core.windows.net/datasets/EPA_AMPD/",
"epa_needs": "https://besciences.blob.core.windows.net/datasets/EPA_NEEDS/needs-v620_06-30-21-2_active.csv",
"substations": "https://besciences.blob.core.windows.net/datasets/hifld/Electric_Substations_Jul2020.csv",
Expand Down
1 change: 1 addition & 0 deletions prereise/gather/griddata/hifld/data_process/generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,5 +343,6 @@ def build_plant(bus, substations, kwargs={}):
)
generators["type"] = generators["type"].replace(const.fuel_translations)
generators["GenIOD"] = 0
generators.index.name = "plant_id"

return generators
59 changes: 59 additions & 0 deletions prereise/gather/griddata/hifld/data_process/profiles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from prereise.gather.griddata.hifld import const
from prereise.gather.griddata.hifld.data_access.load import get_eia_form_860
from prereise.gather.solardata.nsrdb.sam import retrieve_data_individual


def floatify(x):
"""Coerce an object to a float, returning a float NaN for any objects which raise an
exception when passed to :func:`float`.
:param object x: object to coerce.
:return: (*float*) -- coerced value.
"""
try:
return float(x)
except ValueError:
return float("nan")


def build_solar(nrel_email, nrel_api_key, solar_plants, **solar_kwargs):
"""Use plant-level data to build solar profiles.
:param str nrel_email: email used to`sign up <https://developer.nrel.gov/signup/>`_.
:param str nrel_api_key: API key.
:param pandas.DataFrame solar_plants: data frame of solar farms.
:param dict solar_kwargs: keyword arguments to pass to
:func:`prereise.gather.solardata.nsrdb.sam.retrieve_data_individual`.
:return: (*pandas.DataFrame*) -- data frame of normalized power profiles. The index
is hourly timestamps for the profile year, the columns are plant IDs, the values
are floats.
"""
boolean_columns = ["Single-Axis Tracking?", "Dual-Axis Tracking?", "Fixed Tilt?"]
float_columns = ["DC Net Capacity (MW)", "Nameplate Capacity (MW)", "Tilt Angle"]
# Load raw 'extra' table data, join on plant & generating unit, re-establish index
extra_solar_data = get_eia_form_860(const.blob_paths["eia_form860_2019_solar"])
full_data = solar_plants.merge(
extra_solar_data, on=["Plant Code", "Generator ID"], suffixes=(None, "_extra")
)
full_data.index = solar_plants.index
# Process data to expected types for profile generation
for col in float_columns:
full_data[col] = full_data[col].map(floatify)
for col in boolean_columns:
# 'Y' becomes True, anything else ('N', blank, etc) becomes False
full_data[col] = full_data[col] == "Y"

# If panel type isn't known definitively, assume 100% Fixed Tilt
# Solar thermal also ends up labeled as fixed tilt, but this will be ignored
bad_booleans = full_data.index[full_data[boolean_columns].sum(axis=1) != 1]
full_data.loc[bad_booleans, boolean_columns] = False
full_data.loc[bad_booleans, "Fixed Tilt?"] = True

full_data.index.name = "plant_id" # needed for next step but gets lost in the merge
profiles = retrieve_data_individual(
nrel_email,
nrel_api_key,
solar_plant=full_data,
**solar_kwargs,
)
return profiles
127 changes: 93 additions & 34 deletions prereise/gather/griddata/hifld/orchestration.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,55 +6,114 @@
from prereise.gather.griddata.hifld.const import powersimdata_column_defaults
from prereise.gather.griddata.hifld.data_process.demand import assign_demand_to_buses
from prereise.gather.griddata.hifld.data_process.generators import build_plant
from prereise.gather.griddata.hifld.data_process.profiles import build_solar
from prereise.gather.griddata.hifld.data_process.transmission import build_transmission


def create_csvs(output_folder):
def create_csvs(output_folder, nrel_email, nrel_api_key, solar_kwargs={}):
"""Process HIFLD source data to CSVs compatible with PowerSimData.
:param str output_folder: directory to write CSVs to.
:param str nrel_email: email used to`sign up <https://developer.nrel.gov/signup/>`_.
:param str nrel_api_key: API key.
:param dict solar_kwargs: keyword arguments to pass to
:func:`prereise.gather.solardata.nsrdb.sam.retrieve_data_individual`.
"""
full_tables = create_grid(output_folder)
create_profiles(
full_tables["plant"], nrel_email, nrel_api_key, output_folder, solar_kwargs
)


def create_grid(output_folder=None):
"""Process a table of plant data to produce grid CSVs compatible with PowerSimData.
:param str output_folder: directory to write CSVs to. If None, CSVs will not be
written (just returned).
:return: (*dict*) -- keys are strings for table names, values are dataframes that
correspond to those tables. These dataframes have all available columns for
each table, even though the CSV files which are written are limited to only the
columns expected by powersimdata.
"""
# Process grid data from original sources
branch, bus, substation, dcline = build_transmission()
plant = build_plant(bus, substation)
assign_demand_to_buses(substation, branch, plant, bus)

outputs = {}
outputs["branch"] = branch
outputs["dcline"] = dcline
outputs["sub"] = substation
full_tables = {}
full_tables["branch"] = branch
full_tables["dcline"] = dcline
full_tables["sub"] = substation
# Separate tables as necessary to match PowerSimData format
# bus goes to bus and bus2sub
outputs["bus2sub"] = bus[["sub_id", "interconnect"]]
outputs["bus"] = bus.drop(["sub_id"], axis=1)
full_tables["bus2sub"] = bus[["sub_id", "interconnect"]]
full_tables["bus"] = bus.drop(["sub_id"], axis=1)
# plant goes to plant and gencost
outputs["gencost"] = plant[["c0", "c1", "c2", "interconnect"]].copy()
outputs["plant"] = plant.drop(["c0", "c1", "c2"], axis=1)
full_tables["gencost"] = plant[["c0", "c1", "c2", "interconnect"]].copy()
full_tables["plant"] = plant.drop(["c0", "c1", "c2"], axis=1)

# Fill in missing column values
for name, defaults in powersimdata_column_defaults.items():
outputs[name] = outputs[name].assign(**defaults)

# Filter to only the columns expected by PowerSimData, in the expected order
for name, df in outputs.items():
col_names = getattr(psd_const, f"col_name_{name}")
if name == "bus":
# The bus column names in PowerSimData include the index for legacy reasons
col_names = col_names[1:]
if name == "branch":
col_names += ["branch_device_type"]
if name == "plant":
col_names += ["type", "GenFuelCost", "GenIOB", "GenIOC", "GenIOD"]
if name == "dcline":
col_names += ["from_interconnect", "to_interconnect"]
else:
col_names += ["interconnect"]
outputs[name] = outputs[name][col_names]

# Save files
os.makedirs(output_folder, exist_ok=True)
for name, df in outputs.items():
df.to_csv(os.path.join(output_folder, f"{name}.csv"))
# The zone file gets copied directly
zone_path = os.path.join(os.path.dirname(__file__), "data", "zone.csv")
shutil.copyfile(zone_path, os.path.join(output_folder, "zone.csv"))
full_tables[name] = full_tables[name].assign(**defaults)

if output_folder is not None:
os.makedirs(output_folder, exist_ok=True)
# Filter to only the columns expected by PowerSimData, in the expected order
powersimdata_outputs = {}
for name, df in full_tables.items():
col_names = getattr(psd_const, f"col_name_{name}")
if name == "bus":
# The bus column names in PowerSimData include the index
col_names = col_names[1:]
if name == "branch":
col_names += ["branch_device_type"]
if name == "plant":
col_names += ["type", "GenFuelCost", "GenIOB", "GenIOC", "GenIOD"]
if name == "dcline":
col_names += ["from_interconnect", "to_interconnect"]
else:
col_names += ["interconnect"]
powersimdata_outputs[name] = full_tables[name][col_names]

# Save files
for name, df in powersimdata_outputs.items():
df.to_csv(os.path.join(output_folder, f"{name}.csv"))
# The zone file gets copied directly
zone_path = os.path.join(os.path.dirname(__file__), "data", "zone.csv")
shutil.copyfile(zone_path, os.path.join(output_folder, "zone.csv"))

return full_tables


def create_profiles(
plants, nrel_email, nrel_api_key, output_folder=None, solar_kwargs={}
):
"""Process a table of plant data to produce profile CSVs compatible with
PowerSimData.
:param pandas.DataFrame plants: table of plant data.
:param str nrel_email: email used to`sign up <https://developer.nrel.gov/signup/>`_.
:param str nrel_api_key: API key.
:param str output_folder: directory to write CSVs to. If None, CSVs will not be
written (just returned).
:param dict solar_kwargs: keyword arguments to pass to
:func:`prereise.gather.solardata.nsrdb.sam.retrieve_data_individual`.
:return: (*dict*) -- keys are strings for profile names, values are dataframes,
indexed by timestamp, with plant IDs as columns.
"""
# Use plant data to build profiles
profiles = {
"solar": build_solar(
nrel_email,
nrel_api_key,
plants.query("type == 'solar'"),
**solar_kwargs,
),
}
if output_folder is not None:
os.makedirs(output_folder, exist_ok=True)
# Write profiles
for name, df in profiles.items():
df.to_csv(os.path.join(output_folder, f"{name}.csv"))

return profiles
17 changes: 17 additions & 0 deletions prereise/gather/solardata/nsrdb/nrel_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,19 @@ def to_dict(self):
)
return result

def to_sam_weather_file_format(self):
"""Convert the data to the format expected by nrel-pysam for local files. See
https://developer.nrel.gov/docs/solar/nsrdb/psm3-download/.
:return: (*list*) -- a list of lists which can be passed to
:meth:`csv.writer.writerows` and then loaded from disk.
"""
metadata_names = ["lat", "lon", "tz", "elevation"]
metadata_values = self.lat, self.lon, self.tz, self.elevation
data_headers = self.data_resource.columns.tolist()
data_rows = self.data_resource.to_numpy().tolist()
return [metadata_names, metadata_values, data_headers] + data_rows


class NrelApi:
"""Provides an interface to the NREL API for PSM3 data. It supports
Expand Down Expand Up @@ -151,6 +164,10 @@ def get_psm3_at(
)
def download(url):
resp = requests.get(url)
if resp.status_code // 100 == 5: # all 5xx errors, server side
raise TransientError(
f"Server side error, retry_count={download.retry_count}"
)
if resp.status_code == 429:
raise TransientError(
f"Too many requests, retry_count={download.retry_count}"
Expand Down
Loading

0 comments on commit f2ccac7

Please sign in to comment.