Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: generate solar profiles during HIFLD grid-building #256

Merged
merged 12 commits into from
Feb 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ['3.8', '3.9']
python-version: ['3.8', '3.9', '3.10']

name: Python ${{ matrix.python-version }}
steps:
Expand Down
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ tqdm = "==4.29.1"
xlrd = "==1.2.0"
netCDF4 = "~=1.5.8"
powersimdata = "~=0.4.4"
nrel-pysam = "~=2.2"
nrel-pysam = "~=3.0"
pyproj = "~=3.0"
pygrib = "*"
33 changes: 18 additions & 15 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions prereise/gather/griddata/hifld/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@
blob_paths = {
"eia_form860_2019_generator": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/3_1_Generator_Y2019_Operable.csv",
"eia_form860_2019_plant": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/2___Plant_Y2019.csv",
"eia_form860_2019_solar": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/3_3_Solar_Y2019_Operable.csv",
"eia_form860_2019_wind": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/3_2_Wind_Y2019_Operable.csv",
"epa_ampd": "https://besciences.blob.core.windows.net/datasets/EPA_AMPD/",
"epa_needs": "https://besciences.blob.core.windows.net/datasets/EPA_NEEDS/needs-v620_06-30-21-2_active.csv",
"substations": "https://besciences.blob.core.windows.net/datasets/hifld/Electric_Substations_Jul2020.csv",
Expand Down
1 change: 1 addition & 0 deletions prereise/gather/griddata/hifld/data_process/generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,5 +343,6 @@ def build_plant(bus, substations, kwargs={}):
)
generators["type"] = generators["type"].replace(const.fuel_translations)
generators["GenIOD"] = 0
generators.index.name = "plant_id"

return generators
59 changes: 59 additions & 0 deletions prereise/gather/griddata/hifld/data_process/profiles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from prereise.gather.griddata.hifld import const
from prereise.gather.griddata.hifld.data_access.load import get_eia_form_860
from prereise.gather.solardata.nsrdb.sam import retrieve_data_individual


def floatify(x):
"""Coerce an object to a float, returning a float NaN for any objects which raise an
exception when passed to :func:`float`.
:param object x: object to coerce.
:return: (*float*) -- coerced value.
"""
try:
return float(x)
except ValueError:
return float("nan")


def build_solar(nrel_email, nrel_api_key, solar_plants, **solar_kwargs):
"""Use plant-level data to build solar profiles.
:param str nrel_email: email used to`sign up <https://developer.nrel.gov/signup/>`_.
:param str nrel_api_key: API key.
:param pandas.DataFrame solar_plants: data frame of solar farms.
:param dict solar_kwargs: keyword arguments to pass to
:func:`prereise.gather.solardata.nsrdb.sam.retrieve_data_individual`.
:return: (*pandas.DataFrame*) -- data frame of normalized power profiles. The index
is hourly timestamps for the profile year, the columns are plant IDs, the values
are floats.
"""
boolean_columns = ["Single-Axis Tracking?", "Dual-Axis Tracking?", "Fixed Tilt?"]
float_columns = ["DC Net Capacity (MW)", "Nameplate Capacity (MW)", "Tilt Angle"]
# Load raw 'extra' table data, join on plant & generating unit, re-establish index
extra_solar_data = get_eia_form_860(const.blob_paths["eia_form860_2019_solar"])
full_data = solar_plants.merge(
extra_solar_data, on=["Plant Code", "Generator ID"], suffixes=(None, "_extra")
)
full_data.index = solar_plants.index
# Process data to expected types for profile generation
for col in float_columns:
full_data[col] = full_data[col].map(floatify)
for col in boolean_columns:
# 'Y' becomes True, anything else ('N', blank, etc) becomes False
full_data[col] = full_data[col] == "Y"

# If panel type isn't known definitively, assume 100% Fixed Tilt
# Solar thermal also ends up labeled as fixed tilt, but this will be ignored
bad_booleans = full_data.index[full_data[boolean_columns].sum(axis=1) != 1]
full_data.loc[bad_booleans, boolean_columns] = False
full_data.loc[bad_booleans, "Fixed Tilt?"] = True

full_data.index.name = "plant_id" # needed for next step but gets lost in the merge
profiles = retrieve_data_individual(
nrel_email,
nrel_api_key,
solar_plant=full_data,
**solar_kwargs,
)
return profiles
127 changes: 93 additions & 34 deletions prereise/gather/griddata/hifld/orchestration.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,55 +6,114 @@
from prereise.gather.griddata.hifld.const import powersimdata_column_defaults
from prereise.gather.griddata.hifld.data_process.demand import assign_demand_to_buses
from prereise.gather.griddata.hifld.data_process.generators import build_plant
from prereise.gather.griddata.hifld.data_process.profiles import build_solar
from prereise.gather.griddata.hifld.data_process.transmission import build_transmission


def create_csvs(output_folder):
def create_csvs(output_folder, nrel_email, nrel_api_key, solar_kwargs={}):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the user perspective, do we want to enable the user to generate the grid without generating the profiles? If so, we could add a flag input, and add default None values for the profile-specific input parameters.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alternatively, we may want the user to be able to create the full Grid CSVs, and then use these for multiple years. We could break create_csvs into a grid-step and a profiles step, or allow the user to pass a list of years to have everything handled automatically.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree it makes sense to be able to create the grid and profiles separately. Assuming this isn't something that would be used often, I might leave it at something like create_grid and create_profiles, or make create_csvs a wrapper around those if it's convenient.

Copy link
Contributor Author

@danielolsen danielolsen Feb 3, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. EDIT: The refactor has also been tested and behaves as expected when calling the create_csvs wrapper function.

"""Process HIFLD source data to CSVs compatible with PowerSimData.
:param str output_folder: directory to write CSVs to.
:param str nrel_email: email used to`sign up <https://developer.nrel.gov/signup/>`_.
:param str nrel_api_key: API key.
:param dict solar_kwargs: keyword arguments to pass to
:func:`prereise.gather.solardata.nsrdb.sam.retrieve_data_individual`.
"""
full_tables = create_grid(output_folder)
create_profiles(
full_tables["plant"], nrel_email, nrel_api_key, output_folder, solar_kwargs
)


def create_grid(output_folder=None):
"""Process a table of plant data to produce grid CSVs compatible with PowerSimData.
:param str output_folder: directory to write CSVs to. If None, CSVs will not be
written (just returned).
:return: (*dict*) -- keys are strings for table names, values are dataframes that
correspond to those tables. These dataframes have all available columns for
each table, even though the CSV files which are written are limited to only the
columns expected by powersimdata.
"""
# Process grid data from original sources
branch, bus, substation, dcline = build_transmission()
plant = build_plant(bus, substation)
assign_demand_to_buses(substation, branch, plant, bus)

outputs = {}
outputs["branch"] = branch
outputs["dcline"] = dcline
outputs["sub"] = substation
full_tables = {}
full_tables["branch"] = branch
full_tables["dcline"] = dcline
full_tables["sub"] = substation
# Separate tables as necessary to match PowerSimData format
# bus goes to bus and bus2sub
outputs["bus2sub"] = bus[["sub_id", "interconnect"]]
outputs["bus"] = bus.drop(["sub_id"], axis=1)
full_tables["bus2sub"] = bus[["sub_id", "interconnect"]]
full_tables["bus"] = bus.drop(["sub_id"], axis=1)
# plant goes to plant and gencost
outputs["gencost"] = plant[["c0", "c1", "c2", "interconnect"]].copy()
outputs["plant"] = plant.drop(["c0", "c1", "c2"], axis=1)
full_tables["gencost"] = plant[["c0", "c1", "c2", "interconnect"]].copy()
full_tables["plant"] = plant.drop(["c0", "c1", "c2"], axis=1)

# Fill in missing column values
for name, defaults in powersimdata_column_defaults.items():
outputs[name] = outputs[name].assign(**defaults)

# Filter to only the columns expected by PowerSimData, in the expected order
for name, df in outputs.items():
col_names = getattr(psd_const, f"col_name_{name}")
if name == "bus":
# The bus column names in PowerSimData include the index for legacy reasons
col_names = col_names[1:]
if name == "branch":
col_names += ["branch_device_type"]
if name == "plant":
col_names += ["type", "GenFuelCost", "GenIOB", "GenIOC", "GenIOD"]
if name == "dcline":
col_names += ["from_interconnect", "to_interconnect"]
else:
col_names += ["interconnect"]
outputs[name] = outputs[name][col_names]

# Save files
os.makedirs(output_folder, exist_ok=True)
for name, df in outputs.items():
df.to_csv(os.path.join(output_folder, f"{name}.csv"))
# The zone file gets copied directly
zone_path = os.path.join(os.path.dirname(__file__), "data", "zone.csv")
shutil.copyfile(zone_path, os.path.join(output_folder, "zone.csv"))
full_tables[name] = full_tables[name].assign(**defaults)

if output_folder is not None:
os.makedirs(output_folder, exist_ok=True)
# Filter to only the columns expected by PowerSimData, in the expected order
powersimdata_outputs = {}
for name, df in full_tables.items():
col_names = getattr(psd_const, f"col_name_{name}")
if name == "bus":
# The bus column names in PowerSimData include the index
col_names = col_names[1:]
if name == "branch":
col_names += ["branch_device_type"]
if name == "plant":
col_names += ["type", "GenFuelCost", "GenIOB", "GenIOC", "GenIOD"]
if name == "dcline":
col_names += ["from_interconnect", "to_interconnect"]
else:
col_names += ["interconnect"]
powersimdata_outputs[name] = full_tables[name][col_names]

# Save files
for name, df in powersimdata_outputs.items():
df.to_csv(os.path.join(output_folder, f"{name}.csv"))
# The zone file gets copied directly
zone_path = os.path.join(os.path.dirname(__file__), "data", "zone.csv")
shutil.copyfile(zone_path, os.path.join(output_folder, "zone.csv"))

return full_tables


def create_profiles(
plants, nrel_email, nrel_api_key, output_folder=None, solar_kwargs={}
):
"""Process a table of plant data to produce profile CSVs compatible with
PowerSimData.
:param pandas.DataFrame plants: table of plant data.
:param str nrel_email: email used to`sign up <https://developer.nrel.gov/signup/>`_.
:param str nrel_api_key: API key.
:param str output_folder: directory to write CSVs to. If None, CSVs will not be
written (just returned).
:param dict solar_kwargs: keyword arguments to pass to
:func:`prereise.gather.solardata.nsrdb.sam.retrieve_data_individual`.
:return: (*dict*) -- keys are strings for profile names, values are dataframes,
indexed by timestamp, with plant IDs as columns.
"""
# Use plant data to build profiles
profiles = {
"solar": build_solar(
nrel_email,
nrel_api_key,
plants.query("type == 'solar'"),
**solar_kwargs,
),
}
if output_folder is not None:
os.makedirs(output_folder, exist_ok=True)
# Write profiles
for name, df in profiles.items():
df.to_csv(os.path.join(output_folder, f"{name}.csv"))

return profiles
17 changes: 17 additions & 0 deletions prereise/gather/solardata/nsrdb/nrel_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,19 @@ def to_dict(self):
)
return result

def to_sam_weather_file_format(self):
"""Convert the data to the format expected by nrel-pysam for local files. See
https://developer.nrel.gov/docs/solar/nsrdb/psm3-download/.
:return: (*list*) -- a list of lists which can be passed to
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically I think metadata_values is a tuple, but would expect that's fine.

:meth:`csv.writer.writerows` and then loaded from disk.
"""
metadata_names = ["lat", "lon", "tz", "elevation"]
metadata_values = self.lat, self.lon, self.tz, self.elevation
data_headers = self.data_resource.columns.tolist()
data_rows = self.data_resource.to_numpy().tolist()
return [metadata_names, metadata_values, data_headers] + data_rows


class NrelApi:
"""Provides an interface to the NREL API for PSM3 data. It supports
Expand Down Expand Up @@ -151,6 +164,10 @@ def get_psm3_at(
)
def download(url):
resp = requests.get(url)
if resp.status_code // 100 == 5: # all 5xx errors, server side
raise TransientError(
f"Server side error, retry_count={download.retry_count}"
)
if resp.status_code == 429:
raise TransientError(
f"Too many requests, retry_count={download.retry_count}"
Expand Down
Loading