From 35ee478d80ee5f3b6bb7a8785e790c699ad85ee0 Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Wed, 5 Jan 2022 14:35:32 -0800 Subject: [PATCH 01/12] chore: add URL paths to Form 860 solar/wind-specific data --- prereise/gather/griddata/hifld/const.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/prereise/gather/griddata/hifld/const.py b/prereise/gather/griddata/hifld/const.py index 55e802cbe..755e1dd7e 100644 --- a/prereise/gather/griddata/hifld/const.py +++ b/prereise/gather/griddata/hifld/const.py @@ -107,6 +107,8 @@ blob_paths = { "eia_form860_2019_generator": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/3_1_Generator_Y2019_Operable.csv", "eia_form860_2019_plant": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/2___Plant_Y2019.csv", + "eia_form860_2019_solar": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/3_3_Solar_Y2019_Operable.csv", + "eia_form860_2019_wind": "https://besciences.blob.core.windows.net/datasets/EIA_Form860/3_2_Wind_Y2019_Operable.csv", "epa_ampd": "https://besciences.blob.core.windows.net/datasets/EPA_AMPD/", "epa_needs": "https://besciences.blob.core.windows.net/datasets/EPA_NEEDS/needs-v620_06-30-21-2_active.csv", "substations": "https://besciences.blob.core.windows.net/datasets/hifld/Electric_Substations_Jul2020.csv", From a4a9a95173f955d299395b8eb501146bd5844fd6 Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Thu, 6 Jan 2022 11:47:00 -0800 Subject: [PATCH 02/12] chore: update nrel-pysam requirement to version 3.0 --- Pipfile | 2 +- Pipfile.lock | 33 ++++++++++++++++++--------------- requirements.txt | 2 +- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/Pipfile b/Pipfile index 2f3eb2f3c..a893c698b 100644 --- a/Pipfile +++ b/Pipfile @@ -23,6 +23,6 @@ tqdm = "==4.29.1" xlrd = "==1.2.0" netCDF4 = "~=1.5.8" powersimdata = "~=0.4.4" -nrel-pysam = "~=2.2" +nrel-pysam = "~=3.0" pyproj = "~=3.0" pygrib = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 96ee06d32..49a1e74bc 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "3776c28a392d55c14dd5f650d957a5cb86d8fd0073ea13d81dd417e6b1c993c1" + "sha256": "da607b969c8ee56efee681dfce43c9e652b3cf43a7e7181c781c8711068b3af2" }, "pipfile-spec": 6, "requires": {}, @@ -520,26 +520,29 @@ "sha256:80b6b89c77d1dfb64a4c7854981b60aeea6360ac02c6d4e4913319e0a313abef", "sha256:c0946ed31d71f1b732b5aaa6da5a0388a345019af232ce2f49c766e2d6795c51" ], - "markers": "python_version >= '3.7'", + "index": "pypi", "version": "==2.6.3" }, "nrel-pysam": { "hashes": [ - "sha256:40f579d546d8763ead40da22c4755f463204abb59c5b56d6756f416de5a9408d", - "sha256:4da7bf23749b2b9f0b603b84e21e833a92e1965078b86363de0da2781dcb8aeb", - "sha256:5335095823001aa893706d6fde612b129cdc97078b2107426c40c5bde72d3a2d", - "sha256:55012ef8c44ff32cdd4befa360bd8d9490f1bbee68958ec6dee4a4091d377df5", - "sha256:55825638b64d29d1c0046abc29a168a319a7d5962242cc306b6b7770791fb3d4", - "sha256:898c587c89a1ee25f711ef0de51a9575a029b79d221a785b4a5e4c56e1ed3d00", - "sha256:9b30775d20c679f2f6c7f6b51a0d07ebdb681133e021039d583c3450ffcdca6a", - "sha256:ae00d9f2e3b7c03ff91d0ea5c26752b33dc8a5300b86cd3ee63f8ac77c47fa63", - "sha256:b0464daacbf854d506323675071c6b5eb7f23bc35a36014cfba6e60babe05804", - "sha256:bef67975fec3340eff73dd9a16ffb662c32bb6eaa8bef109d2c8db51aa0c5273", - "sha256:eca2af66cf3dacbef80a8c3f1a4f602b9f1cce4189249c2e28aab62d52aa7863", - "sha256:f58f676c4b1fe83615721d8f864077d62f9c93022b1538351c9ecd53c31763b3" + "sha256:1537d0b1263eb0a179b91e2fc0d3135c7af6e20ad8519179def5df59aaf367db", + "sha256:192c1845c6f4216672e909bafbfbae6ec8de86e88f883d88ac18cfd169e9d4be", + "sha256:3dd476e832787cffd5909d8ddb0f3e59d1c1e3864a3366c093d71a5f092dd8c6", + "sha256:48b5ccd3db019d59aa005f573dfb6d605e27ea6cfbd0d211ddb2fd49884813a9", + "sha256:4b3b7e8cc77e6ccb81676655f712362cc7ab8ed1039a0756546857228a8782f5", + "sha256:6619d78002d0168d2adac51266d042620ebb97718c4e5dc1ea9a3c937b53516d", + "sha256:71768d40122536dbee683db933817eeb62674b2a6fdf8dcd11665b1d95c06370", + "sha256:765545f9876e2f55c4774aa45e8e09d727aad2e9d76d0a75e181cc0de48ba45f", + "sha256:767bec11b8b95fa2c61c9e4f4f3875a305275afdcf27480511ad84309f5f67bc", + "sha256:8d605a62e182142d422c05ab34b27eef24a6e218ac5dbbd28a18b5c5e8fc6a99", + "sha256:90cd2746b9e34ccd161576f67e93d38ec2fb745c7dccdd795975b00cc1933f57", + "sha256:b2fc1f68c78498c80375c805f2b872c8c6085b56224c4cf7d494912647f3ba20", + "sha256:b8017892e225bc4275846f6a941b3e65fd5cf1ba006ccba687c3767d73c38543", + "sha256:d31239e87a0e3319da17a41dd8618f988117ee1c9e408d3413092f1ca5bc66ec", + "sha256:d82269f42ae3cb74236c5124bd90333af5182919b8bd877db54ad860e8c49e66" ], "index": "pypi", - "version": "==2.2.4" + "version": "==3.0.0" }, "nrel-pysam-stubs": { "hashes": [ diff --git a/requirements.txt b/requirements.txt index f946e12f3..e47d21c11 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,7 +12,7 @@ requests~=2.25 scipy~=1.7 tqdm==4.29.1 xlrd==1.2.0 -nrel-pysam~=2.2 +nrel-pysam~=3.0 black pytest coverage From 59d66aea407696a138e25b3df34a3a11b19ce594 Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Thu, 6 Jan 2022 11:38:26 -0800 Subject: [PATCH 03/12] chore: update PVWatts module to version 8 --- prereise/gather/solardata/nsrdb/sam.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prereise/gather/solardata/nsrdb/sam.py b/prereise/gather/solardata/nsrdb/sam.py index c05574c92..67d07fb32 100644 --- a/prereise/gather/solardata/nsrdb/sam.py +++ b/prereise/gather/solardata/nsrdb/sam.py @@ -1,6 +1,6 @@ import numpy as np import pandas as pd -import PySAM.Pvwattsv7 as PVWatts +import PySAM.Pvwattsv8 as PVWatts import PySAM.PySSC as pssc # noqa: N813 from tqdm import tqdm @@ -48,7 +48,7 @@ def calculate_power(solar_data, pv_dict): :param dict pv_dict: solar plant attributes. :return: (*numpy.array*) hourly power output. """ - pv_dat = pssc.dict_to_ssc_table(pv_dict, "pvwattsv7") + pv_dat = pssc.dict_to_ssc_table(pv_dict, "pvwattsv8") pv = PVWatts.wrap(pv_dat) pv.SolarResource.assign({"solar_resource_data": solar_data}) pv.execute() From 6b0d6a917994bd978927856682c37816d78e0beb Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Wed, 5 Jan 2022 17:37:51 -0800 Subject: [PATCH 04/12] chore: add name to generator index --- prereise/gather/griddata/hifld/data_process/generators.py | 1 + 1 file changed, 1 insertion(+) diff --git a/prereise/gather/griddata/hifld/data_process/generators.py b/prereise/gather/griddata/hifld/data_process/generators.py index 36f1a773c..ff9818c2e 100644 --- a/prereise/gather/griddata/hifld/data_process/generators.py +++ b/prereise/gather/griddata/hifld/data_process/generators.py @@ -343,5 +343,6 @@ def build_plant(bus, substations, kwargs={}): ) generators["type"] = generators["type"].replace(const.fuel_translations) generators["GenIOD"] = 0 + generators.index.name = "plant_id" return generators From ada819e2fab43575c31a777913576323ef258a31 Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Thu, 6 Jan 2022 16:16:46 -0800 Subject: [PATCH 05/12] ci: add python 3.10 test --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ada5f3d00..5922d7ea0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -14,7 +14,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.8', '3.9'] + python-version: ['3.8', '3.9', '3.10'] name: Python ${{ matrix.python-version }} steps: From 2f777061cbdd707742ae6aa06f233be57157b0c8 Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Wed, 5 Jan 2022 15:47:38 -0800 Subject: [PATCH 06/12] doc: fix docstrings for solar profile functions --- prereise/gather/solardata/nsrdb/sam.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/prereise/gather/solardata/nsrdb/sam.py b/prereise/gather/solardata/nsrdb/sam.py index 67d07fb32..4b07ae4b0 100644 --- a/prereise/gather/solardata/nsrdb/sam.py +++ b/prereise/gather/solardata/nsrdb/sam.py @@ -80,8 +80,9 @@ def retrieve_data_blended( :param int/str year: year. :param int/float rate_limit: minimum seconds to wait between requests to NREL :param str cache_dir: directory to cache downloaded data. If None, don't cache. - :return: (*pandas.DataFrame*) -- data frame with *'Pout'*, *'plant_id'*, - *'ts'* and *'ts_id'* as columns. Values are power output for a 1MW generator. + :return: (*pandas.DataFrame*) -- data frame of normalized power profiles. The index + is hourly timestamps for the profile year, the columns are plant IDs, the values + are floats. """ xor_err_msg = ( "Either grid xor (solar_plant and interconnect_to_state_abvs) must be defined" @@ -178,9 +179,7 @@ def retrieve_data_individual( email, api_key, solar_plant, year="2016", rate_limit=0.5, cache_dir=None ): """Retrieves irradiance data from NSRDB and calculate the power output using - the System Adviser Model (SAM). Either a Grid object needs to be passed to ``grid``, - or (a data frame needs to be passed to ``solar_plant`` and a string needs to be - passed to ``grid_model``. + the System Adviser Model (SAM). :param str email: email used to`sign up `_. :param str api_key: API key. @@ -190,8 +189,9 @@ def retrieve_data_individual( :param int/str year: year. :param int/float rate_limit: minimum seconds to wait between requests to NREL :param str cache_dir: directory to cache downloaded data. If None, don't cache. - :return: (*pandas.DataFrame*) -- data frame with *'Pout'*, *'plant_id'*, - *'ts'* and *'ts_id'* as columns. Values are power output for a 1MW generator. + :return: (*pandas.DataFrame*) -- data frame of normalized power profiles. The index + is hourly timestamps for the profile year, the columns are plant IDs, the values + are floats. """ # Verify that each solar plant has exactly one tracking type equal to True array_type_mapping = { From 89835e1cd50d54764bf0c2e88f96e1019ad32d41 Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Thu, 6 Jan 2022 20:26:41 -0800 Subject: [PATCH 07/12] feat: add method to translate PSM3Data back to original CSV file format --- prereise/gather/solardata/nsrdb/nrel_api.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/prereise/gather/solardata/nsrdb/nrel_api.py b/prereise/gather/solardata/nsrdb/nrel_api.py index e17c22905..985561507 100644 --- a/prereise/gather/solardata/nsrdb/nrel_api.py +++ b/prereise/gather/solardata/nsrdb/nrel_api.py @@ -71,6 +71,19 @@ def to_dict(self): ) return result + def to_sam_weather_file_format(self): + """Convert the data to the format expected by nrel-pysam for local files. See + https://developer.nrel.gov/docs/solar/nsrdb/psm3-download/. + + :return: (*list*) -- a list of lists which can be passed to + :meth:`csv.writer.writerows` and then loaded from disk. + """ + metadata_names = ["lat", "lon", "tz", "elevation"] + metadata_values = self.lat, self.lon, self.tz, self.elevation + data_headers = self.data_resource.columns.tolist() + data_rows = self.data_resource.to_numpy().tolist() + return [metadata_names, metadata_values, data_headers] + data_rows + class NrelApi: """Provides an interface to the NREL API for PSM3 data. It supports From d4d62fe53194cbe8fd2ac477274a45b68db9b18b Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Fri, 7 Jan 2022 09:17:25 -0800 Subject: [PATCH 08/12] refactor: make NREL API downloads resilient to 5xx errors --- prereise/gather/solardata/nsrdb/nrel_api.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/prereise/gather/solardata/nsrdb/nrel_api.py b/prereise/gather/solardata/nsrdb/nrel_api.py index 985561507..c00223892 100644 --- a/prereise/gather/solardata/nsrdb/nrel_api.py +++ b/prereise/gather/solardata/nsrdb/nrel_api.py @@ -164,6 +164,10 @@ def get_psm3_at( ) def download(url): resp = requests.get(url) + if resp.status_code // 100 == 5: # all 5xx errors, server side + raise TransientError( + f"Server side error, retry_count={download.retry_count}" + ) if resp.status_code == 429: raise TransientError( f"Too many requests, retry_count={download.retry_count}" From 6e6a31d7acde4aa016263d5a8613e66973ba1457 Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Wed, 5 Jan 2022 17:55:42 -0800 Subject: [PATCH 09/12] feat: add concentrating solar power profile generation function --- prereise/gather/solardata/nsrdb/sam.py | 75 +++++++++++++++++++------- 1 file changed, 55 insertions(+), 20 deletions(-) diff --git a/prereise/gather/solardata/nsrdb/sam.py b/prereise/gather/solardata/nsrdb/sam.py index 4b07ae4b0..aa23ec8e2 100644 --- a/prereise/gather/solardata/nsrdb/sam.py +++ b/prereise/gather/solardata/nsrdb/sam.py @@ -1,7 +1,12 @@ +import csv +import os +import tempfile + import numpy as np import pandas as pd import PySAM.Pvwattsv8 as PVWatts import PySAM.PySSC as pssc # noqa: N813 +from PySAM import TcsgenericSolar from tqdm import tqdm from prereise.gather.solardata.helpers import get_plant_id_unique_location @@ -18,6 +23,7 @@ "inv_eff": 94, "losses": 14, "tilt": 30, + "ilr": 1.25, # Inverter Loading Ratio } @@ -41,8 +47,8 @@ def generate_timestamps_without_leap_day(year): return sam_dates, leap_day -def calculate_power(solar_data, pv_dict): - """Use PVWatts to translate weather data into power. +def calculate_power_pv(solar_data, pv_dict): + """Use PVWatts to translate weather data into power using a photovoltaic (PV) model. :param dict solar_data: weather data as returned by :meth:`Psm3Data.to_dict`. :param dict pv_dict: solar plant attributes. @@ -55,6 +61,28 @@ def calculate_power(solar_data, pv_dict): return np.array(pv.Outputs.gen) +def calculate_power_csp(solar_data): + """Use the System Adviser Model (SAM) to translate weather data into power using a + concentrating solar power (CSP) model. + + :param list solar_data: weather data as returned by + :meth:`Psm3Data.to_sam_weather_file_format`. + :param dict csp_dict: solar plant attributes. + :return: (*numpy.array*) hourly power output. + """ + csp = TcsgenericSolar.default("GenericCSPSystemCommercial") + csp.Type260.assign({"w_des": 1e-3}) # Capacity is in MW, but outputs are in kW + # The solar module expects weather data as a local file, so let's create one + with tempfile.TemporaryDirectory() as tmpdirname: + filename = os.path.join(tmpdirname, "weather_data.csv") + with open(filename, "w", newline="") as f: + writer = csv.writer(f) + writer.writerows(solar_data) + csp.Weather.assign({"file_name": filename}) + csp.execute() + return np.clip(np.array(csp.Outputs.gen), 0, 1) # clip outside the [0, 1] interval + + def retrieve_data_blended( email, api_key, @@ -131,8 +159,6 @@ def retrieve_data_blended( states_in_interconnect = list(interconnect_to_state_abvs[interconnect]) frac[zone] = get_pv_tracking_ratio_state(pv_info, states_in_interconnect) - # Inverter Loading Ratio - ilr = 1.25 api = NrelApi(email, api_key, rate_limit) # Identify unique location @@ -149,7 +175,7 @@ def retrieve_data_blended( leap_day=False, dates=sam_dates, cache_dir=cache_dir, - ).to_dict() + ) for i, plant_id in enumerate(plants): if i == 0: @@ -159,12 +185,14 @@ def retrieve_data_blended( power = 0 for j, axis in enumerate([0, 2, 4]): plant_pv_dict = { - "system_capacity": ilr, - "dc_ac_ratio": ilr, + "system_capacity": default_pv_parameters["ilr"], + "dc_ac_ratio": default_pv_parameters["ilr"], "array_type": axis, } pv_dict = {**default_pv_parameters, **plant_pv_dict} - power += tracking_ratios[j] * calculate_power(solar_data, pv_dict) + power += tracking_ratios[j] * calculate_power_pv( + solar_data.to_dict(), pv_dict + ) if leap_day is not None: power = np.insert(power, leap_day, power[leap_day - 24 : leap_day]) else: @@ -228,23 +256,30 @@ def retrieve_data_individual( leap_day=False, dates=sam_dates, cache_dir=cache_dir, - ).to_dict() + ) for plant_id in plants: series = solar_plant.loc[plant_id] - ilr = series["DC Net Capacity (MW)"] / series["Nameplate Capacity (MW)"] - plant_pv_dict = { - "system_capacity": ilr, - "dc_ac_ratio": ilr, - "array_type": plant_array_types.loc[plant_id], - } - if plant_pv_dict["array_type"] == 0: - plant_pv_dict["tilt"] = series["Tilt Angle"] - pv_dict = {**default_pv_parameters, **plant_pv_dict} - power = calculate_power(solar_data, pv_dict) + # EIA codes: PV = Photovoltaic, CP = Concentrating solar, ST = steam turbine + if series["Prime Mover"] == "PV": + ilr = series["DC Net Capacity (MW)"] / series["Nameplate Capacity (MW)"] + else: + # Besides PV, other types don't seem to have DC capacity defined + ilr = default_pv_parameters["ilr"] + if series["Prime Mover"] in {"CP", "PV"}: + plant_pv_dict = { + "system_capacity": ilr, + "dc_ac_ratio": ilr, + "array_type": plant_array_types.loc[plant_id], + } + if plant_pv_dict["array_type"] == 0: + plant_pv_dict["tilt"] = series["Tilt Angle"] + pv_dict = {**default_pv_parameters, **plant_pv_dict} + power = calculate_power_pv(solar_data.to_dict(), pv_dict) + elif series["Prime Mover"] == "ST": + power = calculate_power_csp(solar_data.to_sam_weather_file_format()) if leap_day is not None: power = np.insert(power, leap_day, power[leap_day - 24 : leap_day]) - data[plant_id] = power return pd.DataFrame(data, index=real_dates).sort_index(axis="columns") From 16d7427f5acb52eab8b3bb28aa55c8ea6ba745c6 Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Fri, 7 Jan 2022 15:37:29 -0800 Subject: [PATCH 10/12] refactor: fetch full weather year, but still manage non-leap output --- prereise/gather/solardata/nsrdb/sam.py | 61 ++++++++++---------------- 1 file changed, 23 insertions(+), 38 deletions(-) diff --git a/prereise/gather/solardata/nsrdb/sam.py b/prereise/gather/solardata/nsrdb/sam.py index aa23ec8e2..a6ff65e38 100644 --- a/prereise/gather/solardata/nsrdb/sam.py +++ b/prereise/gather/solardata/nsrdb/sam.py @@ -25,26 +25,7 @@ "tilt": 30, "ilr": 1.25, # Inverter Loading Ratio } - - -def generate_timestamps_without_leap_day(year): - """For a given year, return timestamps for each non-leap-day hour, and the timestamp - of the beginning of the leap day (if there is one). - - :param int/str year: year to generate timestamps for. - :return: (*tuple*) -- - pandas.DatetimeIndex: for each non-leap-day-hour of the given year. - pandas.Timestamp/None: timestamp for the first hour of the leap day (if any). - """ - # SAM only takes 365 days, so for a leap year: leave out the leap day. - try: - leap_day = (pd.Timestamp(f"{year}-02-29-00").dayofyear - 1) * 24 - sam_dates = pd.date_range(start=f"{year}-01-01-00", freq="H", periods=365 * 24) - sam_dates = sam_dates.map(lambda t: t.replace(year=int(year))) - except ValueError: - leap_day = None - sam_dates = pd.date_range(start=f"{year}-01-01-00", freq="H", periods=365 * 24) - return sam_dates, leap_day +leap_hour_idx = 59 * 24 # 59 days have passed already (31 in January, 28 in February) def calculate_power_pv(solar_data, pv_dict): @@ -141,10 +122,8 @@ def retrieve_data_blended( for z in solar_plant["zone_id"].unique() } - real_dates = pd.date_range( - start=f"{year}-01-01-00", end=f"{year}-12-31-23", freq="H" - ) - sam_dates, leap_day = generate_timestamps_without_leap_day(year) + dates = pd.date_range(start=f"{year}-01-01-00", end=f"{year}-12-31-23", freq="H") + leap_day = f"{year}-02-29" in dates # PV tracking ratios # By state and by interconnect when EIA data do not have any solar PV in the state @@ -172,8 +151,8 @@ def retrieve_data_blended( lon, attributes="dhi,dni,wind_speed,air_temperature", year=year, - leap_day=False, - dates=sam_dates, + leap_day=leap_day, + dates=dates, cache_dir=cache_dir, ) @@ -193,14 +172,18 @@ def retrieve_data_blended( power += tracking_ratios[j] * calculate_power_pv( solar_data.to_dict(), pv_dict ) - if leap_day is not None: - power = np.insert(power, leap_day, power[leap_day - 24 : leap_day]) + if leap_day: + power = np.insert( + power, + leap_hour_idx, + power[leap_hour_idx - 24 : leap_hour_idx], + ) else: # For every other plant, look up power from first plant at the location power = data[first_plant_id] data[plant_id] = power - return pd.DataFrame(data, index=real_dates).sort_index(axis="columns") + return pd.DataFrame(data, index=dates).sort_index(axis="columns") def retrieve_data_individual( @@ -236,10 +219,8 @@ def retrieve_data_individual( .apply(lambda x: array_type_mapping[x.idxmax()], axis=1) ) - real_dates = pd.date_range( - start=f"{year}-01-01-00", end=f"{year}-12-31-23", freq="H" - ) - sam_dates, leap_day = generate_timestamps_without_leap_day(year) + dates = pd.date_range(start=f"{year}-01-01-00", end=f"{year}-12-31-23", freq="H") + leap_day = f"{year}-02-29" in dates api = NrelApi(email, api_key, rate_limit) @@ -253,8 +234,8 @@ def retrieve_data_individual( lon, attributes="dhi,dni,wind_speed,air_temperature", year=year, - leap_day=False, - dates=sam_dates, + leap_day=leap_day, + dates=dates, cache_dir=cache_dir, ) @@ -278,8 +259,12 @@ def retrieve_data_individual( power = calculate_power_pv(solar_data.to_dict(), pv_dict) elif series["Prime Mover"] == "ST": power = calculate_power_csp(solar_data.to_sam_weather_file_format()) - if leap_day is not None: - power = np.insert(power, leap_day, power[leap_day - 24 : leap_day]) + if leap_day: + power = np.insert( + power, + leap_hour_idx, + power[leap_hour_idx - 24 : leap_hour_idx], + ) data[plant_id] = power - return pd.DataFrame(data, index=real_dates).sort_index(axis="columns") + return pd.DataFrame(data, index=dates).sort_index(axis="columns") From 62067be58aced46f3c271602b510e483069aaaa5 Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Wed, 5 Jan 2022 15:43:28 -0800 Subject: [PATCH 11/12] feat: add function to pass solar-specific 860 data to profile generation --- .../griddata/hifld/data_process/profiles.py | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 prereise/gather/griddata/hifld/data_process/profiles.py diff --git a/prereise/gather/griddata/hifld/data_process/profiles.py b/prereise/gather/griddata/hifld/data_process/profiles.py new file mode 100644 index 000000000..a469b5e05 --- /dev/null +++ b/prereise/gather/griddata/hifld/data_process/profiles.py @@ -0,0 +1,59 @@ +from prereise.gather.griddata.hifld import const +from prereise.gather.griddata.hifld.data_access.load import get_eia_form_860 +from prereise.gather.solardata.nsrdb.sam import retrieve_data_individual + + +def floatify(x): + """Coerce an object to a float, returning a float NaN for any objects which raise an + exception when passed to :func:`float`. + + :param object x: object to coerce. + :return: (*float*) -- coerced value. + """ + try: + return float(x) + except ValueError: + return float("nan") + + +def build_solar(nrel_email, nrel_api_key, solar_plants, **solar_kwargs): + """Use plant-level data to build solar profiles. + + :param str nrel_email: email used to`sign up `_. + :param str nrel_api_key: API key. + :param pandas.DataFrame solar_plants: data frame of solar farms. + :param dict solar_kwargs: keyword arguments to pass to + :func:`prereise.gather.solardata.nsrdb.sam.retrieve_data_individual`. + :return: (*pandas.DataFrame*) -- data frame of normalized power profiles. The index + is hourly timestamps for the profile year, the columns are plant IDs, the values + are floats. + """ + boolean_columns = ["Single-Axis Tracking?", "Dual-Axis Tracking?", "Fixed Tilt?"] + float_columns = ["DC Net Capacity (MW)", "Nameplate Capacity (MW)", "Tilt Angle"] + # Load raw 'extra' table data, join on plant & generating unit, re-establish index + extra_solar_data = get_eia_form_860(const.blob_paths["eia_form860_2019_solar"]) + full_data = solar_plants.merge( + extra_solar_data, on=["Plant Code", "Generator ID"], suffixes=(None, "_extra") + ) + full_data.index = solar_plants.index + # Process data to expected types for profile generation + for col in float_columns: + full_data[col] = full_data[col].map(floatify) + for col in boolean_columns: + # 'Y' becomes True, anything else ('N', blank, etc) becomes False + full_data[col] = full_data[col] == "Y" + + # If panel type isn't known definitively, assume 100% Fixed Tilt + # Solar thermal also ends up labeled as fixed tilt, but this will be ignored + bad_booleans = full_data.index[full_data[boolean_columns].sum(axis=1) != 1] + full_data.loc[bad_booleans, boolean_columns] = False + full_data.loc[bad_booleans, "Fixed Tilt?"] = True + + full_data.index.name = "plant_id" # needed for next step but gets lost in the merge + profiles = retrieve_data_individual( + nrel_email, + nrel_api_key, + solar_plant=full_data, + **solar_kwargs, + ) + return profiles From f639807edbef01e659b5e1dd9a68f1caa7faf0c2 Mon Sep 17 00:00:00 2001 From: Daniel Olsen Date: Wed, 5 Jan 2022 15:23:32 -0800 Subject: [PATCH 12/12] feat: add solar profile generation step to orchestration --- .../gather/griddata/hifld/orchestration.py | 127 +++++++++++++----- 1 file changed, 93 insertions(+), 34 deletions(-) diff --git a/prereise/gather/griddata/hifld/orchestration.py b/prereise/gather/griddata/hifld/orchestration.py index 05fd47106..74b4bf23d 100644 --- a/prereise/gather/griddata/hifld/orchestration.py +++ b/prereise/gather/griddata/hifld/orchestration.py @@ -6,55 +6,114 @@ from prereise.gather.griddata.hifld.const import powersimdata_column_defaults from prereise.gather.griddata.hifld.data_process.demand import assign_demand_to_buses from prereise.gather.griddata.hifld.data_process.generators import build_plant +from prereise.gather.griddata.hifld.data_process.profiles import build_solar from prereise.gather.griddata.hifld.data_process.transmission import build_transmission -def create_csvs(output_folder): +def create_csvs(output_folder, nrel_email, nrel_api_key, solar_kwargs={}): """Process HIFLD source data to CSVs compatible with PowerSimData. :param str output_folder: directory to write CSVs to. + :param str nrel_email: email used to`sign up `_. + :param str nrel_api_key: API key. + :param dict solar_kwargs: keyword arguments to pass to + :func:`prereise.gather.solardata.nsrdb.sam.retrieve_data_individual`. + """ + full_tables = create_grid(output_folder) + create_profiles( + full_tables["plant"], nrel_email, nrel_api_key, output_folder, solar_kwargs + ) + + +def create_grid(output_folder=None): + """Process a table of plant data to produce grid CSVs compatible with PowerSimData. + + :param str output_folder: directory to write CSVs to. If None, CSVs will not be + written (just returned). + :return: (*dict*) -- keys are strings for table names, values are dataframes that + correspond to those tables. These dataframes have all available columns for + each table, even though the CSV files which are written are limited to only the + columns expected by powersimdata. """ # Process grid data from original sources branch, bus, substation, dcline = build_transmission() plant = build_plant(bus, substation) assign_demand_to_buses(substation, branch, plant, bus) - outputs = {} - outputs["branch"] = branch - outputs["dcline"] = dcline - outputs["sub"] = substation + full_tables = {} + full_tables["branch"] = branch + full_tables["dcline"] = dcline + full_tables["sub"] = substation # Separate tables as necessary to match PowerSimData format # bus goes to bus and bus2sub - outputs["bus2sub"] = bus[["sub_id", "interconnect"]] - outputs["bus"] = bus.drop(["sub_id"], axis=1) + full_tables["bus2sub"] = bus[["sub_id", "interconnect"]] + full_tables["bus"] = bus.drop(["sub_id"], axis=1) # plant goes to plant and gencost - outputs["gencost"] = plant[["c0", "c1", "c2", "interconnect"]].copy() - outputs["plant"] = plant.drop(["c0", "c1", "c2"], axis=1) + full_tables["gencost"] = plant[["c0", "c1", "c2", "interconnect"]].copy() + full_tables["plant"] = plant.drop(["c0", "c1", "c2"], axis=1) # Fill in missing column values for name, defaults in powersimdata_column_defaults.items(): - outputs[name] = outputs[name].assign(**defaults) - - # Filter to only the columns expected by PowerSimData, in the expected order - for name, df in outputs.items(): - col_names = getattr(psd_const, f"col_name_{name}") - if name == "bus": - # The bus column names in PowerSimData include the index for legacy reasons - col_names = col_names[1:] - if name == "branch": - col_names += ["branch_device_type"] - if name == "plant": - col_names += ["type", "GenFuelCost", "GenIOB", "GenIOC", "GenIOD"] - if name == "dcline": - col_names += ["from_interconnect", "to_interconnect"] - else: - col_names += ["interconnect"] - outputs[name] = outputs[name][col_names] - - # Save files - os.makedirs(output_folder, exist_ok=True) - for name, df in outputs.items(): - df.to_csv(os.path.join(output_folder, f"{name}.csv")) - # The zone file gets copied directly - zone_path = os.path.join(os.path.dirname(__file__), "data", "zone.csv") - shutil.copyfile(zone_path, os.path.join(output_folder, "zone.csv")) + full_tables[name] = full_tables[name].assign(**defaults) + + if output_folder is not None: + os.makedirs(output_folder, exist_ok=True) + # Filter to only the columns expected by PowerSimData, in the expected order + powersimdata_outputs = {} + for name, df in full_tables.items(): + col_names = getattr(psd_const, f"col_name_{name}") + if name == "bus": + # The bus column names in PowerSimData include the index + col_names = col_names[1:] + if name == "branch": + col_names += ["branch_device_type"] + if name == "plant": + col_names += ["type", "GenFuelCost", "GenIOB", "GenIOC", "GenIOD"] + if name == "dcline": + col_names += ["from_interconnect", "to_interconnect"] + else: + col_names += ["interconnect"] + powersimdata_outputs[name] = full_tables[name][col_names] + + # Save files + for name, df in powersimdata_outputs.items(): + df.to_csv(os.path.join(output_folder, f"{name}.csv")) + # The zone file gets copied directly + zone_path = os.path.join(os.path.dirname(__file__), "data", "zone.csv") + shutil.copyfile(zone_path, os.path.join(output_folder, "zone.csv")) + + return full_tables + + +def create_profiles( + plants, nrel_email, nrel_api_key, output_folder=None, solar_kwargs={} +): + """Process a table of plant data to produce profile CSVs compatible with + PowerSimData. + + :param pandas.DataFrame plants: table of plant data. + :param str nrel_email: email used to`sign up `_. + :param str nrel_api_key: API key. + :param str output_folder: directory to write CSVs to. If None, CSVs will not be + written (just returned). + :param dict solar_kwargs: keyword arguments to pass to + :func:`prereise.gather.solardata.nsrdb.sam.retrieve_data_individual`. + :return: (*dict*) -- keys are strings for profile names, values are dataframes, + indexed by timestamp, with plant IDs as columns. + """ + # Use plant data to build profiles + profiles = { + "solar": build_solar( + nrel_email, + nrel_api_key, + plants.query("type == 'solar'"), + **solar_kwargs, + ), + } + if output_folder is not None: + os.makedirs(output_folder, exist_ok=True) + # Write profiles + for name, df in profiles.items(): + df.to_csv(os.path.join(output_folder, f"{name}.csv")) + + return profiles