Skip to content

Commit

Permalink
1) added outlier events to find out outliers; 2) tested outlier event…
Browse files Browse the repository at this point in the history
…s with a list of numbers and data from station; 3) moved class function from_station to BaseEvents
  • Loading branch information
YangKehan authored and YangKehan committed Nov 7, 2023
1 parent 664878c commit a22a8b6
Show file tree
Hide file tree
Showing 3 changed files with 1,230 additions and 39 deletions.
101 changes: 62 additions & 39 deletions metevents/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from metloom.pointdata import CDECPointData, SnotelPointData, MesowestPointData
from pandas.tseries.frequencies import to_offset
from .utilities import determine_freq
import numpy as np



class BaseEvents:
Expand All @@ -12,6 +14,11 @@ def __init__(self, data):
self.data = data
self._groups = []
self._group_ids = None
self._outliers = None

@property
def outliers(self):
return self._outliers

@property
def events(self):
Expand All @@ -38,12 +45,47 @@ def group_condition_by_time(ind):
return groups, ind_sum

@classmethod
def from_station(cls, station_id, start, end):
raise NotImplementedError('Not implemented')
def from_station(cls, station_id, start, stop, station_name='unknown',
source='NRCS'):

"""
Form storm analysis from metloom
Args:
station_id: string id of the station of interest
start: Datetime object when to start looking for data
stop: Datetime object when to stop looking for data
source: Network/datasource to search for data options: NRCS, mesowest, CDEC
station_name: String name of the station to pass to pointdata
"""
pnt = None
pnt_classes = [SnotelPointData, CDECPointData, MesowestPointData]
for STATION_CLASS in pnt_classes:
if STATION_CLASS.DATASOURCE.lower() == source.lower():
pnt = STATION_CLASS(station_id, station_name)
break

if pnt is None:
raise ValueError(f'Datasource {source} is invalid. Use '
f'{", ".join([c.DATASOURCE for c in pnt_classes])}')

# Pull data
variable = pnt.ALLOWED_VARIABLES.PRECIPITATIONACCUM

df = pnt.get_daily_data(start, stop, [variable])

if df is None:
raise ValueError(f'The combination of pulling precip from {station_id} '
f'during {start}-{stop} produced no data. Check station '
f'is real and has precip data between specified dates.')
else:
df = df.reset_index().set_index('datetime')

return cls(df[variable.name].diff())

class StormEvents(BaseEvents):

class StormEvents(BaseEvents):
def find(self, instant_mass_to_start=0.1, min_storm_total=0.5, hours_to_stop=24,
max_storm_hours=336):
"""
Expand Down Expand Up @@ -109,41 +151,22 @@ def find(self, instant_mass_to_start=0.1, min_storm_total=0.5, hours_to_stop=24,
# Update start for the next storm
start = next_start

@classmethod
def from_station(cls, station_id, start, stop, station_name='unknown',
source='NRCS'):
"""
Form storm analysis from metloom

Args:
station_id: string id of the station of interest
start: Datetime object when to start looking for data
stop: Datetime object when to stop looking for data
source: Network/datasource to search for data options: NRCS, mesowest, CDEC
station_name: String name of the station to pass to pointdata
class OutlierEvents(BaseEvents):
def find(self):
"""
pnt = None
pnt_classes = [SnotelPointData, CDECPointData, MesowestPointData]
for STATION_CLASS in pnt_classes:
if STATION_CLASS.DATASOURCE.lower() == source.lower():
pnt = STATION_CLASS(station_id, station_name)
break

if pnt is None:
raise ValueError(f'Datasource {source} is invalid. Use '
f'{", ".join([c.DATASOURCE for c in pnt_classes])}')

# Pull data
variable = pnt.ALLOWED_VARIABLES.PRECIPITATIONACCUM

df = pnt.get_daily_data(start, stop, [variable])

if df is None:
raise ValueError(f'The combination of pulling precip from {station_id} '
f'during {start}-{stop} produced no data. Check station '
f'is real and has precip data between specified dates.')
else:
df = df.reset_index().set_index('datetime')

return cls(df[variable.name].diff())
Find periods that were outliers for the given dataset using a Z-score ??
Periods or records
"""
# read data
data = self.data
if len(data) > 15:
mean = np.nanmean(data.values)
sd = np.nanstd(data.values)
z_score = (data.values - mean) / sd
# the record is outlier when z-score is lower -3 or higher than 3
is_outlier = (z_score > 3) | (z_score < -3)

# only save outliers
data_outlier = data[is_outlier]
self._outliers = data_outlier
Loading

0 comments on commit a22a8b6

Please sign in to comment.