From a22a8b63cd0603153512f834474beb3e3a168f52 Mon Sep 17 00:00:00 2001 From: YangKehan Date: Tue, 7 Nov 2023 09:55:26 -0500 Subject: [PATCH] 1) added outlier events to find out outliers; 2) tested outlier events with a list of numbers and data from station; 3) moved class function from_station to BaseEvents --- metevents/events.py | 101 +-- metevents/test_outlier.ipynb | 1144 ++++++++++++++++++++++++++++++++++ tests/test_events.py | 24 + 3 files changed, 1230 insertions(+), 39 deletions(-) create mode 100644 metevents/test_outlier.ipynb diff --git a/metevents/events.py b/metevents/events.py index 5b222d9..6ba4e56 100644 --- a/metevents/events.py +++ b/metevents/events.py @@ -4,6 +4,8 @@ from metloom.pointdata import CDECPointData, SnotelPointData, MesowestPointData from pandas.tseries.frequencies import to_offset from .utilities import determine_freq +import numpy as np + class BaseEvents: @@ -12,6 +14,11 @@ def __init__(self, data): self.data = data self._groups = [] self._group_ids = None + self._outliers = None + + @property + def outliers(self): + return self._outliers @property def events(self): @@ -38,12 +45,47 @@ def group_condition_by_time(ind): return groups, ind_sum @classmethod - def from_station(cls, station_id, start, end): - raise NotImplementedError('Not implemented') + def from_station(cls, station_id, start, stop, station_name='unknown', + source='NRCS'): + """ + + Form storm analysis from metloom + + Args: + station_id: string id of the station of interest + start: Datetime object when to start looking for data + stop: Datetime object when to stop looking for data + source: Network/datasource to search for data options: NRCS, mesowest, CDEC + station_name: String name of the station to pass to pointdata + """ + pnt = None + pnt_classes = [SnotelPointData, CDECPointData, MesowestPointData] + for STATION_CLASS in pnt_classes: + if STATION_CLASS.DATASOURCE.lower() == source.lower(): + pnt = STATION_CLASS(station_id, station_name) + break + + if pnt is None: + raise ValueError(f'Datasource {source} is invalid. Use ' + f'{", ".join([c.DATASOURCE for c in pnt_classes])}') + + # Pull data + variable = pnt.ALLOWED_VARIABLES.PRECIPITATIONACCUM + + df = pnt.get_daily_data(start, stop, [variable]) + + if df is None: + raise ValueError(f'The combination of pulling precip from {station_id} ' + f'during {start}-{stop} produced no data. Check station ' + f'is real and has precip data between specified dates.') + else: + df = df.reset_index().set_index('datetime') + + return cls(df[variable.name].diff()) -class StormEvents(BaseEvents): +class StormEvents(BaseEvents): def find(self, instant_mass_to_start=0.1, min_storm_total=0.5, hours_to_stop=24, max_storm_hours=336): """ @@ -109,41 +151,22 @@ def find(self, instant_mass_to_start=0.1, min_storm_total=0.5, hours_to_stop=24, # Update start for the next storm start = next_start - @classmethod - def from_station(cls, station_id, start, stop, station_name='unknown', - source='NRCS'): - """ - - Form storm analysis from metloom - Args: - station_id: string id of the station of interest - start: Datetime object when to start looking for data - stop: Datetime object when to stop looking for data - source: Network/datasource to search for data options: NRCS, mesowest, CDEC - station_name: String name of the station to pass to pointdata +class OutlierEvents(BaseEvents): + def find(self): """ - pnt = None - pnt_classes = [SnotelPointData, CDECPointData, MesowestPointData] - for STATION_CLASS in pnt_classes: - if STATION_CLASS.DATASOURCE.lower() == source.lower(): - pnt = STATION_CLASS(station_id, station_name) - break - - if pnt is None: - raise ValueError(f'Datasource {source} is invalid. Use ' - f'{", ".join([c.DATASOURCE for c in pnt_classes])}') - - # Pull data - variable = pnt.ALLOWED_VARIABLES.PRECIPITATIONACCUM - - df = pnt.get_daily_data(start, stop, [variable]) - - if df is None: - raise ValueError(f'The combination of pulling precip from {station_id} ' - f'during {start}-{stop} produced no data. Check station ' - f'is real and has precip data between specified dates.') - else: - df = df.reset_index().set_index('datetime') - - return cls(df[variable.name].diff()) + Find periods that were outliers for the given dataset using a Z-score ?? + Periods or records + """ + # read data + data = self.data + if len(data) > 15: + mean = np.nanmean(data.values) + sd = np.nanstd(data.values) + z_score = (data.values - mean) / sd + # the record is outlier when z-score is lower -3 or higher than 3 + is_outlier = (z_score > 3) | (z_score < -3) + + # only save outliers + data_outlier = data[is_outlier] + self._outliers = data_outlier diff --git a/metevents/test_outlier.ipynb b/metevents/test_outlier.ipynb new file mode 100644 index 0000000..78d1379 --- /dev/null +++ b/metevents/test_outlier.ipynb @@ -0,0 +1,1144 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 88, + "id": "e97fcd32-d0bd-4133-9aa7-19f5f16bd3b3", + "metadata": {}, + "outputs": [], + "source": [ + "from periods import CumulativePeriod\n", + "import pandas as pd\n", + "from datetime import timedelta,datetime\n", + "from metloom.pointdata import CDECPointData, SnotelPointData, MesowestPointData\n", + "from pandas.tseries.frequencies import to_offset\n", + "from utilities import determine_freq\n", + "import numpy as np\n", + "\n", + "\n", + "class BaseEvents:\n", + " def __init__(self, data):\n", + " self._events = []\n", + " self.data = data\n", + " self._groups = []\n", + " self._group_ids = None\n", + " self._outliers = []\n", + " \n", + "\n", + " @property\n", + " def events(self):\n", + " return self._events\n", + "\n", + " @property\n", + " def outliers(self):\n", + " return self._outliers\n", + "\n", + " @property\n", + " def N(self):\n", + " return len(self.events)\n", + "\n", + " def find(self, *args, **kwargs):\n", + " \"\"\"\n", + " Function to be defined for specific events in timeseries data. Performs\n", + " the actual detection of the events. Should assign self._events\n", + " \"\"\"\n", + " raise NotImplementedError(\"find function not implemented.\")\n", + "\n", + " @staticmethod\n", + " def group_condition_by_time(ind):\n", + " ind_sum = ind.eq(False).cumsum()\n", + "\n", + " # Isolate the ind_sum by positions that are True and group them together\n", + " time_groups = ind_sum.loc[ind.eq(True)].groupby(ind_sum)\n", + " groups = time_groups.groups\n", + " return groups, ind_sum\n", + "\n", + "\n", + " @classmethod\n", + " def from_station(cls, station_id, start, stop, station_name='unknown',\n", + " source='NRCS'):\n", + " \"\"\"\n", + " \n", + " Form storm analysis from metloom\n", + " \n", + " Args:\n", + " station_id: string id of the station of interest\n", + " start: Datetime object when to start looking for data\n", + " stop: Datetime object when to stop looking for data\n", + " source: Network/datasource to search for data options: NRCS, mesowest, CDEC\n", + " station_name: String name of the station to pass to pointdata\n", + " \"\"\"\n", + " pnt = None\n", + " pnt_classes = [SnotelPointData, CDECPointData, MesowestPointData]\n", + " for STATION_CLASS in pnt_classes:\n", + " if STATION_CLASS.DATASOURCE.lower() == source.lower():\n", + " pnt = STATION_CLASS(station_id, station_name)\n", + " break\n", + " \n", + " if pnt is None:\n", + " raise ValueError(f'Datasource {source} is invalid. Use '\n", + " f'{\", \".join([c.DATASOURCE for c in pnt_classes])}')\n", + " \n", + " # Pull data\n", + " variable = pnt.ALLOWED_VARIABLES.PRECIPITATIONACCUM\n", + " \n", + " df = pnt.get_daily_data(start, stop, [variable])\n", + " \n", + " if df is None:\n", + " raise ValueError(f'The combination of pulling precip from {station_id} '\n", + " f'during {start}-{stop} produced no data. Check station '\n", + " f'is real and has precip data between specified dates.')\n", + " else:\n", + " df = df.reset_index().set_index('datetime')\n", + " \n", + " return cls(df[variable.name].diff())\n", + "\n", + "\n", + "class StormEvents(BaseEvents):\n", + "\n", + " def find(self, instant_mass_to_start=0.1, min_storm_total=0.5, hours_to_stop=24,\n", + " max_storm_hours=336):\n", + " \"\"\"\n", + " Find all the storms that are initiated by a mass greater than the\n", + " instant_mass_to_start and receive less than that threshold for at\n", + " least hours_to_stop to end it. Storm delineation is further bounded by\n", + " min_storm_total and max_storm_hours.\n", + "\n", + " Args:\n", + " instant_mass_to_start: mass per time step to consider the beginning of a\n", + " storm\n", + " min_storm_total: Total storm mass to be considered a complete storm\n", + " hours_to_stop: minimum hours of mass less than instant threshold to\n", + " end a storm\n", + " max_storm_hours: Maximum hours a storm can.\n", + " \"\"\"\n", + " # group main condition by time\n", + " ind = self.data >= instant_mass_to_start\n", + " groups, _ = self.group_condition_by_time(ind)\n", + "\n", + " freq = determine_freq(ind)\n", + " tstep = pd.to_timedelta(to_offset(freq))\n", + " dt = timedelta(hours=hours_to_stop)\n", + " max_storm = timedelta(hours=max_storm_hours)\n", + "\n", + " group_list = sorted(list(groups.items()))\n", + " N_groups = len(group_list)\n", + "\n", + " # Evaluate each group of mass conditions against the timing\n", + " for i, (event_id, curr_group) in enumerate(group_list):\n", + " curr_start = curr_group.min()\n", + " curr_stop = curr_group.max()\n", + " if i == 0:\n", + " start = curr_start\n", + "\n", + " # Grab next\n", + " nx_idx = i + 1\n", + " if nx_idx < N_groups:\n", + " next_group = group_list[nx_idx][1]\n", + " next_start = next_group.min()\n", + "\n", + " else:\n", + " next_start = curr_stop\n", + " # track storm total and no_precip_d\n", + " total = self.data.loc[start:curr_stop].sum()\n", + " duration = curr_stop - start\n", + "\n", + " # Has there been enough hours without mass\n", + " enough_hours_wo_precip = (next_start - curr_stop) > dt\n", + " # Has storm gone on too long\n", + " storm_duration_too_long = duration > max_storm\n", + " # Has enough mass accumulated to be considered a storm\n", + " enough_storm_mass = total >= min_storm_total\n", + " base_condition = (enough_hours_wo_precip or storm_duration_too_long)\n", + " condition = (base_condition and enough_storm_mass)\n", + "\n", + " if condition or nx_idx == N_groups:\n", + " # Watch out for beginning\n", + " start = start - tstep if start != self.data.index[0] else start\n", + "\n", + " event = CumulativePeriod(self.data.loc[start:curr_stop])\n", + " self._events.append(event)\n", + " # Update start for the next storm\n", + " start = next_start\n", + "\n", + " \n", + "\n", + "\n", + "class OutlierEvents(BaseEvents):\n", + " def find(self):\n", + " \"\"\"\n", + " Find periods that were outliers for the given dataset using a Z-score ??\n", + " Periods or records\n", + " \"\"\"\n", + " data = self.data\n", + " mean = np.nanmean(data.values)\n", + " sd = np.nanstd(data.values)\n", + " z_score = (data.values - mean)/sd\n", + " is_outlier = (z_score > 3) | (z_score < -1)\n", + " df = pd.DataFrame()\n", + " df['Datetime'] = data.index\n", + " df['data'] = data.values\n", + " df['is_outlier'] = is_outlier\n", + " self._outliers = df \n", + "\n", + " \n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "id": "0a6a0107-a052-4ae5-9d7d-4958610b89aa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "7.0013992770541735" + ] + }, + "execution_count": 124, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mean\n", + "sd" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "id": "793212ab-4f85-487d-b1ad-a75a90ada28a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, True, False,\n", + " False, True])" + ] + }, + "execution_count": 130, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = data\n", + "data = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,23,4,42,2,2,-40]\n", + "mean = np.nanmean(data)\n", + "sd = np.nanstd(data)\n", + "z_score = (data - mean)/sd\n", + "is_outlier = (z_score > 3) | (z_score < -3)\n", + "is_outlier" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "id": "ac2d9db3-e741-4f82-a3c3-f0d7b3124ed3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Datetimedatais_outlier
02021-10-03 08:00:00+00:00NaNFalse
12021-10-04 08:00:00+00:000.0False
22021-10-05 08:00:00+00:000.0False
32021-10-06 08:00:00+00:000.0False
42021-10-07 08:00:00+00:000.0False
............
3582022-09-26 08:00:00+00:000.0False
3592022-09-27 08:00:00+00:000.0False
3602022-09-28 08:00:00+00:000.0False
3612022-09-29 08:00:00+00:000.0False
3622022-09-30 08:00:00+00:000.0False
\n", + "

363 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " Datetime data is_outlier\n", + "0 2021-10-03 08:00:00+00:00 NaN False\n", + "1 2021-10-04 08:00:00+00:00 0.0 False\n", + "2 2021-10-05 08:00:00+00:00 0.0 False\n", + "3 2021-10-06 08:00:00+00:00 0.0 False\n", + "4 2021-10-07 08:00:00+00:00 0.0 False\n", + ".. ... ... ...\n", + "358 2022-09-26 08:00:00+00:00 0.0 False\n", + "359 2022-09-27 08:00:00+00:00 0.0 False\n", + "360 2022-09-28 08:00:00+00:00 0.0 False\n", + "361 2022-09-29 08:00:00+00:00 0.0 False\n", + "362 2022-09-30 08:00:00+00:00 0.0 False\n", + "\n", + "[363 rows x 3 columns]" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = storms.outliers\n", + "df\n", + "# df[is_outlier]" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "id": "be940392-be6e-4713-a266-dd24fa97035e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([3.34, 2.55, 2.43, 1.54, 1.14])" + ] + }, + "execution_count": 136, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "start = datetime(2021,10,1)\n", + "stop = datetime(2022,9,30)\n", + "storms = OutlierEvents.from_station(station_id, start, stop, source=source)\n", + "storms.find()\n", + "storms.outliers\n", + "data = storms.data\n", + "mean = np.nanmean(data.values)\n", + "sd = np.nanstd(data.values)\n", + "z_score = (data.values - mean) / sd\n", + "# the record is outlier when z-score is lower -3 or higher than 3\n", + "is_outlier = (z_score > 3) | (z_score < -3)\n", + "\n", + "# only save outliers\n", + "data_outlier = data[is_outlier]\n", + "data_outlier.values" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "id": "bd4d0e72-36d8-4765-8285-63a8a34d139d", + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Item wrong length 46 instead of 363.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/ps/ym9wmm7d2gvcvbndrb8sb0mw0000gn/T/ipykernel_7736/3313206823.py\u001b[0m in \u001b[0;36m?\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Datetime'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'data'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;31m# only save outliers\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mis_outlier\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/projects/m3works/metevents/venv/lib/python3.11/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3883\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwhere\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3884\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3885\u001b[0m \u001b[0;31m# Do we have a (boolean) 1d indexer?\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3886\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_bool_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3887\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_bool_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3888\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3889\u001b[0m \u001b[0;31m# We are left with two options: a single key, and a collection of keys,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3890\u001b[0m \u001b[0;31m# We interpret tuples as collections only for non-MultiIndex\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/projects/m3works/metevents/venv/lib/python3.11/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3933\u001b[0m \u001b[0mUserWarning\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3934\u001b[0m \u001b[0mstacklevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfind_stack_level\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3935\u001b[0m )\n\u001b[1;32m 3936\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3937\u001b[0;31m raise ValueError(\n\u001b[0m\u001b[1;32m 3938\u001b[0m \u001b[0;34mf\"Item wrong length {len(key)} instead of {len(self.index)}.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3939\u001b[0m )\n\u001b[1;32m 3940\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Item wrong length 46 instead of 363." + ] + } + ], + "source": [ + "is_outlier = (z_score > 3) | (z_score < -3)\n", + "df = pd.DataFrame()\n", + "df['Datetime'] = data.index\n", + "df['data'] = data.values\n", + "# only save outliers\n", + "df = df[is_outlier]\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "id": "70076f36-5e21-4b2b-ba78-da4097a0d6d0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Datetimedatais_outlier
02021-10-03 08:00:00+00:00NaNFalse
12021-10-04 08:00:00+00:000.0False
22021-10-05 08:00:00+00:000.0False
32021-10-06 08:00:00+00:000.0False
42021-10-07 08:00:00+00:000.0False
............
3582022-09-26 08:00:00+00:000.0False
3592022-09-27 08:00:00+00:000.0False
3602022-09-28 08:00:00+00:000.0False
3612022-09-29 08:00:00+00:000.0False
3622022-09-30 08:00:00+00:000.0False
\n", + "

363 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " Datetime data is_outlier\n", + "0 2021-10-03 08:00:00+00:00 NaN False\n", + "1 2021-10-04 08:00:00+00:00 0.0 False\n", + "2 2021-10-05 08:00:00+00:00 0.0 False\n", + "3 2021-10-06 08:00:00+00:00 0.0 False\n", + "4 2021-10-07 08:00:00+00:00 0.0 False\n", + ".. ... ... ...\n", + "358 2022-09-26 08:00:00+00:00 0.0 False\n", + "359 2022-09-27 08:00:00+00:00 0.0 False\n", + "360 2022-09-28 08:00:00+00:00 0.0 False\n", + "361 2022-09-29 08:00:00+00:00 0.0 False\n", + "362 2022-09-30 08:00:00+00:00 0.0 False\n", + "\n", + "[363 rows x 3 columns]" + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 137, + "id": "c941b6b4-2ecd-498a-8aa9-3d6fb4f320f3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "363" + ] + }, + "execution_count": 137, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "id": "fdf85755-5805-4662-8126-359ab9136e55", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "datetime\n", + "2021-12-01 08:00:00+00:00 NaN\n", + "2021-12-02 08:00:00+00:00 0.00\n", + "2021-12-03 08:00:00+00:00 0.00\n", + "2021-12-04 08:00:00+00:00 0.00\n", + "2021-12-05 08:00:00+00:00 0.00\n", + "2021-12-06 08:00:00+00:00 0.00\n", + "2021-12-07 08:00:00+00:00 0.02\n", + "2021-12-08 08:00:00+00:00 0.00\n", + "2021-12-09 08:00:00+00:00 0.60\n", + "2021-12-10 08:00:00+00:00 0.00\n", + "2021-12-11 08:00:00+00:00 0.00\n", + "2021-12-12 08:00:00+00:00 0.13\n", + "2021-12-13 08:00:00+00:00 2.43\n", + "2021-12-14 08:00:00+00:00 1.54\n", + "2021-12-15 08:00:00+00:00 0.16\n", + "2021-12-16 08:00:00+00:00 0.67\n", + "2021-12-17 08:00:00+00:00 0.00\n", + "2021-12-18 08:00:00+00:00 0.01\n", + "2021-12-19 08:00:00+00:00 0.01\n", + "2021-12-20 08:00:00+00:00 0.00\n", + "2021-12-21 08:00:00+00:00 0.02\n", + "2021-12-22 08:00:00+00:00 0.68\n", + "2021-12-23 08:00:00+00:00 1.14\n", + "2021-12-24 08:00:00+00:00 0.90\n", + "2021-12-25 08:00:00+00:00 0.56\n", + "2021-12-26 08:00:00+00:00 0.26\n", + "2021-12-27 08:00:00+00:00 0.59\n", + "2021-12-28 08:00:00+00:00 0.06\n", + "2021-12-29 08:00:00+00:00 0.42\n", + "2021-12-30 08:00:00+00:00 0.20\n", + "2021-12-31 08:00:00+00:00 0.00\n", + "2022-01-01 08:00:00+00:00 0.02\n", + "2022-01-02 08:00:00+00:00 0.01\n", + "2022-01-03 08:00:00+00:00 0.00\n", + "2022-01-04 08:00:00+00:00 0.33\n", + "2022-01-05 08:00:00+00:00 0.07\n", + "2022-01-06 08:00:00+00:00 0.01\n", + "2022-01-07 08:00:00+00:00 0.01\n", + "2022-01-08 08:00:00+00:00 0.00\n", + "2022-01-09 08:00:00+00:00 0.00\n", + "2022-01-10 08:00:00+00:00 0.01\n", + "2022-01-11 08:00:00+00:00 0.02\n", + "2022-01-12 08:00:00+00:00 0.97\n", + "2022-01-13 08:00:00+00:00 0.01\n", + "2022-01-14 08:00:00+00:00 0.00\n", + "2022-01-15 08:00:00+00:00 0.00\n", + "Name: ACCUMULATED PRECIPITATION, dtype: float64" + ] + }, + "execution_count": 91, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = [0.1, 0.2]\n", + "baseevent = BaseEvents(data)\n", + "storms = StormEvents(data)\n", + "\n", + "start_mass = 0.1\n", + "stop_hours = 24\n", + "total_mass = 0.5\n", + "max_hours = 336\n", + "# instant_mass_to_start=0.1, min_storm_total=0.5, hours_to_stop=24,\n", + "# max_storm_hours=336\n", + "station_id = 'TUM'\n", + "start = datetime(2021,12,1)\n", + "stop = datetime(2022,1,15)\n", + "source = 'CDEC'\n", + "\n", + "\n", + "storms = StormEvents.from_station(station_id, start, stop, source=source)\n", + "storms.data" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "66fc1cdd-60fa-4c43-b817-8a76045a2ec1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "datetime\n", + "2021-12-01 08:00:00+00:00 NaN\n", + "2021-12-02 08:00:00+00:00 0.00\n", + "2021-12-03 08:00:00+00:00 0.00\n", + "2021-12-04 08:00:00+00:00 0.00\n", + "2021-12-05 08:00:00+00:00 0.00\n", + "2021-12-06 08:00:00+00:00 0.00\n", + "2021-12-07 08:00:00+00:00 0.02\n", + "2021-12-08 08:00:00+00:00 0.00\n", + "2021-12-09 08:00:00+00:00 0.60\n", + "2021-12-10 08:00:00+00:00 0.00\n", + "2021-12-11 08:00:00+00:00 0.00\n", + "2021-12-12 08:00:00+00:00 0.13\n", + "2021-12-13 08:00:00+00:00 2.43\n", + "2021-12-14 08:00:00+00:00 1.54\n", + "2021-12-15 08:00:00+00:00 0.16\n", + "2021-12-16 08:00:00+00:00 0.67\n", + "2021-12-17 08:00:00+00:00 0.00\n", + "2021-12-18 08:00:00+00:00 0.01\n", + "2021-12-19 08:00:00+00:00 0.01\n", + "2021-12-20 08:00:00+00:00 0.00\n", + "2021-12-21 08:00:00+00:00 0.02\n", + "2021-12-22 08:00:00+00:00 0.68\n", + "2021-12-23 08:00:00+00:00 1.14\n", + "2021-12-24 08:00:00+00:00 0.90\n", + "2021-12-25 08:00:00+00:00 0.56\n", + "2021-12-26 08:00:00+00:00 0.26\n", + "2021-12-27 08:00:00+00:00 0.59\n", + "2021-12-28 08:00:00+00:00 0.06\n", + "2021-12-29 08:00:00+00:00 0.42\n", + "2021-12-30 08:00:00+00:00 0.20\n", + "2021-12-31 08:00:00+00:00 0.00\n", + "2022-01-01 08:00:00+00:00 0.02\n", + "2022-01-02 08:00:00+00:00 0.01\n", + "2022-01-03 08:00:00+00:00 0.00\n", + "2022-01-04 08:00:00+00:00 0.33\n", + "2022-01-05 08:00:00+00:00 0.07\n", + "2022-01-06 08:00:00+00:00 0.01\n", + "2022-01-07 08:00:00+00:00 0.01\n", + "2022-01-08 08:00:00+00:00 0.00\n", + "2022-01-09 08:00:00+00:00 0.00\n", + "2022-01-10 08:00:00+00:00 0.01\n", + "2022-01-11 08:00:00+00:00 0.02\n", + "2022-01-12 08:00:00+00:00 0.97\n", + "2022-01-13 08:00:00+00:00 0.01\n", + "2022-01-14 08:00:00+00:00 0.00\n", + "2022-01-15 08:00:00+00:00 0.00\n", + "Name: ACCUMULATED PRECIPITATION, dtype: float64" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "storms.data" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "id": "c3b957c2-c5cc-4c69-b85b-52f00aa72717", + "metadata": {}, + "outputs": [], + "source": [ + "storms.find(instant_mass_to_start=start_mass,\n", + " hours_to_stop=24,\n", + " min_storm_total=total_mass,\n", + " max_storm_hours=max_hours)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "id": "78538bdd-1079-40e5-977a-e09c90b8bc82", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Cumulative Period (2021-12-08T08:00:00+00:00 - 2021-12-09T08:00:00+00:00),\n", + " Cumulative Period (2021-12-11T08:00:00+00:00 - 2021-12-16T08:00:00+00:00),\n", + " Cumulative Period (2021-12-21T08:00:00+00:00 - 2021-12-27T08:00:00+00:00),\n", + " Cumulative Period (2021-12-28T08:00:00+00:00 - 2021-12-30T08:00:00+00:00),\n", + " Cumulative Period (2022-01-03T08:00:00+00:00 - 2022-01-12T08:00:00+00:00),\n", + " Cumulative Period (2021-12-08T08:00:00+00:00 - 2021-12-09T08:00:00+00:00),\n", + " Cumulative Period (2021-12-11T08:00:00+00:00 - 2021-12-16T08:00:00+00:00),\n", + " Cumulative Period (2021-12-21T08:00:00+00:00 - 2021-12-27T08:00:00+00:00),\n", + " Cumulative Period (2021-12-28T08:00:00+00:00 - 2021-12-30T08:00:00+00:00),\n", + " Cumulative Period (2022-01-03T08:00:00+00:00 - 2022-01-12T08:00:00+00:00)]" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "storms.events" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "c94bd87c-9368-4ced-8434-7d1c6325b4e1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "5" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "storms.N" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "c0d9e407-7652-436e-ba33-e13153ddadc0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "datetime\n", + "2021-12-01 08:00:00+00:00 NaN\n", + "2021-12-02 08:00:00+00:00 0.00\n", + "2021-12-03 08:00:00+00:00 0.00\n", + "2021-12-04 08:00:00+00:00 0.00\n", + "2021-12-05 08:00:00+00:00 0.00\n", + "2021-12-06 08:00:00+00:00 0.00\n", + "2021-12-07 08:00:00+00:00 0.02\n", + "2021-12-08 08:00:00+00:00 0.00\n", + "2021-12-09 08:00:00+00:00 0.60\n", + "2021-12-10 08:00:00+00:00 0.00\n", + "2021-12-11 08:00:00+00:00 0.00\n", + "2021-12-12 08:00:00+00:00 0.13\n", + "2021-12-13 08:00:00+00:00 2.43\n", + "2021-12-14 08:00:00+00:00 1.54\n", + "2021-12-15 08:00:00+00:00 0.16\n", + "2021-12-16 08:00:00+00:00 0.67\n", + "2021-12-17 08:00:00+00:00 0.00\n", + "2021-12-18 08:00:00+00:00 0.01\n", + "2021-12-19 08:00:00+00:00 0.01\n", + "2021-12-20 08:00:00+00:00 0.00\n", + "2021-12-21 08:00:00+00:00 0.02\n", + "2021-12-22 08:00:00+00:00 0.68\n", + "2021-12-23 08:00:00+00:00 1.14\n", + "2021-12-24 08:00:00+00:00 0.90\n", + "2021-12-25 08:00:00+00:00 0.56\n", + "2021-12-26 08:00:00+00:00 0.26\n", + "2021-12-27 08:00:00+00:00 0.59\n", + "2021-12-28 08:00:00+00:00 0.06\n", + "2021-12-29 08:00:00+00:00 0.42\n", + "2021-12-30 08:00:00+00:00 0.20\n", + "2021-12-31 08:00:00+00:00 0.00\n", + "2022-01-01 08:00:00+00:00 0.02\n", + "2022-01-02 08:00:00+00:00 0.01\n", + "2022-01-03 08:00:00+00:00 0.00\n", + "2022-01-04 08:00:00+00:00 0.33\n", + "2022-01-05 08:00:00+00:00 0.07\n", + "2022-01-06 08:00:00+00:00 0.01\n", + "2022-01-07 08:00:00+00:00 0.01\n", + "2022-01-08 08:00:00+00:00 0.00\n", + "2022-01-09 08:00:00+00:00 0.00\n", + "2022-01-10 08:00:00+00:00 0.01\n", + "2022-01-11 08:00:00+00:00 0.02\n", + "2022-01-12 08:00:00+00:00 0.97\n", + "2022-01-13 08:00:00+00:00 0.01\n", + "2022-01-14 08:00:00+00:00 0.00\n", + "2022-01-15 08:00:00+00:00 0.00\n", + "Name: ACCUMULATED PRECIPITATION, dtype: float64" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "storms.data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ea237a1-fb2b-48b4-8005-5ddfdedc4f15", + "metadata": {}, + "outputs": [], + "source": [ + " @pytest.mark.parametrize('station_id, start, stop, source, mass, hours, n_storms', [\n", + " ('TUM', datetime(2021, 12, 1), datetime(2022, 1, 15), 'CDEC', 0.1, 48, 5),\n", + " ('637:ID:SNTL', datetime(2022, 12, 1), datetime(2022, 12, 15),\n", + " 'NRCS', 0.1, 48, 2)\n", + "\n", + " ])\n", + " def test_storm_events_from_station(self, station_id, start, stop, source, mass,\n", + " hours, n_storms):\n", + " \"\"\"\n", + " Test the number of storms identified by varying input data and thresholds.\n", + " \"\"\"\n", + " storms = StormEvents.from_station(station_id, start, stop, source=source)\n", + " storms.find(instant_mass_to_start=mass, hours_to_stop=hours,\n", + " min_storm_total=0.2)\n", + " assert storms.N == n_storms\n" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "id": "52e50905-e736-472d-879c-781db9b2901e", + "metadata": {}, + "outputs": [], + "source": [ + "data = storms.outliers" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "id": "aa644b5a-ccaf-4a98-b582-6ca5f315e39c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Datetimedatais_outlier
02021-10-03 08:00:00+00:00NaNFalse
12021-10-04 08:00:00+00:000.0False
22021-10-05 08:00:00+00:000.0False
32021-10-06 08:00:00+00:000.0False
42021-10-07 08:00:00+00:000.0False
............
3582022-09-26 08:00:00+00:000.0False
3592022-09-27 08:00:00+00:000.0False
3602022-09-28 08:00:00+00:000.0False
3612022-09-29 08:00:00+00:000.0False
3622022-09-30 08:00:00+00:000.0False
\n", + "

363 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " Datetime data is_outlier\n", + "0 2021-10-03 08:00:00+00:00 NaN False\n", + "1 2021-10-04 08:00:00+00:00 0.0 False\n", + "2 2021-10-05 08:00:00+00:00 0.0 False\n", + "3 2021-10-06 08:00:00+00:00 0.0 False\n", + "4 2021-10-07 08:00:00+00:00 0.0 False\n", + ".. ... ... ...\n", + "358 2022-09-26 08:00:00+00:00 0.0 False\n", + "359 2022-09-27 08:00:00+00:00 0.0 False\n", + "360 2022-09-28 08:00:00+00:00 0.0 False\n", + "361 2022-09-29 08:00:00+00:00 0.0 False\n", + "362 2022-09-30 08:00:00+00:00 0.0 False\n", + "\n", + "[363 rows x 3 columns]" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "id": "2342d2fe-fd71-4b0a-963c-817d26c4bfc7", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "list indices must be integers or slices, not str", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[97], line 6\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# data['date'] = [i.split(' ')[0] for i in data[0]['Datetime']]\u001b[39;00m\n\u001b[1;32m 5\u001b[0m fig, ax \u001b[38;5;241m=\u001b[39m plt\u001b[38;5;241m.\u001b[39msubplots(figsize \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m12\u001b[39m,\u001b[38;5;241m4\u001b[39m))\n\u001b[0;32m----> 6\u001b[0m sns\u001b[38;5;241m.\u001b[39mscatterplot(x \u001b[38;5;241m=\u001b[39m \u001b[43mdata\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mDatetime\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m, y \u001b[38;5;241m=\u001b[39m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata\u001b[39m\u001b[38;5;124m'\u001b[39m], data \u001b[38;5;241m=\u001b[39m data, hue \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mis_outlier\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", + "\u001b[0;31mTypeError\u001b[0m: list indices must be integers or slices, not str" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA+AAAAFlCAYAAABrxYI/AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAeGElEQVR4nO3df2zV9b348Vdb7KlmtuLl0gK3jqubc5sKDqS3OmO86WyiYeOPxQ4XIETn3LhGbXYn+IPOH6PcXTUks47I3PQfB5uZZhFS5+2VLLv2hgxoohlgHGMQsxa4u7bcurXSfr5/LOu+HQX5lPZNC49Hcv7oe+/3+bzP8ob45HN6TlGWZVkAAAAA46r4dG8AAAAAzgYCHAAAABIQ4AAAAJCAAAcAAIAEBDgAAAAkIMABAAAgAQEOAAAACQhwAAAASECAAwAAQAICHAAAABLIHeC/+MUvYuHChTFz5swoKiqKl19++UPXbN26NT7zmc9EoVCIj33sY/Hcc8+NYqsAAAAweeUO8N7e3pgzZ060tLSc1Pzf/va3cfPNN8cNN9wQHR0dcc8998Ttt98er776au7NAgAAwGRVlGVZNurFRUXx0ksvxaJFi44757777ovNmzfHW2+9NTT2pS99Kd57771obW0d7aUBAABgUpky3hdob2+Purq6YWP19fVxzz33HHdNX19f9PX1Df08ODgYf/jDH+Lv/u7voqioaLy2CgAAABERkWVZHDlyJGbOnBnFxWPz8WnjHuCdnZ1RWVk5bKyysjJ6enrij3/8Y5x77rnHrGlubo6HH354vLcGAAAAJ3TgwIH4h3/4hzF5rnEP8NFYtWpVNDY2Dv3c3d0dF110URw4cCDKy8tP484AAAA4G/T09ER1dXWcf/75Y/ac4x7gVVVV0dXVNWysq6srysvLR7z7HRFRKBSiUCgcM15eXi7AAQAASGYsfw163L8HvLa2Ntra2oaNvfbaa1FbWzvelwYAAIAJI3eA/9///V90dHRER0dHRPz5a8Y6Ojpi//79EfHnt48vXbp0aP6dd94Ze/fujW9+85uxe/fuePrpp+PHP/5x3HvvvWPzCgAAAGASyB3gv/rVr+Kqq66Kq666KiIiGhsb46qrrorVq1dHRMTvf//7oRiPiPjHf/zH2Lx5c7z22msxZ86ceOKJJ+L73/9+1NfXj9FLAAAAgInvlL4HPJWenp6oqKiI7u5uvwMOAADAuBuPDh333wEHAAAABDgAAAAkIcABAAAgAQEOAAAACQhwAAAASECAAwAAQAICHAAAABIQ4AAAAJCAAAcAAIAEBDgAAAAkIMABAAAgAQEOAAAACQhwAAAASECAAwAAQAICHAAAABIQ4AAAAJCAAAcAAIAEBDgAAAAkIMABAAAgAQEOAAAACQhwAAAASECAAwAAQAICHAAAABIQ4AAAAJCAAAcAAIAEBDgAAAAkIMABAAAgAQEOAAAACQhwAAAASECAAwAAQAICHAAAABIQ4AAAAJCAAAcAAIAEBDgAAAAkIMABAAAgAQEOAAAACQhwAAAASECAAwAAQAICHAAAABIQ4AAAAJCAAAcAAIAEBDgAAAAkIMABAAAgAQEOAAAACQhwAAAASECAAwAAQAICHAAAABIQ4AAAAJCAAAcAAIAEBDgAAAAkIMABAAAggVEFeEtLS8yePTvKysqipqYmtm3bdsL569ati0984hNx7rnnRnV1ddx7773xpz/9aVQbBgAAgMkod4Bv2rQpGhsbo6mpKXbs2BFz5syJ+vr6OHjw4IjzX3jhhVi5cmU0NTXFrl274tlnn41NmzbF/ffff8qbBwAAgMkid4A/+eST8ZWvfCWWL18en/rUp2L9+vVx3nnnxQ9+8IMR57/xxhtx7bXXxq233hqzZ8+OG2+8MRYvXvyhd80BAADgTJIrwPv7+2P79u1RV1f31ycoLo66urpob28fcc0111wT27dvHwruvXv3xpYtW+Kmm2467nX6+vqip6dn2AMAAAAmsyl5Jh8+fDgGBgaisrJy2HhlZWXs3r17xDW33nprHD58OD772c9GlmVx9OjRuPPOO0/4FvTm5uZ4+OGH82wNAAAAJrRx/xT0rVu3xpo1a+Lpp5+OHTt2xE9/+tPYvHlzPProo8dds2rVquju7h56HDhwYLy3CQAAAOMq1x3wadOmRUlJSXR1dQ0b7+rqiqqqqhHXPPTQQ7FkyZK4/fbbIyLiiiuuiN7e3rjjjjvigQceiOLiY/8NoFAoRKFQyLM1AAAAmNBy3QEvLS2NefPmRVtb29DY4OBgtLW1RW1t7Yhr3n///WMiu6SkJCIisizLu18AAACYlHLdAY+IaGxsjGXLlsX8+fNjwYIFsW7duujt7Y3ly5dHRMTSpUtj1qxZ0dzcHBERCxcujCeffDKuuuqqqKmpiXfeeSceeuihWLhw4VCIAwAAwJkud4A3NDTEoUOHYvXq1dHZ2Rlz586N1tbWoQ9m279//7A73g8++GAUFRXFgw8+GO+++278/d//fSxcuDC+/e1vj92rAAAAgAmuKJsE7wPv6emJioqK6O7ujvLy8tO9HQAAAM5w49Gh4/4p6AAAAIAABwAAgCQEOAAAACQgwAEAACABAQ4AAAAJCHAAAABIQIADAABAAgIcAAAAEhDgAAAAkIAABwAAgAQEOAAAACQgwAEAACABAQ4AAAAJCHAAAABIQIADAABAAgIcAAAAEhDgAAAAkIAABwAAgAQEOAAAACQgwAEAACABAQ4AAAAJCHAAAABIQIADAABAAgIcAAAAEhDgAAAAkIAABwAAgAQEOAAAACQgwAEAACABAQ4AAAAJCHAAAABIQIADAABAAgIcAAAAEhDgAAAAkIAABwAAgAQEOAAAACQgwAEAACABAQ4AAAAJCHAAAABIQIADAABAAgIcAAAAEhDgAAAAkIAABwAAgAQEOAAAACQgwAEAACABAQ4AAAAJCHAAAABIQIADAABAAgIcAAAAEhDgAAAAkIAABwAAgARGFeAtLS0xe/bsKCsri5qamti2bdsJ57/33nuxYsWKmDFjRhQKhbj00ktjy5Yto9owAAAATEZT8i7YtGlTNDY2xvr166OmpibWrVsX9fX1sWfPnpg+ffox8/v7++Nzn/tcTJ8+PV588cWYNWtW/O53v4sLLrhgLPYPAAAAk0JRlmVZngU1NTVx9dVXx1NPPRUREYODg1FdXR133XVXrFy58pj569evj3//93+P3bt3xznnnDOqTfb09ERFRUV0d3dHeXn5qJ4DAAAATtZ4dGiut6D39/fH9u3bo66u7q9PUFwcdXV10d7ePuKan/3sZ1FbWxsrVqyIysrKuPzyy2PNmjUxMDBw3Ov09fVFT0/PsAcAAABMZrkC/PDhwzEwMBCVlZXDxisrK6Ozs3PENXv37o0XX3wxBgYGYsuWLfHQQw/FE088EY899thxr9Pc3BwVFRVDj+rq6jzbBAAAgAln3D8FfXBwMKZPnx7PPPNMzJs3LxoaGuKBBx6I9evXH3fNqlWroru7e+hx4MCB8d4mAAAAjKtcH8I2bdq0KCkpia6urmHjXV1dUVVVNeKaGTNmxDnnnBMlJSVDY5/85Cejs7Mz+vv7o7S09Jg1hUIhCoVCnq0BAADAhJbrDnhpaWnMmzcv2trahsYGBwejra0tamtrR1xz7bXXxjvvvBODg4NDY2+//XbMmDFjxPgGAACAM1Hut6A3NjbGhg0b4vnnn49du3bF1772tejt7Y3ly5dHRMTSpUtj1apVQ/O/9rWvxR/+8Ie4++674+23347NmzfHmjVrYsWKFWP3KgAAAGCCy/094A0NDXHo0KFYvXp1dHZ2xty5c6O1tXXog9n2798fxcV/7frq6up49dVX4957740rr7wyZs2aFXfffXfcd999Y/cqAAAAYILL/T3gp4PvAQcAACCl0/494AAAAMDoCHAAAABIQIADAABAAgIcAAAAEhDgAAAAkIAABwAAgAQEOAAAACQgwAEAACABAQ4AAAAJCHAAAABIQIADAABAAgIcAAAAEhDgAAAAkIAABwAAgAQEOAAAACQgwAEAACABAQ4AAAAJCHAAAABIQIADAABAAgIcAAAAEhDgAAAAkIAABwAAgAQEOAAAACQgwAEAACABAQ4AAAAJCHAAAABIQIADAABAAgIcAAAAEhDgAAAAkIAABwAAgAQEOAAAACQgwAEAACABAQ4AAAAJCHAAAABIQIADAABAAgIcAAAAEhDgAAAAkIAABwAAgAQEOAAAACQgwAEAACABAQ4AAAAJCHAAAABIQIADAABAAgIcAAAAEhDgAAAAkIAABwAAgAQEOAAAACQgwAEAACABAQ4AAAAJCHAAAABIQIADAABAAqMK8JaWlpg9e3aUlZVFTU1NbNu27aTWbdy4MYqKimLRokWjuSwAAABMWrkDfNOmTdHY2BhNTU2xY8eOmDNnTtTX18fBgwdPuG7fvn3xjW98I6677rpRbxYAAAAmq9wB/uSTT8ZXvvKVWL58eXzqU5+K9evXx3nnnRc/+MEPjrtmYGAgvvzlL8fDDz8cF1988SltGAAAACajXAHe398f27dvj7q6ur8+QXFx1NXVRXt7+3HXPfLIIzF9+vS47bbbTuo6fX190dPTM+wBAAAAk1muAD98+HAMDAxEZWXlsPHKysro7Owccc0vf/nLePbZZ2PDhg0nfZ3m5uaoqKgYelRXV+fZJgAAAEw44/op6EeOHIklS5bEhg0bYtq0aSe9btWqVdHd3T30OHDgwDjuEgAAAMbflDyTp02bFiUlJdHV1TVsvKurK6qqqo6Z/5vf/Cb27dsXCxcuHBobHBz884WnTIk9e/bEJZdccsy6QqEQhUIhz9YAAABgQst1B7y0tDTmzZsXbW1tQ2ODg4PR1tYWtbW1x8y/7LLL4s0334yOjo6hx+c///m44YYboqOjw1vLAQAAOGvkugMeEdHY2BjLli2L+fPnx4IFC2LdunXR29sby5cvj4iIpUuXxqxZs6K5uTnKysri8ssvH7b+ggsuiIg4ZhwAAADOZLkDvKGhIQ4dOhSrV6+Ozs7OmDt3brS2tg59MNv+/fujuHhcf7UcAAAAJp2iLMuy072JD9PT0xMVFRXR3d0d5eXlp3s7AAAAnOHGo0PdqgYAAIAEBDgAAAAkIMABAAAgAQEOAAAACQhwAAAASECAAwAAQAICHAAAABIQ4AAAAJCAAAcAAIAEBDgAAAAkIMABAAAgAQEOAAAACQhwAAAASECAAwAAQAICHAAAABIQ4AAAAJCAAAcAAIAEBDgAAAAkIMABAAAgAQEOAAAACQhwAAAASECAAwAAQAICHAAAABIQ4AAAAJCAAAcAAIAEBDgAAAAkIMABAAAgAQEOAAAACQhwAAAASECAAwAAQAICHAAAABIQ4AAAAJCAAAcAAIAEBDgAAAAkIMABAAAgAQEOAAAACQhwAAAASECAAwAAQAICHAAAABIQ4AAAAJCAAAcAAIAEBDgAAAAkIMABAAAgAQEOAAAACQhwAAAASECAAwAAQAICHAAAABIQ4AAAAJCAAAcAAIAEBDgAAAAkMKoAb2lpidmzZ0dZWVnU1NTEtm3bjjt3w4YNcd1118XUqVNj6tSpUVdXd8L5AAAAcCbKHeCbNm2KxsbGaGpqih07dsScOXOivr4+Dh48OOL8rVu3xuLFi+P111+P9vb2qK6ujhtvvDHefffdU948AAAATBZFWZZleRbU1NTE1VdfHU899VRERAwODkZ1dXXcddddsXLlyg9dPzAwEFOnTo2nnnoqli5delLX7OnpiYqKiuju7o7y8vI82wUAAIDcxqNDc90B7+/vj+3bt0ddXd1fn6C4OOrq6qK9vf2knuP999+PDz74IC688MJ8OwUAAIBJbEqeyYcPH46BgYGorKwcNl5ZWRm7d+8+qee47777YubMmcMi/m/19fVFX1/f0M89PT15tgkAAAATTtJPQV+7dm1s3LgxXnrppSgrKzvuvObm5qioqBh6VFdXJ9wlAAAAjL1cAT5t2rQoKSmJrq6uYeNdXV1RVVV1wrWPP/54rF27Nn7+85/HlVdeecK5q1atiu7u7qHHgQMH8mwTAAAAJpxcAV5aWhrz5s2Ltra2obHBwcFoa2uL2tra4677zne+E48++mi0trbG/PnzP/Q6hUIhysvLhz0AAABgMsv1O+AREY2NjbFs2bKYP39+LFiwINatWxe9vb2xfPnyiIhYunRpzJo1K5qbmyMi4t/+7d9i9erV8cILL8Ts2bOjs7MzIiI+8pGPxEc+8pExfCkAAAAwceUO8IaGhjh06FCsXr06Ojs7Y+7cudHa2jr0wWz79++P4uK/3lj/3ve+F/39/fHFL35x2PM0NTXFt771rVPbPQAAAEwSub8H/HTwPeAAAACkdNq/BxwAAAAYHQEOAAAACQhwAAAASECAAwAAQAICHAAAABIQ4AAAAJCAAAcAAIAEBDgAAAAkIMABAAAgAQEOAAAACQhwAAAASECAAwAAQAICHAAAABIQ4AAAAJCAAAcAAIAEBDgAAAAkIMABAAAgAQEOAAAACQhwAAAASECAAwAAQAICHAAAABIQ4AAAAJCAAAcAAIAEBDgAAAAkIMABAAAgAQEOAAAACQhwAAAASECAAwAAQAICHAAAABIQ4AAAAJCAAAcAAIAEBDgAAAAkIMABAAAgAQEOAAAACQhwAAAASECAAwAAQAICHAAAABIQ4AAAAJCAAAcAAIAEBDgAAAAkIMABAAAgAQEOAAAACQhwAAAASECAAwAAQAICHAAAABIQ4AAAAJCAAAcAAIAEBDgAAAAkIMABAAAgAQEOAAAACQhwAAAASGBUAd7S0hKzZ8+OsrKyqKmpiW3btp1w/k9+8pO47LLLoqysLK644orYsmXLqDYLAAAAk1XuAN+0aVM0NjZGU1NT7NixI+bMmRP19fVx8ODBEee/8cYbsXjx4rjtttti586dsWjRoli0aFG89dZbp7x5AAAAmCyKsizL8iyoqamJq6++Op566qmIiBgcHIzq6uq46667YuXKlcfMb2hoiN7e3njllVeGxv7pn/4p5s6dG+vXrz+pa/b09ERFRUV0d3dHeXl5nu0CAABAbuPRoVPyTO7v74/t27fHqlWrhsaKi4ujrq4u2tvbR1zT3t4ejY2Nw8bq6+vj5ZdfPu51+vr6oq+vb+jn7u7uiPjz/wEAAAAw3v7SnznvWZ9QrgA/fPhwDAwMRGVl5bDxysrK2L1794hrOjs7R5zf2dl53Os0NzfHww8/fMx4dXV1nu0CAADAKfmf//mfqKioGJPnyhXgqaxatWrYXfP33nsvPvrRj8b+/fvH7IXDRNPT0xPV1dVx4MABv2rBGcs552zgnHM2cM45G3R3d8dFF10UF1544Zg9Z64AnzZtWpSUlERXV9ew8a6urqiqqhpxTVVVVa75ERGFQiEKhcIx4xUVFf6Ac8YrLy93zjnjOeecDZxzzgbOOWeD4uKx+/buXM9UWloa8+bNi7a2tqGxwcHBaGtri9ra2hHX1NbWDpsfEfHaa68ddz4AAACciXK/Bb2xsTGWLVsW8+fPjwULFsS6deuit7c3li9fHhERS5cujVmzZkVzc3NERNx9991x/fXXxxNPPBE333xzbNy4MX71q1/FM888M7avBAAAACaw3AHe0NAQhw4ditWrV0dnZ2fMnTs3Wltbhz5obf/+/cNu0V9zzTXxwgsvxIMPPhj3339/fPzjH4+XX345Lr/88pO+ZqFQiKamphHflg5nCuecs4FzztnAOeds4JxzNhiPc577e8ABAACA/Mbut8kBAACA4xLgAAAAkIAABwAAgAQEOAAAACQwYQK8paUlZs+eHWVlZVFTUxPbtm074fyf/OQncdlll0VZWVlcccUVsWXLlkQ7hdHLc843bNgQ1113XUydOjWmTp0adXV1H/rnAiaCvH+f/8XGjRujqKgoFi1aNL4bhDGQ95y/9957sWLFipgxY0YUCoW49NJL/bcLE17ec75u3br4xCc+Eeeee25UV1fHvffeG3/6058S7Rby+cUvfhELFy6MmTNnRlFRUbz88ssfumbr1q3xmc98JgqFQnzsYx+L5557Lvd1J0SAb9q0KRobG6OpqSl27NgRc+bMifr6+jh48OCI8994441YvHhx3HbbbbFz585YtGhRLFq0KN56663EO4eTl/ecb926NRYvXhyvv/56tLe3R3V1ddx4443x7rvvJt45nLy85/wv9u3bF9/4xjfiuuuuS7RTGL2857y/vz8+97nPxb59++LFF1+MPXv2xIYNG2LWrFmJdw4nL+85f+GFF2LlypXR1NQUu3btimeffTY2bdoU999/f+Kdw8np7e2NOXPmREtLy0nN/+1vfxs333xz3HDDDdHR0RH33HNP3H777fHqq6/mu3A2ASxYsCBbsWLF0M8DAwPZzJkzs+bm5hHn33LLLdnNN988bKympib76le/Oq77hFOR95z/raNHj2bnn39+9vzzz4/XFuGUjeacHz16NLvmmmuy73//+9myZcuyL3zhCwl2CqOX95x/73vfyy6++OKsv78/1RbhlOU95ytWrMj++Z//edhYY2Njdu21147rPmEsRET20ksvnXDON7/5zezTn/70sLGGhoasvr4+17VO+x3w/v7+2L59e9TV1Q2NFRcXR11dXbS3t4+4pr29fdj8iIj6+vrjzofTbTTn/G+9//778cEHH8SFF144XtuEUzLac/7II4/E9OnT47bbbkuxTTgloznnP/vZz6K2tjZWrFgRlZWVcfnll8eaNWtiYGAg1bYhl9Gc82uuuSa2b98+9Db1vXv3xpYtW+Kmm25KsmcYb2PVoFPGclOjcfjw4RgYGIjKysph45WVlbF79+4R13R2do44v7Ozc9z2CadiNOf8b913330xc+bMY/7gw0QxmnP+y1/+Mp599tno6OhIsEM4daM553v37o3//M//jC9/+cuxZcuWeOedd+LrX/96fPDBB9HU1JRi25DLaM75rbfeGocPH47PfvazkWVZHD16NO68805vQeeMcbwG7enpiT/+8Y9x7rnnntTznPY74MCHW7t2bWzcuDFeeumlKCsrO93bgTFx5MiRWLJkSWzYsCGmTZt2urcD42ZwcDCmT58ezzzzTMybNy8aGhrigQceiPXr15/urcGY2bp1a6xZsyaefvrp2LFjR/z0pz+NzZs3x6OPPnq6twYTymm/Az5t2rQoKSmJrq6uYeNdXV1RVVU14pqqqqpc8+F0G805/4vHH3881q5dG//xH/8RV1555XhuE05J3nP+m9/8Jvbt2xcLFy4cGhscHIyIiClTpsSePXvikksuGd9NQ06j+ft8xowZcc4550RJScnQ2Cc/+cno7OyM/v7+KC0tHdc9Q16jOecPPfRQLFmyJG6//faIiLjiiiuit7c37rjjjnjggQeiuNh9Pya34zVoeXn5Sd/9jpgAd8BLS0tj3rx50dbWNjQ2ODgYbW1tUVtbO+Ka2traYfMjIl577bXjzofTbTTnPCLiO9/5Tjz66KPR2toa8+fPT7FVGLW85/yyyy6LN998Mzo6OoYen//854c+XbS6ujrl9uGkjObv82uvvTbeeeedoX9gioh4++23Y8aMGeKbCWk05/z9998/JrL/8o9Of/6MK5jcxqxB830+3PjYuHFjVigUsueeey779a9/nd1xxx3ZBRdckHV2dmZZlmVLlizJVq5cOTT/v/7rv7IpU6Zkjz/+eLZr166sqakpO+ecc7I333zzdL0E+FB5z/natWuz0tLS7MUXX8x+//vfDz2OHDlyul4CfKi85/xv+RR0JoO853z//v3Z+eefn/3Lv/xLtmfPnuyVV17Jpk+fnj322GOn6yXAh8p7zpuamrLzzz8/+9GPfpTt3bs3+/nPf55dcskl2S233HK6XgKc0JEjR7KdO3dmO3fuzCIie/LJJ7OdO3dmv/vd77Isy7KVK1dmS5YsGZq/d+/e7Lzzzsv+9V//Ndu1a1fW0tKSlZSUZK2trbmuOyECPMuy7Lvf/W520UUXZaWlpdmCBQuy//7v/x76366//vps2bJlw+b/+Mc/zi699NKstLQ0+/SnP51t3rw58Y4hvzzn/KMf/WgWEcc8mpqa0m8ccsj79/n/T4AzWeQ952+88UZWU1OTFQqF7OKLL86+/e1vZ0ePHk28a8gnzzn/4IMPsm9961vZJZdckpWVlWXV1dXZ17/+9ex///d/028cTsLrr78+4n9r/+VcL1u2LLv++uuPWTN37tystLQ0u/jii7Mf/vCHua9blGXeEwIAAADj7bT/DjgAAACcDQQ4AAAAJCDAAQAAIAEBDgAAAAkIcAAAAEhAgAMAAEACAhwAAAASEOAAAACQgAAHAACABAQ4AAAAJCDAAQAAIAEBDgAAAAn8P827NXL1szzcAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "# data['date'] = [i.split(' ')[0] for i in data[0]['Datetime']]\n", + "\n", + "fig, ax = plt.subplots(figsize = (12,4))\n", + "sns.scatterplot(x = data['Datetime'], y = data['data'], data = data, hue = 'is_outlier')" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "ce450e0a-4b90-4477-8540-a6d0bdd99eb1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Looking in indexes: https://__token__:****@gitlab.com/api/v4/groups/7239187/-/packages/pypi/simple\n", + "Collecting seaborn\n", + " Obtaining dependency information for seaborn from https://files.pythonhosted.org/packages/7b/e5/83fcd7e9db036c179e0352bfcd20f81d728197a16f883e7b90307a88e65e/seaborn-0.13.0-py3-none-any.whl.metadata\n", + " Downloading seaborn-0.13.0-py3-none-any.whl.metadata (5.3 kB)\n", + "Requirement already satisfied: numpy!=1.24.0,>=1.20 in /Users/yangkehan/projects/m3works/metevents/venv/lib/python3.11/site-packages (from seaborn) (1.26.0)\n", + "Requirement already satisfied: pandas>=1.2 in /Users/yangkehan/projects/m3works/metevents/venv/lib/python3.11/site-packages (from seaborn) (2.1.1)\n", + "Requirement already satisfied: matplotlib!=3.6.1,>=3.3 in /Users/yangkehan/projects/m3works/metevents/venv/lib/python3.11/site-packages (from seaborn) (3.8.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /Users/yangkehan/projects/m3works/metevents/venv/lib/python3.11/site-packages (from matplotlib!=3.6.1,>=3.3->seaborn) (1.1.1)\n", + "Requirement already satisfied: cycler>=0.10 in /Users/yangkehan/projects/m3works/metevents/venv/lib/python3.11/site-packages (from matplotlib!=3.6.1,>=3.3->seaborn) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /Users/yangkehan/projects/m3works/metevents/venv/lib/python3.11/site-packages (from matplotlib!=3.6.1,>=3.3->seaborn) (4.43.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/yangkehan/projects/m3works/metevents/venv/lib/python3.11/site-packages (from matplotlib!=3.6.1,>=3.3->seaborn) (1.4.5)\n", + "Requirement already satisfied: packaging>=20.0 in /Users/yangkehan/projects/m3works/metevents/venv/lib/python3.11/site-packages (from matplotlib!=3.6.1,>=3.3->seaborn) (23.2)\n", + "Requirement already satisfied: pillow>=6.2.0 in /Users/yangkehan/projects/m3works/metevents/venv/lib/python3.11/site-packages (from matplotlib!=3.6.1,>=3.3->seaborn) (10.0.1)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /Users/yangkehan/projects/m3works/metevents/venv/lib/python3.11/site-packages (from matplotlib!=3.6.1,>=3.3->seaborn) (3.1.1)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /Users/yangkehan/projects/m3works/metevents/venv/lib/python3.11/site-packages (from matplotlib!=3.6.1,>=3.3->seaborn) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/yangkehan/projects/m3works/metevents/venv/lib/python3.11/site-packages (from pandas>=1.2->seaborn) (2023.3.post1)\n", + "Requirement already satisfied: tzdata>=2022.1 in /Users/yangkehan/projects/m3works/metevents/venv/lib/python3.11/site-packages (from pandas>=1.2->seaborn) (2023.3)\n", + "Requirement already satisfied: six>=1.5 in /Users/yangkehan/projects/m3works/metevents/venv/lib/python3.11/site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.3->seaborn) (1.16.0)\n", + "Using cached seaborn-0.13.0-py3-none-any.whl (294 kB)\n", + "Installing collected packages: seaborn\n", + "Successfully installed seaborn-0.13.0\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "!pip install seaborn\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2316b22e-318a-4bf4-b112-364fcbe45ec1", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/test_events.py b/tests/test_events.py index 58ab2b8..fb66637 100644 --- a/tests/test_events.py +++ b/tests/test_events.py @@ -4,6 +4,7 @@ from pandas import DatetimeIndex from metevents.events import StormEvents +from metevents.events import OutlierEvents @pytest.fixture() @@ -87,3 +88,26 @@ def test_storm_events_from_station(self, station_id, start, stop, source, mass, storms.find(instant_mass_to_start=mass, hours_to_stop=hours, min_storm_total=0.2) assert storms.N == n_storms + + +class TestOutlierEvents: + @pytest.fixture() + def outlier_storms(self, series, data): + yield OutlierEvents(series) + + @pytest.mark.parametrize('data, outliers', [ + ([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 23, 4, 42, 2, 2, -40], [42, -40]) + ]) + def test_outliers(self, outlier_storms, data, outliers): + outlier_storms.find() + assert outlier_storms.outliers.values.tolist() == outliers + + @pytest.mark.parametrize('station_id, start, stop, source, outliers', [ + ('TUM', datetime(2021, 10, 1), datetime(2022, 9, 30), 'CDEC', + [3.34, 2.55, 2.43, 1.54, 1.14]) + ]) + def test_outliers_from_station(self, station_id, start, stop, source, outliers): + outlier_storms = OutlierEvents.from_station(station_id=station_id, start=start, stop=stop, source=source) + outlier_storms.find() + tolerance = 1e-10 + assert outlier_storms.outliers.values.tolist() == pytest.approx(outliers, rel=tolerance, abs=tolerance)