From 736d9e0b3a876dba0bfae48a717c8c9561d24cb2 Mon Sep 17 00:00:00 2001 From: americast Date: Thu, 28 Sep 2023 20:30:48 -0400 Subject: [PATCH] added auto support; updated docs --- .../source/reference/ai/model-forecasting.rst | 22 ++++++----- evadb/executor/create_function_executor.py | 37 ++++++++++++++----- .../long/test_model_forecasting.py | 4 +- 3 files changed, 41 insertions(+), 22 deletions(-) diff --git a/docs/source/reference/ai/model-forecasting.rst b/docs/source/reference/ai/model-forecasting.rst index 5c46bae852..ff7c1803b4 100644 --- a/docs/source/reference/ai/model-forecasting.rst +++ b/docs/source/reference/ai/model-forecasting.rst @@ -47,20 +47,22 @@ EvaDB's default forecast framework is `statsforecast `_ for details. If not provided, an auto increasing ID column will be used. - * - ID - - The name of column that represents an identifier for the series. If not provided, the whole table is considered as one series of data. - * - LIBRARY + * - TIME (default: 'ds') + - The name of the column that contains the datestamp, which should be of a format expected by Pandas, ideally YYYY-MM-DD for a date or YYYY-MM-DD HH:MM:SS for a timestamp. Please visit the `pandas documentation `_ for details. If relevant column is not found, an auto increasing ID column will be used. + * - ID (default: 'unique_id') + - The name of column that represents an identifier for the series. If relevant column is not found, the whole table is considered as one series of data. + * - LIBRARY (default: 'statsforecast') - We can select one of `statsforecast` (default) or `neuralforecast`. `statsforecast` provides access to statistical forecasting methods, while `neuralforecast` gives access to deep-learning based forecasting methods. - * - MODEL + * - MODEL (default: 'AutoARIMA') - If LIBRARY is `statsforecast`, we can select one of AutoARIMA, AutoCES, AutoETS, AutoTheta. The default is AutoARIMA. Check `Automatic Forecasting `_ to learn details about these models. If LIBRARY is `neuralforecast`, we can select one of NHITS or NBEATS. The default is NBEATS. Check `NBEATS docs `_ for details. - * - EXOGENOUS + * - AUTO (default: 'F') - The names of columns to be treated as exogenous variables, separated by comma. These columns would be considered for forecasting by the backend only for LIBRARY `neuralforecast`. - * - Frequency - - A string indicating the frequency of the data. The common used ones are D, W, M, Y, which repestively represents day-, week-, month- and year- end frequency. The default value is M. Check `pandas available frequencies `_ for all available frequencies. + * - Frequency (default: 'auto') + - A string indicating the frequency of the data. The common used ones are D, W, M, Y, which repestively represents day-, week-, month- and year- end frequency. The default value is M. Check `pandas available frequencies `_ for all available frequencies. If it is not provided, the frequency is attempted to be determined automatically. + +Note: If columns other than the ones required as mentioned above are passed while creating the function, they will be treated as exogenous variables if LIBRARY is `neuralforecast` and the AUTO is set to F. In other situations, they would be ignored. Below is an example query specifying the above parameters: diff --git a/evadb/executor/create_function_executor.py b/evadb/executor/create_function_executor.py index 3cb9065f33..82570ea4ef 100644 --- a/evadb/executor/create_function_executor.py +++ b/evadb/executor/create_function_executor.py @@ -246,7 +246,7 @@ def handle_forecasting_function(self): Set or infer data frequency """ - if "frequency" not in arg_map.keys(): + if "frequency" not in arg_map.keys() or arg_map["frequency"] == "auto": arg_map["frequency"] = pd.infer_freq(data["ds"]) frequency = arg_map["frequency"] if frequency is None: @@ -290,6 +290,12 @@ def handle_forecasting_function(self): if "model" not in arg_map.keys(): arg_map["model"] = "NBEATS" + if ( + arg_map["model"].lower()[0] == "t" + and "auto" not in arg_map["model"].lower() + ): + arg_map["model"] = "Auto" + arg_map["model"] + try: model_here = model_dict[arg_map["model"]] except Exception: @@ -298,16 +304,17 @@ def handle_forecasting_function(self): raise FunctionIODefinitionError(err_msg) model_args = {} - if len(data.columns) >= 4: - exogenous_columns = [ - x for x in list(data.columns) if x not in ["ds", "y", "unique_id"] - ] - model_args["hist_exog_list"] = exogenous_columns - if "auto" not in arg_map["model"].lower(): model_args["input_size"] = 2 * horizon + if len(data.columns) >= 4: + exogenous_columns = [ + x + for x in list(data.columns) + if x not in ["ds", "y", "unique_id"] + ] + model_args["hist_exog_list"] = exogenous_columns - model_args["early_stop_patience_steps"] = 20 + model_args["early_stop_patience_steps"] = 20 model_args["h"] = horizon @@ -333,6 +340,12 @@ def handle_forecasting_function(self): if "model" not in arg_map.keys(): arg_map["model"] = "AutoARIMA" + if ( + arg_map["model"].lower()[0] == "t" + and "auto" not in arg_map["model"].lower() + ): + arg_map["model"] = "Auto" + arg_map["model"] + try: model_here = model_dict[arg_map["model"]] except Exception: @@ -348,7 +361,11 @@ def handle_forecasting_function(self): data["ds"] = pd.to_datetime(data["ds"]) model_save_dir_name = library + "_" + arg_map["model"] + "_" + new_freq - if len(data.columns) >= 4: + if ( + len(data.columns) >= 4 + and "auto" not in arg_map["model"].lower() + and library == "neuralforecast" + ): model_save_dir_name += "_exogenous_" + str(sorted(exogenous_columns)) model_dir = os.path.join( @@ -373,7 +390,7 @@ def handle_forecasting_function(self): if int(x.split("horizon")[1].split(".pkl")[0]) >= horizon ] if len(existing_model_files) == 0: - print("Training") + print("Training, please wait...") if library == "neuralforecast": model.fit(df=data, val_size=horizon) else: diff --git a/test/integration_tests/long/test_model_forecasting.py b/test/integration_tests/long/test_model_forecasting.py index 1941a4c71a..f47ff328ed 100644 --- a/test/integration_tests/long/test_model_forecasting.py +++ b/test/integration_tests/long/test_model_forecasting.py @@ -104,7 +104,6 @@ def test_forecast(self): HORIZON 12 PREDICT 'y' LIBRARY 'neuralforecast' - EXOGENOUS 'trend' FREQUENCY 'M'; """ execute_query_fetch_all(self.evadb, create_predict_udf) @@ -128,6 +127,7 @@ def test_forecast_with_column_rename(self): WHERE bedrooms = 2 ) TYPE Forecasting + HORIZON 24 PREDICT 'ma' ID 'type' TIME 'saledate' @@ -136,7 +136,7 @@ def test_forecast_with_column_rename(self): execute_query_fetch_all(self.evadb, create_predict_udf) predict_query = """ - SELECT HomeForecast(12); + SELECT HomeForecast(); """ result = execute_query_fetch_all(self.evadb, predict_query) self.assertEqual(len(result), 24)