diff --git a/Tools/ML.py b/Tools/ML.py index c656fe3..95a7921 100644 --- a/Tools/ML.py +++ b/Tools/ML.py @@ -33,6 +33,7 @@ def MLmap_multidim( loocv, restvar, missVal, + dataset = [] ): check.display( "processing %s, variable %s, index %s (dim: %s)..." @@ -60,7 +61,8 @@ def MLmap_multidim( # end extract Y extracted_Y = np.reshape(pool_arr, (len(packdata.Nlat), 1)) extr_all = np.concatenate((extracted_Y, extr_var, pft_ny), axis=1) - df_data = DataFrame(extr_all, columns=[labx]) # convert the array into dataframe + df_data = DataFrame(extr_all, columns=labx) # convert the array into dataframe + dataset.append(df_data) # df_data.ix[:,22]=(df_data.ix[:,22].astype(int)).astype(str) combine_XY = df_data.dropna() # delete pft=nan combine_XY = combine_XY.drop(["pft"], axis=1) @@ -86,15 +88,15 @@ def MLmap_multidim( ( Tree_Ens, predY_train, - loocv_R2, - loocv_reMSE, - loocv_slope, - loocv_dNRMSE, - loocv_sNRMSE, - loocv_iNRMSE, - loocv_f_SB, - loocv_f_SDSD, - loocv_f_LSC, + # loocv_R2, + # loocv_reMSE, + # loocv_slope, + # loocv_dNRMSE, + # loocv_sNRMSE, + # loocv_iNRMSE, + # loocv_f_SB, + # loocv_f_SDSD, + # loocv_f_LSC, ) = train.training_BAT(combineXY, logfile, loocv) if not Tree_Ens: @@ -129,6 +131,7 @@ def MLmap_multidim( if (PFT_mask[ipft - 1] > 0).any(): return MLeval.evaluation_map(Global_Predicted_Y_map, pool_map, ipft, PFT_mask) + # evaluation R2, RMSE, slope, reMSE, dNRMSE, sNRMSE, iNRMSE, f_SB, f_SDSD, f_LSC = ( MLeval.evaluation_map(Global_Predicted_Y_map, pool_map, ipft, PFT_mask) @@ -186,14 +189,9 @@ def MLmap_multidim( ) ) plt.close("all") - else: - check.display( - "%s, variable %s, index %s (dim: %s) : NO DATA!" - % (ipool, varname, ind, ii["dim_loop"]), - logfile, - ) - if ind[-1] == ii["loops"][ii["dim_loop"][-1]][-1]: - print(varname, ind) + + raise ValueError("%s, variable %s, index %s (dim: %s) : NO DATA!" + % (ipool, varname, ind, ii["dim_loop"])) ##@param[in] packdata packaged data @@ -225,6 +223,7 @@ def MLloop( Yvar = varlist["resp"]["variables"] + comb_ds = {} frames = [] for ipool, iis in Yvar.items(): @@ -270,6 +269,7 @@ def MLloop( loocv, restvar, missVal, + comb_ds.setdefault(ipool, []) ) if res: res["var"] = varname @@ -277,10 +277,17 @@ def MLloop( res[f"dim_{i+1}"] = k res[f"ind_{i+1}"] = v result.append(res) + if len(result) > 1: + break # close&save netCDF file restnc.close() + if len(result) > 2: + break frames.append(pd.DataFrame(result).set_index("var")) + comb_ds = {k: pd.concat(v) for k, v in comb_ds.items()} + breakpoint() + return pd.concat(frames, keys=Yvar.keys(), names=["comp"]) diff --git a/Tools/train.py b/Tools/train.py index 3b8cacf..183a42a 100644 --- a/Tools/train.py +++ b/Tools/train.py @@ -44,15 +44,15 @@ def training_BAT(XY_train, logfile, loocv): return ( TreeEns, predY, - loocv_R2, - loocv_reMSE, - loocv_slope, - loocv_dNRMSE, - loocv_sNRMSE, - loocv_iNRMSE, - loocv_f_SB, - loocv_f_SDSD, - loocv_f_LSC, + # loocv_R2, + # loocv_reMSE, + # loocv_slope, + # loocv_dNRMSE, + # loocv_sNRMSE, + # loocv_iNRMSE, + # loocv_f_SB, + # loocv_f_SDSD, + # loocv_f_LSC, ) # If the length of unique target variable is not 1, @@ -116,11 +116,13 @@ def training_BAT(XY_train, logfile, loocv): # max_depth=14, min_samples_split=5) bag = BaggingRegressor( - base_estimator=tree, max_samples=0.8, n_estimators=300, random_state=1000 + base_estimator=tree, max_samples=0.8, n_estimators=1, random_state=1000 ) TreeEns = bag.fit(Xtrain, Ytrain, sample_weight=SW) # sample_weight=SW # predict predY = bag.predict(XX) + + return TreeEns, predY # leave one out cross validations loo = LeaveOneOut()