Skip to content

Commit

Permalink
combine inputs into a single dataset for each ipool
Browse files Browse the repository at this point in the history
  • Loading branch information
tztsai committed Jun 10, 2024
1 parent 1ad99a1 commit cd84e1b
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 28 deletions.
43 changes: 25 additions & 18 deletions Tools/ML.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def MLmap_multidim(
loocv,
restvar,
missVal,
dataset = []
):
check.display(
"processing %s, variable %s, index %s (dim: %s)..."
Expand Down Expand Up @@ -60,7 +61,8 @@ def MLmap_multidim(
# end extract Y
extracted_Y = np.reshape(pool_arr, (len(packdata.Nlat), 1))
extr_all = np.concatenate((extracted_Y, extr_var, pft_ny), axis=1)
df_data = DataFrame(extr_all, columns=[labx]) # convert the array into dataframe
df_data = DataFrame(extr_all, columns=labx) # convert the array into dataframe
dataset.append(df_data)
# df_data.ix[:,22]=(df_data.ix[:,22].astype(int)).astype(str)
combine_XY = df_data.dropna() # delete pft=nan
combine_XY = combine_XY.drop(["pft"], axis=1)
Expand All @@ -86,15 +88,15 @@ def MLmap_multidim(
(
Tree_Ens,
predY_train,
loocv_R2,
loocv_reMSE,
loocv_slope,
loocv_dNRMSE,
loocv_sNRMSE,
loocv_iNRMSE,
loocv_f_SB,
loocv_f_SDSD,
loocv_f_LSC,
# loocv_R2,
# loocv_reMSE,
# loocv_slope,
# loocv_dNRMSE,
# loocv_sNRMSE,
# loocv_iNRMSE,
# loocv_f_SB,
# loocv_f_SDSD,
# loocv_f_LSC,
) = train.training_BAT(combineXY, logfile, loocv)

if not Tree_Ens:
Expand Down Expand Up @@ -129,6 +131,7 @@ def MLmap_multidim(

if (PFT_mask[ipft - 1] > 0).any():
return MLeval.evaluation_map(Global_Predicted_Y_map, pool_map, ipft, PFT_mask)

# evaluation
R2, RMSE, slope, reMSE, dNRMSE, sNRMSE, iNRMSE, f_SB, f_SDSD, f_LSC = (
MLeval.evaluation_map(Global_Predicted_Y_map, pool_map, ipft, PFT_mask)
Expand Down Expand Up @@ -186,14 +189,9 @@ def MLmap_multidim(
)
)
plt.close("all")
else:
check.display(
"%s, variable %s, index %s (dim: %s) : NO DATA!"
% (ipool, varname, ind, ii["dim_loop"]),
logfile,
)
if ind[-1] == ii["loops"][ii["dim_loop"][-1]][-1]:
print(varname, ind)

raise ValueError("%s, variable %s, index %s (dim: %s) : NO DATA!"
% (ipool, varname, ind, ii["dim_loop"]))


##@param[in] packdata packaged data
Expand Down Expand Up @@ -225,6 +223,7 @@ def MLloop(

Yvar = varlist["resp"]["variables"]

comb_ds = {}
frames = []

for ipool, iis in Yvar.items():
Expand Down Expand Up @@ -270,17 +269,25 @@ def MLloop(
loocv,
restvar,
missVal,
comb_ds.setdefault(ipool, [])
)
if res:
res["var"] = varname
for i, (k, v) in enumerate(dim_ind):
res[f"dim_{i+1}"] = k
res[f"ind_{i+1}"] = v
result.append(res)
if len(result) > 1:
break

# close&save netCDF file
restnc.close()
if len(result) > 2:
break

frames.append(pd.DataFrame(result).set_index("var"))

comb_ds = {k: pd.concat(v) for k, v in comb_ds.items()}
breakpoint()

return pd.concat(frames, keys=Yvar.keys(), names=["comp"])
22 changes: 12 additions & 10 deletions Tools/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,15 @@ def training_BAT(XY_train, logfile, loocv):
return (
TreeEns,
predY,
loocv_R2,
loocv_reMSE,
loocv_slope,
loocv_dNRMSE,
loocv_sNRMSE,
loocv_iNRMSE,
loocv_f_SB,
loocv_f_SDSD,
loocv_f_LSC,
# loocv_R2,
# loocv_reMSE,
# loocv_slope,
# loocv_dNRMSE,
# loocv_sNRMSE,
# loocv_iNRMSE,
# loocv_f_SB,
# loocv_f_SDSD,
# loocv_f_LSC,
)

# If the length of unique target variable is not 1,
Expand Down Expand Up @@ -116,11 +116,13 @@ def training_BAT(XY_train, logfile, loocv):
# max_depth=14, min_samples_split=5)

bag = BaggingRegressor(
base_estimator=tree, max_samples=0.8, n_estimators=300, random_state=1000
base_estimator=tree, max_samples=0.8, n_estimators=1, random_state=1000
)
TreeEns = bag.fit(Xtrain, Ytrain, sample_weight=SW) # sample_weight=SW
# predict
predY = bag.predict(XX)

return TreeEns, predY

# leave one out cross validations
loo = LeaveOneOut()
Expand Down

0 comments on commit cd84e1b

Please sign in to comment.