From 337871d656179baa992bfad95d3b28acd8d537a3 Mon Sep 17 00:00:00 2001 From: Matt Archer Date: Tue, 23 Jul 2024 11:04:35 +0100 Subject: [PATCH] apply ruff --- Tools/ML.py | 74 ++++++++++++++++++++++++++++------------------ Tools/extract_X.py | 2 +- Tools/readvar.py | 8 +++-- Tools/train.py | 19 +++++++----- 4 files changed, 63 insertions(+), 40 deletions(-) diff --git a/Tools/ML.py b/Tools/ML.py index aedec00..71cf0d0 100644 --- a/Tools/ML.py +++ b/Tools/ML.py @@ -28,7 +28,7 @@ def collect_data( # extract data extr_var = extract_X.var(packdata, ipft) - + # extract PFT map pft_ny = extract_X.pft(packdata, PFT_mask_lai, ipft) pft_ny = np.resize(pft_ny, (*extr_var.shape[:-1], 1)) @@ -56,12 +56,14 @@ def combine_data(frames, keys): raise ValueError("DataFrames have different columns") check_same = {} for col in columns: - check_same[col] = all((frame[col] == frames[0][col]).dropna().all() for frame in frames) - same_cols = [col for col, same in check_same.items() if same or col == 'pft'] + check_same[col] = all( + (frame[col] == frames[0][col]).dropna().all() for frame in frames + ) + same_cols = [col for col, same in check_same.items() if same or col == "pft"] df = pd.concat([df.drop(columns=same_cols) for df in frames], keys=keys, axis=1) df.columns = [f"{c}_{k}" for k, c in df.columns] df = pd.concat([df, frames[0][same_cols]], axis=1) - df = df.drop(columns=['pft']).dropna() + df = df.drop(columns=["pft"]).dropna() return df @@ -86,8 +88,8 @@ def MLmap_multidim( col_type = "None" type_val = "None" combineXY = combine_XY - - Y = combineXY.filter(regex='^Y_') + + Y = combineXY.filter(regex="^Y_") X = combineXY.drop(columns=Y.columns) # combine_XY=pd.get_dummies(combine_XY) # one-hot encoded @@ -142,8 +144,20 @@ def MLmap_multidim( return MLeval.evaluation_map(Global_Predicted_Y_map, Y, PFT_mask) -def plot_eval_results(Global_Predicted_Y_map, ipool, pool_map, combineXY, predY_train, varname, ind, ii, ipft, PFT_mask, resultpath, logfile): - +def plot_eval_results( + Global_Predicted_Y_map, + ipool, + pool_map, + combineXY, + predY_train, + varname, + ind, + ii, + ipft, + PFT_mask, + resultpath, + logfile, +): # evaluation R2, RMSE, slope, reMSE, dNRMSE, sNRMSE, iNRMSE, f_SB, f_SDSD, f_LSC = ( MLeval.evaluation_map(Global_Predicted_Y_map, pool_map, ipft, PFT_mask) @@ -257,29 +271,31 @@ def MLloop( if ipft in ii["skip_loop"]["pft"]: continue - dim_ind, = zip(ii["dim_loop"], ind) - - comb_ds[ipool].append(( - collect_data( - packdata, - ivar, - ipool, - PFT_mask_lai, - ipft, - varname, - ind, - ii, - labx, - varlist, - logfile, - ), - f"{varname}_{dim_ind[0]}_{dim_ind[1]}" - )) + (dim_ind,) = zip(ii["dim_loop"], ind) + + comb_ds[ipool].append( + ( + collect_data( + packdata, + ivar, + ipool, + PFT_mask_lai, + ipft, + varname, + ind, + ii, + labx, + varlist, + logfile, + ), + f"{varname}_{dim_ind[0]}_{dim_ind[1]}", + ) + ) break # close&save netCDF file restnc.close() - + if len(comb_ds[ipool]) > 3: break @@ -288,7 +304,7 @@ def MLloop( for ipool, vals in comb_ds.items(): df = combine_data(*zip(*vals)) df.to_csv(f"{resultpath}/{ipool}.csv") - + res = MLmap_multidim( packdata, df, @@ -302,5 +318,5 @@ def MLloop( missVal, ) results.append(res) - + return pd.concat(results, keys=comb_ds.keys(), names=["component"]) diff --git a/Tools/extract_X.py b/Tools/extract_X.py index dc128e3..b76679f 100644 --- a/Tools/extract_X.py +++ b/Tools/extract_X.py @@ -55,4 +55,4 @@ def var(packdata, ipft): extr_var.append(extracted_var.reshape(-1, len(packdata.Nlat), 1)) com_shape = max(map(np.shape, extr_var)) extr_var = [np.resize(a, com_shape) for a in extr_var] - return np.concatenate(extr_var, axis=-1) \ No newline at end of file + return np.concatenate(extr_var, axis=-1) diff --git a/Tools/readvar.py b/Tools/readvar.py index abb0ea5..350cec0 100644 --- a/Tools/readvar.py +++ b/Tools/readvar.py @@ -88,7 +88,9 @@ def readvar(varlist, config, logfile): # packdata.Tamp = packdata.Tmax - packdata.Tmin # 0.1.2 Other climatic variables (Rainf,Snowf,Qair,Psurf,SWdown,LWdown) - packdata.update((k, (["year", "month", "lat", "lon"], adict[f"MY{k}"])) for k in varname_clim) + packdata.update( + (k, (["year", "month", "lat", "lon"], adict[f"MY{k}"])) for k in varname_clim + ) # for index in range(len(varname_clim)): # if varname_clim[index] == "Tair": # continue @@ -167,12 +169,12 @@ def readvar(varlist, config, logfile): da[da == predvar[ipred]["missing_value"]] = np.nan if isinstance(da, np.ma.masked_array): da = da.filled(np.nan) - packdata[rename[ivar]] = (["veget", "lat", "lon"][-da.ndim:], da) + packdata[rename[ivar]] = (["veget", "lat", "lon"][-da.ndim :], da) ds = xarray.Dataset(packdata) # 0.3 Interactions between variables - ds['interx'] = ds.Tair * ds.Rainf + ds["interx"] = ds.Tair * ds.Rainf # packdata.interx2 = packdata.Temp_GS * packdata.Pre_GS ds.attrs.update( diff --git a/Tools/train.py b/Tools/train.py index f65eadd..b37f699 100644 --- a/Tools/train.py +++ b/Tools/train.py @@ -39,17 +39,17 @@ ##@retval predY predicted Y def training_BAT(X, Y, logfile, loocv): print("Data shapes: ", X.shape, Y.shape) - + # run the KMeans algorithm to find the cluster centers, and resample the data mod = KMeans(n_clusters=3) lab = mod.fit_predict(Y) count = Counter(lab) check.display("Counter(lab):" + str(count), logfile) over_samples = SMOTE() - over_samples_X, over_samples_y = over_samples.fit_resample(pd.concat([X, Y], axis=1), lab) - check.display( - "Counter(over_samples_y):" + str(Counter(over_samples_y)), logfile + over_samples_X, over_samples_y = over_samples.fit_resample( + pd.concat([X, Y], axis=1), lab ) + check.display("Counter(over_samples_y):" + str(Counter(over_samples_y)), logfile) X = over_samples_X[X.columns] Y = over_samples_X[Y.columns] print("Data shapes after resampling: ", X.shape, Y.shape) @@ -62,10 +62,15 @@ def training_BAT(X, Y, logfile, loocv): # optimizer=optim.Adam, # # device="cuda", # ) - model = MLPRegressor(hidden_layer_sizes=(64, 64), max_iter=100, - learning_rate='invscaling', learning_rate_init=0.1, verbose=True) + model = MLPRegressor( + hidden_layer_sizes=(64, 64), + max_iter=100, + learning_rate="invscaling", + learning_rate_init=0.1, + verbose=True, + ) model.fit(X, Y) predY = model.predict(X) - + return model, predY