documentation and cleanup

MartinMohrmann · Jan 29, 2021 · a6a20ae · a6a20ae
1 parent b0ca6c4
commit a6a20ae
Show file tree

Hide file tree

Showing 11 changed files with 1,758 additions and 0 deletions.
diff --git a/plt_algorithm.py b/plt_algorithm.py
diff --git a/plt_all_correlations.py b/plt_all_correlations.py
@@ -0,0 +1,136 @@
+from scipy import stats
+from matplotlib import pyplot as plt
+import numpy as np
+from sklearn.linear_model import LinearRegression
+import statsmodels.api as sm
+import statsmodels
+import numpy as np
+import numpy
+import pandas
+import utils
+
+resultsdictionary = utils.create_resultsdictionary()
+periods=['mon']
+variables = ['sivol', 'siconc']
+slopedict = {}
+rvdict = {}
+interceptdict = {}
+
+fig, axs = plt.subplots(nrows=5, ncols=5, figsize=[10,15],constrained_layout=True) #sharex=True, sharey=True, constrained_layout=True)
+#fig2, axs2 = plt.subplots(nrows=4, ncols=5, figsize=[13,10], sharex=True, sharey=True, constrained_layout=True)
+axs = axs.flat
+#axs2 = axs2.flat
+month = 8
+steplength = 12 # should be eiter 12 or 1 (yearly or monthly) # end CESM daily one step earlier!
+threshold = '30'
+variable1, period1 = 'sivol', 'mon'
+variable2, period2= 'siconc', 'mon'
+#print(threshold)
+models = list(set(resultsdictionary[variable1][period1][threshold].keys()) & 
+              set(resultsdictionary[variable2][period2][threshold].keys()))
+unused_axes = set(range(0,25)) - set(range(0,len(models)))
+for i in unused_axes:
+    fig.delaxes(axs[i])
+print('List of models:', models)
+models.sort()
+for index, model in enumerate(models):
+
+    if model == 'OBS':
+        continue
+    # print(model)
+    axs[index].set_aspect('equal', adjustable='box')
+    endstep = len(resultsdictionary['siconc']['mon']['30'][model]['pa_tot'])
+    if model == 'MRI-ESM2-0':
+        # because the MRI models submit data only after 1919 in daily resolution
+        year=0#69
+    elif model == 'SAM0-UNICON':
+        year=0#100
+    elif model in ['CESM2', 'CESM2-WACCM', 'CESM2-FV', 'CESM2-WACCM-FV']:
+        endstep = len(resultsdictionary['siconc']['mon']['30'][model]['pa_tot'])-1
+    #elif model == 'OBS':
+        #year=31
+    else:
+        year=0
+
+    x = resultsdictionary[variable1][period1][threshold][model]['pa_tot'].groupby(pandas.Grouper(freq='M')).mean()[month:endstep:steplength].values
+    X = x
+    #y = resultsdictionary['siconc']['mon'][threshold][model]['pa_tot'][12*year+month:endstep:steplength].values
+    y = resultsdictionary[variable2][period2][threshold][model]['pa_tot'][12*year+month:endstep:steplength].values
+    print(model, len(x), len(y))
+    axs[index].scatter(x, y, alpha=0.5)
+    #plt.xlabel('polynya areas computed from "sivol"')
+    #plt.ylabel('polynya areas computed from "siconc"')
+    axs[index].set_title(model, pad=15)
+
+    slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
+    # print('sterr:', std_err)
+    xmax = max(max(x), max(y))#0.5e12
+    x_coord = numpy.linspace(0,xmax,100)
+    axs[index].plot(x,intercept+x*slope+intercept, color='blue', ls=':', lw=2)
+
+    x = sm.add_constant(x)
+    res_ols = sm.OLS(y, x).fit()
+    # print(res_ols.summary())
+    slope = res_ols.params[1]
+    slopedict[model] = slope
+    rvdict[model] = r_value
+    interceptdict[model] = intercept
+
+    slopelowest = res_ols.conf_int(alpha=0.05, cols=None)[1][0]
+    slopehighest= res_ols.conf_int(alpha=0.05, cols=None)[1][1]
+    intercept = res_ols.params[0]
+    interceptlowest = res_ols.conf_int(alpha=0.05, cols=None)[0][0]
+    intercepthighest= res_ols.conf_int(alpha=0.05, cols=None)[0][1]
+
+    axs[index].plot(x_coord,intercept+x_coord*slope, color='green')
+    axs[index].fill_between(x_coord, interceptlowest+x_coord*slopelowest, intercepthighest+x_coord*slopehighest, alpha=0.2)
+
+    axs[index].set_xlim(-.1*xmax, xmax)
+    axs[index].set_ylim(-.1*xmax, xmax)
+
+    axs[index].grid()
+
+    #residual = res_ols.resid
+    residual = y-x[:,1]
+    #residual = y-x[:,1]
+    relative_residual = abs((residual/numpy.maximum(X,y))[~numpy.isnan(residual/numpy.maximum(x[:,1],y))])
+    #print(model, '90% percentile:',numpy.percentile(relative_residual, 90))
+    #print(model, mean(abs(relative_residual)))#numpy.std(relative_residual))
+
+    #axs2[index].hist(residual, bins=40)
+    #axs2[index].set_title(model)
+
+fig.suptitle('polynya areas computed from "sivol monthly" (x) vs "siconc monthly (y)" month:%s, stepl:%s'%(month, steplength))
+#fig.delaxes(axs[17])
+#fig.delaxes(axs[18])
+#fig.delaxes(axs[19])
+plt.savefig('./all_correlations/all_correlations_%s%s_%s%s_%s.png'%(variable1, period1, variable2, period2, threshold))
+
+fig, axs = plt.subplots(ncols=3, figsize=[20,6])
+df = pandas.DataFrame([interceptdict.values(), slopedict.values(), rvdict.values()], columns=interceptdict.keys(), index=['intercept', 'slope', 'r-value'])
+axi0 = df.T['intercept'].plot(kind='bar', title='intercept', ax=axs[0])
+axi1 = df.T['slope'].plot(kind='bar', title='slope', ax=axs[1])
+axi2 = df.T['r-value'].plot(kind='bar', title='r-value', ax=axs[2])
+
+print('mean slope:',df.T['slope'].mean(), 'mean r-value:',df.T['r-value'].mean())
+
+x_offset = 0.02
+y_offset = 0.04
+
+for p in axi0.patches:
+    b = p.get_bbox()
+    x = b.y1 + b.y0
+    val = f"{x:.3}"        
+    axi0.annotate(val, ((b.x0 + b.x1)/2 + x_offset, b.y1 + y_offset), rotation='90')
+
+for p in axi1.patches:
+    b = p.get_bbox()
+    val = "{:.2f}".format(b.y1 + b.y0)        
+    axi1.annotate(val, ((b.x0 + b.x1)/2 + x_offset, b.y1 + y_offset), rotation='90')
+
+for p in axi2.patches:
+    b = p.get_bbox()
+    val = "{:.2f}".format(b.y1 + b.y0)        
+    axi2.annotate(val, ((b.x0 + b.x1)/2 + x_offset, b.y1 + y_offset), rotation='90')
+
+plt.savefig('./all_correlations/all_correlations_%s%s_%s%s_%sbarchart'%(variable1, period1, variable2, period2, threshold))
diff --git a/plt_barchart.py b/plt_barchart.py
@@ -0,0 +1,86 @@
+import utils
+resultsdictionary = utils.create_resultsdictionary()
+
+# Besonders spannend ist hier das ganze mal mit max() und mal mit mean() zu aggreggieren und die Monate zu ändern
+# beachte das die ganze Statistik für die gesamte südliche Hemisphere oder alternativ für die Weddell Sea gemacht werden 
+# kann
+
+threshold = '30'
+def agg(ice_var, area_var, period, method):
+    models = resultsdictionary[ice_var][period][threshold].keys()
+    for model in models:
+        if area_var in ['pa_co']:
+            resultsdictionary[ice_var][period][threshold][model]['pa_co'] = resultsdictionary[ice_var][period][threshold][model]['pa_tot'] - resultsdictionary[ice_var][period][threshold][model]['pa_op']
+        if area_var in ['pa_we_co']:
+            resultsdictionary[ice_var][period][threshold][model]['pa_we_co'] = resultsdictionary[ice_var][period][threshold][model]['pa_we'] - resultsdictionary[ice_var][period][threshold][model]['pa_we_op']
+
+    df = pandas.concat([resultsdictionary[ice_var][period][threshold][model][area_var] for model in models], axis=1, keys=models)
+    df
+    if method=='mean':
+        df = df[(df.index.month >= 5) & (df.index.month <= 11) & (df.index.year >= 1979) & (
+            df.index.dayofyear < 320)].groupby(pandas.Grouper(freq='A')).mean()
+    elif method=='max':
+        df = df[(df.index.month >= 5) & (df.index.month <= 11) & (df.index.year >= 1979) & (
+            df.index.dayofyear < 320)].groupby(pandas.Grouper(freq='A')).max()
+    #breakpoint()
+    df = df.describe()
+    # breakpoint()
+    return df
+
+def barcharts(resultsdictionary, period, ice_var, ax, method, text):
+    complete = {}
+    #if period == 'mon' and method == 'mean' and ice_var == 'siconc':
+    #    yerr = resultsdictionary[ice_var][period]['30'][model]['pa_we'].describe()['25%']
+    yerr = agg(ice_var=ice_var, area_var='pa_we', period=period, method=method).T['50%']
+    # breakpoint()
+    # breakpoint()
+    complete['pa_we_tot'] = agg(ice_var=ice_var, area_var='pa_we', period=period, method=method).T['mean'] #+ numpy.random.rand(len(complete))
+    # ACHTUNG: HIER SITZT NOCH EIN FEHLER! Ich kann in folgender Zeile nicht die Differenz aus möglicherweise unterschiedlichen Zeitschritten ziehen
+    complete['pa_we_co'] = agg(ice_var=ice_var, area_var='pa_we_co', period=period, method=method).T['mean']#-agg(ice_var=ice_var, area_var='pa_we_op', period=period, method=method)
+    #resultsdictionary[ice_var][period][threshold]
+    complete['pa_we_op'] = agg(ice_var=ice_var, area_var='pa_we_op',  period=period, method=method).T['mean']
+    #complete['pa_tot_max'] = aggmax(ice_var=ice_var, area_var='pa_tot',  period=period)
+    complete = pandas.DataFrame([complete['pa_we_tot'], complete['pa_we_co'], complete['pa_we_op']], index=['pa_we_tot', 'pa_we_co', 'pa_we_op']).transpose()
+
+    alpha = 0.2 if method=='max' else 1
+    # complete['pa_tot'] = complete['pa_tot'] + numpy.random.rand(len(complete))
+    # complete['name'] = complete.index
+    # import pdb; pdb.set_trace()
+    # complete.sort_values(by=['name','pa_tot', ])
+    # complete['randNumCol'] = numpy.random.sample(len(complete))
+    complete = complete.sort_values(by=['pa_we_tot'], ascending=False)
+    #complete['pa_we_tot'].max()+0.2*complete['pa_we_tot'].max()
+    if method == 'mean':
+        #breakpoint()
+        (complete['pa_we_co']+complete['pa_we_op']).plot(kind='bar', yerr=yerr, ylim=(-.1e11,ymax),title=text+'%s %s'%(ice_var, period), ax=ax, alpha=0.1)#, yerr=complete['pa_tot_max'])
+    complete[['pa_we_op', 'pa_we_co']].plot(kind='bar', stacked=True, ylim=(-.1e11,ymax),title=text+'%s %s'%(ice_var, period), ax=ax, alpha=alpha)#, yerr=complete['pa_tot_max'])
+
+    #complete['pa_tot_max'].plot(kind="bar", ax=ax, alpha=0.2)
+    #ax.text(0.1, 0.9,text, ha='center', va='center', transform=ax.transAxes, fontsize=20)
+    return complete
+
+fig, axs = plt.subplots(ncols=3, nrows=2, figsize=[20,15])
+axs = axs.flat
+ymax = 2e11
+# !!!!!!!!!!!!!!add aggregate = 'full' to all the following lines!
+complete_siconc_mon = barcharts(resultsdictionary=resultsdictionary, period='mon', ice_var='siconc', ax=axs[0],text='',method='max')
+complete_siconc_mon = barcharts(resultsdictionary=resultsdictionary, period='mon', ice_var='siconc', ax=axs[0],text='(a) ',method='mean')#
+complete_siconc_day = barcharts(resultsdictionary=resultsdictionary, period='day', ice_var='siconc', ax=axs[1],text='', method='max')
+complete_siconc_day = barcharts(resultsdictionary=resultsdictionary, period='day', ice_var='siconc', ax=axs[1],text='(b) ',method='mean')
+complete_sivol_mon  = barcharts(resultsdictionary=resultsdictionary, period='mon', ice_var='sivol', ax=axs[2], text='',method='max')
+complete_sivol_mon  = barcharts(resultsdictionary=resultsdictionary, period='mon', ice_var='sivol', ax=axs[2], text='(c) ',method='mean')
+
+(complete_siconc_day-complete_siconc_mon)[['pa_we_op', 'pa_we_co']].plot(kind='bar', stacked=True, title='(d) PA diff siconc daily/monthly', ax=axs[4], ylim=(-0.5*ymax,0.5*ymax))
+(complete_sivol_mon-complete_siconc_mon)[['pa_we_op', 'pa_we_co']].plot(kind='bar', stacked=True, title='(e) PA diff siconc/sivol monthly', ax=axs[5], ylim=(-0.5*ymax,0.5*ymax))
+
+#axs[0].text(0.5, 0.5,'a',transform=ax.transAxes)
+#(complete_sivol_mon-complete_siconc_mon)[['pa_op', 'pa_co']].plot(kind='bar', stacked=True, title='PA diff siconc/sivol monthly', ax=axs[6], ylim=(-2.5e11,2.5e11))
+plt.subplots_adjust(hspace=0.5)
+fig.delaxes(axs[3])
+plt.savefig('barplot.png')
+plt.show()
+
+
+#plt.text()
+#plot_clustered_stacked([complete_siconc_mon, complete_sivol_mon, complete_siconc_day],["df1", "df2", "df3"])
+#plt.ylim(-1e11,1e11)
diff --git a/plt_hotbars.py b/plt_hotbars.py
@@ -0,0 +1,65 @@
+# this is the unaltered version for the monthly data seasonality plots "hotbars"
+from matplotlib import pyplot as plt
+from matplotlib.lines import Line2D
+from mpl_toolkits.axes_grid1 import make_axes_locatable
+from utils import colormap_alpha
+import utils
+import pandas
+import numpy
+
+plt.rcParams.update({'font.size': 18})
+plt.rcParams['xtick.bottom'] = plt.rcParams['xtick.labelbottom'] = True
+plt.rcParams['xtick.top'] = plt.rcParams['xtick.labeltop'] = False
+
+variable = 'sivol'
+freq = 'mon'
+threshold = '30'
+scenario = 'historical'
+resultsdictionary = utils.create_resultsdictionary()
+
+fig, axs = plt.subplots(figsize=[17,12], nrows=len(resultsdictionary[variable][freq][threshold].keys()), sharex=True)
+fig.subplots_adjust(hspace=0.3)
+models = list(resultsdictionary[variable][freq][threshold].keys())
+models.sort()
+models.remove('OBS')
+models.append('OBS')
+
+for index, model in enumerate(models):#resultsdictionary[threshold].keys()):
+    #if model == 'OBS':
+    #    continue
+    df = resultsdictionary[variable][freq][threshold][model]
+    df = df[(df.index.month >= 5) & (df.index.month <= 11) & (
+            df.index.dayofyear < 320)].groupby(pandas.Grouper(freq='A')).mean()
+    dfwinter_stacked = numpy.vstack((
+        df['pa_we_op'], 
+        df['pa_we_co']))
+    vmax = 1e11
+    vmin = 1e8
+    im = axs[index].imshow(dfwinter_stacked+0.2, aspect=2, cmap='hot', extent=[df.index.min().year, df.index.max().year, 0,2], vmin=vmin, vmax=vmax)
+    axs[index].set_yticklabels([])
+    axs[index].set_yticks([])
+
+    if index==-1:
+        axs[index].xaxis.set_tick_params(labeltop='on', labelbottom='off')
+        axs[index].set_xticks(ticks=range(0,165,20))
+        axs[index].set_xticklabels(range(0,165,20))
+        axs[index].set_xlabel('year')
+    else:
+        axs[index].set_xticks([])
+        axs[index].set_xticklabels([])
+        axs[index].minorticks_on()#(range(1850,2015))
+        axs[index].tick_params(axis='y',which='minor',bottom=False)    
+    cmap = plt.cm.get_cmap('hot')
+    axs[index].text(x=2022, y=0, s="%s"%model)
+
+plt.xlim(1850, 2020)
+axs[-1].set_xticks(range(1850,2020,20))
+axs[-1].set_xticklabels(range(1850,2020,20))
+
+cbar = fig.colorbar(im, ax=[axs[:]], location='left', extend='max', shrink=1, pad=0.02)
+cbar.ax.set_yticklabels(cbar.get_ticks()/1e12)
+
+fig.text(x=0.15, y=0.59, s='polynya area in million km²', va='center', rotation='vertical', fontsize=18)
+
+plt.savefig("./plt_hotbars/hotbars_%s_%s_%s.png"%(variable, freq, threshold), bbox_inches = 'tight',
+    pad_inches = 0, dpi=300)