Skip to content

Commit

Permalink
documentation and cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinMohrmann committed Jan 29, 2021
1 parent b0ca6c4 commit a6a20ae
Show file tree
Hide file tree
Showing 11 changed files with 1,758 additions and 0 deletions.
Empty file added plt_algorithm.py
Empty file.
136 changes: 136 additions & 0 deletions plt_all_correlations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
from scipy import stats
from matplotlib import pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
import statsmodels
import numpy as np
import numpy
import pandas
import utils

resultsdictionary = utils.create_resultsdictionary()
periods=['mon']
variables = ['sivol', 'siconc']
slopedict = {}
rvdict = {}
interceptdict = {}

fig, axs = plt.subplots(nrows=5, ncols=5, figsize=[10,15],constrained_layout=True) #sharex=True, sharey=True, constrained_layout=True)
#fig2, axs2 = plt.subplots(nrows=4, ncols=5, figsize=[13,10], sharex=True, sharey=True, constrained_layout=True)
axs = axs.flat
#axs2 = axs2.flat
month = 8
steplength = 12 # should be eiter 12 or 1 (yearly or monthly) # end CESM daily one step earlier!
threshold = '30'
variable1, period1 = 'sivol', 'mon'
variable2, period2= 'siconc', 'mon'
#print(threshold)
models = list(set(resultsdictionary[variable1][period1][threshold].keys()) &
set(resultsdictionary[variable2][period2][threshold].keys()))
unused_axes = set(range(0,25)) - set(range(0,len(models)))
for i in unused_axes:
fig.delaxes(axs[i])
print('List of models:', models)
models.sort()
for index, model in enumerate(models):

if model == 'OBS':
continue
# print(model)
axs[index].set_aspect('equal', adjustable='box')
endstep = len(resultsdictionary['siconc']['mon']['30'][model]['pa_tot'])
if model == 'MRI-ESM2-0':
# because the MRI models submit data only after 1919 in daily resolution
year=0#69
elif model == 'SAM0-UNICON':
year=0#100
elif model in ['CESM2', 'CESM2-WACCM', 'CESM2-FV', 'CESM2-WACCM-FV']:
endstep = len(resultsdictionary['siconc']['mon']['30'][model]['pa_tot'])-1
#elif model == 'OBS':
#year=31
else:
year=0

x = resultsdictionary[variable1][period1][threshold][model]['pa_tot'].groupby(pandas.Grouper(freq='M')).mean()[month:endstep:steplength].values
X = x
#y = resultsdictionary['siconc']['mon'][threshold][model]['pa_tot'][12*year+month:endstep:steplength].values
y = resultsdictionary[variable2][period2][threshold][model]['pa_tot'][12*year+month:endstep:steplength].values
print(model, len(x), len(y))
axs[index].scatter(x, y, alpha=0.5)
#plt.xlabel('polynya areas computed from "sivol"')
#plt.ylabel('polynya areas computed from "siconc"')
axs[index].set_title(model, pad=15)

slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
# print('sterr:', std_err)
xmax = max(max(x), max(y))#0.5e12
x_coord = numpy.linspace(0,xmax,100)
axs[index].plot(x,intercept+x*slope+intercept, color='blue', ls=':', lw=2)

x = sm.add_constant(x)
res_ols = sm.OLS(y, x).fit()
# print(res_ols.summary())
slope = res_ols.params[1]
slopedict[model] = slope
rvdict[model] = r_value
interceptdict[model] = intercept

slopelowest = res_ols.conf_int(alpha=0.05, cols=None)[1][0]
slopehighest= res_ols.conf_int(alpha=0.05, cols=None)[1][1]
intercept = res_ols.params[0]
interceptlowest = res_ols.conf_int(alpha=0.05, cols=None)[0][0]
intercepthighest= res_ols.conf_int(alpha=0.05, cols=None)[0][1]

axs[index].plot(x_coord,intercept+x_coord*slope, color='green')
axs[index].fill_between(x_coord, interceptlowest+x_coord*slopelowest, intercepthighest+x_coord*slopehighest, alpha=0.2)

axs[index].set_xlim(-.1*xmax, xmax)
axs[index].set_ylim(-.1*xmax, xmax)

axs[index].grid()

#residual = res_ols.resid
residual = y-x[:,1]
#residual = y-x[:,1]
relative_residual = abs((residual/numpy.maximum(X,y))[~numpy.isnan(residual/numpy.maximum(x[:,1],y))])
#print(model, '90% percentile:',numpy.percentile(relative_residual, 90))
#print(model, mean(abs(relative_residual)))#numpy.std(relative_residual))

#axs2[index].hist(residual, bins=40)
#axs2[index].set_title(model)

fig.suptitle('polynya areas computed from "sivol monthly" (x) vs "siconc monthly (y)" month:%s, stepl:%s'%(month, steplength))
#fig.delaxes(axs[17])
#fig.delaxes(axs[18])
#fig.delaxes(axs[19])
plt.savefig('./all_correlations/all_correlations_%s%s_%s%s_%s.png'%(variable1, period1, variable2, period2, threshold))

fig, axs = plt.subplots(ncols=3, figsize=[20,6])
df = pandas.DataFrame([interceptdict.values(), slopedict.values(), rvdict.values()], columns=interceptdict.keys(), index=['intercept', 'slope', 'r-value'])
axi0 = df.T['intercept'].plot(kind='bar', title='intercept', ax=axs[0])
axi1 = df.T['slope'].plot(kind='bar', title='slope', ax=axs[1])
axi2 = df.T['r-value'].plot(kind='bar', title='r-value', ax=axs[2])

print('mean slope:',df.T['slope'].mean(), 'mean r-value:',df.T['r-value'].mean())

x_offset = 0.02
y_offset = 0.04

for p in axi0.patches:
b = p.get_bbox()
x = b.y1 + b.y0
val = f"{x:.3}"
axi0.annotate(val, ((b.x0 + b.x1)/2 + x_offset, b.y1 + y_offset), rotation='90')

for p in axi1.patches:
b = p.get_bbox()
val = "{:.2f}".format(b.y1 + b.y0)
axi1.annotate(val, ((b.x0 + b.x1)/2 + x_offset, b.y1 + y_offset), rotation='90')

for p in axi2.patches:
b = p.get_bbox()
val = "{:.2f}".format(b.y1 + b.y0)
axi2.annotate(val, ((b.x0 + b.x1)/2 + x_offset, b.y1 + y_offset), rotation='90')

plt.savefig('./all_correlations/all_correlations_%s%s_%s%s_%sbarchart'%(variable1, period1, variable2, period2, threshold))
86 changes: 86 additions & 0 deletions plt_barchart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import utils
resultsdictionary = utils.create_resultsdictionary()

# Besonders spannend ist hier das ganze mal mit max() und mal mit mean() zu aggreggieren und die Monate zu ändern
# beachte das die ganze Statistik für die gesamte südliche Hemisphere oder alternativ für die Weddell Sea gemacht werden
# kann

threshold = '30'
def agg(ice_var, area_var, period, method):
models = resultsdictionary[ice_var][period][threshold].keys()
for model in models:
if area_var in ['pa_co']:
resultsdictionary[ice_var][period][threshold][model]['pa_co'] = resultsdictionary[ice_var][period][threshold][model]['pa_tot'] - resultsdictionary[ice_var][period][threshold][model]['pa_op']
if area_var in ['pa_we_co']:
resultsdictionary[ice_var][period][threshold][model]['pa_we_co'] = resultsdictionary[ice_var][period][threshold][model]['pa_we'] - resultsdictionary[ice_var][period][threshold][model]['pa_we_op']

df = pandas.concat([resultsdictionary[ice_var][period][threshold][model][area_var] for model in models], axis=1, keys=models)
df
if method=='mean':
df = df[(df.index.month >= 5) & (df.index.month <= 11) & (df.index.year >= 1979) & (
df.index.dayofyear < 320)].groupby(pandas.Grouper(freq='A')).mean()
elif method=='max':
df = df[(df.index.month >= 5) & (df.index.month <= 11) & (df.index.year >= 1979) & (
df.index.dayofyear < 320)].groupby(pandas.Grouper(freq='A')).max()
#breakpoint()
df = df.describe()
# breakpoint()
return df

def barcharts(resultsdictionary, period, ice_var, ax, method, text):
complete = {}
#if period == 'mon' and method == 'mean' and ice_var == 'siconc':
# yerr = resultsdictionary[ice_var][period]['30'][model]['pa_we'].describe()['25%']
yerr = agg(ice_var=ice_var, area_var='pa_we', period=period, method=method).T['50%']
# breakpoint()
# breakpoint()
complete['pa_we_tot'] = agg(ice_var=ice_var, area_var='pa_we', period=period, method=method).T['mean'] #+ numpy.random.rand(len(complete))
# ACHTUNG: HIER SITZT NOCH EIN FEHLER! Ich kann in folgender Zeile nicht die Differenz aus möglicherweise unterschiedlichen Zeitschritten ziehen
complete['pa_we_co'] = agg(ice_var=ice_var, area_var='pa_we_co', period=period, method=method).T['mean']#-agg(ice_var=ice_var, area_var='pa_we_op', period=period, method=method)
#resultsdictionary[ice_var][period][threshold]
complete['pa_we_op'] = agg(ice_var=ice_var, area_var='pa_we_op', period=period, method=method).T['mean']
#complete['pa_tot_max'] = aggmax(ice_var=ice_var, area_var='pa_tot', period=period)
complete = pandas.DataFrame([complete['pa_we_tot'], complete['pa_we_co'], complete['pa_we_op']], index=['pa_we_tot', 'pa_we_co', 'pa_we_op']).transpose()

alpha = 0.2 if method=='max' else 1
# complete['pa_tot'] = complete['pa_tot'] + numpy.random.rand(len(complete))
# complete['name'] = complete.index
# import pdb; pdb.set_trace()
# complete.sort_values(by=['name','pa_tot', ])
# complete['randNumCol'] = numpy.random.sample(len(complete))
complete = complete.sort_values(by=['pa_we_tot'], ascending=False)
#complete['pa_we_tot'].max()+0.2*complete['pa_we_tot'].max()
if method == 'mean':
#breakpoint()
(complete['pa_we_co']+complete['pa_we_op']).plot(kind='bar', yerr=yerr, ylim=(-.1e11,ymax),title=text+'%s %s'%(ice_var, period), ax=ax, alpha=0.1)#, yerr=complete['pa_tot_max'])
complete[['pa_we_op', 'pa_we_co']].plot(kind='bar', stacked=True, ylim=(-.1e11,ymax),title=text+'%s %s'%(ice_var, period), ax=ax, alpha=alpha)#, yerr=complete['pa_tot_max'])

#complete['pa_tot_max'].plot(kind="bar", ax=ax, alpha=0.2)
#ax.text(0.1, 0.9,text, ha='center', va='center', transform=ax.transAxes, fontsize=20)
return complete

fig, axs = plt.subplots(ncols=3, nrows=2, figsize=[20,15])
axs = axs.flat
ymax = 2e11
# !!!!!!!!!!!!!!add aggregate = 'full' to all the following lines!
complete_siconc_mon = barcharts(resultsdictionary=resultsdictionary, period='mon', ice_var='siconc', ax=axs[0],text='',method='max')
complete_siconc_mon = barcharts(resultsdictionary=resultsdictionary, period='mon', ice_var='siconc', ax=axs[0],text='(a) ',method='mean')#
complete_siconc_day = barcharts(resultsdictionary=resultsdictionary, period='day', ice_var='siconc', ax=axs[1],text='', method='max')
complete_siconc_day = barcharts(resultsdictionary=resultsdictionary, period='day', ice_var='siconc', ax=axs[1],text='(b) ',method='mean')
complete_sivol_mon = barcharts(resultsdictionary=resultsdictionary, period='mon', ice_var='sivol', ax=axs[2], text='',method='max')
complete_sivol_mon = barcharts(resultsdictionary=resultsdictionary, period='mon', ice_var='sivol', ax=axs[2], text='(c) ',method='mean')

(complete_siconc_day-complete_siconc_mon)[['pa_we_op', 'pa_we_co']].plot(kind='bar', stacked=True, title='(d) PA diff siconc daily/monthly', ax=axs[4], ylim=(-0.5*ymax,0.5*ymax))
(complete_sivol_mon-complete_siconc_mon)[['pa_we_op', 'pa_we_co']].plot(kind='bar', stacked=True, title='(e) PA diff siconc/sivol monthly', ax=axs[5], ylim=(-0.5*ymax,0.5*ymax))

#axs[0].text(0.5, 0.5,'a',transform=ax.transAxes)
#(complete_sivol_mon-complete_siconc_mon)[['pa_op', 'pa_co']].plot(kind='bar', stacked=True, title='PA diff siconc/sivol monthly', ax=axs[6], ylim=(-2.5e11,2.5e11))
plt.subplots_adjust(hspace=0.5)
fig.delaxes(axs[3])
plt.savefig('barplot.png')
plt.show()


#plt.text()
#plot_clustered_stacked([complete_siconc_mon, complete_sivol_mon, complete_siconc_day],["df1", "df2", "df3"])
#plt.ylim(-1e11,1e11)
65 changes: 65 additions & 0 deletions plt_hotbars.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# this is the unaltered version for the monthly data seasonality plots "hotbars"
from matplotlib import pyplot as plt
from matplotlib.lines import Line2D
from mpl_toolkits.axes_grid1 import make_axes_locatable
from utils import colormap_alpha
import utils
import pandas
import numpy

plt.rcParams.update({'font.size': 18})
plt.rcParams['xtick.bottom'] = plt.rcParams['xtick.labelbottom'] = True
plt.rcParams['xtick.top'] = plt.rcParams['xtick.labeltop'] = False

variable = 'sivol'
freq = 'mon'
threshold = '30'
scenario = 'historical'
resultsdictionary = utils.create_resultsdictionary()

fig, axs = plt.subplots(figsize=[17,12], nrows=len(resultsdictionary[variable][freq][threshold].keys()), sharex=True)
fig.subplots_adjust(hspace=0.3)
models = list(resultsdictionary[variable][freq][threshold].keys())
models.sort()
models.remove('OBS')
models.append('OBS')

for index, model in enumerate(models):#resultsdictionary[threshold].keys()):
#if model == 'OBS':
# continue
df = resultsdictionary[variable][freq][threshold][model]
df = df[(df.index.month >= 5) & (df.index.month <= 11) & (
df.index.dayofyear < 320)].groupby(pandas.Grouper(freq='A')).mean()
dfwinter_stacked = numpy.vstack((
df['pa_we_op'],
df['pa_we_co']))
vmax = 1e11
vmin = 1e8
im = axs[index].imshow(dfwinter_stacked+0.2, aspect=2, cmap='hot', extent=[df.index.min().year, df.index.max().year, 0,2], vmin=vmin, vmax=vmax)
axs[index].set_yticklabels([])
axs[index].set_yticks([])

if index==-1:
axs[index].xaxis.set_tick_params(labeltop='on', labelbottom='off')
axs[index].set_xticks(ticks=range(0,165,20))
axs[index].set_xticklabels(range(0,165,20))
axs[index].set_xlabel('year')
else:
axs[index].set_xticks([])
axs[index].set_xticklabels([])
axs[index].minorticks_on()#(range(1850,2015))
axs[index].tick_params(axis='y',which='minor',bottom=False)
cmap = plt.cm.get_cmap('hot')
axs[index].text(x=2022, y=0, s="%s"%model)

plt.xlim(1850, 2020)
axs[-1].set_xticks(range(1850,2020,20))
axs[-1].set_xticklabels(range(1850,2020,20))

cbar = fig.colorbar(im, ax=[axs[:]], location='left', extend='max', shrink=1, pad=0.02)
cbar.ax.set_yticklabels(cbar.get_ticks()/1e12)

fig.text(x=0.15, y=0.59, s='polynya area in million km²', va='center', rotation='vertical', fontsize=18)

plt.savefig("./plt_hotbars/hotbars_%s_%s_%s.png"%(variable, freq, threshold), bbox_inches = 'tight',
pad_inches = 0, dpi=300)
Loading

0 comments on commit a6a20ae

Please sign in to comment.