diff --git a/plt_algorithm.py b/plt_algorithm.py
new file mode 100644
index 0000000..e69de29
diff --git a/plt_all_correlations.py b/plt_all_correlations.py
new file mode 100644
index 0000000..bbe83b7
--- /dev/null
+++ b/plt_all_correlations.py
@@ -0,0 +1,136 @@
+from scipy import stats
+from matplotlib import pyplot as plt
+import numpy as np
+from sklearn.linear_model import LinearRegression
+import statsmodels.api as sm
+import statsmodels
+import numpy as np
+import numpy
+import pandas
+import utils
+
+resultsdictionary = utils.create_resultsdictionary()
+periods=['mon']
+variables = ['sivol', 'siconc']
+slopedict = {}
+rvdict = {}
+interceptdict = {}
+
+fig, axs = plt.subplots(nrows=5, ncols=5, figsize=[10,15],constrained_layout=True) #sharex=True, sharey=True, constrained_layout=True)
+#fig2, axs2 = plt.subplots(nrows=4, ncols=5, figsize=[13,10], sharex=True, sharey=True, constrained_layout=True)
+axs = axs.flat
+#axs2 = axs2.flat
+month = 8
+steplength = 12 # should be eiter 12 or 1 (yearly or monthly) # end CESM daily one step earlier!
+threshold = '30'
+variable1, period1 = 'sivol', 'mon'
+variable2, period2= 'siconc', 'mon'
+#print(threshold)
+models = list(set(resultsdictionary[variable1][period1][threshold].keys()) & 
+              set(resultsdictionary[variable2][period2][threshold].keys()))
+unused_axes = set(range(0,25)) - set(range(0,len(models)))
+for i in unused_axes:
+    fig.delaxes(axs[i])
+print('List of models:', models)
+models.sort()
+for index, model in enumerate(models):
+
+    if model == 'OBS':
+        continue
+    # print(model)
+    axs[index].set_aspect('equal', adjustable='box')
+    endstep = len(resultsdictionary['siconc']['mon']['30'][model]['pa_tot'])
+    if model == 'MRI-ESM2-0':
+        # because the MRI models submit data only after 1919 in daily resolution
+        year=0#69
+    elif model == 'SAM0-UNICON':
+        year=0#100
+    elif model in ['CESM2', 'CESM2-WACCM', 'CESM2-FV', 'CESM2-WACCM-FV']:
+        endstep = len(resultsdictionary['siconc']['mon']['30'][model]['pa_tot'])-1
+    #elif model == 'OBS':
+        #year=31
+    else:
+        year=0
+
+    x = resultsdictionary[variable1][period1][threshold][model]['pa_tot'].groupby(pandas.Grouper(freq='M')).mean()[month:endstep:steplength].values
+    X = x
+    #y = resultsdictionary['siconc']['mon'][threshold][model]['pa_tot'][12*year+month:endstep:steplength].values
+    y = resultsdictionary[variable2][period2][threshold][model]['pa_tot'][12*year+month:endstep:steplength].values
+    print(model, len(x), len(y))
+    axs[index].scatter(x, y, alpha=0.5)
+    #plt.xlabel('polynya areas computed from "sivol"')
+    #plt.ylabel('polynya areas computed from "siconc"')
+    axs[index].set_title(model, pad=15)
+    
+    slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
+    # print('sterr:', std_err)
+    xmax = max(max(x), max(y))#0.5e12
+    x_coord = numpy.linspace(0,xmax,100)
+    axs[index].plot(x,intercept+x*slope+intercept, color='blue', ls=':', lw=2)
+
+    x = sm.add_constant(x)
+    res_ols = sm.OLS(y, x).fit()
+    # print(res_ols.summary())
+    slope = res_ols.params[1]
+    slopedict[model] = slope
+    rvdict[model] = r_value
+    interceptdict[model] = intercept
+    
+    slopelowest = res_ols.conf_int(alpha=0.05, cols=None)[1][0]
+    slopehighest= res_ols.conf_int(alpha=0.05, cols=None)[1][1]
+    intercept = res_ols.params[0]
+    interceptlowest = res_ols.conf_int(alpha=0.05, cols=None)[0][0]
+    intercepthighest= res_ols.conf_int(alpha=0.05, cols=None)[0][1]
+    
+    axs[index].plot(x_coord,intercept+x_coord*slope, color='green')
+    axs[index].fill_between(x_coord, interceptlowest+x_coord*slopelowest, intercepthighest+x_coord*slopehighest, alpha=0.2)
+
+    axs[index].set_xlim(-.1*xmax, xmax)
+    axs[index].set_ylim(-.1*xmax, xmax)
+    
+    axs[index].grid()
+    
+    #residual = res_ols.resid
+    residual = y-x[:,1]
+    #residual = y-x[:,1]
+    relative_residual = abs((residual/numpy.maximum(X,y))[~numpy.isnan(residual/numpy.maximum(x[:,1],y))])
+    #print(model, '90% percentile:',numpy.percentile(relative_residual, 90))
+    #print(model, mean(abs(relative_residual)))#numpy.std(relative_residual))
+    
+    #axs2[index].hist(residual, bins=40)
+    #axs2[index].set_title(model)
+    
+fig.suptitle('polynya areas computed from "sivol monthly" (x) vs "siconc monthly (y)" month:%s, stepl:%s'%(month, steplength))
+#fig.delaxes(axs[17])
+#fig.delaxes(axs[18])
+#fig.delaxes(axs[19])
+plt.savefig('./all_correlations/all_correlations_%s%s_%s%s_%s.png'%(variable1, period1, variable2, period2, threshold))
+
+fig, axs = plt.subplots(ncols=3, figsize=[20,6])
+df = pandas.DataFrame([interceptdict.values(), slopedict.values(), rvdict.values()], columns=interceptdict.keys(), index=['intercept', 'slope', 'r-value'])
+axi0 = df.T['intercept'].plot(kind='bar', title='intercept', ax=axs[0])
+axi1 = df.T['slope'].plot(kind='bar', title='slope', ax=axs[1])
+axi2 = df.T['r-value'].plot(kind='bar', title='r-value', ax=axs[2])
+
+print('mean slope:',df.T['slope'].mean(), 'mean r-value:',df.T['r-value'].mean())
+
+x_offset = 0.02
+y_offset = 0.04
+
+for p in axi0.patches:
+    b = p.get_bbox()
+    x = b.y1 + b.y0
+    val = f"{x:.3}"        
+    axi0.annotate(val, ((b.x0 + b.x1)/2 + x_offset, b.y1 + y_offset), rotation='90')
+
+for p in axi1.patches:
+    b = p.get_bbox()
+    val = "{:.2f}".format(b.y1 + b.y0)        
+    axi1.annotate(val, ((b.x0 + b.x1)/2 + x_offset, b.y1 + y_offset), rotation='90')
+
+for p in axi2.patches:
+    b = p.get_bbox()
+    val = "{:.2f}".format(b.y1 + b.y0)        
+    axi2.annotate(val, ((b.x0 + b.x1)/2 + x_offset, b.y1 + y_offset), rotation='90')
+
+plt.savefig('./all_correlations/all_correlations_%s%s_%s%s_%sbarchart'%(variable1, period1, variable2, period2, threshold))
diff --git a/plt_barchart.py b/plt_barchart.py
new file mode 100644
index 0000000..39e57a5
--- /dev/null
+++ b/plt_barchart.py
@@ -0,0 +1,86 @@
+import utils
+resultsdictionary = utils.create_resultsdictionary()
+
+# Besonders spannend ist hier das ganze mal mit max() und mal mit mean() zu aggreggieren und die Monate zu ändern
+# beachte das die ganze Statistik für die gesamte südliche Hemisphere oder alternativ für die Weddell Sea gemacht werden 
+# kann
+
+threshold = '30'
+def agg(ice_var, area_var, period, method):
+    models = resultsdictionary[ice_var][period][threshold].keys()
+    for model in models:
+        if area_var in ['pa_co']:
+            resultsdictionary[ice_var][period][threshold][model]['pa_co'] = resultsdictionary[ice_var][period][threshold][model]['pa_tot'] - resultsdictionary[ice_var][period][threshold][model]['pa_op']
+        if area_var in ['pa_we_co']:
+            resultsdictionary[ice_var][period][threshold][model]['pa_we_co'] = resultsdictionary[ice_var][period][threshold][model]['pa_we'] - resultsdictionary[ice_var][period][threshold][model]['pa_we_op']
+            
+    df = pandas.concat([resultsdictionary[ice_var][period][threshold][model][area_var] for model in models], axis=1, keys=models)
+    df
+    if method=='mean':
+        df = df[(df.index.month >= 5) & (df.index.month <= 11) & (df.index.year >= 1979) & (
+            df.index.dayofyear < 320)].groupby(pandas.Grouper(freq='A')).mean()
+    elif method=='max':
+        df = df[(df.index.month >= 5) & (df.index.month <= 11) & (df.index.year >= 1979) & (
+            df.index.dayofyear < 320)].groupby(pandas.Grouper(freq='A')).max()
+    #breakpoint()
+    df = df.describe()
+    # breakpoint()
+    return df
+
+def barcharts(resultsdictionary, period, ice_var, ax, method, text):
+    complete = {}
+    #if period == 'mon' and method == 'mean' and ice_var == 'siconc':
+    #    yerr = resultsdictionary[ice_var][period]['30'][model]['pa_we'].describe()['25%']
+    yerr = agg(ice_var=ice_var, area_var='pa_we', period=period, method=method).T['50%']
+    # breakpoint()
+    # breakpoint()
+    complete['pa_we_tot'] = agg(ice_var=ice_var, area_var='pa_we', period=period, method=method).T['mean'] #+ numpy.random.rand(len(complete))
+    # ACHTUNG: HIER SITZT NOCH EIN FEHLER! Ich kann in folgender Zeile nicht die Differenz aus möglicherweise unterschiedlichen Zeitschritten ziehen
+    complete['pa_we_co'] = agg(ice_var=ice_var, area_var='pa_we_co', period=period, method=method).T['mean']#-agg(ice_var=ice_var, area_var='pa_we_op', period=period, method=method)
+    #resultsdictionary[ice_var][period][threshold]
+    complete['pa_we_op'] = agg(ice_var=ice_var, area_var='pa_we_op',  period=period, method=method).T['mean']
+    #complete['pa_tot_max'] = aggmax(ice_var=ice_var, area_var='pa_tot',  period=period)
+    complete = pandas.DataFrame([complete['pa_we_tot'], complete['pa_we_co'], complete['pa_we_op']], index=['pa_we_tot', 'pa_we_co', 'pa_we_op']).transpose()
+
+    alpha = 0.2 if method=='max' else 1
+    # complete['pa_tot'] = complete['pa_tot'] + numpy.random.rand(len(complete))
+    # complete['name'] = complete.index
+    # import pdb; pdb.set_trace()
+    # complete.sort_values(by=['name','pa_tot', ])
+    # complete['randNumCol'] = numpy.random.sample(len(complete))
+    complete = complete.sort_values(by=['pa_we_tot'], ascending=False)
+    #complete['pa_we_tot'].max()+0.2*complete['pa_we_tot'].max()
+    if method == 'mean':
+        #breakpoint()
+        (complete['pa_we_co']+complete['pa_we_op']).plot(kind='bar', yerr=yerr, ylim=(-.1e11,ymax),title=text+'%s %s'%(ice_var, period), ax=ax, alpha=0.1)#, yerr=complete['pa_tot_max'])
+    complete[['pa_we_op', 'pa_we_co']].plot(kind='bar', stacked=True, ylim=(-.1e11,ymax),title=text+'%s %s'%(ice_var, period), ax=ax, alpha=alpha)#, yerr=complete['pa_tot_max'])
+
+    #complete['pa_tot_max'].plot(kind="bar", ax=ax, alpha=0.2)
+    #ax.text(0.1, 0.9,text, ha='center', va='center', transform=ax.transAxes, fontsize=20)
+    return complete
+  
+fig, axs = plt.subplots(ncols=3, nrows=2, figsize=[20,15])
+axs = axs.flat
+ymax = 2e11
+# !!!!!!!!!!!!!!add aggregate = 'full' to all the following lines!
+complete_siconc_mon = barcharts(resultsdictionary=resultsdictionary, period='mon', ice_var='siconc', ax=axs[0],text='',method='max')
+complete_siconc_mon = barcharts(resultsdictionary=resultsdictionary, period='mon', ice_var='siconc', ax=axs[0],text='(a) ',method='mean')#
+complete_siconc_day = barcharts(resultsdictionary=resultsdictionary, period='day', ice_var='siconc', ax=axs[1],text='', method='max')
+complete_siconc_day = barcharts(resultsdictionary=resultsdictionary, period='day', ice_var='siconc', ax=axs[1],text='(b) ',method='mean')
+complete_sivol_mon  = barcharts(resultsdictionary=resultsdictionary, period='mon', ice_var='sivol', ax=axs[2], text='',method='max')
+complete_sivol_mon  = barcharts(resultsdictionary=resultsdictionary, period='mon', ice_var='sivol', ax=axs[2], text='(c) ',method='mean')
+
+(complete_siconc_day-complete_siconc_mon)[['pa_we_op', 'pa_we_co']].plot(kind='bar', stacked=True, title='(d) PA diff siconc daily/monthly', ax=axs[4], ylim=(-0.5*ymax,0.5*ymax))
+(complete_sivol_mon-complete_siconc_mon)[['pa_we_op', 'pa_we_co']].plot(kind='bar', stacked=True, title='(e) PA diff siconc/sivol monthly', ax=axs[5], ylim=(-0.5*ymax,0.5*ymax))
+
+#axs[0].text(0.5, 0.5,'a',transform=ax.transAxes)
+#(complete_sivol_mon-complete_siconc_mon)[['pa_op', 'pa_co']].plot(kind='bar', stacked=True, title='PA diff siconc/sivol monthly', ax=axs[6], ylim=(-2.5e11,2.5e11))
+plt.subplots_adjust(hspace=0.5)
+fig.delaxes(axs[3])
+plt.savefig('barplot.png')
+plt.show()
+
+
+#plt.text()
+#plot_clustered_stacked([complete_siconc_mon, complete_sivol_mon, complete_siconc_day],["df1", "df2", "df3"])
+#plt.ylim(-1e11,1e11)
\ No newline at end of file
diff --git a/plt_hotbars.py b/plt_hotbars.py
new file mode 100644
index 0000000..ea95b5f
--- /dev/null
+++ b/plt_hotbars.py
@@ -0,0 +1,65 @@
+# this is the unaltered version for the monthly data seasonality plots "hotbars"
+from matplotlib import pyplot as plt
+from matplotlib.lines import Line2D
+from mpl_toolkits.axes_grid1 import make_axes_locatable
+from utils import colormap_alpha
+import utils
+import pandas
+import numpy
+
+plt.rcParams.update({'font.size': 18})
+plt.rcParams['xtick.bottom'] = plt.rcParams['xtick.labelbottom'] = True
+plt.rcParams['xtick.top'] = plt.rcParams['xtick.labeltop'] = False
+
+variable = 'sivol'
+freq = 'mon'
+threshold = '30'
+scenario = 'historical'
+resultsdictionary = utils.create_resultsdictionary()
+
+fig, axs = plt.subplots(figsize=[17,12], nrows=len(resultsdictionary[variable][freq][threshold].keys()), sharex=True)
+fig.subplots_adjust(hspace=0.3)
+models = list(resultsdictionary[variable][freq][threshold].keys())
+models.sort()
+models.remove('OBS')
+models.append('OBS')
+
+for index, model in enumerate(models):#resultsdictionary[threshold].keys()):
+    #if model == 'OBS':
+    #    continue
+    df = resultsdictionary[variable][freq][threshold][model]
+    df = df[(df.index.month >= 5) & (df.index.month <= 11) & (
+            df.index.dayofyear < 320)].groupby(pandas.Grouper(freq='A')).mean()
+    dfwinter_stacked = numpy.vstack((
+        df['pa_we_op'], 
+        df['pa_we_co']))
+    vmax = 1e11
+    vmin = 1e8
+    im = axs[index].imshow(dfwinter_stacked+0.2, aspect=2, cmap='hot', extent=[df.index.min().year, df.index.max().year, 0,2], vmin=vmin, vmax=vmax)
+    axs[index].set_yticklabels([])
+    axs[index].set_yticks([])
+    
+    if index==-1:
+        axs[index].xaxis.set_tick_params(labeltop='on', labelbottom='off')
+        axs[index].set_xticks(ticks=range(0,165,20))
+        axs[index].set_xticklabels(range(0,165,20))
+        axs[index].set_xlabel('year')
+    else:
+        axs[index].set_xticks([])
+        axs[index].set_xticklabels([])
+        axs[index].minorticks_on()#(range(1850,2015))
+        axs[index].tick_params(axis='y',which='minor',bottom=False)    
+    cmap = plt.cm.get_cmap('hot')
+    axs[index].text(x=2022, y=0, s="%s"%model)
+
+plt.xlim(1850, 2020)
+axs[-1].set_xticks(range(1850,2020,20))
+axs[-1].set_xticklabels(range(1850,2020,20))
+
+cbar = fig.colorbar(im, ax=[axs[:]], location='left', extend='max', shrink=1, pad=0.02)
+cbar.ax.set_yticklabels(cbar.get_ticks()/1e12)
+
+fig.text(x=0.15, y=0.59, s='polynya area in million km²', va='center', rotation='vertical', fontsize=18)
+
+plt.savefig("./plt_hotbars/hotbars_%s_%s_%s.png"%(variable, freq, threshold), bbox_inches = 'tight',
+    pad_inches = 0, dpi=300)
diff --git a/plt_locations.py b/plt_locations.py
new file mode 100644
index 0000000..bfbb1a9
--- /dev/null
+++ b/plt_locations.py
@@ -0,0 +1,116 @@
+# Attention: This does not work with the GISS model yet!
+# Fully working '12month climatology' of all polynyas
+import pickle
+import cmocean
+import utils
+from matplotlib import pyplot as plt
+import cartopy
+import cartopy.crs as ccrs
+import numpy
+
+monthDict={1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 
+    6:'Jun', 7:'Jul', 8:'Aug', 9:'Sep', 10:'Oct', 
+    11:'Nov', 12:'Dec'}
+tsDict={'monthly':{1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 
+        8:8, 9:9, 10:10, 11:11, 12:12},
+        'daily':{1:15, 2:45, 3:75, 4:105, 5:135, 6:165, 
+        7:195, 8:225, 9:255, 10:285, 11:315, 12:345}}
+freq_dict = {'daily':'day', 'monthly':'mon'}
+
+plt.rcParams.update({'font.size': 25})
+variable = 'siconc'
+freq = 'daily'
+threshold = '30'
+scenario = 'historical'
+if freq=='daily':
+    divider = 30
+else:
+    divider = 1
+resultsdictionary = utils.create_resultsdictionary()
+models = list(resultsdictionary[variable][freq_dict[freq]]['30'].keys())
+models.sort()
+models.remove('OBS')
+#breakpoint()
+if variable == 'sithick':
+    models.remove('CESM2-WACCM'); models.remove('CESM2-WACCM-FV2'); models.remove('MPI-ESM1-2-LR')
+    models.remove('BCC-ESM1'); models.remove('MPI-ESM-1-2-HAM')
+print('plotting for %s models'%len(models))
+
+fig = plt.figure(figsize=[25,25])
+fig.subplots_adjust(wspace=0.1, hspace=0.1)
+#print(source_id)
+print('List of models:', models)
+for index, source_id in enumerate(models):
+    print(source_id)
+    if source_id in ['GISS-E2-1-H', 'OBS']:#, 'OBS']:
+        continue
+    dataset_areacello = utils.open_CMIP_variable(
+        model=source_id, variable='areacello', scenario='historical', freq='daily')
+    dataset_areacello = utils.homogenize_CMIP_variable(dataset_areacello)
+    for i in range(9,10):
+        timestep = i#tsDict[freq][i]
+        sa_tot_file = open('../sa_tot_alltime/sa_tot_alltime_%s_%s_%s_%s_%s_%s'%(
+            freq, scenario,threshold,source_id,variable,timestep), "rb" )
+        pa_tot_file = open('../sa_tot_alltime/pa_tot_alltime_%s_%s_%s_%s_%s_%s'%(
+            freq, scenario,threshold,source_id, variable, timestep), "rb" )
+        print('using file  ../sa_tot_alltime/sa_tot_alltime_%s_%s_%s_%s_%s_%s'%(
+            freq, scenario,threshold,source_id,variable,timestep))
+        #../sa_tot_alltime/sa_tot_alltime_daily_historical_60_SAM0-UNICON_siconc_97
+        # the divided by 30 I have to do here because it is not implemented correctly in the algorithm yet
+        if source_id in ['ACCESS-CM2', 'MRI-ESM2-0']: #thos are the two that might not have to be divided!
+            sa_tot_alltime = pickle.load(sa_tot_file)
+            pa_tot_alltime = pickle.load(pa_tot_file)
+        else:
+            sa_tot_alltime = pickle.load(sa_tot_file)/divider
+            pa_tot_alltime = pickle.load(pa_tot_file)/divider
+        sa_tot_file.close()
+        pa_tot_file.close()
+
+        print('index:', index, "source_id:", source_id)
+
+        ax = plt.subplot(5,5,index+1,projection=ccrs.Orthographic(central_latitude=-90))
+        ax.set_extent([-180, 180, -90, -50], ccrs.PlateCarree())
+        ax.background_patch.set_facecolor('grey')
+        cmap=utils.colormap_alpha(cmocean.cm.ice)#plt.cm.Greys_r)
+        colors1 = plt.pcolormesh(
+                dataset_areacello.lon, 
+                dataset_areacello.lat,
+                sa_tot_alltime, 
+                transform=ccrs.PlateCarree(),
+                cmap=cmap,
+                vmin=0,
+                vmax=100)
+
+        cmap = utils.colormap_alpha(plt.cm.get_cmap('autumn', 21))
+        colors2 = plt.pcolormesh(
+                dataset_areacello.lon, 
+                dataset_areacello.lat,
+                pa_tot_alltime, 
+                transform=ccrs.PlateCarree(),
+                cmap=cmap,
+                vmin=0,
+                vmax=20)#20
+
+        ax.set_title(source_id, fontsize=25)
+        ax.coastlines()
+        ax.add_feature(cartopy.feature.LAND, edgecolor='k')
+        ax.gridlines()
+    
+fig.subplots_adjust(right=0.8)
+cbar_ax = fig.add_axes([0.87, 0.15, 0.025, 0.7])
+cbar_ax2 = fig.add_axes([0.90, 0.15, 0.025, 0.7])
+cbar1 = fig.colorbar(colors1, cax=cbar_ax)
+cbar2 = fig.colorbar(colors2, cax=cbar_ax2, extend='max')
+
+#if variable == 'siconc':
+cbar1.set_label('% probability of sea ice coverage > threshold',labelpad=-110)
+#elif variable == 'sivol':
+#    cbar1.set_label('mean sea ice thickness', labelpad=-110)
+#elif variable == 'sithick':
+#    cbar1.set_label('')
+cbar2.set_label('% propability of polynya occurence',labelpad=0)
+cbar_ax.yaxis.set_ticks_position('left')
+plt.savefig('./plt_locations/%s_%s_%s_%s.png'%(variable, threshold, freq, monthDict[i]))#, dpi=300)
+# plt.show()
+    
+#open('./sa_tot_alltime/sa_tot_alltime/')
\ No newline at end of file
diff --git a/plt_observational_scatter.py b/plt_observational_scatter.py
new file mode 100644
index 0000000..9bafb9a
--- /dev/null
+++ b/plt_observational_scatter.py
@@ -0,0 +1,56 @@
+import utils
+from scipy import stats
+from matplotlib import pyplot as plt
+import pandas
+import numpy
+
+resultsdictionary = utils.create_resultsdictionary()
+fig, axs = plt.subplots(ncols=3, figsize=[10,10])
+
+month=11
+sivol_mon = resultsdictionary['sivol']['mon']['30']['OBS'][resultsdictionary['sivol']['mon']['30']['OBS'].date.dt.month == month]
+sivol_day = resultsdictionary['sivol']['day']['30']['OBS'].groupby(pandas.Grouper(freq='M')).mean() 
+sivol_day = sivol_day[sivol_day.index.month==month]
+nmin = -0.1e11
+nmax = 7e11
+x,y = sivol_day['pa_tot'], sivol_mon['pa_tot']
+slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
+print(slope, r_value)
+titlepad = 15
+titlex, titley, titlefontsize = 0.5e11, 6e11, 16
+
+axs[0].scatter(x,y, color='blue');
+axs[0].set_xlabel("A(daily sivol)"); axs[0].set_ylabel("A(monthly sivol)"); 
+axs[0].set_aspect('equal', adjustable='box')
+axs[0].set_title('daily vs monthly sivol', pad=titlepad)
+axs[0].set_ylim(nmin, nmax); axs[0].set_xlim(nmin, nmax);
+axs[0].text(x=titlex, y=titley, s='a', fontsize=titlefontsize)
+xcoord = numpy.linspace(nmin, nmax, 100)
+axs[0].plot(xcoord, xcoord*slope+intercept, alpha=0.5)
+
+for index, threshold in enumerate(['30','60']):
+
+    siconc_mon = resultsdictionary['siconc']['mon'][threshold]['OBS'][resultsdictionary['siconc']['mon'][threshold]['OBS'].date.dt.month == month]
+    siconc_day = resultsdictionary['siconc']['day'][threshold]['OBS'].groupby(pandas.Grouper(freq='M')).mean()
+    siconc_day = siconc_day[siconc_day.index.month==month]
+
+    x,y = siconc_day['pa_tot'], siconc_mon['pa_tot'] 
+    slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
+    xcoord = numpy.linspace(nmin, nmax, 100)
+    axs[index+1].plot(xcoord, xcoord*slope+intercept, alpha=0.5)
+    print(slope, r_value)
+    axs[index+1].scatter(x,y, color='grey')#facecolor='white', edgecolor='blue')
+    axs[index+1].scatter(x[-10:],y[-10:], color='blue');
+    axs[index+1].set_xlabel("A(daily siconc)"); axs[index+1].set_ylabel("A(monthly siconc)"); 
+    axs[index+1].set_aspect('equal', adjustable='box')
+    axs[index+1].set_title('daily vs monthly siconc', pad=titlepad)
+    axs[index+1].set_ylim(nmin, nmax); axs[index+1].set_xlim(nmin, nmax);
+
+axs[1].text(x=titlex, y=titley, s='b', fontsize=titlefontsize)
+axs[2].text(x=titlex, y=titley, s='c', fontsize=titlefontsize)
+
+    
+#plt.show()
+
+plt.savefig('observational_scatter.png')
+#plt.show()
diff --git a/plt_seasonality.py b/plt_seasonality.py
new file mode 100644
index 0000000..6d3c9bf
--- /dev/null
+++ b/plt_seasonality.py
@@ -0,0 +1,96 @@
+# Attention: This does not work with the GISS model yet!
+# Fully working '12month climatology' of all polynyas
+import pickle
+import cmocean
+from utils import open_CMIP_variable, homogenize_CMIP_variable#, colormap_alpha
+import utils
+from matplotlib import pyplot as plt
+import cartopy.crs as ccrs
+import cartopy
+
+monthDict={1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun', 7:'Jul', 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}
+#tsDict={'monthly':{1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8, 9:9, 10:10, 11:11, 12:12},
+#        'daily':{1:15, 2:45, 3:75, 4:105, 5:135, 6:165, 7:195, 8:225, 9:255, 10:285, 11:315, 12:345}}
+plt.rcParams.update({'font.size': 25})
+
+freq_dict = {'daily':'day', 'monthly':'mon'}
+variable = 'sivol'
+freq = 'monthly' # 'monthly'
+threshold = '30'
+scenario = 'historical'
+resultsdictionary = utils.create_resultsdictionary()
+models = list(resultsdictionary[variable][freq_dict[freq]]['30'].keys())
+models.sort()
+for source_id in models:
+    if source_id in ['GISS-E2-1-H', 'OBS']:
+        continue
+    dataset_areacello = utils.open_CMIP_variable(model=source_id, variable='areacello', scenario='historical', freq='daily')
+    dataset_areacello = homogenize_CMIP_variable(dataset_areacello)
+    fig, axs = plt.subplots(nrows=2, ncols=6,figsize=[25, 17])
+    fig.subplots_adjust(wspace=0.1, hspace=0.1)
+    print(source_id)
+    for i in range(1,13):
+        timestep = i#tsDict[freq][i]
+        outfile = open('../sa_tot_alltime/sa_tot_alltime_%s_%s_%s_%s_%s_%s'%(freq, scenario,threshold,source_id,variable,timestep), "rb" )
+        sa_tot_alltime = pickle.load(outfile)
+        outfile.close()
+        outfile = open('../sa_tot_alltime/pa_tot_alltime_%s_%s_%s_%s_%s_%s'%(freq, scenario,threshold,source_id,variable,timestep), "rb" )
+        pa_tot_alltime = pickle.load(outfile)
+        #if pa_tot_alltime == 0.0:
+        #    print('File not found for model ../sa_tot_alltime/pa_tot_alltime_%s_%s_%s_%s_%s_%s'%(freq, scenario,threshold,source_id,variable,timestep))
+        #    continue
+        
+        if freq == 'daily':
+            sa_tot_alltime = sa_tot_alltime/30
+            pa_tot_alltime = pa_tot_alltime/30
+        
+        # breakpoint()
+
+        print(i)
+
+        ax = plt.subplot(3,4,i,projection=ccrs.Orthographic(central_latitude=-90))
+        ax.set_extent([-180, 180, -90, -50], ccrs.PlateCarree())
+        ax.background_patch.set_facecolor('grey')
+        cmap=utils.colormap_alpha(cmocean.cm.ice)#plt.cm.Greys_r)
+        # breakpoint()
+        colors1 = plt.pcolormesh(
+                dataset_areacello.lon, 
+                dataset_areacello.lat,
+                sa_tot_alltime, 
+                transform=ccrs.PlateCarree(),
+                cmap=cmap,
+                vmin=0,
+                vmax=100)
+
+        cmap = utils.colormap_alpha(plt.cm.get_cmap('autumn', 21))
+        colors2 = plt.pcolormesh(
+                dataset_areacello.lon, 
+                dataset_areacello.lat,
+                pa_tot_alltime, 
+                transform=ccrs.PlateCarree(),
+                cmap=cmap,
+                vmin=0,
+                vmax=20)
+
+        ax.set_title(monthDict[i], fontsize=25)
+        ax.coastlines()
+        ax.add_feature(cartopy.feature.LAND, edgecolor='k')
+        ax.gridlines()
+        outfile.close()
+    
+    fig.subplots_adjust(right=0.8)
+    cbar_ax = fig.add_axes([0.87, 0.15, 0.025, 0.7])
+    cbar_ax2 = fig.add_axes([0.90, 0.15, 0.025, 0.7])
+    cbar1 = fig.colorbar(colors1, cax=cbar_ax)
+    cbar2 = fig.colorbar(colors2, cax=cbar_ax2, extend='max')
+    
+    cbar1.set_label('% mean sea ice concentration',labelpad=-110)#horizontalalignment='left')
+    cbar2.set_label('% propability of polynya occurence',labelpad=0)
+    cbar_ax.yaxis.set_ticks_position('left')
+    # print('YEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEES', freq)
+    if freq == 'monthly':
+        plt.savefig('./plt_seasonality/%s/%s_%s.png'%(freq, variable, source_id))
+    if freq == 'daily':
+        plt.savefig('./plt_seasonality/%s/%s_%s.png'%(freq, variable, source_id))
+    # plt.show()
+    
\ No newline at end of file
diff --git a/plt_seasonality_curves.py b/plt_seasonality_curves.py
new file mode 100644
index 0000000..71cc2a5
--- /dev/null
+++ b/plt_seasonality_curves.py
@@ -0,0 +1,238 @@
+import utils
+from matplotlib import pyplot as plt
+import numpy 
+import pandas
+
+resultsdictionary = utils.create_resultsdictionary()
+
+#columns
+# Ein fuer alle mal symbole und farben festlegen
+monthdict={1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun', 7:'Jul', 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}
+
+styledict = {'color': {'CanESM5': 'C0',
+  'CESM2-WACCM-FV2': 'C1',
+  'CESM2-WACCM': 'C2',
+  'NorCPM1': 'C3',
+  'EC-Earth3': 'C4',
+  'BCC-CSM2-MR': 'C5',
+  'MIROC6': 'C6',
+  'MPI-ESM1-2-LR': 'C7',
+  'UKESM1-0-LL': 'C8',
+  'EC-Earth3-Veg': 'C9',
+  'MPI-ESM-1-2-HAM': 'C0',
+  'NorESM2-LM': 'C1',
+  'CESM2': 'C2',
+  'GFDL-CM4': 'C3',
+  'ACCESS-CM2': 'C4',
+  'MIROC-ES2L': 'C5',
+  'MPI-ESM1-2-HR': 'C6',
+  'MRI-ESM2-0': 'C7',
+  'IPSL-CM6A-LR': 'C8',
+  'GISS-E2-1-H': 'C9',
+  'NorESM2-MM': 'C0',
+  'BCC-ESM1': 'C1',
+  'HadGEM3-GC31-LL': 'C2',
+  'ACCESS-ESM1-5': 'C3',
+  'CAMS-CSM1-0': 'C4',
+  'CNRM-ESM2-1': 'C5',
+  'CNRM-CM6-1': 'C6',
+  'SAM0-UNICON': 'C7',
+  'CESM2-FV2': 'C8',
+  'GFDL-ESM4': 'C9',
+  'OBS': 'C0'},
+ 'marker': {'CanESM5': 'o',
+  'CESM2-WACCM-FV2': 'v',
+  'CESM2-WACCM': '^',
+  'NorCPM1': '<',
+  'EC-Earth3': '>',
+  'BCC-CSM2-MR': '8',
+  'MIROC6': 's',
+  'MPI-ESM1-2-LR': 'p',
+  'UKESM1-0-LL': '*',
+  'EC-Earth3-Veg': 'h',
+  'MPI-ESM-1-2-HAM': 'H',
+  'NorESM2-LM': 'D',
+  'CESM2': 'd',
+  'GFDL-CM4': 'P',
+  'ACCESS-CM2': 'X',
+  'MIROC-ES2L': 'o',
+  'MPI-ESM1-2-HR': 'v',
+  'MRI-ESM2-0': '^',
+  'IPSL-CM6A-LR': '<',
+  'GISS-E2-1-H': '>',
+  'NorESM2-MM': '8',
+  'BCC-ESM1': 's',
+  'HadGEM3-GC31-LL': 'p',
+  'ACCESS-ESM1-5': '*',
+  'CAMS-CSM1-0': 'h',
+  'CNRM-ESM2-1': 'H',
+  'CNRM-CM6-1': 'D',
+  'SAM0-UNICON': 'd',
+  'CESM2-FV2': 'P',
+  'GFDL-ESM4': 'X',
+  'OBS':'o'}}
+
+variable = 'sivol'
+period = 'mon'
+threshold = '30'
+
+models = list(resultsdictionary[variable][period][threshold].keys())
+models.sort()
+
+import itertools
+from matplotlib.lines import Line2D
+plt.rcParams.update({'font.size': 14})
+my_marker_cycle = itertools.cycle(list(Line2D.filled_markers))
+my_color_cycle = itertools.cycle(['C0', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9'])
+my_marker_cycle_list = []
+my_color_cycle_list = []
+for i in range(0,len(models)):
+    my_marker_cycle_list.append(my_marker_cycle.__next__())
+    my_color_cycle_list.append(my_color_cycle.__next__())
+#mt2['color'] = my_color_cycle_list
+#mt2['marker'] = my_marker_cycle_list
+# defining the coastal polynyas
+
+
+
+
+fig, axs = plt.subplots(nrows=2, ncols=3, figsize=[20,20], sharex=True)
+
+axs = axs.flatten()
+for j,col in enumerate(['sa_we', 'pa_we_co', 'pa_we_op']):#columns:
+    iterative_mean = 0
+    
+    # PLOT MODEL RESULTS
+    for index, model in enumerate(models):
+        label = model if j==0 else None
+        ma = resultsdictionary[variable][period][threshold][model][col].groupby(
+             resultsdictionary[variable][period][threshold][model][col].index.month).describe()
+        iterative_mean += ma['mean']
+        axs[j].plot(
+            ma['mean']/10**12, 
+            alpha=0.2, 
+            c=styledict['color'][model], 
+            marker=styledict['marker'][model],)
+        points = axs[j].scatter(
+            x = ma.index, 
+            y = ma['mean']/10**12, 
+            c=styledict['color'][model], 
+            marker=styledict['marker'][model],
+            edgecolor='k', 
+            s=100, 
+            label=label)
+    # plotting the iterative multi-model-mean
+    label = 'multi model mean' if j==0 else None 
+    axs[j].plot(iterative_mean/len(models)/10**12, lw=5, color="k", ls='--', label=label)
+
+    """
+    # PLOT OSI RESULTS
+    if col in dfre[threshold].keys():
+        label = 'OSI-450' if j==0 else None
+        ma = dfre[threshold][col].groupby(dfre[threshold][col].index.month).describe()
+        lines = axs[j].plot(range(1,13),ma['mean']/10**12, label=label, lw=5, color="k")
+        color = lines[0].get_color()
+        axs[j].fill_between(x=range(1,13),y1=numpy.array((ma['mean']-2*ma['std'])/10**12).clip(min=0), y2=(ma['mean']+2*ma['std'])/10**12, alpha=0.2, color='grey')
+    else:
+        print('dfre is missing the %s column'%col)
+    if col in descriptions.keys():
+        axs[j].set_title('%s) '%abc[j]+descriptions[col])
+    else:
+        axs[j].set_title(col)
+    """
+
+    #plt.legend(loc=(1.1,0))
+    # plt.title(pandas_dfs[col]['description'][0])
+    axs[j].set_xlabel('month')
+    #axs[j].set_xticks(numpy.array(list(monthdict.keys()))+1,list(monthdict.values()))#monthdict.keys, monthdict.values)
+    axs[j].set_xticks(range(1,13))
+    axs[j].set_xticklabels(list(monthdict.values()))
+    axs[j].set_ylabel('area in mio km²')
+    axs[j].set_xlim(0,13)
+    axs[0].set_title('Sea ice area in \n the Weddell Sea')
+    axs[1].set_title('Area of coastal polynyas \n in the Weddell Sea')
+    axs[2].set_title('Area of OWP in \n the Weddell Sea')
+    # axs[j].set_yscale('log')
+    if col in ['pa_we_co', 'pa_we_op']:
+        axs[j].set_ylim(0,0.15)
+
+
+
+
+##########
+variable = 'siconc'
+period = 'day'
+threshold = '30'
+
+models = list(resultsdictionary[variable][period][threshold].keys())
+models.sort()
+axs = axs.flatten()
+for j,col in enumerate(['sa_we', 'pa_we_co', 'pa_we_op']):#columns:
+    iterative_mean = 0
+    
+    # PLOT MODEL RESULTS
+    for index, model in enumerate(models):
+        label = model if j==0 else None
+        ma = resultsdictionary[variable][period][threshold][model][col].groupby(
+             resultsdictionary[variable][period][threshold][model][col].index.month).describe()
+        iterative_mean += ma['mean']
+        axs[j+3].plot(
+            ma['mean']/10**12, 
+            alpha=0.2, 
+            c=styledict['color'][model], 
+            marker=styledict['marker'][model],)
+        points = axs[j+3].scatter(
+            x = ma.index, 
+            y = ma['mean']/10**12, 
+            c=styledict['color'][model], 
+            marker=styledict['marker'][model],
+            edgecolor='k', 
+            s=100, 
+            label=label)
+    # plotting the iterative multi-model-mean
+    label = 'multi model mean' if j==0 else None 
+    axs[j+3].plot(iterative_mean/len(models)/10**12, lw=5, color="k", ls='--', label=label)
+
+    """
+    # PLOT OSI RESULTS
+    if col in dfre[threshold].keys():
+        label = 'OSI-450' if j==0 else None
+        ma = dfre[threshold][col].groupby(dfre[threshold][col].index.month).describe()
+        lines = axs[j].plot(range(1,13),ma['mean']/10**12, label=label, lw=5, color="k")
+        color = lines[0].get_color()
+        axs[j].fill_between(x=range(1,13),y1=numpy.array((ma['mean']-2*ma['std'])/10**12).clip(min=0), y2=(ma['mean']+2*ma['std'])/10**12, alpha=0.2, color='grey')
+    else:
+        print('dfre is missing the %s column'%col)
+    if col in descriptions.keys():
+        axs[j].set_title('%s) '%abc[j]+descriptions[col])
+    else:
+        axs[j].set_title(col)
+    """
+
+    #plt.legend(loc=(1.1,0))
+    # plt.title(pandas_dfs[col]['description'][0])
+    axs[j+3].set_xlabel('month')
+    #axs[j].set_xticks(numpy.array(list(monthdict.keys()))+1,list(monthdict.values()))#monthdict.keys, monthdict.values)
+    axs[j+3].set_xticks(range(1,13))
+    axs[j+3].set_xticklabels(list(monthdict.values()))
+    axs[j+3].set_ylabel('area in mio km²')
+    axs[j+3].set_xlim(0,13)
+    axs[0+3].set_title('Sea ice area in \n the Weddell Sea')
+    axs[1+3].set_title('Area of coastal polynyas \n in the Weddell Sea')
+    axs[2+3].set_title('Area of OWP in \n the Weddell Sea')
+    # axs[j].set_yscale('log')
+    if col in ['pa_we_co', 'pa_we_op']:
+        axs[j+3].set_ylim(0,0.15)
+##########
+
+
+
+
+
+fig.subplots_adjust(bottom=0.2)
+fig.legend(loc='lower center', bbox_to_anchor=(0.4,-0.00),
+          fancybox=True, shadow=True, ncol=5, fontsize=10)
+plt.savefig('./plt_seasonality_curves/seasonality_%s_%s_%s_multivariable.png'%(variable, period, threshold), dpi=100)
+#plt.tight_layout(rect=(0, 0.15, 1, 1), h_pad=0.2, w_pad=0.2)
+    #if col in ['pa_we', 'pa_we_op']:
+    #    plt.ylim(0,1e11)
\ No newline at end of file
diff --git a/polynyaareas_remastered.py b/polynyaareas_remastered.py
new file mode 100644
index 0000000..153ad3c
--- /dev/null
+++ b/polynyaareas_remastered.py
@@ -0,0 +1,677 @@
+from skimage import data, filters
+from skimage.segmentation import flood, flood_fill
+from matplotlib import pyplot as plt
+import cartopy
+import xarray
+import numpy
+import resource
+from mpl_toolkits.axes_grid1 import AxesGrid
+import warnings
+import datetime
+import pickle
+import os
+import glob
+import pandas
+import cmocean
+import pickle
+import cartopy.crs as ccrs
+from cartopy.util import add_cyclic_point
+from matplotlib.patches import Patch
+warnings.simplefilter(action='ignore', category=FutureWarning)
+import matplotlib.colors as mcolors
+from matplotlib.colors import ListedColormap
+from copy import copy
+from mpl_toolkits.axes_grid1 import make_axes_locatable
+import sys
+
+months_length = {1:31, 2:28, 3:31, 4:30, 5:31, 6:30,
+                 7:31, 8:31, 9:30, 10:31, 11:30, 12:31}
+
+def colormap_alpha(cmap):
+    my_cmap = cmap(numpy.arange(cmap.N))
+    my_cmap[:,-1] = numpy.linspace(0, 1, cmap.N)
+    my_cmap = ListedColormap(my_cmap)
+    return my_cmap
+
+def limit_memory(maxsize):
+    """Limit memory to the given value (MB's) """
+    maxsize = maxsize * 1024 * 1024 # function works with bytes
+    soft, hard = resource.getrlimit(resource.RLIMIT_AS)
+    resource.setrlimit(resource.RLIMIT_AS, (maxsize, hard))
+
+def open_CMIP_variable(model, variable, scenario, freq):
+    freq = 'day' if freq == 'daily' else 'mon'
+    # for the GISS-models, areacella available only and is the sea ice grid
+    if variable in ['siconc', 'sivol', 'sithick']:
+        variantsdict = {'CNRM-CM6-1':'r1i1p1f2','CNRM-ESM2-1':'r1i1p1f2','UKESM1-0-LL':'r1i1p1f2', 'MIROC-ES2L':'r1i1p1f2','HadGEM3-GC31-LL':'r1i1p1f3'}
+        if model in variantsdict:
+            variant = variantsdict[model]
+        else:
+            variant = 'r1i1p1f1'
+        filelist = glob.glob('../%s/%s/%s_SI%s*%s*.nc'%(model,scenario,variable,freq[0:3],variant))
+        print('matching files with: ../%s/%s/%s_SI%s*%s*.nc'%(model,scenario,variable,freq[0:3],variant))
+        if filelist:
+            #print('progressing with the following input files:\n%', sorted(filelist))
+            dataset = xarray.open_mfdataset(sorted(filelist))
+        else:
+            #print('no input files found with are matching ../%s/%s/%s_SI%s*%s*.nc'%(model,scenario,variable,freq[0:3],variant))
+            return 0,0
+    elif variable == 'areacello':
+            try:
+                dataset = xarray.open_mfdataset('../%s/areacello*historical*.nc'%(model))
+            except:
+                dataset = xarray.open_mfdataset('../%s/areacello*.nc'%(model))
+    else:
+        try:
+            dataset = xarray.open_mfdataset('../%s/%s/%s*.nc'%(model, scenario, variable))
+        except:
+            print('no sourcefiles for variable %s, model %s, scenario %s'%(variable, model, scenario))
+            return none
+    return dataset
+
+
+def homogenize_CMIP_variable(dataset):
+    # print(dataset)
+    # homogenize the naming schemes to lat, lon and lev
+    lat_key = list({'lat', 'nav_lat', 'latitude'} & (set(dataset.data_vars) | set(dataset.coords)))[0]
+    lon_key = list({'lon', 'nav_lon', 'longitude'} & (set(dataset.data_vars) | set(dataset.coords)))[0]
+    lev_key = list({'lev', 'olevel'} & (set(dataset.data_vars) | set(dataset.coords)))
+    if lon_key and dataset.source_id not in ['BCC-CSM2-MR','BCC-ESM1', 'GISS-E2-1-H']:
+        dataset = dataset.rename(
+            {lat_key: 'lat', lon_key: 'lon'}).set_coords(
+            ['lon', 'lat'])
+    # is there a depth coordinate also in dataset (i.e. areacello wont have one)
+    if lev_key:
+        dataset = dataset.rename(
+            {'olevel': 'lev'}).set_coords(s
+            ['lev'])
+    try:
+        dataset_areacello = dataset.rename(
+            {'nj': 'j', 'ni': 'i'}).set_coords(
+            ['i', 'j'])
+    except:
+        pass
+    # This might be a bit of a brave change, but rolling all lon coordinates to -180,180
+    if dataset is not None:
+        dataset['lon'] = dataset.lon.where(dataset.lon<180, dataset.lon-360)
+    return dataset
+
+def compute_CMIP_areas(
+    dataset, 
+    dataset_areacello,
+    scenario,
+    threshold_conc, 
+    threshold_thick,
+    freq,
+    timelimit=1980, 
+    createplots=True,
+    seasonalmaps=True):
+    
+    if freq == 'monthly':
+        sa_tot_alltime = dict.fromkeys(range(1,13), 0)
+        pa_tot_alltime = dict.fromkeys(range(1,13), 0)
+    elif freq == 'daily':
+        sa_tot_alltime = dict.fromkeys(range(1,365), 0)
+        pa_tot_alltime = dict.fromkeys(range(1,365), 0)
+    else:
+        print('invalid value for freq!')
+
+    resultsdictionary = dict(date=[])
+    areas = {'pa_tot':'Wistia', 'sa_tot':'Reds', 'pa_we':'Wistia', 
+             'sa_we':'Greens', 'pa_we_op':'winter', 'pa_op':'winter'}
+    colorscheme = ["Wistia","Greens","Wistia","winter","winter"]
+    for area in areas.keys():
+        resultsdictionary[area] = []
+        resultsdictionary[area+'_hf'] = []
+        resultsdictionary[area+'_mlotst'] = []
+
+    # find a spot in the open ocean (and fastland antarctica) to start the freezing over floodfill
+    lat = -40; lon = 90;
+    variable_id = dataset['seaice'].variable_id
+    source_id = dataset['seaice'].source_id
+    
+    if source_id == 'GISS-E2-1-H':
+        areacellvar = 'areacella'
+    else:
+        areacellvar = 'areacello'
+        
+    newarray =  (numpy.abs(dataset['seaice']['lat']-lat)**2 + numpy.abs(dataset['seaice']['lon']-lon)**2)
+    j,i = numpy.unravel_index(numpy.argmin(newarray, axis=None), newarray.shape)
+    
+    if variable_id == 'siconc':
+        floodfillvalue = 100
+        tolerance = threshold_conc
+        tolerance_sea_ice = threshold_conc
+        vmin=0; vmax=100
+    elif variable_id in ['sivol', 'sithick']:
+        floodfillvalue = 1
+        tolerance = threshold_thick
+        tolerance_sea_ice = threshold_thick
+        vmin=0; vmax=0.5
+
+    for t in range(0,timelimit):
+        # print(t)
+        # progress print
+        if t%100 == 0:
+            print('model:%s, timestep:%s'%(source_id, t))
+
+        date = datetime.datetime(dataset['seaice']['time'][t].dt.year, 
+                      dataset['seaice']['time'][t].dt.month, 
+                      dataset['seaice']['time'][t].dt.day)
+        resultsdictionary['date'].append(date)
+        if createplots:
+            fig = plt.figure(figsize=[20, 7.5])
+            ax = plt.subplot(1,4,1,projection=ccrs.Orthographic(central_latitude=-90))
+            ax.set_extent([-180, 180, -90, -50], ccrs.PlateCarree())
+            ax.background_patch.set_facecolor('grey')
+            plot_dataset(dataset['seaice'][variable_id][t], dataset_areacello, 
+                         tolerance, cmap=colormap_alpha(cmocean.cm.ice), #my_cmap,#cmap=cmocean.cm.ice, 
+                         title="sea ice plot (%s)"%variable_id, 
+                         ax=ax,
+                         vmin=vmin, vmax=vmax)
+            ax.add_feature(cartopy.feature.LAND, edgecolor='k')
+            legend_elements = [Patch(facecolor='azure', edgecolor='k',label='sea ice covered'),
+                               Patch(facecolor='cornflowerblue', edgecolor='k', label='partly covered'),
+                               Patch(facecolor='black', edgecolor='k', label='open water')]
+            ax.legend(handles=legend_elements, loc='lower left', fontsize=14)
+
+            ax = plt.subplot(1,4,2,projection=ccrs.Orthographic(central_latitude=-90))
+            ax.set_extent([-180, 180, -90, -50], ccrs.PlateCarree())
+            ax.background_patch.set_facecolor('grey')
+            plot_dataset(dataset['seaice'][variable_id][t], dataset_areacello, 
+                         tolerance, cmap=colormap_alpha(cmocean.cm.ice), 
+                         title="sea ice plot (%s)"%variable_id, 
+                         ax=ax,
+                         vmin=vmin, vmax=vmax)
+            ax.add_feature(cartopy.feature.LAND, edgecolor='k')
+        
+        floodedarray =  flood_fill(
+            dataset['seaice'][variable_id][t].values, (j, i), floodfillvalue, tolerance=tolerance)
+        if source_id == 'GISS-E2-1-H':
+            antarcticamask=flood_fill(image=dataset['seaice'][variable_id][9].values, seed_point=(0,0), new_value=numpy.nan)
+            # Fill antartic continent with numpy.nan
+            siconc2 = numpy.where(numpy.isnan(antarcticamask), antarcticamask, floodedarray)
+        else:
+            siconc2 = floodedarray
+        # list of models with one dimensional latitude/longitude errors instead of array
+        if t==0:
+            if source_id in ['BCC-CSM2-MR','BCC-ESM1', 'GISS-E2-1-H']:
+                latitudes, longitudes = [a.T for a in numpy.meshgrid(dataset_areacello.lat, dataset_areacello.lon)]
+            else:
+                longitudes, latitudes = dataset_areacello.lon, dataset_areacello.lat
+        
+        masks = {}
+
+        with numpy.errstate(invalid='ignore'):
+            numpy.seterr(divide='ignore', invalid='ignore')
+            # some sea ice distributions will be cut of at 55°S, but sea ice that far north would
+            # be unrealistic anyway
+            # create all the different masks for polynyas and different sea ice sectors
+
+            if t==0:
+                if source_id in ['BCC-CSM2-MR','BCC-ESM1', 'GISS-E2-1-H']:
+                    weddellsection = ((latitudes<-55) & ((longitudes>-65) & (longitudes<30)))
+                    southernbound = (latitudes<-55)
+                else:
+                    weddellsection = ((latitudes<-55) & ((longitudes>-65) & (longitudes<30))).values
+                    southernbound = (latitudes<-55).values
+                
+            masks['sa_tot'] = numpy.bitwise_and(dataset['seaice'][variable_id][t].values>tolerance_sea_ice, southernbound)
+            masks['sa_we'] = (dataset['seaice'][variable_id][t].values>tolerance_sea_ice) & southernbound & weddellsection
+            
+            masks['pa_tot'] = (siconc2<tolerance) & southernbound #& southernbound
+            masks['pa_we'] = (siconc2<tolerance) & southernbound & weddellsection
+
+            # Now turn all NaN areas of central antarctica to zero, 
+            # but keep other continents NaN
+            siconc2 = numpy.where((siconc2>0) & (latitudes<-25),siconc2,0)#-55),siconc2,0)
+            # fill Antarctica inclusive coastal polynyas with sea ice 
+            # The MPI count the y-index from the north instead of from the south upwards
+            if source_id in ['MPI-ESM-1-2-HAM', 'MPI-ESM1-2-HR', 'MPI-ESM1-2-LR']:
+                siconc2 =  flood_fill(numpy.array(siconc2), (len(siconc2[:-1,0]), 0), floodfillvalue, tolerance=tolerance)
+            else:
+                siconc2 =  flood_fill(numpy.array(siconc2), (0, 0), floodfillvalue, tolerance=tolerance)
+            
+            masks['pa_op'] = (siconc2<tolerance) & southernbound #& southernbound
+            masks['pa_we_op'] = (siconc2<tolerance) & southernbound & weddellsection
+            
+            for mask in masks.keys():
+                area = dataset_areacello[areacellvar].where(masks[mask])
+                area_sum = area.sum(skipna=True)
+                resultsdictionary[mask].append(float(area_sum.values))
+            
+            if seasonalmaps:
+                pa_tot_alltime[resultsdictionary['date'][t].month] += masks['pa_tot'].astype(int)
+                sa_tot_alltime[resultsdictionary['date'][t].month] += masks['sa_tot'].astype(int)
+
+            if createplots:
+                # sea ice
+                plot_dataset(dataset_areacello['areacello'].where(masks['sa_tot']), dataset_areacello, tolerance, cmap=areas['sa_tot'], title="", ax=ax)
+                plot_dataset(dataset_areacello['areacello'].where(masks['sa_we']), dataset_areacello, tolerance, cmap=areas['sa_we'], title="", ax=ax)
+                # plot polynyas
+                plot_dataset(dataset_areacello['areacello'].where(masks['pa_tot']), dataset_areacello, tolerance, cmap=areas['pa_tot'], title="", ax=ax)
+                # plot OWPs
+                plot_dataset(dataset_areacello['areacello'].where(masks['pa_op']), dataset_areacello, tolerance, cmap=areas['pa_op'], title="", ax=ax)                
+                legend_elements = [Patch(facecolor='orange', edgecolor='k',label='coastal polynya'),
+                                   Patch(facecolor='darkred', edgecolor='k', label='sea ice area'),
+                                   Patch(facecolor='darkgreen', edgecolor='k', label='sea ice area Wed. Sea'),
+                                   Patch(facecolor='lightgreen', edgecolor='k', label='open water Polynya')]
+                ax.legend(handles=legend_elements, loc='lower left', fontsize=14)
+                plt.title(resultsdictionary['date'][t].month)
+                plt.savefig('./areafigures/%s/%s/%s_thresh%sthick%s_%s_%s-%s.png'%(
+                    scenario,source_id,source_id, threshold_conc, threshold_thick ,variable_id,resultsdictionary['date'][t].year,
+                             str(resultsdictionary['date'][t].month).zfill(2)))
+                plt.show()
+                plt.close('all')
+                
+    print('finished mainloop')
+    if freq == 'daily':
+        month_or_yeardays = range(1,365)
+        divider = 365/months_length[i]
+    elif freq == 'monthly':
+        month_or_yeardays = range(1,13)
+        divider = 12
+    
+    if seasonalmaps:
+        for i in month_or_yeardays:
+            # Save a dictionary into a pickle file, the maps could be plotted imidiately alternatively
+            outfile = open('./sa_tot_alltime/sa_tot_alltime_%s_%s_%s_%s_%s_%s'%(freq, scenario,threshold_conc,source_id,variable_id,str(i)), "wb" )
+            pickle.dump(sa_tot_alltime[i]/(
+                timelimit/divider)*100,outfile)
+            # 30/ *100
+            outfile.close()
+            outfile = open('./sa_tot_alltime/pa_tot_alltime_%s_%s_%s_%s_%s_%s'%(freq, scenario,threshold_conc,source_id,variable_id,str(i)), "wb" )
+            pickle.dump(pa_tot_alltime[i]/(timelimit/divider)*100,outfile)
+            outfile.close()
+    
+    print('finished processing %s'%source_id)
+    for key in dataset.keys():
+        if dataset[key]:
+            dataset[key].close()
+    dataset_areacello.close()
+    return resultsdictionary
+
+
+def plot_dataset(dataset, dataset_areacello, tolerance, cmap, title ,ax, vmin=0, vmax=1):
+    colors1 = plt.pcolormesh(
+            dataset_areacello.lon, 
+            dataset_areacello.lat,
+            dataset, 
+            transform=ccrs.PlateCarree(),
+            cmap=cmap,
+            vmin=vmin,
+            vmax=vmax)
+    
+    ax.set_title(title, fontsize=30)
+    ax.coastlines()
+    if not title=='d':
+        ax.add_feature(cartopy.feature.LAND, edgecolor='k')
+    ax.gridlines()
+    return colors1
+
+
+def load_CMIP_results(model):
+    """find the Polynya and Seaice areas for the CMIP6 models"""
+    if os.path.isfile('./polynyaareapickles1/complete_resultsdictfile'+model):
+        with open('./polynyaareapickles/complete_resultsdictfile'+model, 'rb') as resultsdictionaryfile:
+            resultsdictionary = pickle.load(resultsdictionaryfile)
+
+            
+def save_CMIP_results(dataset, model, scenario, resultsdictionary, threshold_conc, threshold_thick, freq):
+    with open('./polynyaareapickles/complete_resultsdictfile{scenario}_{model}_{threshold_conc}_{threshold_thick}_{variable}_{freq}'.format(
+            scenario=scenario, 
+            model=model, 
+            threshold_conc=threshold_conc, 
+            threshold_thick=threshold_thick, 
+            variable=dataset['seaice'].variable_id, 
+            freq=freq), 'wb') as resultsdictionaryfile:
+        pickle.dump(resultsdictionary, resultsdictionaryfile)
+
+def polynya_area_ecmwf(variable, freq, threshold_conc, threshold_thick, createplots, saveresults):
+    # This is very similar to compute_CIMIP_areas() but works for the observational datasets instead of CMIP
+    import netCDF4
+    from matplotlib import pyplot as plt
+    import numpy
+    from skimage import data, filters
+    from skimage.segmentation import flood, flood_fill
+    import glob
+
+    if variable == 'siconc':
+        floodfillvalue = 100
+        tolerance = threshold_conc
+    elif variable in ['sivol', 'sithick']:
+        floodfillvalue = 100 
+        tolerance = threshold_thick*100
+
+    pa_tot_list = []
+    sa_tot_list  = []
+    pa_we_list = []
+    sa_we_list = []
+    pa_op_list = []
+    pa_we_op_list = []
+    date_list = []
+    sa_tot_alltime = dict.fromkeys([1,2,3,4,5,6,7,8,9,10,11,12], 0)
+    pa_tot_alltime = dict.fromkeys([1,2,3,4,5,6,7,8,9,10,11,12], 0)
+
+    dataset = netCDF4.Dataset('../ECMWF/historical/ice_conc_sh_ease-125_cont-reproc_201509151200.nc')
+    model = 'ecmwf'
+    startyear = 1979 if variable == 'siconc' else 2010
+    for year in numpy.arange(startyear,2021,1):
+        print(year)
+        for month in numpy.arange(1,13,1):
+            for day in numpy.arange(1,32,1):
+                if freq == 'daily':
+                    datefilter = str(year)+str(month).zfill(2)+str(day).zfill(2)
+                elif freq == 'monthly':
+                    datefilter = str(year)+str(month).zfill(2)+'*'
+                else:
+                    print('invalid value for freq!')
+                if variable == 'siconc':
+                    minlat = -58
+                    key = 'ice_conc' # original key from the OSI450 data
+                    filelist = glob.glob('../ECMWF/historical/ice_conc_sh_ease2*_%s*.nc'%datefilter)
+                elif variable in ['sivol', 'sithick']:
+                    minlat = -63
+                    key = 'thickness' # original from the thin ice product data
+                    filelist = glob.glob('../ECMWF/historical/%s*_hvsouth_rfi_l1c.nc'%datefilter)
+                if filelist:
+                    # some dates are note available in the early years, but about every other one is
+                    dataset = xarray.open_mfdataset(filelist, concat_dim='time')
+                    if variable in ['sivol', 'sithick']:
+                        dataset['lat'] = xarray.open_dataset('../ECMWF/south_lat_12km.hdf')['Latitude [degrees]']
+                        dataset['lon'] = xarray.open_dataset('../ECMWF/south_lon_12km.hdf')['Longitude [degrees]']
+                        dataset['area']= xarray.open_dataset('../ECMWF/PolStereo_GridCellArea_s12.5km_Antarctic.nc').data
+                else:
+                    # print('DID NOT SUCCESS WITH OPENING:')
+                    # print('../ECMWF/historical/%s*_hvsouth_rfi_l1c.nc'%datefilter)
+                    if freq == 'daily':
+                        continue 
+                    else:
+                        break
+
+                siconc = dataset[key][:].mean(axis=0) if freq == 'monthly' else dataset[key][0]
+                
+                # this is changing the masked regions in the sea ice thickness product from -2 to numpy.nan
+                if variable in ['sivol', 'sithick']:
+                    siconc=numpy.where(
+                        numpy.array(siconc)<-1.8,
+                        numpy.nan,numpy.array(siconc))
+
+                # --- plotting part
+                if createplots:
+                    fig = plt.figure(figsize=[10, 7])
+                    ax = plt.subplot(1,2,1,projection=ccrs.Orthographic(central_latitude=-90))
+                    ax.set_extent([-180, 180, -90, -50], ccrs.PlateCarree())
+                    ax.background_patch.set_facecolor('grey')
+                    ax.stock_img()
+                    ax.coastlines()
+                    colors1 = plt.pcolormesh(
+                            dataset['lon'], 
+                            dataset['lat'],
+                            siconc,
+                            transform=ccrs.PlateCarree(),
+                            cmap=colormap_alpha(cmocean.cm.ice))
+                    # This second pcolormap is completely invisble (zorder), but it produces the non-
+                    # transparent artifact free colormap
+                    colors2 = plt.pcolormesh(
+                            dataset['lon'], 
+                            dataset['lat'],
+                            siconc,
+                            transform=ccrs.PlateCarree(),
+                            cmap=cmocean.cm.ice,
+                            alpha=1,
+                            zorder=-1000)
+
+                    cax = fig.add_axes([0.1, 0.2, 0.4, 0.04])
+                    cbar = plt.colorbar(colors2, cax=cax, orientation="horizontal")#ax=[ax], location='left')
+                    cbar.ax.set_xlabel('sea ice concentration in %')
+                    #ax.legend(handles=legend_elements, loc='lower left', fontsize=14)
+                    ax2 = plt.subplot(1,2,2,projection=ccrs.Orthographic(central_latitude=-90))
+                    ax2.set_extent([-180, 180, -90, -50], ccrs.PlateCarree())
+                    ax2.background_patch.set_facecolor('grey')
+                    ax2.coastlines()
+                    colors2 = plt.pcolormesh(
+                            dataset['lon'], 
+                            dataset['lat'],
+                            siconc,
+                            transform=ccrs.PlateCarree(),
+                            cmap=colormap_alpha(cmocean.cm.ice))
+                    # --- end plotting part
+
+                # Here I flod from the outside, the world ocean becomes ice
+                if variable == 'siconc':
+                    i, j = 0, 400
+                    floodedarray =  flood_fill(
+                        siconc.values, (j, i), floodfillvalue, tolerance=tolerance)
+                else:
+                    i, j = 0, 300
+                    # breakpoint()
+                    floodedarray =  flood_fill(
+                        siconc, (j, i), floodfillvalue, tolerance=tolerance)
+                # in the Lambert_Azimuthal_Grid, it is dA=dx*dy
+                # Since areas don't change between grid cells, 
+                # visualization and intercomparison operations are greatly 
+                # simplified and analysis is more convenient. (https://nsidc.org/ease/ease-grid-projection-gt)
+
+                # uniform grid cell area for the siconc product:
+                cellarea_siconc = 25e3**2
+                # compute pa_tot
+                pa_tot_mask = ((floodedarray <= tolerance) & 
+                    (numpy.array(dataset['lat'])<minlat)) * 1
+                pa_tot_pixels = pa_tot_mask.sum()
+                pa_tot = pa_tot_pixels*cellarea_siconc
+
+                # compute sa_tot
+                sa_tot_mask = (siconc >= tolerance) * 1
+                sa_tot_pixels = sa_tot_mask.sum()
+                sa_tot = sa_tot_pixels*cellarea_siconc
+
+                # compute sa_we
+                sa_we_mask = ((siconc >= tolerance) & 
+                  (numpy.array(dataset['lon']) < 30) & 
+                  (numpy.array(dataset['lon']) > -65)) * 1
+                sa_we_pixels = sa_we_mask.sum()
+                sa_we = sa_we_pixels*cellarea_siconc
+
+                # compute pa_we
+                pa_we_mask = ((floodedarray <= tolerance) & 
+                  (numpy.array(dataset['lon']) < 30) & 
+                  (numpy.array(dataset['lon']) > -65) &
+                  (numpy.array(dataset['lat']) < minlat)) * 1
+                pa_we_pixels = pa_we_mask.sum()
+                pa_we = pa_we_pixels*cellarea_siconc
+
+                if variable == 'siconc':
+                    i, j = 216, 216
+                else:
+                    i, j = 300, 300
+
+                booleanarray = floodedarray>0
+                siconc2 = floodedarray*booleanarray
+                if variable == 'siconc':
+                    siconc2 = numpy.where((floodedarray>0),floodedarray,0)
+                else:
+                    siconc2 = numpy.where(numpy.isnan(floodedarray), 0, floodedarray)
+                siconc2 =  flood_fill(numpy.array(siconc2), (i, j), floodfillvalue, tolerance=tolerance)
+
+                # compute pa_we_op
+                pa_we_op_mask = ((siconc2 < tolerance) & 
+                  (numpy.array(dataset['lon']) < 30) & 
+                  (numpy.array(dataset['lon']) > -65) & 
+                  (numpy.array(dataset['lat']) < minlat) &
+                  numpy.invert(numpy.isnan(dataset[key][0]))) * 1
+                pa_we_op_pixels = pa_we_op_mask.sum()
+                pa_we_op = pa_we_op_pixels*cellarea_siconc
+
+                # compute pa_op
+                pa_op_mask = ((siconc2 < tolerance) & 
+                  (numpy.array(dataset['lat'])<minlat) &
+                   numpy.invert(numpy.isnan(dataset[key][0]))) * 1
+                pa_op_pixels = pa_op_mask.sum()
+                pa_op = pa_op_pixels*cellarea_siconc
+
+                
+                if variable=='sivol':
+                    pa_tot = dataset['area'].where(pa_tot_mask).sum(skipna=True)*1e6
+                    sa_tot = dataset['area'].where(sa_tot_mask).sum(skipna=True)*1e6
+                    sa_we = dataset['area'].where(sa_we_mask).sum(skipna=True)*1e6
+                    pa_we = dataset['area'].where(pa_we_mask).sum(skipna=True)*1e6
+                    pa_we_op = dataset['area'].where(numpy.array(pa_we_op_mask)).sum(skipna=True)*1e6
+                    pa_op = dataset['area'].where(numpy.array(pa_op_mask)).sum(skipna=True)*1e6
+
+                pa_tot_alltime[month] += pa_tot_mask.astype(int)
+                sa_tot_alltime[month] += sa_tot_mask.astype(int)
+
+                if createplots:
+                    # --- start second part of plotting routine
+                    ax2.pcolormesh(dataset['lon'], 
+                                   dataset['lat'], 
+                                   sa_tot_mask, 
+                                   transform=ccrs.PlateCarree(),
+                                   cmap=colormap_alpha(cmap=plt.cm.Reds))
+                    ax2.pcolormesh(dataset['lon'], 
+                                   dataset['lat'], 
+                                   sa_we_mask, 
+                                   transform=ccrs.PlateCarree(),
+                                   cmap=colormap_alpha(cmap=plt.cm.Greens))
+                    ax2.pcolormesh(dataset['lon'], 
+                                   dataset['lat'], 
+                                   pa_tot_mask, 
+                                   transform=ccrs.PlateCarree(),
+                                   cmap=colormap_alpha(cmap=plt.cm.Wistia))
+                    ax2.pcolormesh(dataset['lon'], 
+                                   dataset['lat'], 
+                                   pa_op_mask, 
+                                   transform=ccrs.PlateCarree(),
+                                   cmap=colormap_alpha(cmap=plt.cm.winter))
+
+                    legend_elements = [Patch(facecolor='orange', edgecolor='k',label='coastal polynya'),
+                            Patch(facecolor='darkred', edgecolor='k', label='sea ice area'),
+                            Patch(facecolor='darkgreen', edgecolor='k', label='sea ice area Wed. Sea'),
+                            Patch(facecolor='lightgreen', edgecolor='k', label='open water Polynya')]
+                    ax2.legend(handles=legend_elements, loc='lower left', fontsize=14, bbox_to_anchor=(0.2, -0.3))
+
+                    plt.savefig('./areafigures/%s/%s/%s_thresh%sthick%s_%s_freq%s_%s-%s-%s.png'%(
+                        'historical','ecmwf', 'ecmwf', 
+                        threshold_conc, threshold_thick, variable, freq ,year,str(int(month)).zfill(2), str(int(day)).zfill(2)))
+
+                    plt.show()
+                    plt.close('all')
+                    # --- end second part of plotting routine
+                
+                #polynyaarealist.append(polynyaarea)
+                #seaicearealist.append(seaicearea)
+                date_list.append(datetime.datetime(year,month,day))
+                pa_tot_list.append(float(pa_tot))
+                sa_tot_list.append(float(sa_tot))
+                pa_we_list.append(float(pa_we))
+                sa_we_list.append(float(sa_we))
+                pa_we_op_list.append(float(pa_we_op))
+                pa_op_list.append(float(pa_op))
+                
+                dataset.close()
+                if freq == 'monthly':
+                    break
+
+
+    for i in range(1,13):
+        pickle.dump(sa_tot_alltime[i]/len(numpy.arange(startyear,2020,1))*100, open(
+            './sa_tot_alltime/sa_tot_alltime_%s_historical_%s_%s_%s_%s'%(freq, threshold_conc, model,variable, str(i)), "wb" ))
+        pickle.dump(pa_tot_alltime[i]/len(numpy.arange(startyear,2020,1))*100, open(
+            './sa_tot_alltime/pa_tot_alltime_%s_historical_%s_%s_%s_%s'%(freq, threshold_conc, model,variable, str(i)), "wb" ))
+
+    if saveresults:
+        for res_var in ['pa_op_list','pa_tot_list', 'sa_tot_list', 'pa_we_list', 'sa_we_list', 'pa_we_op_list', 'date_list']:
+            with open('./polynyaareapickles/%s_ecmwf_%s_%s_%s_%s'%(res_var, variable, freq, str(threshold_conc), str(threshold_thick)), 'wb') as file:
+                pickle.dump(eval(res_var), file)
+
+from multiprocessing import Pool
+
+def jobfunction(model):
+    print("start processing model %s, scenario %s now"%(model, scenario))
+    dataset = {}
+    dataset['seaice'] = open_CMIP_variable(model, variable, scenario, freq)
+    dataset_areacello = open_CMIP_variable(model, 'areacello', scenario, freq)
+    
+    if timesteps == 'max':
+        timelimit = len(dataset['seaice'].time)
+    else: 
+        timelimit = timesteps
+    print('TIMELIMIT:', timelimit)
+    if not dataset['seaice']:
+        print('ERROR: no sea ice data for %s or no success while homogenizing'%model)
+        return
+    try:
+        dataset['seaice'] = homogenize_CMIP_variable(dataset['seaice'])
+    except:
+        breakpoint()
+    if not dataset_areacello:
+        print('ERROR: no dataset_areacello available for %s'%model)
+        return
+    dataset_areacello = homogenize_CMIP_variable(dataset_areacello)
+    if dryrun:
+        dataset_areacello.close()
+        dataset['seaice'].close()
+        return
+
+    print("proceding with %s timesteps"%timelimit)
+    resultsdictionary = compute_CMIP_areas(
+            dataset,
+            dataset_areacello,
+            scenario,
+            threshold_conc,
+            threshold_thick,
+            freq=freq, 
+            timelimit=timelimit,
+            createplots=createplots,
+            seasonalmaps=seasonalmaps)
+    if saveresults:
+        save_CMIP_results(dataset, model, scenario, resultsdictionary, threshold_conc, threshold_thick, freq)
+        
+
+createplots = False
+seasonalmaps = True
+saveresults = True
+scenario = 'historical'
+variable = 'siconc'
+threshold_conc = 30
+threshold_thick = 0.12
+freq = 'monthly'
+dryrun = False
+timesteps = 'max'
+        
+if __name__ == '__main__':
+    print('Run with: %s %s %s %s'%(variable, freq, threshold_conc, threshold_thick))
+
+    if (freq == 'monthly' and variable == 'sivol'):
+        models = ['CAMS-CSM1-0','CNRM-CM6-1', 'CNRM-ESM2-1', 'ACCESS-CM2','ACCESS-ESM1-5', 'BCC-CSM2-MR', 'BCC-ESM1', 
+                  'CESM2', 'CESM2-FV2', 'CESM2-WACCM', 'CESM2-WACCM-FV2', 'CNRM-CM6-1', 'GFDL-CM4', 'GFDL-ESM4', 
+                  'HadGEM3-GC31-LL', 'UKESM1-0-LL'
+                  'EC-Earth3', 'EC-Earth3-Veg', 'IPSL-CM6A-LR', 'MPI-ESM-1-2-HAM', 'MPI-ESM1-2-HR', 
+                  'MPI-ESM1-2-LR', 'MRI-ESM2-0', 'SAM0-UNICON']
+    elif (freq == 'monthly' and variable == 'siconc'):
+        models = ['ACCESS-CM2', 'ACCESS-ESM1-5', 'BCC-CSM2-MR', 'BCC-ESM1', 'CAMS-CSM1-0', 'CanESM5', 'CNRM-CM6-1', 'CNRM-ESM2-1', 
+                  'CESM2', 'CESM2-FV2', 'CESM2-WACCM', 'CESM2-WACCM-FV2', 
+                  'EC-Earth3', 'EC-Earth3-Veg', 'GFDL-CM4', 'GFDL-ESM4', 'HadGEM3-GC31-LL', 'IPSL-CM6A-LR', 
+                  'MIROC-ES2L', 'MIROC6', 'MPI-ESM-1-2-HAM', 'MPI-ESM1-2-HR', 'MPI-ESM1-2-LR', 
+                  'MRI-ESM2-0', 'NorCPM1', 'SAM0-UNICON', 'UKESM1-0-LL']  
+    elif (freq == 'daily' and variable == 'siconc'):
+        models = ['CNRM-CM6-1', 'CNRM-ESM2-1', 'ACCESS-CM2', 'BCC-CSM2-MR', 'BCC-ESM1', 
+                  'CanESM5', 'CESM2', 'CESM2-FV2', 'CESM2-WACCM', 'CESM2-WACCM-FV2', 
+                  'CNRM-CM6-1', 'EC-Earth3', 'EC-Earth3-Veg', 'IPSL-CM6A-LR', 'MPI-ESM-1-2-HAM', 
+                  'MPI-ESM1-2-HR', 'MPI-ESM1-2-LR', 'MRI-ESM2-0', 'SAM0-UNICON']
+    elif (freq == 'daily' and variable == 'sithick'):
+        models = ['ACCESS-CM2','BCC-CSM2-MR','BCC-ESM1',
+                  'CESM2',
+                  'CESM2-FV2','CESM2-WACCM','CESM2-WACCM-FV2',
+                  'EC-Earth3',
+                  'IPSL-CM6A-LR','MIROC6','MPI-ESM-1-2-HAM','MPI-ESM1-2-HR','MPI-ESM1-2-LR','MRI-ESM2-0','SAM0-UNICON']
+
+    # The Pool-stuff is just for parallelisation (n threads)
+    # run jobfunction(model) to continue without parallelisation
+    
+    p = Pool(7)
+    p.map(jobfunction, models)
+    
+    
+    
\ No newline at end of file
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000..8129d49
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,210 @@
+import numpy
+
+def create_resultsdictionary():
+
+    import pandas
+    import pickle
+    import glob
+
+    scenario = 'historical'
+    thresholds = ['30', '45','60']
+
+    """
+    models = [
+    #'CNRM-CM6-1', #0
+    'CNRM-ESM2-1',
+    'ACCESS-CM2',
+    'BCC-CSM2-MR',
+    'BCC-ESM1',
+    'CESM2', #5
+    'CESM2-FV2',
+    'CESM2-WACCM',
+    'CESM2-WACCM-FV2',
+    'CNRM-CM6-1',
+    #'CanESM5', #10
+    'EC-Earth3',
+    'EC-Earth3-Veg',
+    'IPSL-CM6A-LR',
+    #'MIROC6',
+    'MPI-ESM-1-2-HAM', #15
+    'MPI-ESM1-2-HR',
+    'MPI-ESM1-2-LR',
+    'MRI-ESM2-0',
+    'SAM0-UNICON',
+    ]
+    """
+
+    models = ['CAMS-CSM1-0','CNRM-CM6-1', 'CNRM-ESM2-1', 
+          'ACCESS-CM2','ACCESS-ESM1-5', 'BCC-CSM2-MR', 
+          'BCC-ESM1', 'CESM2', 'CESM2-FV2', 'CESM2-WACCM', 
+          'CESM2-WACCM-FV2', 'CNRM-CM6-1', 'GFDL-CM4', 
+          'GFDL-ESM4', 'HadGEM3-GC31-LL', 'UKESM1-0-LL',
+          'EC-Earth3', 'EC-Earth3-Veg', 'IPSL-CM6A-LR', 
+          'MPI-ESM-1-2-HAM', 'MPI-ESM1-2-HR', 'NorCPM1',
+          'MPI-ESM1-2-LR', 'MRI-ESM2-0', 'SAM0-UNICON',
+          'CanESM5', 'MIROC6', 'MIROC-ES2L']
+
+    models.append('OBS')
+
+    periods = ['mon', 'day']
+    periodnames = {'mon':'monthly', 'day':'daily'}
+    variables = ['siconc', 'sivol', 'sithick']
+
+    resultsdictionary = {}
+    resultsdictionarythick = {}
+    resultsdictionarysivol = {}
+
+    for variable in variables:
+        resultsdictionary[variable] = {}
+        for period in periods:
+            resultsdictionary[variable][period] = {}
+            for threshold in thresholds:
+                resultsdictionary[variable][period][threshold] = {}
+
+
+
+    for model in models:
+        print(model)
+        for threshold in thresholds:
+            for variable in variables:
+                for period in periods:
+                    if model == 'OBS':
+                        if variable in ['sithick']:
+                            continue
+                        resultsdictionary[variable][period][threshold][model] = {}
+                        for res_var in ['pa_op_list', 'pa_tot_list', 'sa_tot_list', 'pa_we_list', 'sa_we_list', 'pa_we_op_list', 'date_list']:
+                            with open('../polynyaareapickles/%s_ecmwf_%s_%s_%s_0.12'%(res_var, variable, periodnames[period], threshold), 'rb') as resultsdictfile:
+                                #'./polynyaareapickles/pa_we_op_list_ecmwf_sivol_monthly_30_0.12'
+                                resultsdictionary[variable][period][threshold][model][res_var[:-5]] = pickle.load(resultsdictfile)
+                                #resultsdictionary[variable][period][threshold][model][res_var] = pandas.DataFrame(pickle.load(resultsdictfile)
+                        resultsdictionary[variable][period][threshold][model] = pandas.DataFrame(resultsdictionary[variable][period][threshold][model])
+                    else:
+                        if variable == 'sivol':
+                            filelist = glob.glob('../polynyaareapickles/complete_resultsdictfilehistorical_%s_%s*%s_%s'%(model, '30', variable, periodnames[period]))
+                        else:
+                            filelist = glob.glob('../polynyaareapickles/complete_resultsdictfilehistorical_%s_%s*%s_%s'%(model, threshold, variable, periodnames[period]))
+                        if not filelist:
+                            #print('no file found for:',variable ,model, period)
+                            continue
+                        else:
+                            # print(variable, model, period)
+                            pass
+                        with open(filelist[0], 'rb') as resultsdictfile:
+                            resultsdictionary[variable][period][threshold][model] = pandas.DataFrame(
+                                { key:pandas.Series(value) for key, value in pickle.load(resultsdictfile).items() })
+                    resultsdictionary[variable][period][threshold][model] = resultsdictionary[variable][period][threshold][model].set_index(resultsdictionary[variable][period][threshold][model].date)
+                    # Ausgleich methodischer Fehler
+                    #print(model, period, variable, threshold)
+                    if model != 'OBS':
+                        resultsdictionary[variable][period][threshold][model]['pa_op'] = resultsdictionary[variable][period][threshold][model]['pa_op'] - min(resultsdictionary[variable][period][threshold][model]['pa_op'])
+                    resultsdictionary[variable][period][threshold][model]['pa_we_op'] = resultsdictionary[variable][period][threshold][model]['pa_we_op'] - min(resultsdictionary[variable][period][threshold][model]['pa_we_op'])
+                    # compute 'pa_we_co' from available variables
+                    resultsdictionary[variable][period][threshold][model]['pa_we_co'] = resultsdictionary[variable][period][threshold][model]['pa_we'] - resultsdictionary[variable][period][threshold][model]['pa_we_op']
+                    resultsdictionary[variable][period][threshold][model]['pa_co'] = resultsdictionary[variable][period][threshold][model]['pa_tot'] - resultsdictionary[variable][period][threshold][model]['pa_op']
+    return resultsdictionary
+
+    """
+    for model in models:
+        for threshold in thresholds:
+            for variable in variables:
+                for period in periods:
+                    if model == 'OBS':
+                        #if variable in ['sivol', 'sithick'] and period == 'day':
+                        #    continue
+                        resultsdictionary[variable][period][threshold][model] = {}
+                        for res_var in ['pa_op_list','pa_tot_list', 'sa_tot_list', 'pa_we_list', 'sa_we_list', 'pa_we_op_list', 'date_list']:
+                            with open('../polynyaareapickles/%s_ecmwf_%s_%s_30_0.12'%(res_var, variable, periodnames[period]), 'rb') as resultsdictfile:
+                                resultsdictionary[variable][period][threshold][model][res_var[:-5]] = pickle.load(resultsdictfile)
+                                #resultsdictionary[variable][period][threshold][model][res_var] = pandas.DataFrame(pickle.load(resultsdictfile)
+                        resultsdictionary[variable][period][threshold][model] = pandas.DataFrame(resultsdictionary[variable][period][threshold][model])
+                    else:
+                        filelist = glob.glob('../polynyaareapickles/complete_resultsdictfilehistorical_%s_%s*%s_%s'%(model, threshold, variable, periodnames[period]))
+                        if not filelist:
+                            #print('no file found for:',variable ,model, period)
+                            continue
+                        else:
+                            # print(variable, model, period)
+                            pass
+                        with open(filelist[0], 'rb') as resultsdictfile:
+                            # breakpoint()
+                            resultsdictionary[variable][period][threshold][model] = pandas.DataFrame(
+                                {key:pandas.Series(value) for key, value in pickle.load(resultsdictfile).items()})
+                    resultsdictionary[variable][period][threshold][model] = resultsdictionary[variable][period][threshold][model].set_index(resultsdictionary[variable][period][threshold][model].date)
+                    # Ausgleich methodischer Fehler
+                    # if model != 'OBS':
+                    resultsdictionary[variable][period][threshold][model]['pa_op'] = resultsdictionary[variable][period][threshold][model]['pa_op'] - min(resultsdictionary[variable][period][threshold][model]['pa_op'])
+                    resultsdictionary[variable][period][threshold][model]['pa_we_op'] = resultsdictionary[variable][period][threshold][model]['pa_we_op'] - min(resultsdictionary[variable][period][threshold][model]['pa_we_op'])
+                    resultsdictionary[variable][period][threshold][model]['pa_we_co'] = resultsdictionary[variable][period][threshold][model]['pa_we'] - resultsdictionary[variable][period][threshold][model]['pa_we_op']
+
+    return resultsdictionary
+    """
+
+def open_CMIP_variable(model, variable, scenario, freq):
+    import glob
+    import xarray
+    freq = 'day' if freq == 'daily' else 'mon'
+    # for the GISS-models, areacella available only and is the sea ice grid
+    if variable in ['siconc', 'sivol', 'sithick']:
+        variantsdict = {'CNRM-CM6-1':'r1i1p1f2','CNRM-ESM2-1':'r1i1p1f2','UKESM1-0-LL':'r1i1p1f2', 'MIROC-ES2L':'r1i1p1f2','HadGEM3-GC31-LL':'r1i1p1f3'}
+        if model in variantsdict:
+            variant = variantsdict[model]
+        else:
+            variant = 'r1i1p1f1'
+        filelist = glob.glob('../../%s/%s/%s_SI%s*%s*.nc'%(model,scenario,variable,freq[0:3],variant))
+        #print('matching files with: ../%s/%s/%s_SI%s*%s*.nc'%(model,scenario,variable,freq[0:3],variant))
+        if filelist:
+            #print('progressing with the following input files:\n%', sorted(filelist))
+            dataset = xarray.open_mfdataset(sorted(filelist))
+        else:
+            #print('no input files found with are matching ../%s/%s/%s_SI%s*%s*.nc'%(model,scenario,variable,freq[0:3],variant))
+            return 0,0
+    elif variable == 'areacello':
+            try:
+                dataset = xarray.open_mfdataset('../../%s/areacello*historical*.nc'%(model))
+            except:
+                # breakpoint()
+                dataset = xarray.open_mfdataset('../../%s/areacello*.nc'%(model))
+    else:
+        try:
+            dataset = xarray.open_mfdataset('../../%s/%s/%s*.nc'%(model, scenario, variable))
+        except:
+            print('no sourcefiles for variable %s, model %s, scenario %s'%(variable, model, scenario))
+            return none
+    return dataset
+
+
+def homogenize_CMIP_variable(dataset):
+    # print(dataset)
+    # homogenize the naming schemes to lat, lon and lev
+    lat_key = list({'lat', 'nav_lat', 'latitude'} & (set(dataset.data_vars) | set(dataset.coords)))[0]
+    lon_key = list({'lon', 'nav_lon', 'longitude'} & (set(dataset.data_vars) | set(dataset.coords)))[0]
+    lev_key = list({'lev', 'olevel'} & (set(dataset.data_vars) | set(dataset.coords)))
+    if lon_key and dataset.source_id not in ['BCC-CSM2-MR','BCC-ESM1', 'GISS-E2-1-H']:
+        dataset = dataset.rename(
+            {lat_key: 'lat', lon_key: 'lon'}).set_coords(
+            ['lon', 'lat'])
+    # is there a depth coordinate also in dataset (i.e. areacello wont have one)
+    if lev_key:
+        dataset = dataset.rename(
+            {'olevel': 'lev'}).set_coords(s
+            ['lev'])
+    try:
+        dataset_areacello = dataset.rename(
+            {'nj': 'j', 'ni': 'i'}).set_coords(
+            ['i', 'j'])
+    except:
+        pass
+    # This might be a bit of a brave change, but rolling all lon coordinates to -180,180
+    if dataset is not None:
+        dataset['lon'] = dataset.lon.where(dataset.lon<180, dataset.lon-360)
+    return dataset
+
+def colormap_alpha(cmap):
+    from matplotlib.colors import ListedColormap
+    my_cmap = cmap(numpy.arange(cmap.N))
+    my_cmap[:,-1] = numpy.linspace(0, 1, cmap.N) # inactive
+    
+    my_cmap[:,-1] = 1
+    my_cmap[0,-1] = 0
+    my_cmap = ListedColormap(my_cmap)
+    return my_cmap
diff --git a/violinplots.py b/violinplots.py
new file mode 100644
index 0000000..e1dcc65
--- /dev/null
+++ b/violinplots.py
@@ -0,0 +1,78 @@
+import utils
+from scipy import stats
+from matplotlib import pyplot as plt
+import pandas
+import numpy
+import seaborn as sns
+plt.rcParams.update({'font.size': 18})
+resultsdictionary = utils.create_resultsdictionary()
+#fig, axs = plt.subplots(ncols=, figsize=[15,8])
+
+period = 'mon'
+variable = 'sivol'
+threshold = '30'
+
+
+print(variable, period, threshold)
+models = resultsdictionary[variable][period][threshold]
+#models.sort()
+dfa = pandas.concat([resultsdictionary[variable][period][threshold][model]['pa_we_co'] for model in models], axis=1, keys=models)
+dfb = pandas.concat([resultsdictionary[variable][period][threshold][model]['pa_we_op'] for model in models], axis=1, keys=models)
+#df
+#breakpoint()
+#sns.load_dataset(df)
+#complete = agg(ice_var='siconc', area_var='pa_we', period=period, method='mean').T['mean']
+if variable == 'siconc':
+    startyear = 1979
+elif variable == 'sivol':
+    startyear = 1850
+df2a = dfa[(dfa.index.month >= 5) & (dfa.index.month <= 11) & (dfa.index.year >= startyear) & (
+            dfa.index.dayofyear < 320)].groupby(pandas.Grouper(freq='A')).mean()
+df2b = dfb[(dfb.index.month >= 5) & (dfb.index.month <= 11) & (dfb.index.year >= startyear) & (
+            dfb.index.dayofyear < 320)].groupby(pandas.Grouper(freq='A')).mean()
+
+df2a['seab_area'] = 'pa_we_co' 
+df2b['seab_area'] = 'pa_we_op'
+
+df2a = df2a.sort_index(axis = 1) 
+df2b = df2b.sort_index(axis = 1) 
+
+dfc = pandas.concat([df2a, df2b], axis=0)
+#breakpoint()
+fig, axs = plt.subplots(ncols=2, nrows=2, figsize=[15,15], gridspec_kw=dict(width_ratios=[len(df2a.keys()),1]), 
+    sharey=True)
+
+df2a = df2a.drop(['seab_area'], axis=1)
+df2b = df2b.drop(['seab_area'], axis=1)
+
+yscaling = 10e9
+ymax = 3e11/yscaling
+ymin = -0.01e11/yscaling
+
+g = sns.violinplot(data=df2a/yscaling, scale='width', inner="points", ax=axs[0][0], order=df2a.keys()[:-1])#,
+g.scatter(x=range(0,len(models)), y=df2a.mean()/yscaling, color='white', zorder=100, s=50)
+g.set_xticklabels(labels=df2a.keys()[:-1] ,rotation=90)
+g.set_title('coastal')
+g.set_ylabel('polynya areas in 10³km²')
+g.set_ylim(ymin, ymax)
+
+g = sns.violinplot(data=df2a['OBS']/yscaling, scale='width', inner="points", ax=axs[0][1])#,
+g.set_xticklabels(labels=['OBSERVATIONS\n(2010-2020)'] ,rotation=90, color="red")
+g.scatter(x=0, y=df2a['OBS'].mean()/yscaling, color='white', zorder=100, s=50, edgecolor='black')
+#g.set_title('coastal')
+g.set_ylim(ymin, ymax)
+
+g = sns.violinplot(data=df2b/yscaling, scale='width', inner="points", ax=axs[1][0])#, color='white')
+g.scatter(x=range(0,len(models)), y=df2b.mean()/yscaling, color='white', edgecolor='black', zorder=100, s=50)
+g.set_xticklabels(labels=df2a.keys()[:-1] ,rotation=90)
+g.set_title('open water')
+g.set_ylim(ymin, ymax)
+
+g = sns.violinplot(data=df2b['OBS']/yscaling, scale='width', inner="points", ax=axs[1][1])#,
+g.set_xticklabels(labels=['OBSERVATIONS\n(2010-2020)'] ,rotation=90, color="red")
+g.scatter(x=0, y=df2b['OBS'].mean()/yscaling, color='white', zorder=100, s=50)
+#g.set_title('coastal')
+g.set_ylim(ymin, ymax)
+
+plt.tight_layout()
+plt.savefig('violinplots_%s_%s_%s.png'%(variable, period, threshold), dpi=100)
\ No newline at end of file