Skip to content

Commit

Permalink
Added introduction, Timeout function, Probes dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
Simardeep27 committed Aug 18, 2021
1 parent 5495061 commit 7b7230f
Show file tree
Hide file tree
Showing 3 changed files with 165 additions and 70 deletions.
2 changes: 1 addition & 1 deletion apps/Analytical.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ def addInputs(n_clicks, children):
[
dbc.Label('Plot PDF'),
dbc.Checklist(
options=[{"value": "val_{}".format(n_clicks),'disabled':True}],
options=[{"value": "val_{}".format(n_clicks),'disabled':False}],
switch=True, value=[0], id={"type": "radio_pdf","index": n_clicks},
)
]
Expand Down
206 changes: 140 additions & 66 deletions apps/Data_driven.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import io
import time

import dash
import dash_html_components as html
import dash_core_components as dcc
import dash_bootstrap_components as dbc
import equadratures.datasets
from dash_extensions.enrich import Dash, ServersideOutput, Output, Input, State, Trigger
from dash.exceptions import PreventUpdate
import plotly.graph_objs as go
Expand All @@ -22,6 +24,7 @@
import ast
import numexpr as ne
from utils import convert_latex
from func_timeout import func_timeout, FunctionTimedOut

from app import app

Expand All @@ -35,14 +38,15 @@
###################################################################
info_text = r'''
This app uses Equadratures to compute unceratinty in the user-defined data. In this model user
can define parameters, select basis function and create a polynomial.
can upload their data to construct polynomials and compute uncertainty.
#### Instructions
1. Click **add parameter** button in parameter definition card to add parameters. Choose the type of distribution and on basis of **selected distribution**, input the required fields.
2. To visualize the defined parameters **probability density function** press the toggle button to obtain the plot in Probability density function card
3. Select the **basis** type from basis selection card and input required fields based on the basis selected (For example sparse-grid requires q-val, order and growth as input)
4. Use **Set Basis** button to compute the **cardinality** and get insights regarding the basis function chosen in the basis selection card.
5. Set the solver method for Polynomial and enter the **input function** in parameter definition card for computing **statistical moments**, use sobol dropdown to gain insights regarding **sensitivity analysis**
1. Upload data in **.csv** format to construct parameters and choose the output variable.
2. Select the type of approach for constructing parameters, **KDE** or **Canonical**.
3. KDE approach as the name suggests makes use of KDE approach to compute the underlying parameter distribution(**Note**: The KDE approach is computationally expensive but is highly accurate)
4. The Canonical approach uses scipy functions to fit the data and find statistical moments which are then fed to Equadratures to create parameters.
5. For using the Canonical approach, it is required to set each input's distribution manually.
6. After selecting the order and mode of computation, the workflow is nearly complete. On pressing the compute uncertainty button, the user can create the model using their input data to compute **statistical moments** and can use sobol dropdown to gain insights regarding **sensitivity analysis**
'''

Expand Down Expand Up @@ -123,11 +127,11 @@
[
dbc.FormGroup(
[
dbc.Label('Parameter Definition:', html_for="mode-select",width=6),
dbc.Label('Distribution Selection:', html_for="mode-select",width=6),
dbc.Col(dcc.Dropdown(id="mode-select",options=[
{'label':'Semi','value':'semi'},
{'label': 'Automatic', 'value':'auto'},
],searchable=False),width=4)
{'label':'Canonical','value':'semi'},
{'label': 'KDE', 'value':'auto'},
],searchable=False,disabled=True,clearable=False),width=4)
], row=True
),
dbc.FormGroup(
Expand All @@ -139,7 +143,7 @@
{'label': '3', 'value': 3},
{'label': '4', 'value': 4},
{'label': '5', 'value': 5}
], searchable=False,disabled=False),width=4)
], searchable=False,disabled=True,clearable=False),width=4)
], row=True
),
]
Expand All @@ -160,10 +164,10 @@
dcc.Dropdown(
options=[
{'label': 'Upload Dataset', 'value': 'Upload_data'},
{'label': 'Dataset 1', 'value': 'Ds1'},
{'label': 'Probes', 'value': 'probes'},
],
className="m-1", id='dataset_selection',
placeholder='Select Dataset..', clearable=False),
placeholder='Select Dataset..', clearable=False,value='Upload_data'),
width=3
),
dbc.Col(Upload_dataset,width=5),
Expand Down Expand Up @@ -296,21 +300,24 @@
]
)

output_vars=dbc.Row([
dbc.Col(mean_form),
dbc.Col(var_form),
dbc.Col(r2_form),
])


sobol_plot = dcc.Graph(id='Sobol_plot_datadriven', style={'width': 'inherit', 'height':'35vh'})

left_side = [
dbc.Row([dbc.Col(method_dropdown,width=6)]),
dbc.Row([dbc.Col(
dbc.Button('Compute Polynomial', id='CU_button_datadriven', n_clicks=0, className='ip_buttons',color='primary',disabled=False))
]),
dbc.Button('Compute Polynomial', id='CU_button_datadriven', n_clicks=0, className='ip_buttons',color='primary',disabled=True)
)
]),
dbc.Row([dbc.Col(dbc.Alert(id='poly-warning-datadriven',color='danger',is_open=False), width=3)]),
dbc.Row(
[
dbc.Col(mean_form),
dbc.Col(var_form),
dbc.Col(r2_form),
]
),
dbc.Spinner(html.Div([],id="loading-output"),color='primary'),
output_vars,
dbc.Row(dbc.Col(sobol_form,width=6)),
dbc.Row(dbc.Col(sobol_plot,width=8))
]
Expand Down Expand Up @@ -338,15 +345,23 @@
], style={"height": "80vh"}
)

tooltips = html.Div(
[
dbc.Tooltip("Maximum of 5 parameters",target="AP_button"),
dbc.Tooltip("The variables should be of the form x1,x2...",target="input_func"),
# dbc.Tooltip('Set basis and Input Function first',target="CU_button"),
]
timeout_msg = dcc.Markdown(r'''
**Timeout!**
Sorry! The uncertainty computation timed out due to the 30 second time limit imposed by the heroku server.
You can try:
- Lowering the polynomial order and/or number of rows in your dataset.
- Using Canonical Approach might be faster.
- Coming back later, when the server might be less busy.
''')

timeout_warning = dbc.Modal(
dbc.ModalBody(timeout_msg, style={'background-color':'rgba(160, 10, 0,0.2)'}),
id="timeout-warning",
is_open=False,
)



layout = dbc.Container(
[
html.H2("Uncertainty quantification for data-driven model", id='main_driven_text'),
Expand All @@ -365,7 +380,9 @@
dcc.Store(id='ParamData'),
dcc.Store(id='column-headers'),
dcc.Store(id='BasisObj'),
dcc.Store(id='PolyObj')
dcc.Store(id='PolyObj'),

timeout_warning
],
fluid=True

Expand All @@ -391,23 +408,30 @@ def ParseData(content,filename):
except Exception:
return None

else:
raise PreventUpdate

@app.callback(
ServersideOutput('UploadDF','data'),
ServersideOutput('column-headers','data'),
Output('filename_append','children'),
Input('upload-data-driven','filename'),
Input('upload-data-driven','contents'),
Input('dataset_selection','value'),
prevent_initial_call=True
)
def ParsedData(filename,content):
if content is not None:
(df,columns)=ParseData(content,filename)
children=[filename]
return df,columns,children
else:
raise PreventUpdate
def ParsedData(filename,content,data_select):
if data_select=='Upload_data':
if content is not None:
(df,columns)=ParseData(content,filename)
children=[filename]
return df,columns,children
else:
raise PreventUpdate
elif data_select=='probes':
data = equadratures.datasets.load_eq_dataset('probes')
data = np.hstack([data['X'], data['y2']])
cols = ['Hole ellipse', 'Hole fwd/back', 'Hole angle', 'Kiel lip', 'Kiel outer', 'Kiel inner', 'Hole diam.',
'Recovery ratio objective']
return data,cols,['Probes']

@app.callback(
Output('upload-data-table','data'),
Expand Down Expand Up @@ -467,6 +491,49 @@ def InputVars(columns,select):
else:
raise PreventUpdate

@app.callback(
Output('mode-select','disabled'),
Output('order-select','disabled'),
Input('upload-data-table','data'),
)
def ModeCheck(data):
changed_id = [p['prop_id'] for p in dash.callback_context.triggered][0]
print(changed_id)
if 'upload-data-table' in changed_id:
return False,False
else:
return True,True


@app.callback(
Output('CU_button_datadriven','disabled'),
Input('mode-select','value'),
Input('order-select','value'),
Input({'type':'drop_vals','index':dash.dependencies.ALL},'value')
)
def CUDisabledCheck(mode,order,distribution):
if mode=='semi':
if distribution or order is not None:
return False
else:
return True
elif mode=='auto':
if order is not None:
return False
else:
return True
else:
return True

@app.callback(
Output("datadriven-info", "is_open"),
[Input("datadriven-info-open", "n_clicks"), Input("datadriven-info-close", "n_clicks")],
[State("datadriven-info", "is_open")],
)
def toggle_modal(n1, n2, is_open):
if n1 or n2:
return not is_open
return is_open


def CreateParamWeights(data,columns,order):
Expand Down Expand Up @@ -502,6 +569,7 @@ def CreateParamSemi(data,columns,distributions,output,order):
@app.callback(
ServersideOutput('ParamData','data'),
ServersideOutput('BasisObj','data'),
Output('timeout-warning','is_open'),
Input('upload-data-table','data'),
Input('upload-data-table', 'columns'),
Input('output-select','value'),
Expand All @@ -518,13 +586,16 @@ def ComputeParams(data,columns,output,mode,order,n_clicks,distribution_semi,col_
for i in range(len(data)):
data[i].pop('{}'.format(output))
if mode=='auto':
param_objs=CreateParamWeights(data,columns,order)
try:
param_objs=func_timeout(24,CreateParamWeights,args=(data,columns,order))
except FunctionTimedOut:
return None,None,True
mybasis=Set_Basis()
return param_objs,mybasis
return param_objs,mybasis,False
else:
param_objs=CreateParamSemi(data,col_list,distribution_semi,output,order)
mybasis=Set_Basis()
return param_objs,mybasis
return param_objs,mybasis,False
else:
raise PreventUpdate

Expand All @@ -542,6 +613,7 @@ def Set_Polynomial(parameters, basis, method,x_data,y_data):
return mypoly

@app.callback(
Output('loading-output','children'),
ServersideOutput('PolyObj', 'data'),
Output('mean_datadriven', 'value'),
Output('variance_datadriven', 'value'),
Expand All @@ -553,40 +625,42 @@ def Set_Polynomial(parameters, basis, method,x_data,y_data):
Input('column-headers','data'),
Input('solver_method_data', 'value'),
Input('output-select','value'),
Input('timeout-warning','is_open'),
prevent_initial_call=True
)
def SetModel(params,mybasis,data,cols,method,y):
changed_id = [p['prop_id'] for p in dash.callback_context.triggered][0]
print('ch',changed_id)
if 'CU_button_datadriven' in changed_id:
print(cols)
y_data = []
x_data = [[None for y in range(len(data[0].keys()) - 1)]
def SetModel(params,mybasis,data,cols,method,y,warning):
if warning:
return None,None,None,None,None
else:
changed_id = [p['prop_id'] for p in dash.callback_context.triggered][0]
if 'CU_button_datadriven' in changed_id:
y_data = []
x_data = [[None for y in range(len(data[0].keys()) - 1)]
for x in range(len(data))]
for i in range(len(data)):
for ind, j in enumerate(cols):
if j == '{}'.format(y):
y_data.append(data[i][j])
else:
x_data[i][ind] = data[i][j]
mypoly = Set_Polynomial(params, mybasis, method,x_data,y_data)
try:
mypoly.set_model()
except KeyError:
return None,None,None,None,False,True,"Incorrect Model evaluations"
for i in range(len(data)):
for ind, j in enumerate(cols):
if j == '{}'.format(y):
y_data.append(data[i][j])
else:
x_data[i][ind] = data[i][j]
mypoly = Set_Polynomial(params, mybasis, method,x_data,y_data)
try:
mypoly.set_model()
except KeyError:
return None,None,None,None,None
# except AssertionError:
# return None,None,None,True,None,False,True,"Incorrect Data uploaded"
# except ValueError:
# return None, None, None, None, False, True, "Incorrect Model evaluations"
# except IndexError:
# return None, None, None, None, False, True, "Incorrect Model evaluations"
mean, var = mypoly.get_mean_and_variance()
DOE=mypoly.get_points()
y_pred = mypoly.get_polyfit(np.array(x_data))
r2_score = eq.datasets.score(np.array(y_data), y_pred, metric='r2')
return mypoly, mean, var, r2_score ###
else:
raise PreventUpdate
mean, var = mypoly.get_mean_and_variance()
DOE=mypoly.get_points()
y_pred = mypoly.get_polyfit(np.array(x_data))
r2_score = eq.datasets.score(np.array(y_data), y_pred, metric='r2')
return None,mypoly, mean, var, r2_score ###
else:
raise PreventUpdate


@app.callback(
Expand Down
27 changes: 24 additions & 3 deletions index.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,30 @@
$$f(s_1) \approx \sum_{i=1}^n x_ip_i(s_1)$$
....
### Models
There are three models present in the app **Analytical**, **Offline** and **Data-Driven** model. These models follows different approaches to
and workflow to compute the statistical moments and quantify uncertainty.
#### 1. Analytical Model
The Analytical Model is a **proof-of-concept app** where the model is an analytical function defined by the user. This
“analytical” app helps the user to define a model function, and its input parameters.The Analytical Model contains features such as adding a parameter (maximum 5), defining various properties such as distribution,
statistical moments, min/max values and order. As different distributions have different attributes i.e. Uniform distribution has attributes min/max and order whereas Gaussian distribution has attributes mean, variance and order, the analytical model
handles this dynamic selection to provide users with various distributions to use the application at its full capacity.
#### 2. Offline Model
The Offline Model is quite similar to the analytical model in terms of workflow, the major difference between models is the
application. The offline model is constructed for professional use. The parameter and basis definition is similar to the analytical model, but here the user is
expected to upload the **DOE evaluations** of their simulations and then quantify uncertainty.
.
#### 3. Data-Driven Model
The Data-Driven as the name suggests has a more **data-centric** workflow, unlike the previous models, the user uploads their data,
select their output variable and based on their selection equadratures construct input parameters. Here, two choices are provided to the user for
Distribution Selection, first is **KDE** approach, which makes use of Kernel Density Estimation to produce accurate results but taking longer time to compute,
second is the **Canonical** approach, here the users manually select the distribution of the input parameters, this method makes use of scipy fit method to compute the
statistical moments, which are then fed to Equadratures parameter. Using these methods, the user can gain insights regarding the senstivity of their dataset and the underlying statistical moments.
.
'''

home_text=dcc.Markdown(convert_latex(home_text),dangerously_allow_html=True, style={'text-align':'justify'})
Expand Down

0 comments on commit 7b7230f

Please sign in to comment.