-
Notifications
You must be signed in to change notification settings - Fork 6
/
app.py
3460 lines (2815 loc) · 180 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import flask
import tensorflow as tf
import pandas as pd
import numpy as np
import time
import json
import os
import shutil
import subprocess
import stat
import keras
import keras.backend as K
import sklearn
import smtplib
import glob
import uuid
import pickle as pkl
import joblib
from sklearn.metrics import pairwise_distances
from bokeh.plotting import figure
from bokeh.resources import CDN
from bokeh.embed import file_html
from bokeh.palettes import Inferno256
from flask import request, session
# import flask_login
# from flask_login import LoginManager, UserMixin, login_required, current_user
from molSimplify.Informatics.MOF.MOF_descriptors import get_primitive, get_MOF_descriptors
from flask_cors import CORS
from datetime import datetime
from pymongo import MongoClient
from werkzeug.utils import secure_filename
from list_content.list_content import my_linkers, my_sbus, my_nets, my_MOFs
cmap_bokeh = Inferno256
MOFSIMPLIFY_PATH = os.path.abspath('.') # the main directory
MOFSIMPLIFY_PATH += '/'
USE_SPLASH_PAGE = False
app = flask.Flask(__name__)
app.config['MAX_CONTENT_LENGTH'] = 1024 * 1024 * 20 # Upload max 20 megabytes
app.config['UPLOAD_EXTENSIONS'] = ['.jpg', '.jpeg', '.png', '.pdf', '.tiff', '.tif', '.eps'] # acceptable file types
# Note: these are a superset of the extensions indicated on the form and allowed on the front end, so some of these extensions don't actually apply
app.secret_key = str(json.load(open('secret_key.json','r'))['key']) # secret key
cors = CORS(app)
operation_counter = 0 # This variable keeps track of server traffic to alert the user if they should wait until later to make a request.
# I only change operation_counter for the more time-consuming operations (the two predictions, and component analysis).
# operation_counter is periodically set back to zero, since if a user closes their browser in the middle of an operation operation_counter is not properly -=1'd
last_operation_counter_clear = time.time() # the current time when server is started
MAX_OPERATIONS = 4 # This variable dictates the maximum number of concurrent operations, to prevent server overload.
# The following three functions are needed for the ANN models.
def precision(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
return precision
def recall(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
return recall
def f1(y_true, y_pred):
p = precision(y_true, y_pred)
r = recall(y_true, y_pred)
return 2 * ((p * r) / (p + r + K.epsilon()))
# global variables for the ANN models, loaded once when server first started
solvent_ANN_path = MOFSIMPLIFY_PATH + 'model/solvent/ANN/'
thermal_ANN_path = MOFSIMPLIFY_PATH + 'model/thermal/ANN/'
dependencies = {'precision':precision,'recall':recall,'f1':f1}
# loading the ANN models to save time later
tf_session = tf.Session()
from tensorflow import keras as tf_keras
tf_keras.backend.set_session(tf_session)
##### Note: the h5 models for the solvent removal stability prediction ANN and the thermal stability prediction ANN should be based on the same version of TensorFlow (here, 1.14). #####
solvent_model = keras.models.load_model(solvent_ANN_path + 'final_model_flag_few_epochs.h5',custom_objects=dependencies)
thermal_model = keras.models.load_model(thermal_ANN_path + 'final_model_T_few_epochs.h5',custom_objects=dependencies)
# These two models use scikit-learn 0.23.2
water_model = joblib.load('model/water_and_acid/models/water_model.joblib')
acid_model = joblib.load('model/water_and_acid/models/acid_model.joblib')
water_scaler = joblib.load('model/water_and_acid/models/water_scaler.joblib')
acid_scaler = joblib.load('model/water_and_acid/models/acid_scaler.joblib')
ethane_ANN_path = MOFSIMPLIFY_PATH + 'model/c2/ethane/'
ethylene_ANN_path = MOFSIMPLIFY_PATH + 'model/c2/ethylene/'
ethane_model = keras.models.load_model(ethane_ANN_path + 'ethane_model.h5')
ethylene_model = keras.models.load_model(ethylene_ANN_path + 'ethylene_model.h5')
# global variable dictionary for stable building block aliases. See https://zenodo.org/record/7091192
bb_mapping = {'XUDNAN_clean_linker_2': 'E0', 'MIFKUJ_clean_linker_0': 'E1', 'AJUNOK_clean_linker_0': 'E2', 'OJICUG_clean_linker_0': 'E3',
'FUNLAD_clean_linker_1': 'E4', 'CIFDUS_clean_linker_0': 'E5', 'EYOCIG_clean_linker_4': 'E6', 'TEMPAE_clean_linker_10': 'E7',
'KIFKEQ_clean_linker_1': 'E8', 'FUNLAD_clean_linker_0': 'E9', 'EZIPEK_clean_linker_0': 'E10', 'UKIBIB_clean_linker_0': 'E11',
'UKIBUN_clean_linker_0': 'E12', 'UVAHIK_clean_linker_0': 'E13', 'XIVTED_clean_linker_0': 'E14', 'ZUJKAS_clean_linker_0': 'E15',
'APAZEY_clean_linker_0': 'orgN0', 'AZAVOO_clean_linker_0': 'orgN1', 'BELTOD_clean_linker_0': 'orgN2', 'BETDAH_clean_linker_0': 'orgN3',
'BETDEL_clean_linker_0': 'orgN4', 'BETDIP_clean_linker_0': 'orgN5', 'CEKHIL_clean_linker_0': 'orgN6', 'CUKXOW_clean_linker_0': 'orgN7',
'CUQRIR_clean_linker_1': 'orgN8', 'ESEHIV_clean_linker_0': 'orgN9', 'GIZVER_clean_linker_0': 'orgN10', 'INOVEN_clean_linker_0': 'orgN11',
'IYUCEM_clean_linker_1': 'orgN12', 'IZENUY_clean_linker_0': 'orgN13', 'KAKCAD_clean_linker_0': 'orgN14', 'KUFVIS_clean_linker_0': 'orgN15',
'MAKGUD_clean_linker_0': 'orgN16', 'MIDCAF_clean_linker_0': 'orgN17', 'MIFKUJ_clean_linker_1': 'orgN18', 'MUDLON_clean_linker_0': 'orgN19',
'NAHDIM_clean_linker_0': 'orgN20', 'NAWXER_clean_linker_0': 'orgN21', 'PAMTOO_clean_linker_0': 'orgN22', 'QEWDON_clean_linker_0': 'orgN23',
'QUQFOY_clean_linker_2': 'orgN24', 'TATPOV_clean_linker_0': 'orgN25', 'VETTIZ_charged_linker_0': 'orgN26', 'WOSHET_clean_linker_1': 'orgN27',
'BEQFEK_clean_linker_11': 'orgN28', 'LEJCEK_clean_linker_0': 'orgN29', 'UFIREI_clean_linker_0': 'orgN30', 'UWAGAB01_clean_linker_0': 'orgN31',
'BEQFEK_clean_sbu_0': 'N0', 'BOTCEU_clean_sbu_0': 'N1', 'ENOWUB_clean_sbu_0': 'N2', 'FANWIC_clean_sbu_0': 'N3',
'ICAMEG_clean_sbu_1': 'N4', 'LIZSOE_clean_sbu_0': 'N5', 'UKALOJ_clean_sbu_0': 'N6', 'UKIBUN_clean_sbu_0': 'N7',
'ZALLEG_clean_sbu_0': 'N8', 'AJUNOK_clean_sbu_0': 'N9', 'AZAVOO_clean_sbu_0': 'N10', 'BELTOD_clean_sbu_0': 'N11',
'BETDAH_clean_sbu_0': 'N12', 'BETDEL_clean_sbu_0': 'N13', 'BETDIP_clean_sbu_0': 'N14', 'BETFAJ_clean_sbu_0': 'N15',
'BETFEN_clean_sbu_0': 'N16', 'BETGAK_clean_sbu_0': 'N17', 'CEKHIL_clean_sbu_0': 'N18', 'CIFDUS_clean_sbu_1': 'N19',
'CUKXOW_clean_sbu_0': 'N20', 'CUWYAW_clean_sbu_0': 'N21', 'EBIMEJ_clean_sbu_0': 'N22', 'EQERAU_clean_sbu_0': 'N23',
'ESEHIV_clean_sbu_0': 'N24', 'EYACOX_clean_sbu_0': 'N25', 'EYACOX_clean_sbu_1': 'N26', 'EZIPEK_clean_sbu_0': 'N27',
'FUNLAD_clean_sbu_0': 'N28', 'GALJAG_clean_sbu_0': 'N29', 'GEDQOX_clean_sbu_0': 'N30', 'GIZVER_clean_sbu_0': 'N31',
'HICVOG_clean_sbu_0': 'N32', 'HISSIN_clean_sbu_0': 'N33', 'HISSIN_clean_sbu_1': 'N34', 'ICIZOL_clean_sbu_0': 'N35',
'INOVEN_clean_sbu_0': 'N36', 'IYUCEM_clean_sbu_0': 'N37', 'IZENUY_clean_sbu_0': 'N38', 'JUFBIX_clean_sbu_0': 'N39',
'KAKCAD_clean_sbu_0': 'N40', 'KOZSID_clean_sbu_0': 'N41', 'KUFVIS_clean_sbu_0': 'N42', 'KUMBOL_clean_sbu_0': 'N43',
'KUMBOL_clean_sbu_1': 'N44', 'KUMBUR_clean_sbu_0': 'N45', 'KUMBUR_clean_sbu_1': 'N46', 'KUMJIN_clean_sbu_0': 'N47',
'KUMJIN_clean_sbu_1': 'N48', 'LEVNOQ01_clean_sbu_1': 'N49', 'MAKGOX_clean_sbu_0': 'N50', 'MAKGUD_clean_sbu_1': 'N51',
'MIDCAF_clean_sbu_0': 'N52', 'MIFKUJ_clean_sbu_0': 'N53', 'MUDLON_clean_sbu_0': 'N54', 'NAHDIM_clean_sbu_0': 'N55',
'NAHDIM_clean_sbu_2': 'N56', 'NAWXER_clean_sbu_0': 'N57', 'NUHQIS_clean_sbu_0': 'N58', 'NUHQUE_clean_sbu_2': 'N59',
'NUHRAL_clean_sbu_3': 'N60', 'OJICUG_clean_sbu_0': 'N61', 'OLANAS_clean_sbu_1': 'N62', 'OLANAS_clean_sbu_2': 'N63',
'OLANEW_clean_sbu_0': 'N64', 'OLANEW_clean_sbu_1': 'N65', 'OLANEW_clean_sbu_2': 'N66', 'OLANEW_clean_sbu_3': 'N67',
'OLANEW_clean_sbu_4': 'N68', 'PAMTOO_clean_sbu_0': 'N69', 'PAMTUU_clean_sbu_0': 'N70', 'PORLAL_clean_sbu_0': 'N71',
'QEWDON_clean_sbu_0': 'N72', 'QUQFOY_clean_sbu_1': 'N73', 'QUQFOY_clean_sbu_3': 'N74', 'SARMOO_clean_sbu_0': 'N75',
'TAGTUT_clean_sbu_1': 'N76', 'TAGTUT_clean_sbu_3': 'N77', 'TATPOV_clean_sbu_0': 'N78', 'UXUYUI_clean_sbu_0': 'N79',
'VETTIZ_charged_sbu_0': 'N80', 'VOLQOD_clean_sbu_1': 'N81', 'WAQDOJ_charged_sbu_0': 'N82', 'WUSLED_clean_sbu_0': 'N83',
'XADDAJ01_clean_sbu_3': 'N84', 'XOMCOT_clean_sbu_0': 'N85', 'XOMCOT_clean_sbu_1': 'N86', 'XUDNAN_clean_sbu_1': 'N87'}
# Loading the names of MOFs from our hypothetical database. Constructed with ultrastable building blocks.
with open('pickle_files/MOF_names/1inorganic_1edge_cifs_list.pkl', 'rb') as f:
list_1inorganic_1edge_MOFs = pkl.load(f)
with open('pickle_files/MOF_names/1inorganic_1organic_1edge_cifs_list.pkl', 'rb') as f:
list_1inorganic_1organic_1edge_MOFs = pkl.load(f)
with open('pickle_files/MOF_names/2inorganic_1edge_cifs_list.pkl', 'rb') as f:
list_2inorganic_1edge_MOFs = pkl.load(f)
# Loading the names of the ultrastable MOFs from our hypothetical database.
with open('pickle_files/ultrastable_MOFs/1inor_1edge_cifs_list_ultrastable.pkl', 'rb') as f:
list_1inorganic_1edge_MOFs_ultrastable = pkl.load(f)
with open('pickle_files/ultrastable_MOFs/1inor_1org_1edge_cifs_list_ultrastable.pkl', 'rb') as f:
list_1inorganic_1organic_1edge_MOFs_ultrastable = pkl.load(f)
with open('pickle_files/ultrastable_MOFs/2inor_1edge_cifs_list_ultrastable.pkl', 'rb') as f:
list_2inorganic_1edge_MOFs_ultrastable = pkl.load(f)
ultrastable_MOFs = list_1inorganic_1edge_MOFs_ultrastable + list_1inorganic_1organic_1edge_MOFs_ultrastable + list_2inorganic_1edge_MOFs_ultrastable
def conditional_diminish(counter):
"""
conditional_diminish decreases the input by one and returns it, unless the input is already zero.
The input is intended to be operation_counter.
operation_counter might be zero when conditional_diminish is called b/c of the periodic zero-ing of operation_counter. It is unlikely though.
:param counter: An int, which should be operation_counter.
:return: The new value of operation_counter.
"""
if counter != 0:
counter -= 1
return counter
def operation_counter_periodic_clear():
global operation_counter # global variable
global last_operation_counter_clear
if time.time() - last_operation_counter_clear > 300: # 5 minutes or more since last time operation_counter was zero'd
last_operation_counter_clear = time.time()
operation_counter = 0
# app.route takes jquery ($) requests from index.html and executes the associated function in app.py.
# Output can then be returned to index.html.
def set_ID():
"""
set_ID sets the session user ID.
This is also used to generate unique folders, so that multiple users can use the website at a time.
The user's folder is temp_file_creation_[ID]
Specifically, the function copies the temp_file_creation folder for the current user so that the rest of MOFSimplify functionality can be executed for the current user in their own folder.
This lets multiple operations be run for different users concurrently.
This function also deletes temp_file_creation copies from other users that have not been used for a while, in order to reduce clutter.
:return: The session ID for this user.
"""
session['ID'] = uuid.uuid4() # a unique ID for this session
session['permission'] = True # keeps track of if user gave us permission to store the MOFs they predict on; defaults to Yes
# make a version of the temp_file_creation folder for this user
new_folder = MOFSIMPLIFY_PATH + '/temp_file_creation_' + str(session['ID'])
shutil.copytree('temp_file_creation', new_folder)
os.remove(new_folder + '/temp_cif.cif') # remove this, for sole purpose of updating time stamp on the new folder (copytree doesn't)
# delete all temp_file_creation clone folders that haven't been used for a while, to prevent folder accumulation
for root, dirs, files in os.walk(MOFSIMPLIFY_PATH):
for dir in dirs:
target_str = 'temp_file_creation'
if len(dir) > len(target_str) and target_str in dir and file_age_in_seconds(dir) > 7200: # 7200s is two hours
# target_str in dir since the names of all copies start with the sequence "temp_file_creation"
# len(dir) > len(target_str) to prevent deleting the original temp_file_creation folder
shutil.rmtree(dir)
return str(session['ID']) # return a string
@app.route('/get_ID', methods=['GET'])
def get_ID():
"""
get_ID gets the session user ID.
This is used for getting building block generated MOFs.
:return: string, The session ID for this user.
"""
return str(session['ID']) # return a string
@app.route('/permission', methods=['POST'])
def change_permission():
"""
change_permission adjusts whether or not MOFSimplify stores information on the MOFs the user predicts on.
If the user clicks "Yes" or "No" before get_lists() finishes running upon the website's startup in the browser (~4 seconds), their "Yes" or "No" input will not register for the session and will register for another session (the most recently started one) instead.
:return: string, The boolean sent from the front end. We return this because we have to return something, but nothing is done with the returned value on the front end.
"""
# Grab data
permission = json.loads(flask.request.get_data())
session['permission'] = permission
print('Permission check')
print(permission)
return str(permission)
@app.route('/list_getter', methods=['GET'])
def get_lists():
"""
get_lists gets the dropdown lists.
:return: dictionary. The dropdown lists
"""
# Initializes the new user, since this function is called when the browser is first opened up.
set_ID()
return {'my_linkers':my_linkers, 'my_sbus':my_sbus, 'my_nets':my_nets, 'my_MOFs':my_MOFs}
# The send_from_directory functions that follow provide images from the MOFSimplify server to the website. The images are in a folder called images.
@app.route('/TGA_graphic.png')
def serve_TGA_graphic():
return flask.send_from_directory('images', 'TGA_graphic.png')
@app.route('/banner_light')
def serve_banner_light():
return flask.send_from_directory('images', 'MOF_light.webp') # Google's webp format. It is optimized for websites and loads quickly.
@app.route('/banner_dark')
def serve_banner_dark():
return flask.send_from_directory('images', 'MOF_dark.webp') # Google's webp format. It is optimized for websites and loads quickly.
@app.route('/MOF_logo.png')
def serve_MOFSimplify_logo():
return flask.send_from_directory('images', 'MOF_logo.png')
## Handle feedback
@app.route('/process_feedback', methods=['POST'])
def process_feedback():
"""
process_feedback inserts MOFSimplify form feedback into the MongoDB feedback database.
If an uploaded file has an incorrect extension (i.e. is a disallowed file format), the user is directed to an error page.
"""
client = MongoClient('18.18.63.68',27017) # connect to mongodb
# The first argument is the IP address. The second argument is the port.
db = client.feedback
collection = db.MOFSimplify # The MOFSimplify collection in the feedback database.
fields = ['feedback_form_name', 'rating', 'email', 'reason', 'comments', 'cif_file_name', 'structure', 'solvent']
#$meta_fields = ['IP', 'datetime', 'cif_file', 'MOF_name']
final_dict = {}
for field in fields:
final_dict[field] = request.form.get(field)
# Populate special fields
uploaded_file = request.files['file']
if uploaded_file.filename == '' and request.form.get('feedback_form_name') != 'upload_form':
# User did not upload the optional TGA trace
print('No TGA trace')
# if final_dict['file']==b'':
# file_ext = ''
else:
final_dict['filetype'] = uploaded_file.content_type
filename = secure_filename(uploaded_file.filename)
final_dict['filename'] = filename
final_dict['file'] = uploaded_file.read()
file_ext = os.path.splitext(filename)[1].lower()
if file_ext not in app.config['UPLOAD_EXTENSIONS']:
return ('', 204) # 204 no content response
# return flask.send_from_directory('./splash_page/', 'error.html')
# Special tasks if the form is upload_form
if request.form.get('feedback_form_name') == 'upload_form':
uploaded_cif = request.files['cif_file']
cif_filename = secure_filename(uploaded_cif.filename)
file_ext = os.path.splitext(cif_filename)[1].lower()
if file_ext != '.cif':
return ('', 204) # 204 no content response
# return flask.send_from_directory('./splash_page/', 'error.html')
final_dict['cif_file_name'] = cif_filename
final_dict['structure'] = uploaded_cif.read()
final_dict['ip'] = request.remote_addr
final_dict['timestamp'] = datetime.now().isoformat()
print(final_dict)
collection.insert(final_dict) # insert the dictionary into the mongodb collection
return ('', 204) # 204 no content response
# return flask.send_from_directory('./splash_page/', 'success.html')
## Handle feedback, for the 2-class water and acid models
@app.route('/process_feedback_water', methods=['POST'])
def process_feedback_water():
"""
process_feedback_water inserts MOFSimplify form feedback into the MongoDB feedback database.
"""
client = MongoClient('18.18.63.68',27017) # connect to mongodb
# The first argument is the IP address. The second argument is the port.
db = client.feedback
collection = db.MOFSimplify_water # The MOFSimplify_water collection in the feedback database.
fields = ['feedback_form_name', 'rating', 'email', 'reason', 'comments', 'cif_file_name', 'structure']
final_dict = {}
for field in fields:
final_dict[field] = request.form.get(field)
final_dict['ip'] = request.remote_addr
final_dict['timestamp'] = datetime.now().isoformat()
print(final_dict)
collection.insert(final_dict) # insert the dictionary into the mongodb collection
return ('', 204) # 204 no content response
## Handle removal request
@app.route('/process_removal', methods=['POST'])
def process_removal():
"""
process_removal emails [email protected] when the removal form is filled out.
"""
email = request.form.get('email')
comments = request.form.get('comments')
ip = request.remote_addr
timestamp = datetime.now().isoformat()
# grabbing environment variables (should be in .bashrc_conda or .zshrc)
EMAIL_ADDRESS = os.environ.get('EMAIL_USER')
EMAIL_PASSWORD = os.environ.get('EMAIL_PASS')
# https://www.youtube.com/watch?v=JRCJ6RtE3xU
with smtplib.SMTP('smtp.gmail.com', 587) as smtp:
smtp.ehlo()
smtp.starttls()
smtp.ehlo()
# logging in to mail server
smtp.login(EMAIL_ADDRESS, EMAIL_PASSWORD)
subject = 'MOFSimplify removal request'
body = f'email: {email}\ncomments: {comments}\nip: {ip}\ntimestamp: {timestamp}'
msg = f'Subject: {subject}\n\n{body}'
smtp.sendmail(EMAIL_ADDRESS, '[email protected]', msg) # sending to [email protected]
return ('', 204) # 204 no content response
## Splash page management. Splash page is currently disabled.
@app.route('/', methods=['GET', 'POST'])
@app.route('/<path:path>', methods=['GET', 'POST'])
def index(path='index.html'):
if request.method == 'POST':
username = 'user1'
# user = User()
# user.id = username
# flask_login.login_user(user)
# This is the section where the password is checked.
# if request.form.get('password') == users[username]['password']:
# user = User()
# user.id = 'user1'
# flask_login.login_user(user)
# print('is user authenticated?')
# print(current_user.is_authenticated)
# print('input check')
# print(request.form.get('password'))
# if current_user.is_authenticated:
# return flask.send_from_directory('.', 'index.html')
# elif request.form.get('password') == None:
# return flask.send_from_directory('./splash_page/', path)
# else:
# return flask.send_from_directory('./splash_page/', 'index_wrong_password.html')
return flask.send_from_directory('.', 'index.html')
@app.route('/mof_examples/<path:path>') # needed for fetch
def serve_example(path):
"""
serve_example returns a file to MOFSimplify.
The file is intended to be a cif file of an example MOF.
So, this function serves the example MOF.
:param path: The path to the desired example MOF in the mof_examples folder. For example, HKUST1.cif.
:return: The MOF specified in the path input.
"""
return flask.send_from_directory('mof_examples', path)
@app.route('/how_to_cite.html')
def serve_cite():
"""
serve_cite serves the how to cite page.
So the user is redirected to the how to cite page.
:return: The how to cite page.
"""
return flask.send_from_directory('.', 'how_to_cite.html')
@app.route('/water_stability_prediction.html')
def serve_water_stability_page():
"""
serve_water_stability_page serves the water stability page.
So the user is redirected to the water stability page.
:return: The water stability page.
"""
return flask.send_from_directory('.', 'water_stability_prediction.html')
@app.route('/C2_uptake_prediction.html')
def serve_C2_uptake_page():
"""
serve_C2_uptake_page serves the C2 uptake page.
So the user is redirected to the C2 uptake page.
:return: The C2 uptake page.
"""
return flask.send_from_directory('.', 'C2_uptake_prediction.html')
@app.route('/stable_MOFs.html')
def serve_stable_bb_page():
"""
serve_stable_bb_page serves the stable MOFs page.
So the user is redirected to the stable MOFs page.
:return: The stable MOFs page.
"""
return flask.send_from_directory('.', 'stable_MOFs.html')
@app.route('/libraries/<path:path>')
def serve_library_files(path):
"""
serve_library_files returns a file to MOFSimplify.
The file is intended to be a library file, either .js or .css.
:param path: The path to the desired library in the libraries folder. For example, jquery-3.4.1.min.js.
:return: The library specified in the path input.
"""
return flask.send_from_directory('libraries', path)
@app.route('/list_content/<path:path>')
def serve_list_files(path):
"""
serve_list_files returns a file to MOFSimplify.
The file is intended to be a list file, which contains information for dropdowns.
:param path: The path to the desired library in the libraries folder.
:return: The list file specified in the path input.
"""
return flask.send_from_directory('list_content', path)
@app.route('/bbcif/<path:path>')
def serve_bbcif(path):
"""
serve_bbcif returns a file to MOFSimplify.
The file is intended to be a cif file for a MOF that was constructed using MOFSimplify's building block functionality.
So, this function serves the building block generated MOF.
:param path: The path to the desired MOF in the user's building block folder.
:return: The cif file for the building block generated MOF.
"""
path_parts = path.split('~')
cif_name = path_parts[0]
user_ID = path_parts[1]
return flask.send_from_directory('temp_file_creation_' + user_ID + '/tobacco_3.0/output_cifs', cif_name);
@app.route('/CoRE2019/<path:path>') # needed for fetch
def serve_CoRE_MOF(path):
"""
serve_CoRE_MOF returns a file to MOFSimplify.
The file is intended to be a cif file. It should be a CoRE MOF.
:param path: The path to the desired MOF in the CoRE2019 folder.
:return: The cif file for the neighbor MOF.
"""
return flask.send_from_directory('CoRE2019', path)
@app.route('/unoptimized_geo/<path:path>')
def serve_unoptimized_MOF(path):
"""
serve_unoptimized_MOF returns a file to MOFSimplify.
The file is intended to be a cif file. It should be a MOF.
:param path: The path to the desired MOF in the stable_building_blocks folder.
:return: The cif file for the non geometry optimized MOF.
"""
MOF_type = type_determination(path)
return flask.send_from_directory(f'stable_building_blocks/initial_structures/{MOF_type}', path)
@app.route('/optimized_geo/<path:path>')
def serve_optimized_MOF(path):
"""
serve_optimized_MOF returns a file to MOFSimplify.
The file is intended to be a cif file. It should be a MOF.
:param path: The path to the desired MOF in the stable_building_blocks folder.
:return: The cif file for the geometry optimized MOF.
"""
MOF_type = type_determination(path)
return flask.send_from_directory(f'stable_building_blocks/optimized_structures/{MOF_type}', 'optimized_' + path)
@app.route('/ris_files/MOFSimplify_citation.ris')
def serve_ris():
"""
serve_ris returns a file to MOFSimplify.
The file is intended the citation file for the MOFSimplify paper.
:return: The rif citation file.
"""
return flask.send_from_directory('ris_files', 'MOFSimplify_citation.ris')
def listdir_nohidden(path): # used for bb_generate. Ignores hidden files
"""
listdir_nohidden returns files in the current directory that are not hidden.
It is used as a helper function in the bb_generate function.
:param path: The path to be examined.
:return: The non-hidden files in the current path.
"""
myList = os.listdir(path);
for i in myList:
if i.startswith('.'):
myList.remove(i)
return myList
def file_age_in_seconds(pathname):
"""
file_age_in_seconds returns the age of the file/folder specified in pathname since the last modification.
It is used as a helper function in the set_ID function.
:return: The age of the file/folder specified in pathname since the last modification, in seconds.
"""
return time.time() - os.stat(pathname)[stat.ST_MTIME] # time since last modification
def extract_info(my_data):
structure = my_data['structure']
name = my_data['name']
if name[-4:] == '.cif':
name = name[:-4] # remove the .cif part of the name
return structure, name
def extract_info_C2(my_data):
structure, name = extract_info(my_data)
temperature = my_data['T']
pressure = my_data['P']
return structure, name, temperature, pressure
@app.route('/curr_users', methods=['GET'])
def curr_num_users():
"""
curr_num_users returns the current number of users on MOFSimplify.
This is determined by looking at user specific folders. User specific folders that have not been used for a while are deleted (see the set_ID function).
:return: The number of extant user-specific folders on MOFSimplify.
"""
sum = 0
for root, dirs, files in os.walk(MOFSIMPLIFY_PATH):
for dir in dirs:
target_str = 'temp_file_creation'
if len(dir) > len(target_str) and target_str in dir:
# target_str in dir since all copies start with temp_file_creation
# len(dir) > len(target_str) to prevent counting the original temp_file_creation folder
sum += 1
return str(sum+1)
@app.route('/get_bb_generated_MOF', methods=['POST'])
def bb_generate():
"""
bb_generated generates a MOF using the building blocks and net specified by the user.
The function uses ToBaCCo code, version 3.0.
It returns the constructed MOF's name to the front end
:return: The name of the building block MOF.
"""
tobacco_folder = MOFSIMPLIFY_PATH + "temp_file_creation_" + str(session['ID']) + "/tobacco_3.0/"
# Grab data
my_data = json.loads(flask.request.get_data())
linker = my_data['linker']
sbu = my_data['sbu']
net = my_data['net']
# clear the edges, nodes, templates, and output cifs folders to start fresh
# when running python tobacco.py, it looks in these folders
shutil.rmtree(tobacco_folder + 'edges')
os.mkdir(tobacco_folder + 'edges')
shutil.rmtree(tobacco_folder + 'nodes')
os.mkdir(tobacco_folder + 'nodes')
shutil.rmtree(tobacco_folder + 'templates')
os.mkdir(tobacco_folder + 'templates')
shutil.rmtree(tobacco_folder + 'output_cifs')
os.mkdir(tobacco_folder + 'output_cifs')
# copy over the linker, sbu, and net specified by the user in the edges, nodes, and templates folders
shutil.copy(tobacco_folder + 'edges_database/' + linker + '.cif', tobacco_folder + 'edges/' + linker + '.cif')
shutil.copy(tobacco_folder + 'nodes_database/' + sbu + '.cif', tobacco_folder + 'nodes/' + sbu + '.cif')
shutil.copy(tobacco_folder + 'template_database/' + net + '.cif', tobacco_folder + 'templates/' + net + '.cif')
# run the command to construct the MOF
os.chdir(tobacco_folder)
# note: os.chdir here could cause issues if multiple users are using the website and try to make a building block generated MOF at the same time, since MOFSimplify server might chdir when someone else is in the middle of an operation
# luckily, it is a quick operation, so this is unlikely
subprocess.run(['python', 'tobacco.py'])
os.chdir(MOFSIMPLIFY_PATH)
# if successful, there will be an output cif in the folder output_cifs
if listdir_nohidden(tobacco_folder + 'output_cifs') == []: # no files in folder
print('Construction failed.')
return 'FAILED'
constructed_MOF = listdir_nohidden(tobacco_folder + 'output_cifs')
constructed_MOF = constructed_MOF[0] # getting the first, and only, element out of the list
dictionary = {};
dictionary['mof_name'] = constructed_MOF
# getting the primitive cell using molSimplify
get_primitive(tobacco_folder + 'output_cifs/' + constructed_MOF, tobacco_folder + 'output_cifs/primitive_' + constructed_MOF);
json_object = json.dumps(dictionary, indent = 4);
return json_object
### New section. The code for thermal and solvent stability. ###
def normalize_data_solvent(df_train, df_newMOF, fnames, lname, debug=False):
"""
normalize_data_solvent takes in two DataFrames df_train and df_newMOF, one for the training data (many rows) and one for the new MOF (one row) for which a prediction is to be generated.
This function also takes in fnames (the feature names) and lname (the target property name).
This function normalizes the X values from the pandas DataFrames and returns them as X_train and X_newMOF.
It also standardizes y_train, which are the solvent removal stability flags in the training data DataFrame, and returns x_scaler (which scaled X_train).
By standardizes, I mean that it makes the values of y_train either 0 or 1
:param df_train: A pandas DataFrame of the training data.
:param df_newMOF: A pandas DataFrame of the new MOF being analyzed.
:param fnames: An array of column names of the descriptors.
:param lname: An array of the column name of the target.
:param debug: A boolean that determines whether extra information is printed.
:return: numpy.ndarray X_train, the descriptors of the training data. Its number of rows is the number of MOFs in the training data. Its number of columns is the number of descriptors.
:return: numpy.ndarray X_newMOF, the descriptors of the new MOF being analyzed by MOFSimplify. It contains only one row.
:return: numpy.ndarray y_train, the solvent removal stabilities of the training data.
:return: sklearn.preprocessing._data.StandardScaler x_scaler, the scaler used to normalize the descriptor data to unit mean and a variance of 1.
"""
_df_train = df_train.copy().dropna(subset=fnames+lname)
_df_newMOF = df_newMOF.copy().dropna(subset=fnames)
X_train, X_newMOF = _df_train[fnames].values, _df_newMOF[fnames].values # takes care of ensuring ordering is same for both X
y_train = _df_train[lname].values
if debug:
print("training data reduced from %d -> %d because of nan." % (len(df_train), y_train.shape[0]))
x_scaler = sklearn.preprocessing.StandardScaler()
x_scaler.fit(X_train)
X_train = x_scaler.transform(X_train)
X_newMOF = x_scaler.transform(X_newMOF)
y_train = np.array([1 if x == 1 else 0 for x in y_train.reshape(-1, )])
return X_train, X_newMOF, y_train, x_scaler
def standard_labels(df, key="flag"):
"""
standard_labels makes the solvent removal stability either 1 (stable upon solvent removal) or 0 (unstable upon solvent removal)
"flag" is the column under which solvent removal stability is reported in the DataFrame
:param df: A pandas DataFrame to modify.
:param key: The column in the pandas DataFrame to look at.
:return: The modified pandas DataFrame.
"""
flags = [1 if row[key] == 1 else 0 for _, row in df.iterrows()] # Look through all rows of the DataFrame df.
df[key] = flags
return df
def run_solvent_ANN(user_id, path, MOF_name, solvent_ANN):
"""
run_solvent_ANN runs the solvent removal stability ANN with the desired MOF as input.
It returns a prediction between zero and one. This prediction corresponds to an assessment of MOF stability upon solvent removal.
The further the prediction is from 0.5, the more sure the ANN is.
:param user_id: str, the session ID of the user
:param path: str, the server's path to the MOFSimplify folder on the server
:param MOF_name: str, the name of the MOF for which a prediction is being generated
:param solvent_ANN: keras.engine.training.Model, the ANN itself
:return: str str(new_MOF_pred[0][0]), the model solvent removal stability prediction
:return: list neighbor_names, the latent space nearest neighbor MOFs in the solvent removal stability ANN
:return: list neighbor_distances, the latent space distances of the latent space nearest neighbor MOFs in neighbor_names
"""
RACs = ['D_func-I-0-all','D_func-I-1-all','D_func-I-2-all','D_func-I-3-all',
'D_func-S-0-all', 'D_func-S-1-all', 'D_func-S-2-all', 'D_func-S-3-all',
'D_func-T-0-all', 'D_func-T-1-all', 'D_func-T-2-all', 'D_func-T-3-all',
'D_func-Z-0-all', 'D_func-Z-1-all', 'D_func-Z-2-all', 'D_func-Z-3-all',
'D_func-chi-0-all', 'D_func-chi-1-all', 'D_func-chi-2-all',
'D_func-chi-3-all', 'D_lc-I-0-all', 'D_lc-I-1-all', 'D_lc-I-2-all',
'D_lc-I-3-all', 'D_lc-S-0-all', 'D_lc-S-1-all', 'D_lc-S-2-all',
'D_lc-S-3-all', 'D_lc-T-0-all', 'D_lc-T-1-all', 'D_lc-T-2-all',
'D_lc-T-3-all', 'D_lc-Z-0-all', 'D_lc-Z-1-all', 'D_lc-Z-2-all',
'D_lc-Z-3-all', 'D_lc-chi-0-all', 'D_lc-chi-1-all', 'D_lc-chi-2-all',
'D_lc-chi-3-all', 'D_mc-I-0-all', 'D_mc-I-1-all', 'D_mc-I-2-all',
'D_mc-I-3-all', 'D_mc-S-0-all', 'D_mc-S-1-all', 'D_mc-S-2-all',
'D_mc-S-3-all', 'D_mc-T-0-all', 'D_mc-T-1-all', 'D_mc-T-2-all',
'D_mc-T-3-all', 'D_mc-Z-0-all', 'D_mc-Z-1-all', 'D_mc-Z-2-all',
'D_mc-Z-3-all', 'D_mc-chi-0-all', 'D_mc-chi-1-all', 'D_mc-chi-2-all',
'D_mc-chi-3-all', 'f-I-0-all', 'f-I-1-all', 'f-I-2-all', 'f-I-3-all',
'f-S-0-all', 'f-S-1-all', 'f-S-2-all', 'f-S-3-all', 'f-T-0-all', 'f-T-1-all',
'f-T-2-all', 'f-T-3-all', 'f-Z-0-all', 'f-Z-1-all', 'f-Z-2-all', 'f-Z-3-all',
'f-chi-0-all', 'f-chi-1-all', 'f-chi-2-all', 'f-chi-3-all', 'f-lig-I-0',
'f-lig-I-1', 'f-lig-I-2', 'f-lig-I-3', 'f-lig-S-0', 'f-lig-S-1', 'f-lig-S-2',
'f-lig-S-3', 'f-lig-T-0', 'f-lig-T-1', 'f-lig-T-2', 'f-lig-T-3', 'f-lig-Z-0',
'f-lig-Z-1', 'f-lig-Z-2', 'f-lig-Z-3', 'f-lig-chi-0', 'f-lig-chi-1',
'f-lig-chi-2', 'f-lig-chi-3', 'func-I-0-all', 'func-I-1-all',
'func-I-2-all', 'func-I-3-all', 'func-S-0-all', 'func-S-1-all',
'func-S-2-all', 'func-S-3-all', 'func-T-0-all', 'func-T-1-all',
'func-T-2-all', 'func-T-3-all', 'func-Z-0-all', 'func-Z-1-all',
'func-Z-2-all', 'func-Z-3-all', 'func-chi-0-all', 'func-chi-1-all',
'func-chi-2-all', 'func-chi-3-all', 'lc-I-0-all', 'lc-I-1-all', 'lc-I-2-all',
'lc-I-3-all', 'lc-S-0-all', 'lc-S-1-all', 'lc-S-2-all', 'lc-S-3-all',
'lc-T-0-all', 'lc-T-1-all', 'lc-T-2-all', 'lc-T-3-all', 'lc-Z-0-all',
'lc-Z-1-all', 'lc-Z-2-all', 'lc-Z-3-all', 'lc-chi-0-all', 'lc-chi-1-all',
'lc-chi-2-all', 'lc-chi-3-all', 'mc-I-0-all', 'mc-I-1-all', 'mc-I-2-all',
'mc-I-3-all', 'mc-S-0-all', 'mc-S-1-all', 'mc-S-2-all', 'mc-S-3-all',
'mc-T-0-all', 'mc-T-1-all', 'mc-T-2-all', 'mc-T-3-all', 'mc-Z-0-all',
'mc-Z-1-all', 'mc-Z-2-all', 'mc-Z-3-all', 'mc-chi-0-all', 'mc-chi-1-all',
'mc-chi-2-all', 'mc-chi-3-all']
geo = ['Df','Di', 'Dif','GPOAV','GPONAV','GPOV','GSA','POAV','POAV_vol_frac',
'PONAV','PONAV_vol_frac','VPOV','VSA','cell_v']
ANN_path = path + 'model/solvent/ANN/'
temp_file_path = path + 'temp_file_creation_' + user_id + '/'
df_train = pd.read_csv(ANN_path+'dropped_connectivity_dupes/train.csv')
df_train = df_train.loc[:, (df_train != df_train.iloc[0]).any()]
df_newMOF = pd.read_csv(temp_file_path + 'merged_descriptors/' + MOF_name + '_descriptors.csv') # assumes that temp_file_creation/ is in parent folder
features = [val for val in df_train.columns.values if val in RACs+geo]
df_train = standard_labels(df_train, key="flag")
# The normalize_data_solvent function is expecting a DataFrame with each MOF in a separate row, and features in columns
### Utilize the function below to normalize the RACs + geo features of the new MOF
# newMOF refers to the MOF that has been uploaded to MOFSimplify, for which a prediction will be generated
X_train, X_newMOF, y_train, x_scaler = normalize_data_solvent(df_train, df_newMOF, features, ["flag"], debug=False)
# Order of values in X_newMOF matters, but this is taken care of in normalize_data_solvent.
X_train.shape, y_train.reshape(-1, ).shape
model = solvent_ANN
from tensorflow.python.keras.backend import set_session
with tf_session.as_default(): # session stuff is needed because the model was loaded from h5 a while ago
with tf_session.graph.as_default():
### new_MOF_pred will be a decimal value between 0 and 1, below 0.5 is unstable, above 0.5 is stable
new_MOF_pred = np.round(model.predict(X_newMOF),2) # round to 2 decimals
# Define the function for the latent space. This will depend on the model. We want the layer before the last, in this case this was the 12th one.
get_latent = K.function([model.layers[0].input],
[model.layers[12].output]) # Last layer before dense-last
# Get the latent vectors for the training data first, then the latent vectors for the test data.
training_latent = get_latent([X_train, 0])[0]
design_latent = get_latent([X_newMOF, 0])[0]
# Compute the pairwise distances between the test latent vectors and the train latent vectors to get latent distances
d1 = pairwise_distances(design_latent,training_latent,n_jobs=30)
df1 = pd.DataFrame(data=d1, columns=df_train['CoRE_name'].tolist())
df1.to_csv(temp_file_path + 'solvent_test_latent_dists.csv')
# Want to find the closest points (let's say the closest 5 points); so, smallest values in df1
neighbors = 5 # number of closest points
# will make arrays of length neighbors, where each entry is the next closest neighbor (will do this for both names and distances)
neighbors_names = []
neighbors_distances = []
df_reformat = df1.min(axis='index')
for i in range(neighbors):
name = df_reformat.idxmin() # name of next closest complex in the training data
distance = df_reformat.min() # distance of the next closest complex in the training data to the new MOF
df_reformat = df_reformat.drop(name) # dropping the next closest complex, in order to find the next-next closest complex
neighbors_names.append(name)
neighbors_distances.append(str(distance))
return str(new_MOF_pred[0][0]), neighbors_names, neighbors_distances
def normalize_data_thermal(df_train, df_newMOF, fnames, lname, debug=False): # Function assumes it gets pandas DataFrames with MOFs as rows and features as columns
"""
normalize_data_thermal takes in two DataFrames df_train and df_newMOF, one for the training data (many rows) and one for the new MOF (one row) for which a prediction is to be generated.
This function also takes in fnames (the feature names) and lname (the target property name).
This function normalizes the X values from the pandas DataFrames and returns them as X_train and X_newMOF.
It also normalizes y_train, which are the thermal breakdown temperatures in the training data DataFrame, and returns x_scaler (which scaled X_train) and y_scaler (which scaled y_train).
:param df_train: A pandas DataFrame of the training data.
:param df_newMOF: A pandas DataFrame of the new MOF being analyzed.
:param fnames: An array of column names of the descriptors.
:param lname: An array of the column name of the target.
:param debug: A boolean that determines whether extra information is printed.
:return: numpy.ndarray X_train, the descriptors of the training data. Its number of rows is the number of MOFs in the training data. Its number of columns is the number of descriptors.
:return: numpy.ndarray X_newMOF, the descriptors of the new MOF being analyzed by MOFSimplify. It contains only one row.
:return: numpy.ndarray y_train, the thermal stabilities of the training data.
:return: sklearn.preprocessing._data.StandardScaler x_scaler, the scaler used to normalize the descriptor data to unit mean and a variance of 1.
:return: sklearn.preprocessing._data.StandardScaler y_scaler, the scaler used to normalize the target data to unit mean and a variance of 1.
"""
_df_train = df_train.copy().dropna(subset=fnames+lname)
_df_newMOF = df_newMOF.copy().dropna(subset=fnames)
X_train, X_newMOF = _df_train[fnames].values, _df_newMOF[fnames].values # takes care of ensuring ordering is same for both X
y_train = _df_train[lname].values
if debug:
print("training data reduced from %d -> %d because of nan." % (len(df_train), y_train.shape[0]))
x_scaler = sklearn.preprocessing.StandardScaler()
x_scaler.fit(X_train)
X_train = x_scaler.transform(X_train)
X_newMOF = x_scaler.transform(X_newMOF)
y_scaler = sklearn.preprocessing.StandardScaler()
y_scaler.fit(y_train)
y_train = y_scaler.transform(y_train)
return X_train, X_newMOF, y_train, x_scaler, y_scaler
def run_thermal_ANN(user_id, path, MOF_name, thermal_ANN):
"""
run_thermal_ANN runs the thermal stability ANN with the desired MOF as input.
It returns a prediction for the thermal breakdown temperature of the chosen MOF.
:param user_id: str, the session ID of the user
:param path: str, the server's path to the MOFSimplify folder on the server
:param MOF_name: str, the name of the MOF for which a prediction is being generated
:param thermal_ANN: keras.engine.training.Model, the ANN itself
:return: str new_MOF_pred, the model thermal stability prediction
:return: list neighbor_names, the latent space nearest neighbor MOFs in the thermal stability ANN
:return: list neighbor_distances, the latent space distances of the latent space nearest neighbor MOFs in neighbor_names
"""
RACs = ['D_func-I-0-all','D_func-I-1-all','D_func-I-2-all','D_func-I-3-all',
'D_func-S-0-all', 'D_func-S-1-all', 'D_func-S-2-all', 'D_func-S-3-all',
'D_func-T-0-all', 'D_func-T-1-all', 'D_func-T-2-all', 'D_func-T-3-all',
'D_func-Z-0-all', 'D_func-Z-1-all', 'D_func-Z-2-all', 'D_func-Z-3-all',
'D_func-chi-0-all', 'D_func-chi-1-all', 'D_func-chi-2-all',
'D_func-chi-3-all', 'D_lc-I-0-all', 'D_lc-I-1-all', 'D_lc-I-2-all',
'D_lc-I-3-all', 'D_lc-S-0-all', 'D_lc-S-1-all', 'D_lc-S-2-all',
'D_lc-S-3-all', 'D_lc-T-0-all', 'D_lc-T-1-all', 'D_lc-T-2-all',
'D_lc-T-3-all', 'D_lc-Z-0-all', 'D_lc-Z-1-all', 'D_lc-Z-2-all',
'D_lc-Z-3-all', 'D_lc-chi-0-all', 'D_lc-chi-1-all', 'D_lc-chi-2-all',
'D_lc-chi-3-all', 'D_mc-I-0-all', 'D_mc-I-1-all', 'D_mc-I-2-all',
'D_mc-I-3-all', 'D_mc-S-0-all', 'D_mc-S-1-all', 'D_mc-S-2-all',
'D_mc-S-3-all', 'D_mc-T-0-all', 'D_mc-T-1-all', 'D_mc-T-2-all',
'D_mc-T-3-all', 'D_mc-Z-0-all', 'D_mc-Z-1-all', 'D_mc-Z-2-all',
'D_mc-Z-3-all', 'D_mc-chi-0-all', 'D_mc-chi-1-all', 'D_mc-chi-2-all',
'D_mc-chi-3-all', 'f-I-0-all', 'f-I-1-all', 'f-I-2-all', 'f-I-3-all',
'f-S-0-all', 'f-S-1-all', 'f-S-2-all', 'f-S-3-all', 'f-T-0-all', 'f-T-1-all',
'f-T-2-all', 'f-T-3-all', 'f-Z-0-all', 'f-Z-1-all', 'f-Z-2-all', 'f-Z-3-all',
'f-chi-0-all', 'f-chi-1-all', 'f-chi-2-all', 'f-chi-3-all', 'f-lig-I-0',
'f-lig-I-1', 'f-lig-I-2', 'f-lig-I-3', 'f-lig-S-0', 'f-lig-S-1', 'f-lig-S-2',
'f-lig-S-3', 'f-lig-T-0', 'f-lig-T-1', 'f-lig-T-2', 'f-lig-T-3', 'f-lig-Z-0',
'f-lig-Z-1', 'f-lig-Z-2', 'f-lig-Z-3', 'f-lig-chi-0', 'f-lig-chi-1',
'f-lig-chi-2', 'f-lig-chi-3', 'func-I-0-all', 'func-I-1-all',
'func-I-2-all', 'func-I-3-all', 'func-S-0-all', 'func-S-1-all',
'func-S-2-all', 'func-S-3-all', 'func-T-0-all', 'func-T-1-all',
'func-T-2-all', 'func-T-3-all', 'func-Z-0-all', 'func-Z-1-all',
'func-Z-2-all', 'func-Z-3-all', 'func-chi-0-all', 'func-chi-1-all',
'func-chi-2-all', 'func-chi-3-all', 'lc-I-0-all', 'lc-I-1-all', 'lc-I-2-all',
'lc-I-3-all', 'lc-S-0-all', 'lc-S-1-all', 'lc-S-2-all', 'lc-S-3-all',
'lc-T-0-all', 'lc-T-1-all', 'lc-T-2-all', 'lc-T-3-all', 'lc-Z-0-all',
'lc-Z-1-all', 'lc-Z-2-all', 'lc-Z-3-all', 'lc-chi-0-all', 'lc-chi-1-all',
'lc-chi-2-all', 'lc-chi-3-all', 'mc-I-0-all', 'mc-I-1-all', 'mc-I-2-all',
'mc-I-3-all', 'mc-S-0-all', 'mc-S-1-all', 'mc-S-2-all', 'mc-S-3-all',
'mc-T-0-all', 'mc-T-1-all', 'mc-T-2-all', 'mc-T-3-all', 'mc-Z-0-all',
'mc-Z-1-all', 'mc-Z-2-all', 'mc-Z-3-all', 'mc-chi-0-all', 'mc-chi-1-all',
'mc-chi-2-all', 'mc-chi-3-all']
geo = ['Df','Di', 'Dif','GPOAV','GPONAV','GPOV','GSA','POAV','POAV_vol_frac',
'PONAV','PONAV_vol_frac','VPOV','VSA','cell_v']
ANN_path = path + 'model/thermal/ANN/'
temp_file_path = path + 'temp_file_creation_' + user_id + '/'
df_train_all = pd.read_csv(ANN_path+"train.csv").append(pd.read_csv(ANN_path+"val.csv"))
df_train = pd.read_csv(ANN_path+"train.csv")
df_train = df_train.loc[:, (df_train != df_train.iloc[0]).any()]
df_newMOF = pd.read_csv(temp_file_path + 'merged_descriptors/' + MOF_name + '_descriptors.csv') # Assume temp_file_creation/ in parent directory
features = [val for val in df_train.columns.values if val in RACs+geo]
X_train, X_newMOF, y_train, x_scaler, y_scaler = normalize_data_thermal(df_train, df_newMOF, features, ["T"], debug=False)
X_train.shape, y_train.reshape(-1, ).shape
model = thermal_ANN
from tensorflow.python.keras.backend import set_session
with tf_session.as_default():
with tf_session.graph.as_default():
new_MOF_pred = y_scaler.inverse_transform(model.predict(X_newMOF))
new_MOF_pred = np.round(new_MOF_pred,1) # round to 1 decimal
# isolating just the prediction, since the model spits out the prediction like [[PREDICTION]], as in, in hard brackets
new_MOF_pred = new_MOF_pred[0][0]
new_MOF_pred = str(new_MOF_pred)
# adding units
degree_sign= u'\N{DEGREE SIGN}'
new_MOF_pred = new_MOF_pred + degree_sign + 'C' # degrees Celsius
# Define the function for the latent space. This will depend on the model. We want the layer before the last, in this case this was the 8th one.
get_latent = K.function([model.layers[0].input],
[model.layers[8].output]) # Last layer before dense-last
# Get the latent vectors for the training data first, then the latent vectors for the test data.
training_latent = get_latent([X_train, 0])[0]
design_latent = get_latent([X_newMOF, 0])[0]
print(training_latent.shape,design_latent.shape)
# Compute the pairwise distances between the test latent vectors and the train latent vectors to get latent distances
d1 = pairwise_distances(design_latent,training_latent,n_jobs=30)
df1 = pd.DataFrame(data=d1, columns=df_train['CoRE_name'].tolist())
df1.to_csv(temp_file_path + 'solvent_test_latent_dists.csv')
# Want to find the closest points (let's say the closest 5 points); so, smallest values in df1
neighbors = 5 # number of closest points
# will make arrays of length neighbors, where each entry is the next closest neighbor (will do this for both names and distances)
neighbors_names = []
neighbors_distances = []
df_reformat = df1.min(axis='index')
for i in range(neighbors):
name = df_reformat.idxmin() # name of next closest complex in the training data
distance = df_reformat.min() # distance of the next closest complex in the training data to the new MOF
df_reformat = df_reformat.drop(name) # dropping the next closest complex, in order to find the next-next closest complex
neighbors_names.append(name)
neighbors_distances.append(str(distance))
return new_MOF_pred, neighbors_names, neighbors_distances
def descriptor_generator(name, structure, prediction_type, is_entry):
"""
# descriptor_generator is used by both ss_predict() and ts_predict() to generate RACs and Zeo++ descriptors.
# These descriptors are subsequently used in ss_predict() and ts_predict() for the ANN models.
# Inputs are the name of the MOF and the structure (cif file text) of the MOF for which descriptors are to be generated.
# The third input indicates the type of prediction (solvent removal or thermal).
:param name: str, the name of the MOF being analyzed.
:param structure: str, the text of the cif file of the MOF being analyzed.
:param prediction_type: str, the type of prediction being run. Can either be 'solvent' or 'thermal'.
:param is_entry: boolean, indicates whether the descriptor CSV has already been written.
:return: Depends, either the string 'FAILED' if descriptor generation fails, a dictionary myDict (if the MOF being analyzed is in the training data), or an array myResult (if the MOF being analyzed is not in the training data)
"""
print('TIME CHECK 2')
timeStarted = time.time() # save start time (debugging)
temp_file_folder = MOFSIMPLIFY_PATH + "temp_file_creation_" + str(session['ID']) + '/'
cif_folder = temp_file_folder + 'cifs/'
# Write the data back to a cif file.
try:
cif_file = open(cif_folder + name + '.cif', 'w')
except FileNotFoundError:
return 'FAILED'
cif_file.write(structure)
cif_file.close()
# There can be a RACs folder for solvent predictions and a RACs folder for thermal predictions. Same for Zeo++.
RACs_folder = temp_file_folder + 'feature_generation/' + prediction_type + '_RACs/'
zeo_folder = temp_file_folder + 'feature_generation/' + prediction_type + '_zeo++/'