-
Notifications
You must be signed in to change notification settings - Fork 1
/
perform_msm
218 lines (184 loc) · 8.88 KB
/
perform_msm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
#!/usr/bin/env bash
### ROBUST CLUSTERING #########################################################
# Reproduces the results of Nagel23 and show cases how to use the robust
# clustering framework.
#
# If this script is used, please cite:
# D. Nagel, S. Sartore, and G. Stock, "Selecting Features for Markov Modeling:
# A Case Study on HP35" J. Chem. Theory Comput., submitted
#
# Author: Daniel Nagel
# Copyright (C) 2023, Daniel Nagel
#
# Arguments:
# h: show help
# v: set verbosity mode, the more consecutive 'v' the higher
# c: color_lvl - 0 none, 1 some, 2 utf8
#
###############################################################################
### CONSTANTS #################################################################
# variables
readonly UNIT="--frames-per-unit 5 --unit ns"
readonly IMPL_KWARGS="--max-lagtime 210 --n-lagtimes 3 --ylog $UNIT"
readonly CKTEST_KWARGS="--max-time 35000 --lagtimes 50 100 150 250 500 --grid 4 3 $UNIT"
readonly WTD_KWARGS="--nsteps 100000000 --start 12 --final 1 --max-lagtime 210 $UNIT"
readonly WTS_KWARGS="--nsteps 100000000 --start 12 --final 1 --lagtimes 50 100 200 $UNIT"
readonly KNETC_KWARGS="-u 8 -u 9 -u 10 -u 11 -f 0 -f 1 -f 2 -f 3 -f 4 --tlag 50"
readonly KNETD_KWARGS="-u 10 -u 11 -f 0 -f 1 -f 2 --tlag 50"
readonly MPPLAG="50"
readonly SIGMA="10"
readonly EXT=".svg"
# paths
readonly MOLDYN_GIT="https://github.com/moldyn/"
readonly DESRES="../../HP35-DESRES"
readonly CONS="${DESRES}/hp35.mindists2"
readonly CONS_GAUSSIAN="hp35.mindists2.gaussian${SIGMA}f"
readonly MOSAIC_CLUSTERS="../hp35.mindist2.mosaic_clusters"
readonly DIHS_MICRO="hp35.dihs.res3-33.shifted.gaussian10f_microstates_pcs4_p153"
readonly CONS_MICRO="hp35.mindist2.gaussian10f_microstates_pcs5_p153"
readonly DIHS_MACRO="hp35.dihs.res3-33.shifted.gaussian10f_microstates_pcs4_p153.mpp50_transitions.dat.renamed_by_q.pop0.001_qmin0.50.macrotraj"
readonly CONS_MACRO="hp35.mindists2.gaussian10f_microstates_pcs5_p153.mpp50_transitions.dat.renamed_by_q.pop0.005_qmin0.50.macrotraj_lumped13"
readonly QFILE="hp35.mindists2.gaussian10f.q"
readonly DIR="create_msm_nagel23"
readonly CONREP_KWARGS="--grid 4 3 --contacts $CONS_GAUSSIAN --clusters $MOSAIC_CLUSTERS"
# commands
readonly IMPL="python -m msmhelper implied-timescales"
readonly CKTEST="python -m msmhelper ck-test"
readonly WTD="python -m msmhelper waiting-time-dist"
readonly WTS="python -m msmhelper waiting-times"
readonly GAUSSIAN="python -m msmhelper gaussian-filtering"
readonly CONREP="python -m msmhelper contact-rep"
readonly KNET="python ../kinetic_network.py"
# define requirements except of python venv
declare -a requirements=("python3 venv bunzip2")
### MAIN ######################################################################
main() {
# print welcome
print_welcome
# print user info and warning
print_info
# remove dir if existing
mkdir_ask $DIR
cd $DIR
printf "\n"
pheader "CHECK REQUIREMENTS"
for req in "${requirements[@]}"; do
run_command "$req" "command -v $req >/dev/null 2>&1"
done
run_command "python venv" "python3 -m venv -h >/dev/null 2>&1"
cleanup_if_failed
### SETUP PYTHON VENV #######################################################
pheader "SETUP PYTHON VENV"
run_command "create venv" "python3 -m venv Python"
run_command "activate venv" ". Python/bin/activate"
run_command "update pip" "pip install --upgrade pip"
run_command "install dependencies" "pip install msmhelper networkx==2.8 fa2 bezier"
cleanup_if_failed
### LINK FILES ##############################################################
pheader "LINK FILES"
run_command "link dihedral-based microstates" "ln -s ../../CLUSTERING/$DIHS_MICRO ."
run_command "link dihedral-based macrostates" "ln -s ../../MPP/$DIHS_MACRO ."
run_command "link contact-based microstates" "ln -s ../../CLUSTERING/$CONS_MICRO ."
run_command "link contact-based macrostates" "ln -s ../../MPP/$CONS_MACRO ."
run_command "link contacts file" "ln -s $CONS ."
run_command "link fraction of native contacts" "ln -s ../../MPP/create_macrostate_nagel23/$QFILE ."
cleanup_if_failed
### ESTIMATE IMPLIED TIMESCALE ##############################################
pheader "CREATE CONTACT REPRESENTATIONS"
if [[ ! -f $CONS ]]; then
run_command "check contacts archive" "bunzip2 -t ${CONS}.bz2 &>/dev/null"
run_command "extract contacts file" "bunzip2 -k ${CONS}.bz2"
fi
if [[ ! -f $CONS_GAUSSIAN ]]; then
run_command "smooth contacts with 2ns" "$GAUSSIAN -i ${CONS##*/} -s $SIGMA -o $CONS_GAUSSIAN"
fi
run_command "dihedral-based macrostates" \
"$CONREP $CONREP_KWARGS --state $DIHS_MACRO -o ${DIHS_MACRO}.conrep$EXT"
run_command "contact-based macrostates" \
"$CONREP $CONREP_KWARGS --state $CONS_MACRO -o ${CONS_MACRO}.conrep$EXT"
cleanup_if_failed
### ESTIMATE IMPLIED TIMESCALE ##############################################
pheader "ESTIMATE IMPLIED TIMESCALE"
run_command "dihedral-based macrostates" \
"$IMPL $IMPL_KWARGS -f $DIHS_MACRO -o ${DIHS_MACRO}.impl$EXT"
run_command "contact-based macrostates" \
"$IMPL $IMPL_KWARGS -f $CONS_MACRO -o ${CONS_MACRO}.impl$EXT"
run_command "dihedral-based macrostates using Hummer-Szabo" \
"$IMPL $IMPL_KWARGS -f $DIHS_MACRO --microfilename $DIHS_MICRO -o ${DIHS_MACRO}.impl.sh$EXT"
run_command "contact-based macrostates using Hummer-Szabo" \
"$IMPL $IMPL_KWARGS -f $CONS_MACRO --microfilename $CONS_MICRO -o ${CONS_MACRO}.impl.sh$EXT"
cleanup_if_failed
### ESTIMATE CK-TESTS #######################################################
pheader "ESTIMATE CK-TESTS"
run_command "dihedral-based macrostates" \
"$CKTEST $CKTEST_KWARGS -f $DIHS_MACRO -o ${DIHS_MACRO}.cktest$EXT"
run_command "contact-based macrostates" \
"$CKTEST $CKTEST_KWARGS -f $CONS_MACRO -o ${CONS_MACRO}.cktest$EXT"
run_command "dihedral-based macrostates using Hummer-Szabo" \
"$CKTEST $CKTEST_KWARGS -f $DIHS_MACRO --microfilename $DIHS_MICRO -o ${CONS_MACRO}.cktest.sh$EXT"
run_command "contact-based macrostates using Hummer-Szabo" \
"$CKTEST $CKTEST_KWARGS -f $CONS_MACRO --microfilename $CONS_MICRO -o ${DIHS_MACRO}.cktest.sh$EXT"
cleanup_if_failed
### ESTIMATE WTD #######################################################
pheader "ESTIMATE WAITING TIME DISTRIBUTIONS"
run_command "dihedral-based macrostates" \
"$WTD $WTD_KWARGS -f $DIHS_MACRO -o ${DIHS_MACRO}.wtd$EXT"
run_command "contact-based macrostates" \
"$WTD $WTD_KWARGS -f $CONS_MACRO -o ${CONS_MACRO}.wtd$EXT"
run_command "dihedral-based macrostates using Hummer-Szabo" \
"$WTD $WTD_KWARGS -f $DIHS_MACRO --microfilename $DIHS_MICRO -o ${DIHS_MACRO}.wtd.sh$EXT"
run_command "contact-based macrostates using Hummer-Szabo" \
"$WTD $WTD_KWARGS -f $CONS_MACRO --microfilename $CONS_MICRO -o ${CONS_MACRO}.wtd.sh$EXT"
cleanup_if_failed
### ESTIMATE WTD #######################################################
pheader "ESTIMATE WAITING TIMES"
run_command "dihedral-based macrostates" \
"$WTS $WTS_KWARGS -f $DIHS_MACRO -o ${DIHS_MACRO}.wts$EXT"
run_command "contact-based macrostates" \
"$WTS $WTS_KWARGS -f $CONS_MACRO -o ${CONS_MACRO}.wts$EXT"
run_command "dihedral-based macrostates using Hummer-Szabo" \
"$WTS $WTS_KWARGS -f $DIHS_MACRO --microfilename $DIHS_MICRO -o ${DIHS_MACRO}.wts.sh$EXT"
run_command "contact-based macrostates using Hummer-Szabo" \
"$WTS $WTS_KWARGS -f $CONS_MACRO --microfilename $CONS_MICRO -o ${CONS_MACRO}.wts.sh$EXT"
cleanup_if_failed
### PLOT KNET ##############################################
pheader "PLOT KINETIC NETWORKS"
run_command "dihedral-based kinetic network" \
"$KNET $KNETD_KWARGS --states_traj $DIHS_MACRO --qoft $QFILE"
run_command "contact-based kinetic network" \
"$KNET $KNETC_KWARGS --states_traj $CONS_MACRO --qoft $QFILE"
cleanup_if_failed
}
### DEFINE LOCAL FUNCTIONS ####################################################
cleanup_if_failed() {
if [[ $return_val != 0 ]]; then
cd ..;
err "aborting"
rmdir_ask $DIR
printf "%b\n" "$reset"
exit $return_val
fi
}
print_welcome() {
# reset font
printf "%b" "${reset}"
local _str=$(pbox "${bold}~~~ MSM ANALYSIS ~~~" "$blue")
printf "\n%b%b\n" "$_str"
printf "generating MSM figures of Nagel23 using backbone\n"
printf "dihedral angles and contact distances.\n" "$MOLDYN_GIT"
printf "Copyright (c) 2023, Daniel Nagel\n"
printf "\n"
}
print_info() {
local _user=$(whoami)
local _host=$(hostname)
pheader "RUNTIME INFORMATION"
pprint "user: ${_user}@${_host}\n"
warn "depening on the hardware it can take up to several hours.\n"
warn "several GB of data are downloaded, be sure to use wifi.\n"
}
### SOURCE BASH HELPER FUNCTIONS ##############################################
. ../bash-template/libbash.sh -d "$@"
# run main
main
exit $return_val