-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate-args
executable file
·179 lines (158 loc) · 7.71 KB
/
generate-args
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
#!/usr/bin/env python3
"""
Generate a file with arguments for a number of simulations, either using combinations of specified arguments or using Latin Hypercube sampling for sensitivity analysis.
"""
import math
import numpy as np
import pandas as pd
from pyDOE import lhs
from scipy.stats import expon
import argparse
import sys
import copy
args_file = 'args.csv'
param_round_decimals = 6
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('analysis', choices=[ 'sensitivity_analysis', 'parameter_combinations' ] , help="What to generate arguments for.")
parser.add_argument('-s', '--num-samples', type=int, default=10_000, help="Number of samples for sensitivity analysis.")
parser.add_argument('-p', '--param', action='append', help="Parameter to generate combinations for.")
parser.add_argument('-r', '--value', action='append', help="Range of values of the parameter to generate combinations for. Formats include [min,max,step], with step being optinal and 1 by default and {val1,val2,...} for specific values. Put quotes around [] and {} as they can have a specific meaning in the shell otherwise.")
cli_args = parser.parse_args()
# Initialize the data frame
args = pd.DataFrame()
if cli_args.analysis == 'sensitivity_analysis':
# Prefix, range, transformation (for lambda -> alpha)
# Range type (or after transformation, if present) determines whether param is int or float
params_properties = [
[ "--genes", [ 10, 500 ], ],
[ "--cells", [ 1, 15 ], lambda x : x ** 3 ],
[ "--strains", [ 1, 20 ], ],
[ "--tissue-depth", [ 0, 0 ], ],
# [ "--output-gene-fraction", [ 0.1, 1.0 ], ],
[ "--output-cell-fraction", [ 1.0, 1.0 ], ],
# [ "--output-strain-fraction", [ 0.1, 1.0 ], ],
[ "--output-cells-random", [ 0, 0 ], lambda x : '' ],
[ "--input-fraction", [ 0.1, 1.0 ], ],
[ "--input-signal-depth", [ 1, 15 ], ],
[ "--alpha-esm", [ 2.5, 30.0 ], lambda x : 5.0 / (x ** 2) ],
[ "--esms", [ 0, 20 ], ],
[ "--esm-threshold", [ 10.0, 100.0 ], ],
[ "--delay", [ 0, 0 ], ],
[ "--window-size", [ 7, 7 ], ],
[ "--function", [ 1, 1 ], lambda f : 'median' if f == 0 else 'parity' ],
]
num_params = len(params_properties)
lhd = lhs(num_params, samples=cli_args.num_samples)
for param_properties in params_properties:
param_prefix = param_properties[0]
param_range = param_properties[1]
param_type = type(param_range[0]) if len(param_properties) == 2 else type(param_properties[2](param_range[0]))
types = {
int : np.int64,
float : np.float64,
np.float64 : np.float64,
str : str
}
args[param_prefix] = np.zeros(cli_args.num_samples, dtype=types[param_type])
# Enumerate all samples and parameters and generate arguments
for sample_idx, sample in enumerate(lhd):
for param_idx, param_properties in enumerate(params_properties):
param_prefix = param_properties[0]
param_range = param_properties[1]
param_type = type(param_range[0]) if len(param_properties) == 2 else type(param_properties[2](param_range[0]))
if type(param_range[0]) == int:
range_length = param_range[1] - param_range[0] + 1 # Ensure the integer range is inclusive
else:
range_length = param_range[1] - param_range[0]
sampled_val = sample[param_idx] * range_length + param_range[0]
if type(param_range[0]) == int: sampled_val = int(sampled_val)
if len(param_properties) == 2:
val = sampled_val
else:
val = param_properties[2](sampled_val) # Do parameter transformation is available (e.g. lambda -> alpha)
if param_type == int:
val = int(val)
elif param_type == float:
val = round(val, param_round_decimals)
args.loc[sample_idx, param_prefix] = val
# Ensure generated arguments cover the provided ranges
for param_properties in params_properties:
param_prefix = param_properties[0]
param_range = param_properties[1]
param_type = type(param_range[0]) if len(param_properties) == 2 else type(param_properties[2](param_range[0]))
minval = min(args[param_prefix])
maxval = max(args[param_prefix])
if len(param_properties) == 3:
transformed_param_range = (param_properties[2](param_range[0]), param_properties[2](param_range[1]))
if transformed_param_range[0] > transformed_param_range[1]:
transformed_param_range = (transformed_param_range[1], transformed_param_range[0])
else:
transformed_param_range = param_range
if param_type == int:
assert minval == transformed_param_range[0]
assert maxval == transformed_param_range[1]
elif param_type == float:
assert abs(minval - transformed_param_range[0]) < 0.05
assert abs(maxval - transformed_param_range[1]) < 0.05
else:
if cli_args.param == None or cli_args.value == None or len(cli_args.param) != len(cli_args.value):
print("Each parameter needs a value. Params provided: {}, values provided: {}".format(0 if cli_args.param == None else len(cli_args.param), 0 if cli_args.value == None else len(cli_args.value)))
sys.exit(1)
def is_float(value):
try:
float(value)
return True
except:
return False
def is_int(value):
try:
int(value)
return True
except:
return False
for i in range(len(cli_args.param)):
cli_args.param[i] = '--' + cli_args.param[i].lower().replace(' ', '-')
arg_vals = {}
for i in range(len(cli_args.param)):
param = cli_args.param[i]
value = cli_args.value[i]
tokens = value[1:-1].split(',')
arg = []
if value[0] == '[' and value[-1] == ']':
step = (1 if len(tokens) == 2 else tokens[2])
start = tokens[0]
end = tokens[1]
step = (int(step) if is_int(step) else float(step))
start = (int(start) if is_int(start) else float(start))
end = (int(end) if is_int(end) else float(end))
current = start
while current <= end:
arg.append(current)
current += step
elif value[0] == '{' and value[-1] == '}':
for token in tokens:
if is_int(token):
arg.append(int(token))
elif is_float(token):
arg.append(float(token))
else:
arg.append(token)
else:
print("Wrong format.")
sys.exit(1)
arg_vals[param] = arg
combinations = 1
cumulative_combinations = []
for i, param in enumerate(cli_args.param):
cumulative_combinations.append(combinations)
combinations *= len(arg_vals[param])
args[param] = []
for i, param in enumerate(cli_args.param):
prev_combinations = cumulative_combinations[i]
next_combinations = combinations // cumulative_combinations[i] // len(arg_vals[param])
stretched = []
for val in arg_vals[param]:
stretched += [ val ] * next_combinations
args[param] = stretched * prev_combinations
# Save into CSV
args.to_csv(args_file)