diff --git a/examples/runtime/configs.txt b/examples/runtime/configs.txt new file mode 100644 index 0000000..5e93571 --- /dev/null +++ b/examples/runtime/configs.txt @@ -0,0 +1,18 @@ +IsFirstColumnId = Yes +SaveFull = No +SaveRuntime = No +SequencesDir = data/pydnaepbd_things/id_seqs.txt +OutputsDir = no_outputs/ +Flanks = None +Temperature = 310 +Iterations = 100 +PreheatingSteps = 50000 +PostPreheatingSteps = 80000 +ComputingNodes = 256 +BubbleMonitor = Off +CoordinateMonitor = On +FlippingMonitorVerbose = On +FlippingMonitor = Off +EnergyMonitor = Off +MeltingAndFractionMonitor = Off +MeltingAndFractionManyMonitor = Off \ No newline at end of file diff --git a/pydna_epbd/input_reader.py b/pydna_epbd/input_reader.py index 7478325..e7fdbf6 100644 --- a/pydna_epbd/input_reader.py +++ b/pydna_epbd/input_reader.py @@ -2,9 +2,7 @@ from pydna_epbd.configs import InputConfigs -def read_sequences_from_a_file( - input_seqs_dir, filename_with_ext, is_first_col_id, flanks, outputs_dir -): +def read_sequences_from_a_file(input_seqs_dir, filename_with_ext, is_first_col_id, flanks, outputs_dir): """Read DNA sequences from one input file. This also creates output directories for saving simulation outputs. Args: @@ -56,9 +54,7 @@ def read_all_sequences(input_seqs_dir, is_first_col_id, flanks, outputs_dir): all_seqs = [] total_num_of_bps = 0 for i, filename_with_ext in enumerate(os.listdir(input_seqs_dir)): - seqs = read_sequences_from_a_file( - input_seqs_dir, filename_with_ext, is_first_col_id, flanks, outputs_dir - ) + seqs = read_sequences_from_a_file(input_seqs_dir, filename_with_ext, is_first_col_id, flanks, outputs_dir) all_seqs += seqs n_bps = len(seqs[0][2]) - (2 * len(flanks)) @@ -110,9 +106,7 @@ def read_configurations(configuration_filepath): os.environ["FLIPPING_MONITOR"] = configs["FlippingMonitor"] os.environ["ENERGY_MONITOR"] = configs["EnergyMonitor"] os.environ["MELTING_AND_FRACTION_MONITOR"] = configs["MeltingAndFractionMonitor"] - os.environ["MELTING_AND_FRACTION_MANY_MONITOR"] = configs[ - "MeltingAndFractionManyMonitor" - ] + os.environ["MELTING_AND_FRACTION_MANY_MONITOR"] = configs["MeltingAndFractionManyMonitor"] # reading sequences and creating outputs directory sequences = read_all_sequences(input_seqs_dir, is_first_col_id, flanks, outputs_dir) diff --git a/pydna_epbd/monitors/all_monitors.py b/pydna_epbd/monitors/all_monitors.py index 407e231..920d19c 100644 --- a/pydna_epbd/monitors/all_monitors.py +++ b/pydna_epbd/monitors/all_monitors.py @@ -47,15 +47,11 @@ def __init__(self, dna, n_preheating_steps, n_steps_after_preheating) -> None: self.monitors.append(self.energy_monitor) if os.environ["MELTING_AND_FRACTION_MONITOR"] == "On": - self.melting_and_fraction_monitor = MeltingAndFractionMonitor( - dna, n_steps_after_preheating - ) + self.melting_and_fraction_monitor = MeltingAndFractionMonitor(dna, n_steps_after_preheating) self.monitors.append(self.melting_and_fraction_monitor) if os.environ["MELTING_AND_FRACTION_MANY_MONITOR"] == "On": - self.melting_and_fraction_many_monitor = MeltingAndFractionManyMonitor( - dna, n_preheating_steps - ) + self.melting_and_fraction_many_monitor = MeltingAndFractionManyMonitor(dna, n_preheating_steps) self.monitors.append(self.melting_and_fraction_many_monitor) # if os.environ['COORD_VERBOSE_MONITOR'] == 'True': diff --git a/pydna_epbd/monitors/bubble_monitor.py b/pydna_epbd/monitors/bubble_monitor.py index bba14d1..56b3247 100644 --- a/pydna_epbd/monitors/bubble_monitor.py +++ b/pydna_epbd/monitors/bubble_monitor.py @@ -22,8 +22,7 @@ def __init__(self, dna) -> None: """ super(BubbleMonitor, self).__init__(dna) self.bubbles = [ - [[0] * self.TRESHOLD_SIZE for _ in range(self.MAX_BUB_ELEM)] - for _ in range(self.dna.n_nt_bases) + [[0] * self.TRESHOLD_SIZE for _ in range(self.MAX_BUB_ELEM)] for _ in range(self.dna.n_nt_bases) ] # shape=(n_nt_bases, MAX_BUB_ELEM, TRESHOLD_SIZE) def collect_at_step(self, step_no): diff --git a/pydna_epbd/monitors/coord_monitor.py b/pydna_epbd/monitors/coord_monitor.py index da000c0..0474c7f 100644 --- a/pydna_epbd/monitors/coord_monitor.py +++ b/pydna_epbd/monitors/coord_monitor.py @@ -12,7 +12,7 @@ def __init__(self, dna) -> None: """ super(CoordMonitor, self).__init__(dna) self.coord = [0.0] * self.dna.n_nt_bases - self.coord_square = [0.0] * self.dna.n_nt_bases + # self.coord_square = [0.0] * self.dna.n_nt_bases def collect_at_step(self, step_no): """Collect bps distance and squared-distance at every post-preheating step. @@ -22,4 +22,4 @@ def collect_at_step(self, step_no): """ for i in range(self.dna.n_nt_bases): self.coord[i] += self.dna.coords_dist[i] - self.coord_square[i] += self.dna.coords_dist[i] ** 2 + # self.coord_square[i] += self.dna.coords_dist[i] ** 2 diff --git a/pydna_epbd/monitors/coord_monitor_verbose.py b/pydna_epbd/monitors/coord_monitor_verbose.py index 01715ee..7e37334 100644 --- a/pydna_epbd/monitors/coord_monitor_verbose.py +++ b/pydna_epbd/monitors/coord_monitor_verbose.py @@ -1,4 +1,3 @@ - # this is deprecated, this information is already in the coord-monitor. # import sys @@ -16,10 +15,10 @@ # """ # def __init__(self, dna:DNA, input_configs:InputConfigs) -> None: # super(CoordMonitorVerbose, self).__init__(dna, input_configs) - + # def collect_at_step(self, step_no): # return super().collect_at_step(step_no) - + # def output_iter(self): # output_file = f"outputs/coord_monitor_verbose/seqidx_{self.seq_idx}_iter_{self.iter_no}.txt" -# super()._write_file(output_file, self.coord) \ No newline at end of file +# super()._write_file(output_file, self.coord) diff --git a/pydna_epbd/monitors/energy_monitor.py b/pydna_epbd/monitors/energy_monitor.py index 8d49123..37ec95f 100644 --- a/pydna_epbd/monitors/energy_monitor.py +++ b/pydna_epbd/monitors/energy_monitor.py @@ -22,9 +22,7 @@ def collect_at_step(self, step_no): Args: step_no (int): Step number. """ - temp = self.dna.total_energy / ( - self.KB * self.dna.n_nt_bases - ) # computing temperature from energy + temp = self.dna.total_energy / (self.KB * self.dna.n_nt_bases) # computing temperature from energy self.energy[step_no] += temp def collect_at_step_preheat(self, step_no): diff --git a/pydna_epbd/monitors/flipping_monitor_verbose.py b/pydna_epbd/monitors/flipping_monitor_verbose.py index a549cdb..6114701 100644 --- a/pydna_epbd/monitors/flipping_monitor_verbose.py +++ b/pydna_epbd/monitors/flipping_monitor_verbose.py @@ -16,9 +16,7 @@ def __init__(self, dna) -> None: dna (DNA): A DNA object. """ super(FlippingMonitorVerbose, self).__init__(dna) - self.flip = [ - [0.0] * self.FLIP_SIZES for i in range(self.dna.n_nt_bases) - ] # shape=(n_nt_bases, FLIP_SIZE) + self.flip = [[0.0] * self.FLIP_SIZES for i in range(self.dna.n_nt_bases)] # shape=(n_nt_bases, FLIP_SIZE) def collect_at_step(self, step_no): """Collects flipping characteristics considering different thresholds. diff --git a/pydna_epbd/monitors/melting_and_fraction_many_monitor.py b/pydna_epbd/monitors/melting_and_fraction_many_monitor.py index afc0516..1dbbf49 100644 --- a/pydna_epbd/monitors/melting_and_fraction_many_monitor.py +++ b/pydna_epbd/monitors/melting_and_fraction_many_monitor.py @@ -9,16 +9,12 @@ class MeltingAndFractionManyMonitor(Monitor): in between [0.5, 2.5) Angstrom with step size 0.1. """ - MELT_FRACTION_TRESHOLDS = [ - i / 10 for i in range(5, 25) - ] # start=0.5, end=2.5, step=0.1 + MELT_FRACTION_TRESHOLDS = [i / 10 for i in range(5, 25)] # start=0.5, end=2.5, step=0.1 MELT_FRACTION_SIZES = len(MELT_FRACTION_TRESHOLDS) # MELT_FRACTION_TIME_STEP = 800 # MELT_FRACTION_MAX_STEPS = 80000 - MELT_FRACTION_TIME_STEPS = ( - 100 # int(MELT_FRACTION_MAX_STEPS/MELT_FRACTION_TIME_STEP) # 100 - ) + MELT_FRACTION_TIME_STEPS = 100 # int(MELT_FRACTION_MAX_STEPS/MELT_FRACTION_TIME_STEP) # 100 def __init__(self, dna, n_preheating_steps) -> None: """Initialize MeltingAndFractionManyMonitor object. @@ -33,12 +29,10 @@ def __init__(self, dna, n_preheating_steps) -> None: self.melting_fraction_many = [0.0] * self.dna.n_nt_bases self.melting_many = [ - [0.0] * self.MELT_FRACTION_SIZES - for i in range(self.MELT_FRACTION_TIME_STEPS) + [0.0] * self.MELT_FRACTION_SIZES for i in range(self.MELT_FRACTION_TIME_STEPS) ] # shape=(MELT_FRACTION_TIME_STEPS, MELT_FRACTION_SIZES) self.fraction_many = [ - [0.0] * self.MELT_FRACTION_SIZES - for i in range(self.MELT_FRACTION_TIME_STEPS) + [0.0] * self.MELT_FRACTION_SIZES for i in range(self.MELT_FRACTION_TIME_STEPS) ] # shape=(MELT_FRACTION_TIME_STEPS, MELT_FRACTION_SIZES) def collect_at_step(self, step_no): @@ -61,18 +55,11 @@ def __check_melting_and_fraction_many(self, step, time_step): melting, fraction = 1.0, 0.0 for base_idx in range(self.dna.n_nt_bases): - if ( - melting == 1.0 - and self.melting_fraction_many[base_idx] / (step + 1) - < self.MELT_FRACTION_TRESHOLDS[thresh_idx] - ): + if melting == 1.0 and self.melting_fraction_many[base_idx] / (step + 1) < self.MELT_FRACTION_TRESHOLDS[thresh_idx]: melting = 0.0 break - if ( - self.melting_fraction_many[base_idx] / (step + 1) - > self.MELT_FRACTION_TRESHOLDS[thresh_idx] - ): + if self.melting_fraction_many[base_idx] / (step + 1) > self.MELT_FRACTION_TRESHOLDS[thresh_idx]: fraction += 1 # computing total fraction length for this threshold self.melting_many[time_step][thresh_idx] += melting diff --git a/pydna_epbd/monitors/melting_and_fraction_monitor.py b/pydna_epbd/monitors/melting_and_fraction_monitor.py index d4e4551..08e5c83 100644 --- a/pydna_epbd/monitors/melting_and_fraction_monitor.py +++ b/pydna_epbd/monitors/melting_and_fraction_monitor.py @@ -36,19 +36,11 @@ def collect_at_iter(self): """Melting and fraction characteristics are collected at the end of the iteration.""" melting, fraction = 1.0, 0.0 for i in range(self.dna.n_nt_bases): - if ( - melting == 1.0 - and (self.melting_fraction[i] / self.n_steps_after_preheating) - < self.MELT_FRACTION_TRESHOLD - ): + if melting == 1.0 and (self.melting_fraction[i] / self.n_steps_after_preheating) < self.MELT_FRACTION_TRESHOLD: melting = 0.0 # 0.0 means did not melt the whole DNA - if ( - self.melting_fraction[i] / self.n_steps_after_preheating - ) > self.MELT_FRACTION_TRESHOLD: + if (self.melting_fraction[i] / self.n_steps_after_preheating) > self.MELT_FRACTION_TRESHOLD: fraction += 1 # total number of base pairs whose distance is >MELT_FRACTION_TRESHOLD self.melting = melting # if melting is 1.0, then the fraction should be 1.0, because the whole DNA melted. - self.fraction = ( - fraction / self.dna.n_nt_bases - ) # fraction of the bps got melted. + self.fraction = fraction / self.dna.n_nt_bases # fraction of the bps got melted. diff --git a/pydna_epbd/run.py b/pydna_epbd/run.py index 70f65d8..2a085c7 100644 --- a/pydna_epbd/run.py +++ b/pydna_epbd/run.py @@ -32,10 +32,7 @@ def parse_args(): # dividing the input sequences to the nodes based on job-idx chunk_size = math.ceil(len(input_configs.sequences) / input_configs.n_nodes) - sequence_chunks = [ - input_configs.sequences[x : x + chunk_size] - for x in range(0, len(input_configs.sequences), chunk_size) - ] + sequence_chunks = [input_configs.sequences[x : x + chunk_size] for x in range(0, len(input_configs.sequences), chunk_size)] sequences = sequence_chunks[job_idx] print(f"job_idx:{job_idx}, n_seqs:{len(sequences)}") diff --git a/pydna_epbd/simulation/aggregate_outputs_and_write.py b/pydna_epbd/simulation/aggregate_outputs_and_write.py index fe85438..15d6fb9 100644 --- a/pydna_epbd/simulation/aggregate_outputs_and_write.py +++ b/pydna_epbd/simulation/aggregate_outputs_and_write.py @@ -30,7 +30,7 @@ def aggregate_outputs_for_single_temp(list_of_monitors, input_configs, out_filep if os.environ["COORD_MONITOR"] == "On": coord_iter_list.append(monitors.coord_monitor.coord) - coord_squared_iter_list.append(monitors.coord_monitor.coord_square) + # coord_squared_iter_list.append(monitors.coord_monitor.coord_square) if os.environ["FLIPPING_MONITOR"] == "On": flip_iter_list.append(monitors.flipping_monitor.flip) @@ -46,91 +46,51 @@ def aggregate_outputs_for_single_temp(list_of_monitors, input_configs, out_filep fraction_iter_list.append(monitors.melting_and_fraction_monitor.fraction) if os.environ["MELTING_AND_FRACTION_MANY_MONITOR"] == "True": - melting_many_iter_list.append( - monitors.melting_and_fraction_many_monitor.melting_many - ) - fraction_many_iter_list.append( - monitors.melting_and_fraction_many_monitor.fraction_many - ) + melting_many_iter_list.append(monitors.melting_and_fraction_many_monitor.melting_many) + fraction_many_iter_list.append(monitors.melting_and_fraction_many_monitor.fraction_many) # formating outputs as dictionary, all features are averaged over the number of iterations outputs = {} if os.environ["BUBBLE_MONITOR"] == "On": # (n_iters, seq_len, max_bubble_elem=20, thr_sizes=20) - outputs["bubbles"] = ( - np.array(bubble_iter_list) - if input_configs.save_full - else np.array(bubble_iter_list).mean(0) - ) + outputs["bubbles"] = np.array(bubble_iter_list) if input_configs.save_full else np.array(bubble_iter_list).mean(0) if os.environ["COORD_MONITOR"] == "On": # (n_iters, seq_len) - outputs["coord"] = ( - np.array(coord_iter_list) - if input_configs.save_full - else np.array(coord_iter_list).mean(0) - ) + outputs["coord"] = np.array(coord_iter_list) if input_configs.save_full else np.array(coord_iter_list).mean(0) # (n_iters, seq_len) - outputs["coord_squared"] = ( - np.array(coord_squared_iter_list) - if input_configs.save_full - else np.array(coord_squared_iter_list).mean(0) - ) + # outputs["coord_squared"] = ( + # np.array(coord_squared_iter_list) + # if input_configs.save_full + # else np.array(coord_squared_iter_list).mean(0) + # ) if os.environ["FLIPPING_MONITOR"] == "On": # (n_iters, seq_len) - outputs["flip"] = ( - np.array(flip_iter_list).mean(0) - if input_configs.save_full - else np.array(flip_iter_list).mean(0) - ) + outputs["flip"] = np.array(flip_iter_list).mean(0) if input_configs.save_full else np.array(flip_iter_list).mean(0) if os.environ["FLIPPING_MONITOR_VERBOSE"] == "On": # (n_iters, seq_len, flip_sizes=10) - outputs["flip_verbose"] = ( - np.array(flip_verbose_iter_list) - if input_configs.save_full - else np.array(flip_verbose_iter_list).mean(0) - ) + outputs["flip_verbose"] = np.array(flip_verbose_iter_list) if input_configs.save_full else np.array(flip_verbose_iter_list).mean(0) if os.environ["ENERGY_MONITOR"] == "On": # (n_iters, total_steps) - outputs["energy"] = ( - np.array(energy_iter_list) - if input_configs.save_full - else np.array(energy_iter_list).mean(0) - ) + outputs["energy"] = np.array(energy_iter_list) if input_configs.save_full else np.array(energy_iter_list).mean(0) if os.environ["MELTING_AND_FRACTION_MONITOR"] == "On": # (n_iters) - outputs["melting"] = ( - np.array(melting_iter_list) - if input_configs.save_full - else np.array(melting_iter_list).mean(0) - ) + outputs["melting"] = np.array(melting_iter_list) if input_configs.save_full else np.array(melting_iter_list).mean(0) # (n_iters) - outputs["fraction"] = ( - np.array(fraction_iter_list) - if input_configs.save_full - else np.array(fraction_iter_list).mean(0) - ) + outputs["fraction"] = np.array(fraction_iter_list) if input_configs.save_full else np.array(fraction_iter_list).mean(0) if os.environ["MELTING_AND_FRACTION_MANY_MONITOR"] == "On": # (n_iters, n_time_steps=100, melt_faction_sizes=20) - outputs["melting_many"] = ( - np.array(melting_many_iter_list) - if input_configs.save_full - else np.array(melting_many_iter_list).mean(0) - ) + outputs["melting_many"] = np.array(melting_many_iter_list) if input_configs.save_full else np.array(melting_many_iter_list).mean(0) # (n_iters, n_time_steps=100, melt_faction_sizes=20) - outputs["fraction_many"] = ( - np.array(fraction_many_iter_list) - if input_configs.save_full - else np.array(fraction_many_iter_list).mean(0) - ) + outputs["fraction_many"] = np.array(fraction_many_iter_list) if input_configs.save_full else np.array(fraction_many_iter_list).mean(0) utils.save_as_pickle(outputs, out_filepath) diff --git a/pydna_epbd/simulation/dna.py b/pydna_epbd/simulation/dna.py index 323764d..27fb974 100644 --- a/pydna_epbd/simulation/dna.py +++ b/pydna_epbd/simulation/dna.py @@ -29,9 +29,7 @@ def reset(self): self.DA, self.kn = self.__init_stacking_terms() self.kn_div_four = [i * 0.25 for i in self.kn] - self.DA_div_sqrt_two = [ - [j * one_div_sqrt2 for j in self.DA[i]] for i in range(self.n_nt_bases) - ] + self.DA_div_sqrt_two = [[j * one_div_sqrt2 for j in self.DA[i]] for i in range(self.n_nt_bases)] def __init_coords(self): """Private method for initializing coordinates (u (left) and v (right) bases). diff --git a/pydna_epbd/simulation/mc_simulation.py b/pydna_epbd/simulation/mc_simulation.py index 1531d62..5a361dc 100644 --- a/pydna_epbd/simulation/mc_simulation.py +++ b/pydna_epbd/simulation/mc_simulation.py @@ -49,9 +49,7 @@ def execute(self, monitors: Monitors, total_steps, preheating_steps): def __move_bp(self): """Moves a randomly selected bp and updates new DNA state and corresponding energy.""" - n = int( - self.dna.n_nt_bases * random() - ) # selecting random n-th base-pair, get_random_displacement() + n = int(self.dna.n_nt_bases * random()) # selecting random n-th base-pair, get_random_displacement() # n_next = (n + 1) % self.dna.n_nt_bases # n_previous = (n - 1 + self.dna.n_nt_bases) % self.dna.n_nt_bases @@ -66,12 +64,7 @@ def __move_bp(self): self.__do_random_displacement(n) # recalculating the energy - etotal = ( - self.dna.total_energy - - self.dna.bp_energies[n][0] - - self.dna.bp_energies[n][1] - - self.dna.bp_energies[n_next][1] - ) + etotal = self.dna.total_energy - self.dna.bp_energies[n][0] - self.dna.bp_energies[n][1] - self.dna.bp_energies[n_next][1] u2 = self.__Umors(n) w1, w3 = self.__Wstack(n_previous, n, n_next) etotal += u2 + w1 + w3 @@ -116,9 +109,7 @@ def __assign_new_state(self, n, n_next, u2, w1, w3, etotal): self.dna.bp_energies[n_next][1] = w3 # y_n - self.dna.coords_dist[n] = ( - self.dna.coords_state[n][0] - self.dna.coords_state[n][1] - ) * one_div_sqrt2 + self.dna.coords_dist[n] = (self.dna.coords_state[n][0] - self.dna.coords_state[n][1]) * one_div_sqrt2 def __revert_old_state(self, n): """Revert to old state. @@ -156,25 +147,15 @@ def __Wstack(self, n_previous, n, n_next): Returns: float, float: Stacking potentials between n- and n-previous bps, and n- and n-next bps. """ - y_n_previous = ( - self.dna.coords_state[n_previous][0] - self.dna.coords_state[n_previous][1] - ) + y_n_previous = self.dna.coords_state[n_previous][0] - self.dna.coords_state[n_previous][1] y_n = self.dna.coords_state[n][0] - self.dna.coords_state[n][1] y_n_next = self.dna.coords_state[n_next][0] - self.dna.coords_state[n_next][1] Kn_div_four = self.dna.kn_div_four[n] - w1 = ( - Kn_div_four - * (1 + ro * exp(-beta1_div_sqrt_two * (y_n_previous + y_n))) - * (y_n_previous - y_n) ** 2 - ) + w1 = Kn_div_four * (1 + ro * exp(-beta1_div_sqrt_two * (y_n_previous + y_n))) * (y_n_previous - y_n) ** 2 Kn_div_four = self.dna.kn_div_four[n_next] - w3 = ( - Kn_div_four - * (1 + ro * exp(-beta1_div_sqrt_two * (y_n_next + y_n))) - * (y_n_next - y_n) ** 2 - ) + w3 = Kn_div_four * (1 + ro * exp(-beta1_div_sqrt_two * (y_n_next + y_n))) * (y_n_next - y_n) ** 2 return w1, w3 def __do_random_displacement(self, n): @@ -187,16 +168,10 @@ def __do_random_displacement(self, n): dx = normalvariate(mu=0.0, sigma=1.0) # get_gasdev() if random() > 0.5: # LEFT, get_l_or_r() self.dna.coords_state[n][0] += dx - if ( - abs(self.dna.coords_state[n][0] - self.coords_state_saved[n][1]) - > self.ACCEPT_CUTOFF - ): + if abs(self.dna.coords_state[n][0] - self.coords_state_saved[n][1]) > self.ACCEPT_CUTOFF: self.dna.coords_state[n][0] -= dx # reverting the change else: # RIGHT self.dna.coords_state[n][1] += dx - if ( - abs(self.dna.coords_state[n][1] - self.coords_state_saved[n][0]) - > self.ACCEPT_CUTOFF - ): + if abs(self.dna.coords_state[n][1] - self.coords_state_saved[n][0]) > self.ACCEPT_CUTOFF: self.dna.coords_state[n][1] -= dx # reverting the change diff --git a/pydna_epbd/simulation/simulation_steps.py b/pydna_epbd/simulation/simulation_steps.py index b22ee08..61d9ae5 100644 --- a/pydna_epbd/simulation/simulation_steps.py +++ b/pydna_epbd/simulation/simulation_steps.py @@ -1,11 +1,13 @@ +import os, time from pydna_epbd.simulation.dna import DNA from pydna_epbd.simulation.mc_simulation import Simulation from pydna_epbd.monitors.all_monitors import Monitors +from pydna_epbd.simulation.aggregate_outputs_and_write import aggregate_outputs_for_single_temp +from joblib import delayed, Parallel +import pydna_epbd.pickle_utils as utils -def run_single_iteration( - n_preheating_steps, n_steps_after_preheating, seq_id, seq, temp, iter_no -): +def run_single_iteration(n_preheating_steps, n_steps_after_preheating, seq_id, seq, temp, iter_no): """This runs a single MCMC simulation iteration. Args: @@ -38,13 +40,6 @@ def run_single_iteration( return monitors -import os, time -from pydna_epbd.simulation.aggregate_outputs_and_write import ( - aggregate_outputs_for_single_temp, -) -from joblib import delayed, Parallel - - def run_sequences(sequences, input_configs): """Main function to run MCMC simulations for all DNA sequences. This initializes 100 or the number of available cpu cores-1 cpus to parallaly run n_iterations. @@ -59,44 +54,44 @@ def run_sequences(sequences, input_configs): runtime_write_mode = "a" if os.path.exists(runtime_filepath) else "w" runtime_out_handle = open(runtime_filepath, runtime_write_mode) - with Parallel(n_jobs=min(100, os.cpu_count() - 1), verbose=1) as parallel: + with Parallel(n_jobs=os.cpu_count() - 3, verbose=1) as parallel: # min(100, os.cpu_count() - 1) for i in range(0, len(sequences)): seq_output_dir, seq_id, seq = sequences[i] simulation_out_filepath = f"{seq_output_dir}{seq_id}.pkl" + # checking whether the simulation is run previously for this seq_id and output is correct if os.path.exists(simulation_out_filepath): - print("Already computed:", simulation_out_filepath) - continue - else: - k = 0 - # for k in range(10): # for 10 runs to do runtime analysis - - print(f"Running simulation: seq_idx:{i} | seq_id:{seq_id}") - start_time = time.time() - - list_of_monitors = parallel( - delayed(run_single_iteration)( - input_configs.n_preheating_steps, - input_configs.n_steps_after_preheating, - seq_id, - seq, - input_configs.temperature, - iter_no, - ) - for iter_no in range(input_configs.n_iterations) - ) - aggregate_outputs_for_single_temp( - list_of_monitors, input_configs, simulation_out_filepath + try: + x = utils.load_pickle(simulation_out_filepath) + print("Already computed:", simulation_out_filepath) + continue + except Exception as e: + print(f"Previously computed '{simulation_out_filepath}' had issues, computing again ... ") + # raise e + # else: + k = 0 + # for k in range(10): # for 10 runs to do runtime analysis + + print(f"Running simulation: seq_idx:{i} | seq_id:{seq_id}") + start_time = time.time() + + list_of_monitors = parallel( + delayed(run_single_iteration)( + input_configs.n_preheating_steps, input_configs.n_steps_after_preheating, seq_id, seq, input_configs.temperature, iter_no ) + for iter_no in range(input_configs.n_iterations) + ) - runtime = time.time() - start_time - print( - f"finished -> {simulation_out_filepath} -> {runtime} seconds to execute" - ) - if input_configs.save_runtime: - runtime_out_handle.write( - f"{k}:{simulation_out_filepath}:{runtime}\n" - ) + assert ( + len(list_of_monitors) == input_configs.n_iterations + ), f"Should be equal, but found {len(list_of_monitors)} != {input_configs.n_iterations}" # synchronization barrier + + aggregate_outputs_for_single_temp(list_of_monitors, input_configs, simulation_out_filepath) + + runtime = time.time() - start_time + print(f"finished -> {simulation_out_filepath} -> {runtime} seconds to execute") + if input_configs.save_runtime: + runtime_out_handle.write(f"{k}:{simulation_out_filepath}:{runtime}\n") # break # to run 1st seq, comment-out this line if input_configs.save_runtime: