From 37c017d0aa1a2ee3db13ea7d07a707a1bc1a3560 Mon Sep 17 00:00:00 2001 From: Simon Bray <32272674+simonbray@users.noreply.github.com> Date: Thu, 25 Apr 2024 20:57:05 +0100 Subject: [PATCH 1/3] [WIP] Extended Python version of rbhtfinder (#18) * extended python version of rbhtfinder * fix time calculation bug, parallelize * add max_perc and min_time cli params to rbhtfinder * update rbhtfinder select_best_filter_combination function to optimize all columns and time; run black * relax error to warning * mv rbhtfinder script to new rdock-utils location * readd original perl rbhtfinder to repo * small fixes * run isort and black * add test input and output data generated with: `rbhtfinder -i rbhtfinder_input.txt -o rbhtfinder_output.txt -t rbhtfinder_threshold.txt -f column=4,steps=3,min=-10.0,max=0.0,interval=5.0 -f column=6,steps=5,min=1.0,max=5.0,interval=5.0 --max-time 1 --min-perc 1.0 -v 5 --header` --- bin/rbhtfinder | 0 rdock-utils/rdock_utils/rbhtfinder | 489 ++++++++++++++++++ .../fixtures/rbhtfinder/rbhtfinder_input.txt | 101 ++++ .../fixtures/rbhtfinder/rbhtfinder_output.txt | 7 + .../rbhtfinder/rbhtfinder_threshold.txt | 7 + 5 files changed, 604 insertions(+) mode change 100755 => 100644 bin/rbhtfinder create mode 100755 rdock-utils/rdock_utils/rbhtfinder create mode 100644 rdock-utils/tests/fixtures/rbhtfinder/rbhtfinder_input.txt create mode 100644 rdock-utils/tests/fixtures/rbhtfinder/rbhtfinder_output.txt create mode 100644 rdock-utils/tests/fixtures/rbhtfinder/rbhtfinder_threshold.txt diff --git a/bin/rbhtfinder b/bin/rbhtfinder old mode 100755 new mode 100644 diff --git a/rdock-utils/rdock_utils/rbhtfinder b/rdock-utils/rdock_utils/rbhtfinder new file mode 100755 index 00000000..9e2123c5 --- /dev/null +++ b/rdock-utils/rdock_utils/rbhtfinder @@ -0,0 +1,489 @@ +#!/usr/bin/env python3 + +import numpy as np + +try: + import pandas as pd +except ImportError: + pd = None + +import argparse +import itertools +import multiprocessing +import os +import sys +from collections import Counter +from functools import partial +from pathlib import Path + + +def apply_threshold(scored_poses, column, steps, threshold): + """ + Filter out molecules from `scored_poses`, where the minimum score reached (for a specified `column`) after `steps` is more negative than `threshold`. + """ + # minimum score after `steps` per molecule + mins = np.min(scored_poses[:, :steps, column], axis=1) + # return those molecules where the minimum score is less than the threshold + passing_molecules = np.where(mins < threshold)[0] + return passing_molecules + + +def prepare_array(sdreport_array, name_column): + """ + Convert `sdreport_array` (read directly from the tsv) to 3D array (molecules x poses x columns) and filter out molecules with too few/many poses + """ + # find points in the array where the name_column changes (i.e. we are dealing with a new molecule) and split the array + split_array = np.split( + sdreport_array, + np.where( + sdreport_array[:, name_column] + != np.hstack( + (sdreport_array[1:, name_column], sdreport_array[0, name_column]) + ) + )[0] + + 1, + ) + modal_shape = Counter([n.shape for n in split_array]).most_common(1)[0] + number_of_poses = modal_shape[0][ + 0 + ] # find modal number of poses per molecule in the array + + split_array_clean = sum( + [ + np.array_split(n, n.shape[0] / number_of_poses) + for n in split_array + if not n.shape[0] % number_of_poses and n.shape[0] + ], + [], + ) + + if len(split_array_clean) * number_of_poses < sdreport_array.shape[0] * 0.99: + print( + f"WARNING: the number of poses provided per molecule is inconsistent. Only {len(split_array_clean)} of {int(sdreport_array.shape[0] / number_of_poses)} moleules have {number_of_poses} poses." + ) + + molecule_array = np.array(split_array_clean) + # overwrite the name column (should be the only one with dtype=str) so we can force everything to float + molecule_array[:, :, name_column] = 0 + return np.array(molecule_array, dtype=float) + + +def calculate_results_for_filter_combination( + filter_combination, + molecule_array, + filters, + min_score_indices, + number_of_validation_mols, +): + """ + For a particular combination of filters, calculate the percentage of molecules that will be filtered, the percentage of top-scoring molecules that will be filtered, and the time taken relative to exhaustive docking + """ + # mols_passed_threshold is a list of indices of molecules which have passed the applied filters. As more filters are applied, it gets smaller. Before any iteration, we initialise with all molecules passing + mols_passed_threshold = list(range(molecule_array.shape[0])) + filter_percentages = [] + number_of_simulated_poses = 0 # number of poses which we calculate would be generated, we use this to calculate the TIME column in the final output + for n, threshold in enumerate(filter_combination): + if n: + # e.g. if there are 5000 mols left after 15 steps and the last filter was at 5 steps, append 5000 * (15 - 5) to number_of_simulated_poses + number_of_simulated_poses += len(mols_passed_threshold) * ( + filters[n]["steps"] - filters[n - 1]["steps"] + ) + else: + number_of_simulated_poses += ( + len(mols_passed_threshold) * filters[n]["steps"] + ) + mols_passed_threshold = [ # all mols which pass the threshold and which were already in mols_passed_threshold, i.e. passed all previous filters + n + for n in apply_threshold( + molecule_array, filters[n]["column"], filters[n]["steps"], threshold + ) + if n in mols_passed_threshold + ] + filter_percentages.append(len(mols_passed_threshold) / molecule_array.shape[0]) + number_of_simulated_poses += len(mols_passed_threshold) * ( + molecule_array.shape[1] - filters[-1]["steps"] + ) + perc_val = { + k: len([n for n in v if n in mols_passed_threshold]) / number_of_validation_mols + for k, v in min_score_indices.items() + } + return { + "filter_combination": filter_combination, + "perc_val": perc_val, + "filter_percentages": filter_percentages, + "time": number_of_simulated_poses / np.product(molecule_array.shape[:2]), + } + + +def write_output( + results, filters, number_of_validation_mols, output_file, column_names +): + """ + Print results as a table. The number of columns varies depending how many columns the user picked. + """ + with open(output_file, "w") as f: + # write header + for n in range(len(results[0]["filter_combination"])): + f.write(f"FILTER{n+1}\tNSTEPS{n+1}\tTHR{n+1}\tPERC{n+1}\t") + for n in results[0]["perc_val"]: + f.write(f"TOP{number_of_validation_mols}_{column_names[n]}\t") + f.write(f"ENRICH_{column_names[n]}\t") + f.write("TIME\n") + + # write results + for result in results: + for n, threshold in enumerate(result["filter_combination"]): + f.write( + f"{column_names[filters[n]['column']]}\t{filters[n]['steps']}\t{threshold:.2f}\t{result['filter_percentages'][n]*100:.2f}\t" + ) + for n in result["perc_val"]: + f.write(f"{result['perc_val'][n]*100:.2f}\t") + if result["filter_percentages"][-1]: + f.write( + f"{result['perc_val'][n]/result['filter_percentages'][-1]:.2f}\t" + ) + else: + f.write("NaN\t") + f.write(f"{result['time']:.4f}\n") + return + + +def select_best_filter_combination(results, max_time, min_perc): + """ + Very debatable how to do this... + Here we exclude all combinations with TIME < max_time and calculate an "enrichment factor" + (= percentage of validation compounds / percentage of all compounds); we select the + threshold with the highest enrichment factor + """ + min_max_values = {} + for col in results[0]["perc_val"].keys(): + vals = [result["perc_val"][col] for result in results] + min_max_values[col] = {"min": min(vals), "max": max(vals)} + time_vals = [result["time"] for result in results] + min_max_values["time"] = {"min": min(time_vals), "max": max(time_vals)} + + combination_scores = [ + sum( + [ + ( + (result["perc_val"][col] - min_max_values[col]["min"]) + / (min_max_values[col]["max"] - min_max_values[col]["min"]) + ) + for col in results[0]["perc_val"].keys() + ] + + [ + (min_max_values["time"]["max"] - result["time"]) + / (min_max_values["time"]["max"] - min_max_values["time"]["min"]) + ] + ) + if result["time"] < max_time + and result["filter_percentages"][-1] >= min_perc / 100 + else 0 + for result in results + ] + return np.argmax(combination_scores) + + +def write_threshold_file( + filters, best_filter_combination, threshold_file, column_names, max_number_of_runs +): + with open(threshold_file, "w") as f: + # write number of filters to apply + f.write(f"{len(filters) + 1}\n") + # write each filter to a separate line + for n, filtr in enumerate(filters): + f.write( + f'if - {best_filter_combination[n]:.2f} {column_names[filtr["column"]]} 1.0 if - SCORE.NRUNS {filtr["steps"]} 0.0 -1.0,\n' + ) + # write filter to terminate docking when NRUNS reaches the number of runs used in the input file + f.write(f"if - SCORE.NRUNS {max_number_of_runs - 1} 0.0 -1.0\n") + + # write final filters - find strictest filters for all columns and apply them again + filters_by_column = { + col: [ + best_filter_combination[n] + for n, filtr in enumerate(filters) + if filtr["column"] == col + ] + for col in set([filtr["column"] for filtr in filters]) + } + # write number of filters (same as number of columns filtered on) + f.write(f"{len(filters_by_column)}\n") + # write filter + for col, values in filters_by_column.items(): + f.write(f"- {column_names[col]} {min(values)},\n") + + +def main(): + """ + Parse arguments; read in data; calculate filter combinations and apply them; print results + """ + parser = argparse.ArgumentParser( + description="""Estimate the results and computation time of an rDock high +throughput protocol. The following steps should be followed: +1) exhaustive docking of a small representative part of the entire + library. +2) Store the result of sdreport -t over that exhaustive docking run + in a file which will be the input of this script. +3) Run rbhtfinder, specifying -i and an arbitrary + number of filters specified using the -f option, for example + "-f column=6,steps=5,min=0.5,max=1.0,interval=0.1". This example + would simulate the effect of applying thresholds on column 6 after + 5 poses have been generated, for values between 0.5 and 1.0 (i.e. + 0.5, 0.6, 0.7, 0.8, 0.9, 1.0). More than one threshold can be + specified, e.g., "-f column=4,steps=5,min=-12,max=-10,interval=1 + -f column=4,steps=15,min=-16,max=-15,interval=1" will test the + following combinations of thresholds on column 4: + 5 -10 15 -15 + 5 -11 15 -15 + 5 -12 15 -15 + 5 -10 15 -16 + 5 -11 15 -16 + 5 -12 15 -16 + The number of combinations will increase very rapidly, the more + filters are used and the larger the range of values specified for + each. It may be sensible to run rbhtfinder several times to explore + the effects of various filters independently. + + The output of the program consists of the following columns. + FILTER1 NSTEPS1 THR1 PERC1 TOP500_SCORE.INTER ENRICH_SCORE.INTER TIME + SCORE.INTER 5 -13.00 6.04 72.80 12.05 0.0500 + SCORE.INTER 5 -12.00 9.96 82.80 8.31 0.0500 + The four columns are repeated for each filter specified with the -f + option: name of the column on which the filter is applied + (FILTER1), number of steps at which the threshold is applied + (NSTEPS1), value of the threshold (THR1) and the percentage of + poses which pass this filter (PERC1). Additional filters (FILTER2, + FILTER3 etc.) are listed in the order that they are applied (i.e. + by NSTEPS). + + The final columns provide some overall statistics for the + combination of thresholds specified in a row. TOP500_SCORE.INTER + gives the percentage of the top-scoring 500 poses, measured by + SCORE.INTER, from the whole of which are retained + after the thresholds are applied. This can be contrasted with the + final PERC column. The higher the ratio (the 'enrichment factor'), + the better the combination of thresholds. If thresholds are applied + on multiple columns, this column will be duplicated for each, e.g. + TOP500_SCORE.INTER and TOP500_SCORE.RESTR will give the percentage + of the top-scoring poses retained for both of these scoring + methods. The exact number of poses used for this validation can be + changed from the default 500 using the --validation flag. + ENRICH_SCORE.INTER gives the enrichment factor as a quick + rule-of-thumb to assess the best choice of thresholds. The final + column TIME provides an estimate of the time taken to perform + docking, as a proportion of the time taken for exhaustive docking. + This value should be below 0.1. + + After a combination of thresholds has been selected, they need to + be encoded into a threshold file which rDock can use as an input. + rbhtfinder attempts to help with this task by automatically + selecting a combination and writing a threshold file. The + combination chosen is that which provides the highest enrichment + factor, after all options with a TIME value over 0.1 are excluded. + This choice should not be blindly followed, so the threshold file + should be considered a template that the user modifies as needed. + + rbhtfinder requires NumPy. Installation of pandas is recommended, + but optional; if pandas is not available, loading the input file + for calculations will be considerably slower. + + """, + formatter_class=argparse.RawTextHelpFormatter, + ) + parser.add_argument( + "-i", + "--input", + help="Input from sdreport (tabular separated format).", + type=Path, + required=True, + ) + parser.add_argument( + "-o", + "--output", + help="Output file for report on threshold combinations.", + type=Path, + required=True, + ) + parser.add_argument( + "-t", + "--threshold", + help="Threshold file used by rDock as input.", + type=Path, + ) + parser.add_argument( + "-n", + "--name", + type=int, + default=2, + help="Index of column containing the molecule name. Default is 2.", + ) + parser.add_argument( + "-f", + "--filter", + nargs="+", + action="append", + type=str, + help="Filter to apply, e.g. column=4,steps=5,min=-10,max=-15,interval=1 will test applying a filter to column 4 after generation of 5 poses, with threshold values between -10 and -15 tested. The variables column, steps, min and max must all be specified; interval defaults to 1 if not given.", + ) + parser.add_argument( + "-v", + "--validation", + type=int, + default=500, + help="Top-scoring N molecules from input to use for validating threshold combinations. Default is 500.", + ) + parser.add_argument( + "--header", + action="store_true", + help="Specify if the input file from sdreport contains a header line with column names. If not, output files will describe columns using indices, e.g. COL4, COL5.", + ) + parser.add_argument( + "--max-time", + type=float, + default=0.1, + help="Maximum value for time to use when autogenerating a high-throughput protocol - default is 0.1, i.e. 10%% of the time exhaustive docking would take.", + ) + parser.add_argument( + "--min-perc", + type=float, + default=1.0, + help="Minimum value for the estimated final percentage of compounds to use when autogenerating a high-throughput protocol - default is 1.", + ) + + args = parser.parse_args() + args.name -= 1 # because np arrays need 0-based indices + + # create filters dictionary from args.filter passed in + filters = [ + dict([n.split("=") for n in filtr[0].split(",")]) for filtr in args.filter + ] + filters = [ + { + k: float(v) if k in ["interval", "min", "max"] else int(v) + for k, v in filtr.items() + } + for filtr in filters + ] + + for filtr in filters: + # user inputs with 1-based numbering whereas python uses 0-based + filtr["column"] -= 1 + + # sort filters by step at which they are applied + filters.sort(key=lambda n: n["steps"]) + + # generates all possible combinations from filters provided + filter_combinations = list( + itertools.product( + *( + np.arange(*n) + for n in [ + ( + filtr["min"], + filtr["max"] + filtr.get("interval", 1), + filtr.get("interval", 1), + ) + for filtr in filters + ] + ) + ) + ) + print(f"{len(filter_combinations)} combinations of filters calculated.") + + # remove redundant combinations, i.e. where filters for later steps are less or equally strict to earlier steps + filter_combinations = np.array(filter_combinations) + cols = [filtr["column"] for filtr in filters] + indices_per_col = { + col: [n for n, filter_col in enumerate(cols) if col == filter_col] + for col in set(cols) + } + filter_combination_indices_to_keep = range(len(filter_combinations)) + for col, indices in indices_per_col.items(): + filter_combination_indices_to_keep = [ + n + for n, comb in enumerate(filter_combinations[:, indices]) + if list(comb) == sorted(comb, reverse=True) + and len(set(comb)) == comb.shape[0] + and n in filter_combination_indices_to_keep + ] + filter_combinations = filter_combinations[filter_combination_indices_to_keep] + + if len(filter_combinations): + print( + f"{len(filter_combinations)} combinations of filters remain after removal of redundant combinations. Starting calculations..." + ) + else: + print( + "No filter combinations could be calculated - check the thresholds specified." + ) + exit(1) + + if pd: + # pandas is weird... i.e., skip line 0 if there's a header, else read all lines + header = 0 if args.header else None + sdreport_dataframe = pd.read_csv(args.input, sep="\t", header=header) + if args.header: + column_names = sdreport_dataframe.columns.values + else: + # use index names; add 1 to deal with zero-based numbering + column_names = [f"COL{n+1}" for n in range(len(sdreport_dataframe.columns))] + sdreport_array = sdreport_dataframe.values + else: # pd not available + np_array = np.loadtxt(args.input, dtype=str) + if args.header: + column_names = np_array[0] + sdreport_array = np_array[1:] + else: + column_names = [f"COL{n+1}" for n in range(np_array.shape[1])] + sdreport_array = np_array + print("Data read in from input file.") + + # convert to 3D array (molecules x poses x columns) + molecule_array = prepare_array(sdreport_array, args.name) + + # find the top scoring compounds for validation of the filter combinations + min_score_indices = {} + for column in set(filtr["column"] for filtr in filters): + min_scores = np.min(molecule_array[:, :, column], axis=1) + min_score_indices[column] = np.argpartition(min_scores, args.validation)[ + : args.validation + ] + + results = [] + + pool = multiprocessing.Pool(os.cpu_count()) + results = pool.map( + partial( + calculate_results_for_filter_combination, + molecule_array=molecule_array, + filters=filters, + min_score_indices=min_score_indices, + number_of_validation_mols=args.validation, + ), + filter_combinations, + ) + + write_output(results, filters, args.validation, args.output, column_names) + + best_filter_combination = select_best_filter_combination( + results, args.max_time, args.min_perc + ) + if args.threshold: + if best_filter_combination: + write_threshold_file( + filters, + filter_combinations[best_filter_combination], + args.threshold, + column_names, + molecule_array.shape[1], + ) + else: + print( + "Filter combinations defined are too strict or would take too long to run; no threshold file was written." + ) + exit(1) + + +if __name__ == "__main__": + main() diff --git a/rdock-utils/tests/fixtures/rbhtfinder/rbhtfinder_input.txt b/rdock-utils/tests/fixtures/rbhtfinder/rbhtfinder_input.txt new file mode 100644 index 00000000..0d9277c6 --- /dev/null +++ b/rdock-utils/tests/fixtures/rbhtfinder/rbhtfinder_input.txt @@ -0,0 +1,101 @@ +REC _TITLE1 TOTAL INTER INTRA RESTR VDW +001 mol00 -16.905 -11.204 -6.416 0.715 -18.926 +002 mol00 2.595 -0.601 -1.152 4.347 -11.001 +003 mol00 -13.022 -12.572 -8.953 8.502 -20.443 +004 mol00 -16.128 -12.742 -8.977 5.591 -17.353 +005 mol00 -10.576 -4.606 -6.451 0.481 -16.707 +006 mol00 -18.429 -11.402 -8.179 1.152 -18.191 +007 mol00 -18.316 -12.749 -6.842 1.275 -21.002 +008 mol00 -13.123 -6.272 -9.001 2.150 -16.672 +009 mol00 -6.763 -7.234 -4.006 4.478 -15.995 +010 mol00 -16.302 -11.451 -5.042 0.192 -21.602 +011 mol01 -14.764 -12.244 -3.069 0.550 -16.362 +012 mol01 -8.102 -9.014 -2.509 3.421 -13.535 +013 mol01 -17.136 -13.983 -4.509 1.356 -15.128 +014 mol01 -10.791 -7.401 -4.334 0.944 -12.455 +015 mol01 -15.107 -11.770 -3.681 0.343 -12.760 +016 mol01 -15.348 -12.600 -3.085 0.337 -12.213 +017 mol01 -13.234 -9.356 -4.039 0.161 -13.449 +018 mol01 -12.883 -10.593 -2.692 0.401 -14.155 +019 mol01 -14.937 -12.053 -3.622 0.738 -16.503 +020 mol01 -15.504 -12.806 -3.140 0.442 -12.497 +021 mol02 -12.446 -11.333 -4.405 3.291 -15.701 +022 mol02 -13.334 -11.044 -2.708 0.418 -13.332 +023 mol02 -12.298 -8.953 -4.006 0.662 -13.422 +024 mol02 -10.855 -8.415 -3.033 0.593 -12.782 +025 mol02 -12.506 -9.802 -3.198 0.494 -14.579 +026 mol02 -13.582 -11.559 -2.422 0.399 -15.628 +027 mol02 -14.966 -11.346 -4.361 0.741 -16.671 +028 mol02 -15.302 -12.238 -3.389 0.324 -13.782 +029 mol02 -9.849 -9.111 -4.596 3.858 -14.011 +030 mol02 -13.621 -11.178 -2.870 0.427 -15.527 +031 mol03 -10.492 -8.634 -2.412 0.554 -12.702 +032 mol03 -16.369 -12.611 -3.925 0.166 -15.707 +033 mol03 -16.074 -12.018 -4.147 0.091 -14.921 +034 mol03 -6.623 -8.868 -2.337 4.582 -13.383 +035 mol03 -4.061 -4.354 -4.135 4.428 -11.803 +036 mol03 -16.844 -13.744 -3.429 0.329 -14.531 +037 mol03 -16.759 -14.229 -2.994 0.464 -15.433 +038 mol03 -15.680 -11.976 -3.889 0.185 -15.065 +039 mol03 -11.919 -9.693 -2.623 0.398 -14.239 +040 mol03 -8.137 -7.516 -3.235 2.614 -11.614 +041 mol04 -7.776 -6.296 -2.270 0.790 -16.535 +042 mol04 6.644 5.519 -0.566 1.691 -0.734 +043 mol04 -3.363 -7.773 0.964 3.446 -13.299 +044 mol04 -4.351 -4.121 -1.905 1.675 -11.049 +045 mol04 -2.875 -5.317 0.643 1.799 -13.852 +046 mol04 -7.823 -9.622 -0.031 1.830 -14.752 +047 mol04 -2.534 -1.876 -2.013 1.354 -10.910 +048 mol04 -13.193 -11.516 -2.048 0.371 -17.047 +049 mol04 -8.574 -9.947 1.073 0.301 -18.351 +050 mol04 -9.966 -9.181 -1.811 1.027 -14.498 +051 mol05 -5.717 -12.369 -0.344 6.997 -20.154 +052 mol05 -5.265 -9.689 0.036 4.387 -16.474 +053 mol05 -11.101 -9.229 -2.354 0.483 -17.823 +054 mol05 -3.375 -5.926 -1.281 3.832 -14.547 +055 mol05 -9.546 -12.438 -1.927 4.819 -17.671 +056 mol05 -12.771 -15.095 1.703 0.621 -17.161 +057 mol05 -19.198 -19.152 -0.788 0.743 -17.933 +058 mol05 -12.564 -13.726 -0.425 1.587 -19.786 +059 mol05 -3.387 -7.638 1.574 2.678 -16.308 +060 mol05 -14.882 -17.451 -0.477 3.045 -19.050 +061 mol06 -15.764 -17.717 0.853 1.101 -21.131 +062 mol06 -2.956 -7.275 0.313 4.006 -14.833 +063 mol06 -6.103 -12.909 2.281 4.526 -17.262 +064 mol06 1.370 -1.589 -0.619 3.579 -9.989 +065 mol06 0.980 -14.709 0.605 15.084 -20.358 +066 mol06 3.784 -6.808 8.337 2.255 -14.995 +067 mol06 -5.845 -12.679 2.130 4.704 -17.065 +068 mol06 -5.255 -12.309 4.456 2.598 -17.557 +069 mol06 -5.051 -8.500 -1.065 4.515 -12.298 +070 mol06 -8.737 -13.409 3.272 1.400 -17.974 +071 mol07 -5.945 -6.564 -0.932 1.551 -15.670 +072 mol07 -11.177 -12.429 -1.525 2.777 -15.118 +073 mol07 -3.446 -1.734 -2.958 1.246 -7.623 +074 mol07 -4.229 -5.796 -0.264 1.831 -14.220 +075 mol07 -14.958 -15.847 -0.333 1.222 -18.956 +076 mol07 -8.390 -8.507 -0.927 1.045 -14.022 +077 mol07 -5.093 -5.862 -1.992 2.761 -15.437 +078 mol07 -9.813 -12.418 -0.122 2.726 -17.489 +079 mol07 -10.936 -10.623 -1.940 1.626 -16.272 +080 mol07 -2.593 -7.660 3.906 1.162 -10.076 +081 mol08 -30.625 -10.460 -24.533 4.369 -21.331 +082 mol08 -34.896 -10.897 -28.333 4.334 -24.000 +083 mol08 -37.535 -5.959 -32.574 0.998 -17.627 +084 mol08 -24.337 -1.398 -32.330 9.391 -13.655 +085 mol08 -33.982 -6.759 -29.808 2.584 -20.003 +086 mol08 -22.908 -5.812 -32.172 15.076 -17.519 +087 mol08 -10.119 5.962 -25.259 9.178 -7.399 +088 mol08 -36.286 -7.066 -31.019 1.799 -19.466 +089 mol08 -32.439 -4.421 -28.944 0.926 -16.742 +090 mol08 -33.056 -3.138 -31.632 1.714 -16.795 +091 mol09 -37.922 -11.009 -28.015 1.102 -14.514 +092 mol09 -33.961 -11.278 -28.396 5.713 -18.027 +093 mol09 -30.177 -6.085 -27.327 3.235 -11.667 +094 mol09 -36.755 -10.942 -27.524 1.710 -16.747 +095 mol09 -27.609 -3.028 -27.462 2.881 -5.874 +096 mol09 -29.025 -10.924 -25.192 7.091 -17.062 +097 mol09 -28.521 -6.851 -28.559 6.889 -12.872 +098 mol09 -37.849 -18.828 -26.348 7.327 -18.185 +099 mol09 -33.968 -11.233 -28.349 5.614 -17.982 +100 mol09 -37.434 -10.703 -28.080 1.348 -16.012 diff --git a/rdock-utils/tests/fixtures/rbhtfinder/rbhtfinder_output.txt b/rdock-utils/tests/fixtures/rbhtfinder/rbhtfinder_output.txt new file mode 100644 index 00000000..eba0b51b --- /dev/null +++ b/rdock-utils/tests/fixtures/rbhtfinder/rbhtfinder_output.txt @@ -0,0 +1,7 @@ +FILTER1 NSTEPS1 THR1 PERC1 FILTER2 NSTEPS2 THR2 PERC2 TOP5_INTER ENRICH_INTER TOP5_RESTR ENRICH_RESTR TIME +INTER 3 -10.00 90.00 RESTR 5 1.00 60.00 40.00 0.67 80.00 1.33 0.7800 +INTER 3 -10.00 90.00 RESTR 5 6.00 90.00 100.00 1.11 80.00 0.89 0.9300 +INTER 3 -5.00 100.00 RESTR 5 1.00 70.00 40.00 0.57 100.00 1.43 0.8500 +INTER 3 -5.00 100.00 RESTR 5 6.00 100.00 100.00 1.00 100.00 1.00 1.0000 +INTER 3 0.00 100.00 RESTR 5 1.00 70.00 40.00 0.57 100.00 1.43 0.8500 +INTER 3 0.00 100.00 RESTR 5 6.00 100.00 100.00 1.00 100.00 1.00 1.0000 diff --git a/rdock-utils/tests/fixtures/rbhtfinder/rbhtfinder_threshold.txt b/rdock-utils/tests/fixtures/rbhtfinder/rbhtfinder_threshold.txt new file mode 100644 index 00000000..8178451c --- /dev/null +++ b/rdock-utils/tests/fixtures/rbhtfinder/rbhtfinder_threshold.txt @@ -0,0 +1,7 @@ +3 +if - -5.00 INTER 1.0 if - SCORE.NRUNS 3 0.0 -1.0, +if - 1.00 RESTR 1.0 if - SCORE.NRUNS 5 0.0 -1.0, +if - SCORE.NRUNS 9 0.0 -1.0 +2 +- INTER -5.0, +- RESTR 1.0, From e1ecff6fdebd819042bc5ba5023fd8c24b56034d Mon Sep 17 00:00:00 2001 From: ggutierrez <94693768+ggutierrez-sunbright@users.noreply.github.com> Date: Thu, 25 Apr 2024 22:44:29 +0200 Subject: [PATCH 2/3] make rbhtfinder executable (#108) --- bin/rbhtfinder | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 bin/rbhtfinder diff --git a/bin/rbhtfinder b/bin/rbhtfinder old mode 100644 new mode 100755 From 97a05d6156fd3c9a0b1e370d6df9a55a9faedd66 Mon Sep 17 00:00:00 2001 From: Luis Carlos Hernandez <112667493+lpardey@users.noreply.github.com> Date: Fri, 3 May 2024 14:46:20 -0500 Subject: [PATCH 3/3] Issue # 77 sdsort (#109) * init commit * add molecule with progress log function * refactor sdsort parser * refactor sdsort * add sdf testing files * add helper module * add fixtures to conftest * add integration test * add sdsort script to toml file * moved SDSortConfig dataclass to sdsort module --------- Co-authored-by: ggutierrez <94693768+ggutierrez-sunbright@users.noreply.github.com> --- rdock-utils/pyproject.toml | 1 + rdock-utils/rdock_utils/common/SDFParser.py | 8 + rdock-utils/rdock_utils/common/__init__.py | 3 +- rdock-utils/rdock_utils/common/files.py | 2 +- rdock-utils/rdock_utils/sdsort/__init__.py | 0 rdock-utils/rdock_utils/sdsort/main.py | 12 + rdock-utils/rdock_utils/sdsort/parser.py | 30 ++ rdock-utils/rdock_utils/sdsort/sdsort.py | 62 +++ .../tests/fixtures/sdsort/basic_input.sdf | 270 ++++++++++ .../tests/fixtures/sdsort/fast_mode_input.sdf | 450 +++++++++++++++++ .../sdsort/fast_mode_input_with_id.sdf | 460 ++++++++++++++++++ rdock-utils/tests/sdsort/__init__.py | 0 rdock-utils/tests/sdsort/conftest.py | 7 + rdock-utils/tests/sdsort/test_integration.py | 102 ++++ 14 files changed, 1405 insertions(+), 2 deletions(-) create mode 100644 rdock-utils/rdock_utils/sdsort/__init__.py create mode 100644 rdock-utils/rdock_utils/sdsort/main.py create mode 100644 rdock-utils/rdock_utils/sdsort/parser.py create mode 100644 rdock-utils/rdock_utils/sdsort/sdsort.py create mode 100644 rdock-utils/tests/fixtures/sdsort/basic_input.sdf create mode 100644 rdock-utils/tests/fixtures/sdsort/fast_mode_input.sdf create mode 100644 rdock-utils/tests/fixtures/sdsort/fast_mode_input_with_id.sdf create mode 100644 rdock-utils/tests/sdsort/__init__.py create mode 100644 rdock-utils/tests/sdsort/conftest.py create mode 100644 rdock-utils/tests/sdsort/test_integration.py diff --git a/rdock-utils/pyproject.toml b/rdock-utils/pyproject.toml index 36616a93..a3d96bde 100644 --- a/rdock-utils/pyproject.toml +++ b/rdock-utils/pyproject.toml @@ -18,6 +18,7 @@ sdtether = "rdock_utils.sdtether.main:main" sdtether_old = "rdock_utils.sdtether_original:main" sdfilter = "rdock_utils.sdfilter.main:main" sdmodify = "rdock_utils.sdmodify:main" +sdsort = "rdock_utils.sdsort:main" [project.urls] Repository = "https://github.com/CBDD/rDock.git" diff --git a/rdock-utils/rdock_utils/common/SDFParser.py b/rdock-utils/rdock_utils/common/SDFParser.py index 5fba46bc..14d37ae3 100644 --- a/rdock-utils/rdock_utils/common/SDFParser.py +++ b/rdock-utils/rdock_utils/common/SDFParser.py @@ -107,3 +107,11 @@ def read_molecules(file: TextIO) -> Generator[FastSDMol, None, None]: def read_molecules_from_all_inputs(inputs: Iterable[TextIO]) -> Iterable[FastSDMol]: return itertools.chain.from_iterable(read_molecules(source) for source in inputs) + + +def molecules_with_progress_log(molecules: Iterable[FastSDMol], count: int = 1000) -> Iterable[FastSDMol]: + for i, molecule in enumerate(molecules): + if i % count == 0: + logger.info(f"read {i} molecules") + + yield molecule diff --git a/rdock-utils/rdock_utils/common/__init__.py b/rdock-utils/rdock_utils/common/__init__.py index 2aa39db3..8f9411a0 100644 --- a/rdock-utils/rdock_utils/common/__init__.py +++ b/rdock-utils/rdock_utils/common/__init__.py @@ -1,5 +1,5 @@ from .files import inputs_generator -from .SDFParser import FastSDMol, read_molecules, read_molecules_from_all_inputs +from .SDFParser import FastSDMol, molecules_with_progress_log, read_molecules, read_molecules_from_all_inputs from .superpose3d import MolAlignmentData, Superpose3D, update_coordinates from .types import ( AtomsMapping, @@ -19,6 +19,7 @@ "FastSDMol", "read_molecules", "read_molecules_from_all_inputs", + "molecules_with_progress_log", # -- superpose3d -- "update_coordinates", "MolAlignmentData", diff --git a/rdock-utils/rdock_utils/common/files.py b/rdock-utils/rdock_utils/common/files.py index d47197a7..898f930e 100644 --- a/rdock-utils/rdock_utils/common/files.py +++ b/rdock-utils/rdock_utils/common/files.py @@ -2,7 +2,7 @@ from typing import Generator, TextIO -def inputs_generator(inputs: list[str]) -> Generator[TextIO, None, None]: +def inputs_generator(inputs: list[str] | None) -> Generator[TextIO, None, None]: if not inputs: yield sys.stdin else: diff --git a/rdock-utils/rdock_utils/sdsort/__init__.py b/rdock-utils/rdock_utils/sdsort/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/rdock-utils/rdock_utils/sdsort/main.py b/rdock-utils/rdock_utils/sdsort/main.py new file mode 100644 index 00000000..be4f8ae8 --- /dev/null +++ b/rdock-utils/rdock_utils/sdsort/main.py @@ -0,0 +1,12 @@ +from .parser import get_config +from .sdsort import SDSort + + +def main(argv: list[str] | None = None) -> None: + config = get_config(argv) + sdsort = SDSort(config) + sdsort.run() + + +if __name__ == "__main__": + main() diff --git a/rdock-utils/rdock_utils/sdsort/parser.py b/rdock-utils/rdock_utils/sdsort/parser.py new file mode 100644 index 00000000..67d4c093 --- /dev/null +++ b/rdock-utils/rdock_utils/sdsort/parser.py @@ -0,0 +1,30 @@ +import argparse + +from .sdsort import SDSortConfig + + +def get_parser() -> argparse.ArgumentParser: + description = """Sorts SD records by a specified data field. + Notes: + - If no SD file list is provided, the script reads from standard input. + - Output is directed to standard output. + - Fast mode can be safely used for partial sorting of large SD files of raw docking hits without encountering memory issues. It will sort together consecutive molecules with the same value for a grouping key (the title, by default) instead of sorting all provided molecules together. + """ + parser = argparse.ArgumentParser(description=description) + sorting_field_help = "Specify the field for sorting" + parser.add_argument("--field", "-f", default="SCORE", metavar="DataField", type=str, help=sorting_field_help) + parser.add_argument("--reverse", "-r", action="store_true", help="Perform a descending sort (default: ascending)") + parser.add_argument("--numeric", "-n", action="store_true", help="Perform a numeric sort (default: text sort)") + fast_mode_help = "Enable fast mode: Sort records for each named compound independently (must be consecutive)" + parser.add_argument("--fast", "-s", action="store_true", help=fast_mode_help) + name_field_help = "Specify the grouping field for fast sorting mode (default: _TITLE1)" + parser.add_argument("--group-key", "-id", default="_TITLE1", metavar="NameField", type=str, help=name_field_help) + infile_help = "input file[s] to be processed. if not provided, stdin is used." + parser.add_argument("files", nargs="*", type=str, help=infile_help) + return parser + + +def get_config(argv: list[str] | None = None) -> SDSortConfig: + parser = get_parser() + args = parser.parse_args(argv) + return SDSortConfig(args.field, args.reverse, args.numeric, args.fast, args.group_key, args.files) diff --git a/rdock-utils/rdock_utils/sdsort/sdsort.py b/rdock-utils/rdock_utils/sdsort/sdsort.py new file mode 100644 index 00000000..f6e635b0 --- /dev/null +++ b/rdock-utils/rdock_utils/sdsort/sdsort.py @@ -0,0 +1,62 @@ +import itertools +import logging +import math +import sys +from dataclasses import dataclass +from typing import Iterable, TextIO + +from rdock_utils.common import FastSDMol, inputs_generator, molecules_with_progress_log, read_molecules_from_all_inputs + +logger = logging.getLogger("sdsort") + + +@dataclass(frozen=True) +class SDSortConfig: + sorting_field: str + reverse_sort: bool + numeric_sort: bool + fast_mode: bool + group_key: str + files: list[str] | None + + +class SDSort: + def __init__(self, config: SDSortConfig, output: TextIO = sys.stdout) -> None: + self.config = config + self.output = output + + def run(self) -> None: + inputs = inputs_generator(self.config.files) + input_molecules = read_molecules_from_all_inputs(inputs) + molecules = molecules_with_progress_log(input_molecules) + sort_method = self.sort_records_fast_mode if self.config.fast_mode else self.sorted_records_normal + sorted_records = sort_method(molecules) + + for molecule in sorted_records: + molecule.write(self.output) + + def sorted_records_normal(self, molecules: Iterable[FastSDMol]) -> Iterable[FastSDMol]: + return sorted(molecules, key=self.get_sorting_value, reverse=self.config.reverse_sort) + + def sort_records_fast_mode(self, molecules: Iterable[FastSDMol]) -> Iterable[FastSDMol]: + grouped_molecules = itertools.groupby(molecules, key=lambda x: x.get_field(self.config.group_key)) + sorted_groups = (self.sorted_records_normal(group_records) for _, group_records in grouped_molecules) + return itertools.chain.from_iterable(sorted_groups) + + def get_sorting_value(self, molecule: FastSDMol) -> float | str: + field = molecule.get_field(self.config.sorting_field) + + if not self.config.numeric_sort: + return field or "" + + try: + if field is None: + raise ValueError("Field is missing") + return float(field) + + except ValueError as e: + logger.warning( + f"Field '{self.config.sorting_field}' for molecule {molecule.title}: {e} " + "Defaulted to to infinity. Consider using sdfilter to remove invalid results" + ) + return math.inf diff --git a/rdock-utils/tests/fixtures/sdsort/basic_input.sdf b/rdock-utils/tests/fixtures/sdsort/basic_input.sdf new file mode 100644 index 00000000..93f60773 --- /dev/null +++ b/rdock-utils/tests/fixtures/sdsort/basic_input.sdf @@ -0,0 +1,270 @@ +MOL2 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +5.0 + +> +-15.3 + +$$$$ +MOL1 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +-2.0 + +> +-1 + +$$$$ +MOL5 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +4.0 + +> +25 + +$$$$ +MOL3 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +2.0 + +> +8 + +$$$$ +MOL4 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +3.0 + +> +3 + +$$$$ +MOL6 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +-1.0 + +> +1 + +$$$$ \ No newline at end of file diff --git a/rdock-utils/tests/fixtures/sdsort/fast_mode_input.sdf b/rdock-utils/tests/fixtures/sdsort/fast_mode_input.sdf new file mode 100644 index 00000000..19a339b3 --- /dev/null +++ b/rdock-utils/tests/fixtures/sdsort/fast_mode_input.sdf @@ -0,0 +1,450 @@ +MOL1 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +4.0 + +> +14 + +$$$$ +MOL1 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +0.0 + +> +-1 + +$$$$ +MOL2 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +0.0 + +> +-15.3 + +$$$$ +MOL2 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +0.0 + +> +-12 + +$$$$ +MOL2 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +0.0 + +> +4 + +$$$$ +MOL2 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +0.0 + +> +5 + +$$$$ +MOL3 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +4.0 + +> +25 + +$$$$ +MOL3 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +4.0 + +> +13 + +$$$$ +MOL3 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +4.0 + +> +2 + +$$$$ +MOL4 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +2.0 + +> +8 + +$$$$ \ No newline at end of file diff --git a/rdock-utils/tests/fixtures/sdsort/fast_mode_input_with_id.sdf b/rdock-utils/tests/fixtures/sdsort/fast_mode_input_with_id.sdf new file mode 100644 index 00000000..c1ca0126 --- /dev/null +++ b/rdock-utils/tests/fixtures/sdsort/fast_mode_input_with_id.sdf @@ -0,0 +1,460 @@ +MOL1 +id1 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +4.0 + +> +14 + +$$$$ +MOL1 +id2 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +0.0 + +> +-1 + +$$$$ +MOL2 +id3 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +0.0 + +> +-15.3 + +$$$$ +MOL2 +id4 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +0.0 + +> +-12 + +$$$$ +MOL2 +id5 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +0.0 + +> +4 + +$$$$ +MOL2 +id6 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +0.0 + +> +5 + +$$$$ +MOL3 +id7 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +4.0 + +> +25 + +$$$$ +MOL3 +id8 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +4.0 + +> +13 + +$$$$ +MOL3 +id9 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +4.0 + +> +2 + +$$$$ +MOL4 +id10 +JME 2017-11-16 Fri Jan 26 21:43:27 GMT+100 2024 + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 6.2184 0.0000 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + 5.6489 1.2789 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4718 2.4115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.9024 3.6906 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5101 3.8369 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.9406 5.1159 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 4.6406 6.3283 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0329 6.4746 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7039 7.3687 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.4250 6.7993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.2124 7.4993 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2566 1.4253 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 6.7993 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5713 5.4070 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5308 4.4701 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.6872 2.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 1 0 0 0 0 + 3 2 2 0 0 0 0 + 4 3 1 0 0 0 0 + 5 4 2 0 0 0 0 + 6 5 1 0 0 0 0 + 7 6 1 0 0 0 0 + 8 7 1 0 0 0 0 + 9 7 2 0 0 0 0 + 10 9 1 0 0 0 0 + 11 10 1 0 0 0 0 + 12 2 1 0 0 0 0 + 13 11 2 0 0 0 0 + 14 10 2 0 0 0 0 + 14 6 1 0 0 0 0 + 15 14 1 0 0 0 0 + 16 5 1 0 0 0 0 + 12 16 2 0 0 0 0 +M END +> +2.0 + +> +8 + +$$$$ \ No newline at end of file diff --git a/rdock-utils/tests/sdsort/__init__.py b/rdock-utils/tests/sdsort/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/rdock-utils/tests/sdsort/conftest.py b/rdock-utils/tests/sdsort/conftest.py new file mode 100644 index 00000000..26178eb1 --- /dev/null +++ b/rdock-utils/tests/sdsort/conftest.py @@ -0,0 +1,7 @@ +from ..conftest import FIXTURES_FOLDER + +SDSORT_FIXTURES_FOLDER = FIXTURES_FOLDER / "sdsort" + +BASIC_INPUT_FILE = str(SDSORT_FIXTURES_FOLDER / "basic_input.sdf") +FAST_MODE_INPUT_FILE = str(SDSORT_FIXTURES_FOLDER / "fast_mode_input.sdf") +FAST_MODE_INPUT_WITH_ID_FILE = str(SDSORT_FIXTURES_FOLDER / "fast_mode_input_with_id.sdf") diff --git a/rdock-utils/tests/sdsort/test_integration.py b/rdock-utils/tests/sdsort/test_integration.py new file mode 100644 index 00000000..3c90ebfb --- /dev/null +++ b/rdock-utils/tests/sdsort/test_integration.py @@ -0,0 +1,102 @@ +import itertools +from io import StringIO +from typing import Iterable + +import pytest + +from rdock_utils.common import read_molecules +from rdock_utils.sdsort.parser import get_config +from rdock_utils.sdsort.sdsort import SDSort + +from .conftest import BASIC_INPUT_FILE, FAST_MODE_INPUT_FILE, FAST_MODE_INPUT_WITH_ID_FILE + + +def get_data(path: str, key_field: str = "_TITLE1", value_field: str = "SCORE") -> list[tuple[str, str]]: + with open(path, "r") as f: + molecules = read_molecules(f) + return [(m.get_field(key_field), m.get_field(value_field)) for m in molecules] + + +def sorted_grouped_values( + values: Iterable[str], + reverse: bool = False, + numeric: bool = False, +) -> list[tuple[str, str]]: + convert = float if numeric else str + return list( + itertools.chain.from_iterable( + sorted(values, key=lambda x: convert(x[1]), reverse=reverse) + for _, values in itertools.groupby(values, key=lambda x: x[0]) + ) + ) + + +@pytest.mark.parametrize("numeric", [pytest.param(False, id="lexicographic"), pytest.param(True, id="numeric")]) +@pytest.mark.parametrize("reverse", [pytest.param(False, id="ascending"), pytest.param(True, id="descending")]) +@pytest.mark.parametrize( + "sorting_field", + [pytest.param(["-f", "test_field"], id="custom field"), pytest.param([], id="default field")], +) +def test_sdsort_basic(numeric: bool, reverse: bool, sorting_field: list[str]): + args = sorting_field + (["-n"] if numeric else []) + (["-r"] if reverse else []) + [BASIC_INPUT_FILE] + config = get_config(args) + output = StringIO() + sdsort = SDSort(config, output) + data = get_data(BASIC_INPUT_FILE, value_field=config.sorting_field) + convert = float if numeric else str + expected_result = sorted(data, key=lambda x: convert(x[1]), reverse=reverse) + + sdsort.run() + + output.seek(0) + molecules = read_molecules(output) + result = [(m.get_field(config.group_key), m.get_field(config.sorting_field)) for m in molecules] + + assert result == expected_result + + +@pytest.mark.parametrize("numeric", [pytest.param(False, id="lexicographic"), pytest.param(True, id="numeric")]) +@pytest.mark.parametrize("reverse", [pytest.param(False, id="ascending"), pytest.param(True, id="descending")]) +@pytest.mark.parametrize( + "sorting_field", + [pytest.param(["-f", "test_field"], id="custom field"), pytest.param([], id="default field")], +) +def test_sdsort_fast_mode(numeric: bool, reverse: bool, sorting_field: list[str]): + args = ["-s"] + sorting_field + (["-n"] if numeric else []) + (["-r"] if reverse else []) + [FAST_MODE_INPUT_FILE] + config = get_config(args) + output = StringIO() + sdsort = SDSort(config, output) + data = get_data(FAST_MODE_INPUT_FILE, value_field=config.sorting_field) + expected_result = sorted_grouped_values(data, reverse=reverse, numeric=numeric) + + sdsort.run() + + output.seek(0) + molecules = read_molecules(output) + result = [(m.get_field(config.group_key), m.get_field(config.sorting_field)) for m in molecules] + + assert result == expected_result + + +@pytest.mark.parametrize("numeric", [pytest.param(False, id="lexicographic"), pytest.param(True, id="numeric")]) +@pytest.mark.parametrize("reverse", [pytest.param(False, id="ascending"), pytest.param(True, id="descending")]) +@pytest.mark.parametrize( + "group_key", + [pytest.param(["--group-key", "_TITLE2"], id="custom group field"), pytest.param([], id="default group field")], +) +def test_sdsort_by_group_key(numeric: bool, reverse: bool, group_key: list[str]): + filename = FAST_MODE_INPUT_WITH_ID_FILE + args = ["-s"] + group_key + (["-n"] if numeric else []) + (["-r"] if reverse else []) + [filename] + config = get_config(args) + output = StringIO() + sdsort = SDSort(config, output) + data = get_data(filename, key_field=config.group_key, value_field=config.sorting_field) + expected_result = sorted_grouped_values(data, reverse=reverse, numeric=numeric) + + sdsort.run() + + output.seek(0) + molecules = read_molecules(output) + result = [(m.get_field(config.group_key), m.get_field(config.sorting_field)) for m in molecules] + + assert result == expected_result