From 102206bdb20bc0bc4fcfc43207b0a3fdd9240836 Mon Sep 17 00:00:00 2001 From: hyunp2 <42776897+hyunp2@users.noreply.github.com> Date: Tue, 26 Sep 2023 13:06:49 -0500 Subject: [PATCH] Update 2_generate_frag_sdf.py --- 2_generate_frag_sdf.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/2_generate_frag_sdf.py b/2_generate_frag_sdf.py index 76e1a671..01f65120 100644 --- a/2_generate_frag_sdf.py +++ b/2_generate_frag_sdf.py @@ -3,6 +3,8 @@ import shutil import subprocess from subprocess import PIPE +from utils.rdkit_conf_parallel import compute_confs_worker +import prepare_dataset as prep nodes = ['CuCu'] # change to the line below to reproduce paper result @@ -15,22 +17,30 @@ print(f'Now on node {node}') TARGET_DIR = f'data/sdf/{node}/' INPUT_SMILES=f'data/fragments_smi/frag_{node}.txt' - OUTPUT_TEMPLATE=f'hMOF' + OUTPUT_TEMPLATE=f'hMOF.sdf' OUT_DIR=f'data/fragments_all/{node}/' - CORES='18' + CORES='0' # generate sdf of molecular fragments print('Generating molecule sdf files...') os.makedirs(TARGET_DIR,exist_ok=True) - subprocess.run([f'python -W ignore utils/rdkit_conf_parallel.py {INPUT_SMILES} {OUTPUT_TEMPLATE} --cores {CORES}'],shell=True,stdout=PIPE,stderr=PIPE) + + smiles = [] + with open(INPUT_SMILES, 'r') as f: + for line in f: + smiles.append(line.strip()) + + # subprocess.run([f'python -W ignore utils/rdkit_conf_parallel.py {INPUT_SMILES} {OUTPUT_TEMPLATE} --cores {CORES}'], shell=True, stdout=PIPE, stderr=PIPE) + compute_confs_worker(smifile=smiles, sdffile={OUTPUT_TEMPLATE}, pid={CORES}) for sdf in glob('*.sdf'): - shutil.move(sdf,TARGET_DIR) + shutil.move(sdf, TARGET_DIR) # generate sdf for fragment and connection atom print(f'Generating fragment and connection atom sdf files...') os.makedirs(OUT_DIR,exist_ok=True) - subprocess.run(f'python -W ignore utils/prepare_dataset_parallel.py --table {INPUT_SMILES} --sdf-dir {TARGET_DIR} --out-dir {OUT_DIR} --template {OUTPUT_TEMPLATE} --cores {CORES}',shell=True) - + # subprocess.run(f'python -W ignore utils/prepare_dataset_parallel.py --table {INPUT_SMILES} --sdf-dir {TARGET_DIR} --out-dir {OUT_DIR} --template {OUTPUT_TEMPLATE} --cores {CORES}',shell=True) + prep.run(table_path={INPUT_SMILES}, sdf_path={TARGET_DIR} --out-dir {OUT_DIR} --template {OUTPUT_TEMPLATE} --cores {CORES}) + # filter and merge print(f'Filtering and merging ...') - subprocess.run(f'python -W ignore utils/filter_and_merge.py --in-dir {OUT_DIR} --out-dir {OUT_DIR} --template {OUTPUT_TEMPLATE} --number-of-files {CORES}',shell=True) \ No newline at end of file + subprocess.run(f'python -W ignore utils/filter_and_merge.py --in-dir {OUT_DIR} --out-dir {OUT_DIR} --template {OUTPUT_TEMPLATE} --number-of-files {CORES}',shell=True)