diff --git a/.github/workflows/code-quality-main.yaml b/.github/workflows/code-quality-main.yaml index f361f692..d8670afb 100644 --- a/.github/workflows/code-quality-main.yaml +++ b/.github/workflows/code-quality-main.yaml @@ -21,4 +21,4 @@ jobs: python-version: "3.10" - name: Run pre-commits - uses: pre-commit/action@v2.0.3 + uses: pre-commit/action@v3.0.1 diff --git a/.github/workflows/code-quality-pr.yaml b/.github/workflows/code-quality-pr.yaml index 992f92f5..3e9e85b5 100644 --- a/.github/workflows/code-quality-pr.yaml +++ b/.github/workflows/code-quality-pr.yaml @@ -33,6 +33,6 @@ jobs: run: echo '${{ steps.file_changes.outputs.files}}' - name: Run pre-commits - uses: pre-commit/action@v2.0.3 + uses: pre-commit/action@v3.0.1 with: extra_args: --files ${{ steps.file_changes.outputs.files}} diff --git a/.github/workflows/release-drafter.yml b/.github/workflows/release-drafter.yml index 6a45e15e..5634609f 100644 --- a/.github/workflows/release-drafter.yml +++ b/.github/workflows/release-drafter.yml @@ -22,6 +22,6 @@ jobs: steps: # Drafts your next Release notes as Pull Requests are merged into "master" - - uses: release-drafter/release-drafter@v5 + - uses: release-drafter/release-drafter@v6 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore index 9a0a239b..b83ed579 100644 --- a/.gitignore +++ b/.gitignore @@ -163,6 +163,9 @@ configs/local/default.yaml # Forks /workdir/ +/forks/chai-lab/chai-lab/ +/forks/chai-lab/prediction_inputs/ +/forks/chai-lab/prediction_outputs/ /forks/DiffDock1.0/ /forks/DiffDock/DiffDock/ /forks/DynamicBind/*.npy @@ -189,4 +192,4 @@ configs/local/default.yaml /forks/RoseTTAFold-All-Atom/psipred/ /forks/TULIP/outputs/ /forks/Vina/ADFR/ -scripts/inference/ +scripts/*inference*/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 90f1823c..35f9ee27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +### 0.5.0 - 09/30/2024 + +- Added results with AlphaFold 3 predicted structures (now the default) +- Added results for the new Chai-1 model from Chai Discovery +- Added a new inference sweep pipeline for HPC clusters to allow users to quickly run an exhaustive sweep of all baseline methods, datasets, and tasks e.g., using generated batch scripts and a SLURM scheduler +- Updated Zenodo links to point to the latest version of the project's Zenodo record, which now includes the above-mentioned AlphaFold 3 predicted structures and baseline method results using them +- Updated documentation project-wide according to the additions listed above +- Fixed some CI testing issues + ### 0.4.0 - 08/12/2024 - Renamed `src` root directory to `posebench` to support `pip` packaging diff --git a/README.md b/README.md index d6ca81d4..3e9eaa65 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ # PoseBench [![Paper](http://img.shields.io/badge/arXiv-2405.14108-B31B1B.svg)](https://arxiv.org/abs/2405.14108) -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.11477766.svg)](https://doi.org/10.5281/zenodo.11477766) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.13858866.svg)](https://doi.org/10.5281/zenodo.13858866) [![PyPI version](https://badge.fury.io/py/posebench.svg)](https://badge.fury.io/py/posebench) [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) [![Docs](https://assets.readthedocs.org/static/projects/badges/passing-flat.svg)](https://bioinfomachinelearning.github.io/PoseBench/) @@ -27,6 +27,7 @@ Comprehensive benchmarking of protein-ligand structure generation methods - [Tutorials](#tutorials) - [How to prepare PoseBench data](#how-to-prepare-posebench-data) - [Available inference methods](#available-inference-methods) +- [How to run a sweep of benchmarking experiments](#how-to-run-a-sweep-of-benchmarking-experiments) - [How to run inference with individual methods](#how-to-run-inference-with-individual-methods) - [How to run inference with a method ensemble](#how-to-run-inference-with-a-method-ensemble) - [How to create comparative plots of inference results](#how-to-create-comparative-plots-of-inference-results) @@ -95,6 +96,10 @@ cd forks/NeuralPLexer/ && pip3 install -e . && cd ../../ mamba env create -f environments/rfaa_environment.yaml --prefix forks/RoseTTAFold-All-Atom/RFAA/ conda activate forks/RoseTTAFold-All-Atom/RFAA/ # NOTE: one still needs to use `conda` to (de)activate environments cd forks/RoseTTAFold-All-Atom/rf2aa/SE3Transformer/ && pip3 install --no-cache-dir -r requirements.txt && python3 setup.py install && cd ../../../../ +# - Chai-1 environment (~6 GB) +mamba env create -f environments/chai_lab_environment.yaml --prefix forks/chai-lab/chai-lab/ +conda activate forks/chai-lab/chai-lab/ # NOTE: one still needs to use `conda` to (de)activate environments +pip3 install forks/chai-lab/ # - AutoDock Vina Tools environment (~1 GB) mamba env create -f environments/adfr_environment.yaml --prefix forks/Vina/ADFR/ conda activate forks/Vina/ADFR/ # NOTE: one still needs to use `conda` to (de)activate environments @@ -149,10 +154,10 @@ of how to extend `PoseBench`, as outlined below. ```bash # fetch, extract, and clean-up preprocessed Astex Diverse, PoseBusters Benchmark, DockGen, and CASP15 data (~3 GB) # -wget https://zenodo.org/records/11477766/files/astex_diverse_set.tar.gz -wget https://zenodo.org/records/11477766/files/posebusters_benchmark_set.tar.gz -wget https://zenodo.org/records/11477766/files/dockgen_set.tar.gz -wget https://zenodo.org/records/11477766/files/casp15_set.tar.gz +wget https://zenodo.org/records/13858866/files/astex_diverse_set.tar.gz +wget https://zenodo.org/records/13858866/files/posebusters_benchmark_set.tar.gz +wget https://zenodo.org/records/13858866/files/dockgen_set.tar.gz +wget https://zenodo.org/records/13858866/files/casp15_set.tar.gz tar -xzf astex_diverse_set.tar.gz tar -xzf posebusters_benchmark_set.tar.gz tar -xzf dockgen_set.tar.gz @@ -168,38 +173,42 @@ rm casp15_set.tar.gz ```bash # fetch, extract, and clean-up benchmark method predictions to reproduce paper results (~19 GB) # # DiffDock predictions and results -wget https://zenodo.org/records/11477766/files/diffdock_benchmark_method_predictions.tar.gz +wget https://zenodo.org/records/13858866/files/diffdock_benchmark_method_predictions.tar.gz tar -xzf diffdock_benchmark_method_predictions.tar.gz rm diffdock_benchmark_method_predictions.tar.gz # FABind predictions and results -wget https://zenodo.org/records/11477766/files/fabind_benchmark_method_predictions.tar.gz +wget https://zenodo.org/records/13858866/files/fabind_benchmark_method_predictions.tar.gz tar -xzf fabind_benchmark_method_predictions.tar.gz rm fabind_benchmark_method_predictions.tar.gz # DynamicBind predictions and results -wget https://zenodo.org/records/11477766/files/dynamicbind_benchmark_method_predictions.tar.gz +wget https://zenodo.org/records/13858866/files/dynamicbind_benchmark_method_predictions.tar.gz tar -xzf dynamicbind_benchmark_method_predictions.tar.gz rm dynamicbind_benchmark_method_predictions.tar.gz # NeuralPLexer predictions and results -wget https://zenodo.org/records/11477766/files/neuralplexer_benchmark_method_predictions.tar.gz +wget https://zenodo.org/records/13858866/files/neuralplexer_benchmark_method_predictions.tar.gz tar -xzf neuralplexer_benchmark_method_predictions.tar.gz rm neuralplexer_benchmark_method_predictions.tar.gz # RoseTTAFold-All-Atom predictions and results -wget https://zenodo.org/records/11477766/files/rfaa_benchmark_method_predictions.tar.gz +wget https://zenodo.org/records/13858866/files/rfaa_benchmark_method_predictions.tar.gz tar -xzf rfaa_benchmark_method_predictions.tar.gz rm rfaa_benchmark_method_predictions.tar.gz +# Chai-1 predictions and results +wget https://zenodo.org/records/13858866/files/chai_benchmark_method_predictions.tar.gz +tar -xzf chai_benchmark_method_predictions.tar.gz +rm chai_benchmark_method_predictions.tar.gz # TULIP predictions and results -wget https://zenodo.org/records/11477766/files/tulip_benchmark_method_predictions.tar.gz +wget https://zenodo.org/records/13858866/files/tulip_benchmark_method_predictions.tar.gz tar -xzf tulip_benchmark_method_predictions.tar.gz rm tulip_benchmark_method_predictions.tar.gz # AutoDock Vina predictions and results -wget https://zenodo.org/records/11477766/files/vina_benchmark_method_predictions.tar.gz +wget https://zenodo.org/records/13858866/files/vina_benchmark_method_predictions.tar.gz tar -xzf vina_benchmark_method_predictions.tar.gz rm vina_benchmark_method_predictions.tar.gz # Astex Diverse, PoseBusters Benchmark (w/ pocket-only results), DockGen, and CASP15 consensus ensemble predictions and results -wget https://zenodo.org/records/11477766/files/astex_diverse_ensemble_benchmark_method_predictions.tar.gz -wget https://zenodo.org/records/11477766/files/posebusters_benchmark_ensemble_benchmark_method_predictions.tar.gz -wget https://zenodo.org/records/11477766/files/dockgen_ensemble_benchmark_method_predictions.tar.gz -wget https://zenodo.org/records/11477766/files/casp15_ensemble_benchmark_method_predictions.tar.gz +wget https://zenodo.org/records/13858866/files/astex_diverse_ensemble_benchmark_method_predictions.tar.gz +wget https://zenodo.org/records/13858866/files/posebusters_benchmark_ensemble_benchmark_method_predictions.tar.gz +wget https://zenodo.org/records/13858866/files/dockgen_ensemble_benchmark_method_predictions.tar.gz +wget https://zenodo.org/records/13858866/files/casp15_ensemble_benchmark_method_predictions.tar.gz tar -xzf astex_diverse_ensemble_benchmark_method_predictions.tar.gz tar -xzf posebusters_benchmark_ensemble_benchmark_method_predictions.tar.gz tar -xzf dockgen_ensemble_benchmark_method_predictions.tar.gz @@ -210,7 +219,7 @@ rm dockgen_ensemble_benchmark_method_predictions.tar.gz rm casp15_ensemble_benchmark_method_predictions.tar.gz ``` -**NOTE:** One can reproduce the *pocket-only* experiments with the PoseBusters Benchmark set by adding the argument `pocket_only_baseline=true` to each command below used to run PoseBusters Benchmark dataset inference with all the baseline methods, since the pocket-only versions of the dataset's holo-aligned predicted protein structures have also been included in the downloadable Zenodo archive `posebusters_benchmark_set.tar.gz` referenced above. However, be aware that one then needs to *rename* any existing directories containing PoseBusters Benchmark dataset inference results for each baseline method, to prevent these existing inference directories from being merged with new pocket-only results. Please see the config files within `configs/data/`, `configs/model/`, and `configs/analysis/` for more details. +**NOTE:** One can reproduce the *pocket-only* experiments with the PoseBusters Benchmark set by adding the argument `pocket_only_baseline=true` to each command below used to run PoseBusters Benchmark dataset inference with all the baseline methods (n.b., besides `tulip`, which does not support pocket-level docking currently), since the pocket-only versions of the dataset's holo-aligned predicted protein structures have also been included in the downloadable Zenodo archive `posebusters_benchmark_set.tar.gz` referenced above. Similarly, one can reproduce the *NeuralPLexer w/o inter-ligand clash loss (ILCL)* experiments with the CASP15 set by adding the argument `no_ilcl=true` (`neuralplexer_no_ilcl=true`) to the commands `python3 posebench/models/neuralplexer_inference.py dataset=casp15 ...` and `python3 posebench/analysis/inference_analysis_casp.py dataset=casp15 ...` below (`python3 posebench/models/ensemble_generation.py ensemble_benchmarking_dataset=casp15 ...`) used to run CASP15 dataset inference with NeuralPLexer. Lastly, one can reproduce the *DiffDock w/o structural cluster training (SCT)* by adding the argument `v1_baseline=true` to the DiffDock inference commands below. Please see the config files within `configs/data/`, `configs/model/`, and `configs/analysis/` for more details. ### Downloading sequence databases (required only for RoseTTAFold-All-Atom inference) @@ -228,7 +237,7 @@ wget https://bfd.mmseqs.com/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted mkdir -p bfd tar xfz bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz -C ./bfd -# structure templates (including *_a3m.ffdata, *_a3m.ffindex) +# structure templates [81G] (including *_a3m.ffdata, *_a3m.ffindex) wget https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz tar xfz pdb100_2021Mar03.tar.gz @@ -258,8 +267,8 @@ Then, predict each apo protein structure using ESMFold's batch inference script ```bash -python3 posebench/data/components/esmfold_batch_structure_prediction.py -i data/posebusters_benchmark_set/posebusters_benchmark_esmfold_sequences.fasta -o data/posebusters_benchmark_set/posebusters_benchmark_esmfold_structures --skip-existing -python3 posebench/data/components/esmfold_batch_structure_prediction.py -i data/astex_diverse_set/astex_diverse_esmfold_sequences.fasta -o data/astex_diverse_set/astex_diverse_esmfold_structures --skip-existing +python3 posebench/data/components/esmfold_batch_structure_prediction.py -i data/posebusters_benchmark_set/posebusters_benchmark_esmfold_sequences.fasta -o data/posebusters_benchmark_set/posebusters_benchmark_predicted_structures --skip-existing +python3 posebench/data/components/esmfold_batch_structure_prediction.py -i data/astex_diverse_set/astex_diverse_esmfold_sequences.fasta -o data/astex_diverse_set/astex_diverse_predicted_structures --skip-existing ``` **NOTE:** Having a CUDA-enabled device available when running ESMFold is highly recommended @@ -276,7 +285,7 @@ python3 posebench/data/components/protein_apo_to_holo_alignment.py dataset=poseb python3 posebench/data/components/protein_apo_to_holo_alignment.py dataset=astex_diverse num_workers=1 ``` -**NOTE:** The preprocessed Astex Diverse, PoseBusters Benchmark, DockGen, and CASP15 data available via [Zenodo](https://doi.org/10.5281/zenodo.11477766) provide pre-holo-aligned predicted protein structures for these respective datasets. +**NOTE:** The preprocessed Astex Diverse, PoseBusters Benchmark, DockGen, and CASP15 data available via [Zenodo](https://doi.org/10.5281/zenodo.13858866) provide pre-holo-aligned protein structures predicted by AlphaFold 3 for these respective datasets. Accordingly, users must ensure their usage of such predicted protein structures aligns with the AlphaFold Server's [Terms of Service](https://alphafoldserver.com/terms). @@ -297,11 +306,12 @@ python3 posebench/data/components/protein_apo_to_holo_alignment.py dataset=astex #### Flexible Protein Methods -| Name | Source | Astex Benchmarked | PoseBusters Benchmarked | DockGen Benchmarked | CASP Benchmarked | -| ---------------------- | --------------------------------------------------------------------- | ----------------- | ----------------------- | ------------------- | ---------------- | -| `DynamicBind` | [Lu et al.](https://www.nature.com/articles/s41467-024-45461-2) | ✓ | ✓ | ✓ | ✓ | -| `NeuralPLexer` | [Qiao et al.](https://www.nature.com/articles/s42256-024-00792-z) | ✓ | ✓ | ✓ | ✓ | -| `RoseTTAFold-All-Atom` | [Krishna et al.](https://www.science.org/doi/10.1126/science.adl2528) | ✓ | ✓ | ✓ | ✓ | +| Name | Source | Astex Benchmarked | PoseBusters Benchmarked | DockGen Benchmarked | CASP Benchmarked | +| ---------------------- | ----------------------------------------------------------------------------- | ----------------- | ----------------------- | ------------------- | ---------------- | +| `DynamicBind` | [Lu et al.](https://www.nature.com/articles/s41467-024-45461-2) | ✓ | ✓ | ✓ | ✓ | +| `NeuralPLexer` | [Qiao et al.](https://www.nature.com/articles/s42256-024-00792-z) | ✓ | ✓ | ✓ | ✓ | +| `RoseTTAFold-All-Atom` | [Krishna et al.](https://www.science.org/doi/10.1126/science.adl2528) | ✓ | ✓ | ✓ | ✓ | +| `Chai-1` | [Chai Discovery](https://chaiassets.com/chai-1/paper/technical_report_v1.pdf) | ✓ | ✓ | ✓ | ✓ | ### Methods available for ensembling @@ -315,16 +325,37 @@ python3 posebench/data/components/protein_apo_to_holo_alignment.py dataset=astex #### Flexible Protein Methods -| Name | Source | Astex Benchmarked | PoseBusters Benchmarked | DockGen Benchmarked | CASP Benchmarked | -| ---------------------- | --------------------------------------------------------------------- | ----------------- | ----------------------- | ------------------- | ---------------- | -| `DynamicBind` | [Lu et al.](https://www.nature.com/articles/s41467-024-45461-2) | ✓ | ✓ | ✓ | ✓ | -| `NeuralPLexer` | [Qiao et al.](https://www.nature.com/articles/s42256-024-00792-z) | ✓ | ✓ | ✓ | ✓ | -| `RoseTTAFold-All-Atom` | [Krishna et al.](https://www.science.org/doi/10.1126/science.adl2528) | ✓ | ✓ | ✓ | ✓ | +| Name | Source | Astex Benchmarked | PoseBusters Benchmarked | DockGen Benchmarked | CASP Benchmarked | +| ---------------------- | ----------------------------------------------------------------------------- | ----------------- | ----------------------- | ------------------- | ---------------- | +| `DynamicBind` | [Lu et al.](https://www.nature.com/articles/s41467-024-45461-2) | ✓ | ✓ | ✓ | ✓ | +| `NeuralPLexer` | [Qiao et al.](https://www.nature.com/articles/s42256-024-00792-z) | ✓ | ✓ | ✓ | ✓ | +| `RoseTTAFold-All-Atom` | [Krishna et al.](https://www.science.org/doi/10.1126/science.adl2528) | ✓ | ✓ | ✓ | ✓ | +| `Chai-1` | [Chai Discovery](https://chaiassets.com/chai-1/paper/technical_report_v1.pdf) | ✓ | ✓ | ✓ | ✓ | **NOTE**: Have a new method to add? Please let us know by creating a pull request. We would be happy to work with you to integrate new methodology into this benchmark! +## How to run a sweep of benchmarking experiments + +
+ +Build inference scripts for one's desired sweep + +```bash +python3 scripts/build_inference_script.py sweep=true export_hpc_headers=true +``` + +Submit the inference scripts for job scheduling + +```bash +sbatch scripts/inference/*_inference_*.sh +``` + +**NOTE**: See the config file `configs/scripts/build_inference_script.yaml` for more details. + +
+ ## How to run inference with individual methods
@@ -337,7 +368,7 @@ Prepare CSV input files python3 posebench/data/diffdock_input_preparation.py dataset=posebusters_benchmark python3 posebench/data/diffdock_input_preparation.py dataset=astex_diverse python3 posebench/data/diffdock_input_preparation.py dataset=dockgen -python3 posebench/data/diffdock_input_preparation.py dataset=casp15 input_data_dir="$PWD"/data/casp15_set/targets input_protein_structure_dir="$PWD"/data/casp15_set/predicted_structures +python3 posebench/data/diffdock_input_preparation.py dataset=casp15 input_data_dir=data/casp15_set/targets input_protein_structure_dir=data/casp15_set/casp15_holo_aligned_predicted_structures ``` Run inference on each dataset @@ -356,16 +387,14 @@ python3 posebench/models/diffdock_inference.py dataset=casp15 batch_size=1 repea Relax the generated ligand structures inside of their respective protein pockets ```bash -python3 posebench/models/inference_relaxation.py method=diffdock dataset=posebusters_benchmark remove_initial_protein_hydrogens=true assign_partial_charges_manually=true num_processes=1 repeat_index=1 +python3 posebench/models/inference_relaxation.py method=diffdock dataset=posebusters_benchmark remove_initial_protein_hydrogens=true assign_partial_charges_manually=true repeat_index=1 ... -python3 posebench/models/inference_relaxation.py method=diffdock dataset=astex_diverse remove_initial_protein_hydrogens=true assign_partial_charges_manually=true num_processes=1 repeat_index=1 +python3 posebench/models/inference_relaxation.py method=diffdock dataset=astex_diverse remove_initial_protein_hydrogens=true assign_partial_charges_manually=true repeat_index=1 ... -python3 posebench/models/inference_relaxation.py method=diffdock dataset=dockgen remove_initial_protein_hydrogens=true assign_partial_charges_manually=true num_processes=1 repeat_index=1 +python3 posebench/models/inference_relaxation.py method=diffdock dataset=dockgen remove_initial_protein_hydrogens=true assign_partial_charges_manually=true repeat_index=1 ... ``` -**NOTE**: Increase `num_processes` according to your available CPU/GPU resources to improve throughput - Analyze inference results for each dataset ```bash @@ -381,8 +410,8 @@ Analyze inference results for the CASP15 dataset ```bash # first assemble (unrelaxed and post ranking-relaxed) CASP15-compliant prediction submission files for scoring -python3 posebench/models/ensemble_generation.py ensemble_methods=\[diffdock\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_diffdock_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=false export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 -python3 posebench/models/ensemble_generation.py ensemble_methods=\[diffdock\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_diffdock_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=true export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 +python3 posebench/models/ensemble_generation.py ensemble_methods=\[diffdock\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_diffdock_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=false export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 +python3 posebench/models/ensemble_generation.py ensemble_methods=\[diffdock\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_diffdock_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=true export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 # NOTE: the suffixes for both `output_dir` and `ensemble_benchmarking_repeat_index` should be modified to e.g., 2, 3, ... ... # now score the CASP15-compliant submissions using the official CASP scoring pipeline @@ -414,16 +443,14 @@ python3 posebench/models/fabind_inference.py dataset=dockgen repeat_index=1 Relax the generated ligand structures inside of their respective protein pockets ```bash -python3 posebench/models/inference_relaxation.py method=fabind dataset=posebusters_benchmark remove_initial_protein_hydrogens=true assign_partial_charges_manually=true num_processes=1 repeat_index=1 +python3 posebench/models/inference_relaxation.py method=fabind dataset=posebusters_benchmark remove_initial_protein_hydrogens=true assign_partial_charges_manually=true repeat_index=1 ... -python3 posebench/models/inference_relaxation.py method=fabind dataset=astex_diverse remove_initial_protein_hydrogens=true assign_partial_charges_manually=true num_processes=1 repeat_index=1 +python3 posebench/models/inference_relaxation.py method=fabind dataset=astex_diverse remove_initial_protein_hydrogens=true assign_partial_charges_manually=true repeat_index=1 ... -python3 posebench/models/inference_relaxation.py method=fabind dataset=dockgen remove_initial_protein_hydrogens=true assign_partial_charges_manually=true num_processes=1 repeat_index=1 +python3 posebench/models/inference_relaxation.py method=fabind dataset=dockgen remove_initial_protein_hydrogens=true assign_partial_charges_manually=true repeat_index=1 ... ``` -**NOTE**: Increase `num_processes` according to your available CPU/GPU resources to improve throughput - Analyze inference results for each dataset ```bash @@ -443,7 +470,7 @@ Prepare CSV input files python3 posebench/data/dynamicbind_input_preparation.py dataset=posebusters_benchmark python3 posebench/data/dynamicbind_input_preparation.py dataset=astex_diverse python3 posebench/data/dynamicbind_input_preparation.py dataset=dockgen -python3 posebench/data/dynamicbind_input_preparation.py dataset=casp15 input_data_dir="$PWD"/data/casp15_set/targets +python3 posebench/data/dynamicbind_input_preparation.py dataset=casp15 input_data_dir=data/casp15_set/targets ``` Run inference on each dataset @@ -455,23 +482,21 @@ python3 posebench/models/dynamicbind_inference.py dataset=astex_diverse repeat_i ... python3 posebench/models/dynamicbind_inference.py dataset=dockgen repeat_index=1 ... -python3 posebench/models/dynamicbind_inference.py dataset=casp15 batch_size=1 input_data_dir="$PWD"/data/casp15_set/predicted_structures repeat_index=1 +python3 posebench/models/dynamicbind_inference.py dataset=casp15 batch_size=1 input_data_dir=data/casp15_set/casp15_holo_aligned_predicted_structures repeat_index=1 ... ``` Relax the generated ligand structures inside of their respective protein pockets ```bash -python3 posebench/models/inference_relaxation.py method=dynamicbind dataset=posebusters_benchmark remove_initial_protein_hydrogens=true assign_partial_charges_manually=true num_processes=1 repeat_index=1 +python3 posebench/models/inference_relaxation.py method=dynamicbind dataset=posebusters_benchmark remove_initial_protein_hydrogens=true assign_partial_charges_manually=true repeat_index=1 ... -python3 posebench/models/inference_relaxation.py method=dynamicbind dataset=astex_diverse remove_initial_protein_hydrogens=true assign_partial_charges_manually=true num_processes=1 repeat_index=1 +python3 posebench/models/inference_relaxation.py method=dynamicbind dataset=astex_diverse remove_initial_protein_hydrogens=true assign_partial_charges_manually=true repeat_index=1 ... -python3 posebench/models/inference_relaxation.py method=dynamicbind dataset=dockgen remove_initial_protein_hydrogens=true assign_partial_charges_manually=true num_processes=1 repeat_index=1 +python3 posebench/models/inference_relaxation.py method=dynamicbind dataset=dockgen remove_initial_protein_hydrogens=true assign_partial_charges_manually=true repeat_index=1 ... ``` -**NOTE**: Increase `num_processes` according to your available CPU/GPU resources to improve throughput - Analyze inference results for each dataset ```bash @@ -487,8 +512,8 @@ Analyze inference results for the CASP15 dataset ```bash # first assemble (unrelaxed and post ranking-relaxed) CASP15-compliant prediction submission files for scoring -python3 posebench/models/ensemble_generation.py ensemble_methods=\[dynamicbind\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_dynamicbind_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=false export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 -python3 posebench/models/ensemble_generation.py ensemble_methods=\[dynamicbind\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_dynamicbind_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=true export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 +python3 posebench/models/ensemble_generation.py ensemble_methods=\[dynamicbind\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_dynamicbind_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=false export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 +python3 posebench/models/ensemble_generation.py ensemble_methods=\[dynamicbind\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_dynamicbind_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=true export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 # NOTE: the suffixes for both `output_dir` and `ensemble_benchmarking_repeat_index` should be modified to e.g., 2, 3, ... ... # now score the CASP15-compliant submissions using the official CASP scoring pipeline @@ -504,7 +529,7 @@ Prepare CSV input files python3 posebench/data/neuralplexer_input_preparation.py dataset=posebusters_benchmark python3 posebench/data/neuralplexer_input_preparation.py dataset=astex_diverse python3 posebench/data/neuralplexer_input_preparation.py dataset=dockgen -python3 posebench/data/neuralplexer_input_preparation.py dataset=casp15 input_data_dir="$PWD"/data/casp15_set/targets input_receptor_structure_dir="$PWD"/data/casp15_set/predicted_structures +python3 posebench/data/neuralplexer_input_preparation.py dataset=casp15 input_data_dir=data/casp15_set/targets input_receptor_structure_dir=data/casp15_set/casp15_holo_aligned_predicted_structures ``` Run inference on each dataset @@ -516,23 +541,21 @@ python3 posebench/models/neuralplexer_inference.py dataset=astex_diverse repeat_ ... python3 posebench/models/neuralplexer_inference.py dataset=dockgen repeat_index=1 ... -python3 posebench/models/neuralplexer_inference.py dataset=casp15 repeat_index=1 +python3 posebench/models/neuralplexer_inference.py dataset=casp15 chunk_size=4 repeat_index=1 ... ``` Relax the generated ligand structures inside of their respective protein pockets ```bash -python3 posebench/models/inference_relaxation.py method=neuralplexer dataset=posebusters_benchmark num_processes=1 remove_initial_protein_hydrogens=true assign_partial_charges_manually=true cache_files=false repeat_index=1 +python3 posebench/models/inference_relaxation.py method=neuralplexer dataset=posebusters_benchmark remove_initial_protein_hydrogens=true assign_partial_charges_manually=true repeat_index=1 ... -python3 posebench/models/inference_relaxation.py method=neuralplexer dataset=astex_diverse num_processes=1 remove_initial_protein_hydrogens=true assign_partial_charges_manually=true cache_files=false repeat_index=1 +python3 posebench/models/inference_relaxation.py method=neuralplexer dataset=astex_diverse remove_initial_protein_hydrogens=true assign_partial_charges_manually=true repeat_index=1 ... -python3 posebench/models/inference_relaxation.py method=neuralplexer dataset=dockgen num_processes=1 remove_initial_protein_hydrogens=true assign_partial_charges_manually=true cache_files=false repeat_index=1 +python3 posebench/models/inference_relaxation.py method=neuralplexer dataset=dockgen remove_initial_protein_hydrogens=true assign_partial_charges_manually=true repeat_index=1 ... ``` -**NOTE**: Increase `num_processes` according to your available CPU/GPU resources to improve throughput - Align predicted protein-ligand structures to ground-truth complex structures ```bash @@ -559,8 +582,8 @@ Analyze inference results for the CASP15 dataset ```bash # first assemble (unrelaxed and post ranking-relaxed) CASP15-compliant prediction submission files for scoring -python3 posebench/models/ensemble_generation.py ensemble_methods=\[neuralplexer\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_neuralplexer_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=false export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 -python3 posebench/models/ensemble_generation.py ensemble_methods=\[neuralplexer\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_neuralplexer_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=true export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 +python3 posebench/models/ensemble_generation.py ensemble_methods=\[neuralplexer\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_neuralplexer_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=false export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 +python3 posebench/models/ensemble_generation.py ensemble_methods=\[neuralplexer\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_neuralplexer_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=true export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 # NOTE: the suffixes for both `output_dir` and `ensemble_benchmarking_repeat_index` should be modified to e.g., 2, 3, ... ... # now score the CASP15-compliant submissions using the official CASP scoring pipeline @@ -576,7 +599,7 @@ Prepare CSV input files python3 posebench/data/rfaa_input_preparation.py dataset=posebusters_benchmark python3 posebench/data/rfaa_input_preparation.py dataset=astex_diverse python3 posebench/data/rfaa_input_preparation.py dataset=dockgen -python3 posebench/data/rfaa_input_preparation.py dataset=casp15 input_data_dir="$PWD"/data/casp15_set/targets +python3 posebench/data/rfaa_input_preparation.py dataset=casp15 input_data_dir=data/casp15_set/targets ``` Run inference on each dataset @@ -602,13 +625,11 @@ python3 posebench/data/rfaa_output_extraction.py dataset=casp15 Relax the generated ligand structures inside of their respective protein pockets ```bash -python3 posebench/models/inference_relaxation.py method=rfaa dataset=posebusters_benchmark num_processes=1 remove_initial_protein_hydrogens=true -python3 posebench/models/inference_relaxation.py method=rfaa dataset=astex_diverse num_processes=1 remove_initial_protein_hydrogens=true -python3 posebench/models/inference_relaxation.py method=rfaa dataset=dockgen num_processes=1 remove_initial_protein_hydrogens=true +python3 posebench/models/inference_relaxation.py method=rfaa dataset=posebusters_benchmark remove_initial_protein_hydrogens=true +python3 posebench/models/inference_relaxation.py method=rfaa dataset=astex_diverse remove_initial_protein_hydrogens=true +python3 posebench/models/inference_relaxation.py method=rfaa dataset=dockgen remove_initial_protein_hydrogens=true ``` -**NOTE**: Increase `num_processes` according to your available CPU/GPU resources to improve throughput - Align predicted protein-ligand structures to ground-truth complex structures ```bash @@ -629,12 +650,97 @@ Analyze inference results for the CASP15 dataset ```bash # first assemble (unrelaxed and post ranking-relaxed) CASP15-compliant prediction submission files for scoring -python3 posebench/models/ensemble_generation.py ensemble_methods=\[rfaa\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_rfaa_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=false export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 -python3 posebench/models/ensemble_generation.py ensemble_methods=\[rfaa\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_rfaa_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=true export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 +python3 posebench/models/ensemble_generation.py ensemble_methods=\[rfaa\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_rfaa_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=false export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 +python3 posebench/models/ensemble_generation.py ensemble_methods=\[rfaa\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_rfaa_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=true export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 +# NOTE: the suffixes for both `output_dir` and `ensemble_benchmarking_repeat_index` should be modified to e.g., 2, 3, ... +... +# now score the CASP15-compliant submissions using the official CASP scoring pipeline +python3 posebench/analysis/inference_analysis_casp.py method=rfaa dataset=casp15 targets='[T1124, T1127v2, T1146, T1152, T1158v1, T1158v2, T1158v3, T1158v4, T1186, T1187, T1188]' repeat_index=1 +... +``` + +### How to run inference with `Chai-1` + +Prepare CSV input files + +```bash +python3 posebench/data/chai_input_preparation.py dataset=posebusters_benchmark +python3 posebench/data/chai_input_preparation.py dataset=astex_diverse +python3 posebench/data/chai_input_preparation.py dataset=dockgen +python3 posebench/data/chai_input_preparation.py dataset=casp15 input_data_dir=data/casp15_set/targets +``` + +Run inference on each dataset + +```bash +conda activate forks/chai-lab/chai-lab/ +python3 posebench/models/chai_inference.py dataset=posebusters_benchmark repeat_index=1 +... +python3 posebench/models/chai_inference.py dataset=astex_diverse repeat_index=1 +... +python3 posebench/models/chai_inference.py dataset=dockgen repeat_index=1 +... +python3 posebench/models/chai_inference.py dataset=casp15 repeat_index=1 +... +conda deactivate +``` + +Extract predictions into separate files for proteins and ligands + +```bash +python3 posebench/data/chai_output_extraction.py dataset=posebusters_benchmark repeat_index=1 +... +python3 posebench/data/chai_output_extraction.py dataset=astex_diverse repeat_index=1 +... +python3 posebench/data/chai_output_extraction.py dataset=dockgen repeat_index=1 +... +python3 posebench/data/chai_output_extraction.py dataset=casp15 repeat_index=1 +... +``` + +Relax the generated ligand structures inside of their respective protein pockets + +```bash +python3 posebench/models/inference_relaxation.py method=chai-lab dataset=posebusters_benchmark remove_initial_protein_hydrogens=true repeat_index=1 +... +python3 posebench/models/inference_relaxation.py method=chai-lab dataset=astex_diverse remove_initial_protein_hydrogens=true repeat_index=1 +... +python3 posebench/models/inference_relaxation.py method=chai-lab dataset=dockgen remove_initial_protein_hydrogens=true repeat_index=1 +... +``` + +Align predicted protein-ligand structures to ground-truth complex structures + +```bash +python3 posebench/analysis/complex_alignment.py method=chai-lab dataset=posebusters_benchmark repeat_index=1 +... +python3 posebench/analysis/complex_alignment.py method=chai-lab dataset=astex_diverse repeat_index=1 +... +python3 posebench/analysis/complex_alignment.py method=chai-lab dataset=dockgen repeat_index=1 +... +``` + +Analyze inference results for each dataset + +```bash +python3 posebench/analysis/inference_analysis.py method=chai-lab dataset=posebusters_benchmark repeat_index=1 +... +python3 posebench/analysis/inference_analysis.py method=chai-lab dataset=astex_diverse repeat_index=1 +... +python3 posebench/analysis/inference_analysis.py method=chai-lab dataset=dockgen repeat_index=1 +... +``` + +Analyze inference results for the CASP15 dataset + +```bash +# first assemble (unrelaxed and post ranking-relaxed) CASP15-compliant prediction submission files for scoring +python3 posebench/models/ensemble_generation.py ensemble_methods=\[chai-lab\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_chai-lab_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=false export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 +python3 posebench/models/ensemble_generation.py ensemble_methods=\[chai-lab\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_chai-lab_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=true export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 # NOTE: the suffixes for both `output_dir` and `ensemble_benchmarking_repeat_index` should be modified to e.g., 2, 3, ... ... # now score the CASP15-compliant submissions using the official CASP scoring pipeline -python3 posebench/analysis/inference_analysis_casp.py method=rfaa dataset=casp15 targets="[T1124, T1127v2, T1146, T1152, T1158v1, T1158v2, T1158v3, T1158v4, T1186, T1187, T1188]" repeat_index=1 +python3 posebench/analysis/inference_analysis_casp.py method=chai-lab dataset=casp15 repeat_index=1 targets='[H1135, T1127v2, T1146, T1152, T1158v1, T1158v2, T1158v3, T1158v4, T1186, T1187, T1188]' ... ``` @@ -678,16 +784,14 @@ mkdir -p forks/Vina/inference/vina_diffdock_casp15_outputs_1 && cp -r data/test_ Relax the generated ligand structures inside of their respective protein pockets ```bash -python3 posebench/models/inference_relaxation.py method=vina vina_binding_site_method=diffdock dataset=posebusters_benchmark remove_initial_protein_hydrogens=true assign_partial_charges_manually=true num_processes=1 repeat_index=1 +python3 posebench/models/inference_relaxation.py method=vina vina_binding_site_method=diffdock dataset=posebusters_benchmark remove_initial_protein_hydrogens=true assign_partial_charges_manually=true repeat_index=1 ... -python3 posebench/models/inference_relaxation.py method=vina vina_binding_site_method=diffdock dataset=astex_diverse remove_initial_protein_hydrogens=true assign_partial_charges_manually=true num_processes=1 repeat_index=1 +python3 posebench/models/inference_relaxation.py method=vina vina_binding_site_method=diffdock dataset=astex_diverse remove_initial_protein_hydrogens=true assign_partial_charges_manually=true repeat_index=1 ... -python3 posebench/models/inference_relaxation.py method=vina vina_binding_site_method=diffdock dataset=dockgen remove_initial_protein_hydrogens=true assign_partial_charges_manually=true num_processes=1 repeat_index=1 +python3 posebench/models/inference_relaxation.py method=vina vina_binding_site_method=diffdock dataset=dockgen remove_initial_protein_hydrogens=true assign_partial_charges_manually=true repeat_index=1 ... ``` -**NOTE**: Increase `num_processes` according to your available CPU/GPU resources to improve throughput - Analyze inference results for each dataset ```bash @@ -703,8 +807,8 @@ Analyze inference results for the CASP15 dataset ```bash # assemble (unrelaxed and post ranking-relaxed) CASP15-compliant prediction submission files for scoring -python3 posebench/models/ensemble_generation.py ensemble_methods=\[vina\] vina_binding_site_methods=\[diffdock\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_vina_diffdock_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=false export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 -python3 posebench/models/ensemble_generation.py ensemble_methods=\[vina\] vina_binding_site_methods=\[diffdock\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_vina_diffdock_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=true export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 +python3 posebench/models/ensemble_generation.py ensemble_methods=\[vina\] vina_binding_site_methods=\[diffdock\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_vina_diffdock_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=false export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 +python3 posebench/models/ensemble_generation.py ensemble_methods=\[vina\] vina_binding_site_methods=\[diffdock\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_vina_diffdock_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=true export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 # NOTE: the suffixes for both `output_dir` and `ensemble_benchmarking_repeat_index` should be modified to e.g., 2, 3, ... ... # now score the CASP15-compliant submissions using the official CASP scoring pipeline @@ -719,20 +823,21 @@ Gather all template ligands generated by `TULIP` via its dedicated [GitHub repos ```bash python3 posebench/data/tulip_output_extraction.py dataset=posebusters_benchmark python3 posebench/data/tulip_output_extraction.py dataset=astex_diverse +python3 posebench/data/tulip_output_extraction.py dataset=dockgen python3 posebench/data/tulip_output_extraction.py dataset=casp15 ``` Relax the generated ligand structures inside of their respective protein pockets ```bash -python3 posebench/models/inference_relaxation.py method=tulip dataset=posebusters_benchmark remove_initial_protein_hydrogens=true assign_partial_charges_manually=true num_processes=1 +python3 posebench/models/inference_relaxation.py method=tulip dataset=posebusters_benchmark remove_initial_protein_hydrogens=true assign_partial_charges_manually=true +... +python3 posebench/models/inference_relaxation.py method=tulip dataset=astex_diverse remove_initial_protein_hydrogens=true assign_partial_charges_manually=true ... -python3 posebench/models/inference_relaxation.py method=tulip dataset=astex_diverse remove_initial_protein_hydrogens=true assign_partial_charges_manually=true num_processes=1 +python3 posebench/models/inference_relaxation.py method=tulip dataset=dockgen remove_initial_protein_hydrogens=true assign_partial_charges_manually=true ... ``` -**NOTE**: Increase `num_processes` according to your available CPU/GPU resources to improve throughput - Analyze inference results for each dataset ```bash @@ -740,18 +845,20 @@ python3 posebench/analysis/inference_analysis.py method=tulip dataset=posebuster ... python3 posebench/analysis/inference_analysis.py method=tulip dataset=astex_diverse ... +python3 posebench/analysis/inference_analysis.py method=tulip dataset=dockgen +... ``` Analyze inference results for the CASP15 dataset ```bash # then assemble (unrelaxed and post ranking-relaxed) CASP15-compliant prediction submission files for scoring -python3 posebench/models/ensemble_generation.py ensemble_methods=\[tulip\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_tulip_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=false export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 -python3 posebench/models/ensemble_generation.py ensemble_methods=\[tulip\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_tulip_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=true export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 +python3 posebench/models/ensemble_generation.py ensemble_methods=\[tulip\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_tulip_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=false export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 +python3 posebench/models/ensemble_generation.py ensemble_methods=\[tulip\] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_tulip_ensemble_predictions_1 skip_existing=true relax_method_ligands_post_ranking=true export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index=0 ensemble_benchmarking_repeat_index=1 # NOTE: the suffixes for both `output_dir` and `ensemble_benchmarking_repeat_index` should be modified to e.g., 2, 3, ... ... # now score the CASP15-compliant submissions using the official CASP scoring pipeline -python3 posebench/analysis/inference_analysis_casp.py method=tulip dataset=casp15 targets='[H1135, H1171v1, H1171v2, H1172v1, H1172v2, H1172v3, H1172v4, T1124, T1127v2, T1152, T1158v1, T1158v2, T1158v3, T1158v4, T1186, T1187]' +python3 posebench/analysis/inference_analysis_casp.py method=tulip dataset=casp15 targets='[H1171v1, H1171v2, H1172v1, H1172v2, H1172v3, H1172v4, T1124, T1127v2, T1152, T1158v1, T1158v2, T1158v3, T1181, T1186, T1187, T1188]' ... ``` @@ -766,35 +873,35 @@ Using an `ensemble` of methods, generate predictions for a new protein target us ```bash # generate each method's prediction script for a target # NOTE: to predict input ESMFold protein structures when they are not already locally available in `data/ensemble_proteins/`, e.g., on a SLURM cluster first run e.g., `srun --partition=gpu --gres=gpu:A100:1 --mem=59G --time=01:00:00 --pty bash` to ensure a GPU is available for inference -python3 posebench/models/ensemble_generation.py input_csv_filepath=data/test_cases/5S8I_2LY/ensemble_inputs.csv output_dir=data/test_cases/5S8I_2LY/top_consensus_ensemble_predictions_1 max_method_predictions=40 ensemble_ranking_method=consensus resume=false ensemble_methods='[diffdock, dynamicbind, neuralplexer, rfaa]' +python3 posebench/models/ensemble_generation.py input_csv_filepath=data/test_cases/5S8I_2LY/ensemble_inputs.csv output_dir=data/test_cases/5S8I_2LY/top_consensus_ensemble_predictions_1 max_method_predictions=40 method_top_n_to_select=3 ensemble_ranking_method=consensus resume=false ensemble_methods='[diffdock, dynamicbind, neuralplexer, rfaa]' # ... # now, manually run each desired method's generated prediction script, with the exception of AutoDock Vina which uses other methods' predictions # ... -python3 posebench/models/ensemble_generation.py input_csv_filepath=data/test_cases/5S8I_2LY/ensemble_inputs.csv output_dir=data/test_cases/5S8I_2LY/top_consensus_ensemble_predictions_1 max_method_predictions=40 ensemble_ranking_method=consensus resume=true generate_vina_scripts=true vina_binding_site_methods=[diffdock] +python3 posebench/models/ensemble_generation.py input_csv_filepath=data/test_cases/5S8I_2LY/ensemble_inputs.csv output_dir=data/test_cases/5S8I_2LY/top_consensus_ensemble_predictions_1 max_method_predictions=40 method_top_n_to_select=3 ensemble_ranking_method=consensus resume=true generate_vina_scripts=true vina_binding_site_methods=[diffdock] # now, manually run AutoDock Vina's generated prediction script for each binding site prediction method #... # lastly, organize each method's predictions together -python3 posebench/models/ensemble_generation.py input_csv_filepath=data/test_cases/5S8I_2LY/ensemble_inputs.csv output_dir=data/test_cases/5S8I_2LY/top_consensus_ensemble_predictions_1 max_method_predictions=40 ensemble_ranking_method=consensus resume=true generate_vina_scripts=false vina_binding_site_methods=[diffdock] +python3 posebench/models/ensemble_generation.py input_csv_filepath=data/test_cases/5S8I_2LY/ensemble_inputs.csv output_dir=data/test_cases/5S8I_2LY/top_consensus_ensemble_predictions_1 max_method_predictions=40 method_top_n_to_select=3 ensemble_ranking_method=consensus resume=true generate_vina_scripts=false vina_binding_site_methods=[diffdock] ``` Benchmark (ensemble-)ranked predictions across each test dataset ```bash # benchmark using the PoseBusters Benchmark dataset e.g., after generating 40 complexes per target with each method -python3 posebench/models/ensemble_generation.py input_csv_filepath=data/test_cases/posebusters_benchmark/ensemble_inputs.csv output_dir=data/test_cases/posebusters_benchmark/top_consensus_ensemble_predictions_1 max_method_predictions=40 export_top_n=1 export_file_format=null skip_existing=true relax_method_ligands_post_ranking=false resume=true cuda_device_index=0 ensemble_methods='[diffdock, dynamicbind, neuralplexer, rfaa, tulip, vina]' ensemble_benchmarking=true ensemble_benchmarking_dataset=posebusters_benchmark ensemble_ranking_method=consensus ensemble_benchmarking_repeat_index=1 -python3 posebench/models/ensemble_generation.py input_csv_filepath=data/test_cases/posebusters_benchmark/ensemble_inputs.csv output_dir=data/test_cases/posebusters_benchmark/top_consensus_ensemble_predictions_1 max_method_predictions=40 export_top_n=1 export_file_format=null skip_existing=true relax_method_ligands_post_ranking=true resume=true cuda_device_index=0 ensemble_methods='[diffdock, dynamicbind, neuralplexer, rfaa, tulip, vina]' ensemble_benchmarking=true ensemble_benchmarking_dataset=posebusters_benchmark ensemble_ranking_method=consensus ensemble_benchmarking_repeat_index=1 +python3 posebench/models/ensemble_generation.py input_csv_filepath=data/test_cases/posebusters_benchmark/ensemble_inputs.csv output_dir=data/test_cases/posebusters_benchmark/top_consensus_ensemble_predictions_1 max_method_predictions=40 method_top_n_to_select=3 export_top_n=1 export_file_format=null skip_existing=true relax_method_ligands_post_ranking=false resume=true cuda_device_index=0 ensemble_methods='[diffdock, dynamicbind, neuralplexer, rfaa]' ensemble_benchmarking=true ensemble_benchmarking_dataset=posebusters_benchmark ensemble_ranking_method=consensus ensemble_benchmarking_repeat_index=1 +python3 posebench/models/ensemble_generation.py input_csv_filepath=data/test_cases/posebusters_benchmark/ensemble_inputs.csv output_dir=data/test_cases/posebusters_benchmark/top_consensus_ensemble_predictions_1 max_method_predictions=40 method_top_n_to_select=3 export_top_n=1 export_file_format=null skip_existing=true relax_method_ligands_post_ranking=true resume=true cuda_device_index=0 ensemble_methods='[diffdock, dynamicbind, neuralplexer, rfaa]' ensemble_benchmarking=true ensemble_benchmarking_dataset=posebusters_benchmark ensemble_ranking_method=consensus ensemble_benchmarking_repeat_index=1 ... # benchmark using the Astex Diverse dataset e.g., after generating 40 complexes per target with each method -python3 posebench/models/ensemble_generation.py input_csv_filepath=data/test_cases/astex_diverse/ensemble_inputs.csv output_dir=data/test_cases/astex_diverse/top_consensus_ensemble_predictions_1 max_method_predictions=40 export_top_n=1 export_file_format=null skip_existing=true relax_method_ligands_post_ranking=false resume=true cuda_device_index=0 ensemble_methods='[diffdock, dynamicbind, neuralplexer, rfaa, tulip, vina]' ensemble_benchmarking=true ensemble_benchmarking_dataset=astex_diverse ensemble_ranking_method=consensus ensemble_benchmarking_repeat_index=1 -python3 posebench/models/ensemble_generation.py input_csv_filepath=data/test_cases/astex_diverse/ensemble_inputs.csv output_dir=data/test_cases/astex_diverse/top_consensus_ensemble_predictions_1 max_method_predictions=40 export_top_n=1 export_file_format=null skip_existing=true relax_method_ligands_post_ranking=true resume=true cuda_device_index=0 ensemble_methods='[diffdock, dynamicbind, neuralplexer, rfaa, tulip, vina]' ensemble_benchmarking=true ensemble_benchmarking_dataset=astex_diverse ensemble_ranking_method=consensus ensemble_benchmarking_repeat_index=1 +python3 posebench/models/ensemble_generation.py input_csv_filepath=data/test_cases/astex_diverse/ensemble_inputs.csv output_dir=data/test_cases/astex_diverse/top_consensus_ensemble_predictions_1 max_method_predictions=40 method_top_n_to_select=3 export_top_n=1 export_file_format=null skip_existing=true relax_method_ligands_post_ranking=false resume=true cuda_device_index=0 ensemble_methods='[diffdock, dynamicbind, neuralplexer, rfaa]' ensemble_benchmarking=true ensemble_benchmarking_dataset=astex_diverse ensemble_ranking_method=consensus ensemble_benchmarking_repeat_index=1 +python3 posebench/models/ensemble_generation.py input_csv_filepath=data/test_cases/astex_diverse/ensemble_inputs.csv output_dir=data/test_cases/astex_diverse/top_consensus_ensemble_predictions_1 max_method_predictions=40 method_top_n_to_select=3 export_top_n=1 export_file_format=null skip_existing=true relax_method_ligands_post_ranking=true resume=true cuda_device_index=0 ensemble_methods='[diffdock, dynamicbind, neuralplexer, rfaa]' ensemble_benchmarking=true ensemble_benchmarking_dataset=astex_diverse ensemble_ranking_method=consensus ensemble_benchmarking_repeat_index=1 ... # benchmark using the DockGen dataset e.g., after generating 40 complexes per target with each method -python3 posebench/models/ensemble_generation.py input_csv_filepath=data/test_cases/dockgen/ensemble_inputs.csv output_dir=data/test_cases/dockgen/top_consensus_ensemble_predictions_1 max_method_predictions=40 export_top_n=1 export_file_format=null skip_existing=true relax_method_ligands_post_ranking=false resume=true cuda_device_index=0 ensemble_methods='[diffdock, dynamicbind, neuralplexer, rfaa, vina]' ensemble_benchmarking=true ensemble_benchmarking_dataset=dockgen ensemble_ranking_method=consensus ensemble_benchmarking_repeat_index=1 -python3 posebench/models/ensemble_generation.py input_csv_filepath=data/test_cases/dockgen/ensemble_inputs.csv output_dir=data/test_cases/dockgen/top_consensus_ensemble_predictions_1 max_method_predictions=40 export_top_n=1 export_file_format=null skip_existing=true relax_method_ligands_post_ranking=true resume=true cuda_device_index=0 ensemble_methods='[diffdock, dynamicbind, neuralplexer, rfaa, vina]' ensemble_benchmarking=true ensemble_benchmarking_dataset=dockgen ensemble_ranking_method=consensus ensemble_benchmarking_repeat_index=1 +python3 posebench/models/ensemble_generation.py input_csv_filepath=data/test_cases/dockgen/ensemble_inputs.csv output_dir=data/test_cases/dockgen/top_consensus_ensemble_predictions_1 max_method_predictions=40 method_top_n_to_select=3 export_top_n=1 export_file_format=null skip_existing=true relax_method_ligands_post_ranking=false resume=true cuda_device_index=0 ensemble_methods='[diffdock, dynamicbind, neuralplexer, rfaa]' ensemble_benchmarking=true ensemble_benchmarking_dataset=dockgen ensemble_ranking_method=consensus ensemble_benchmarking_repeat_index=1 +python3 posebench/models/ensemble_generation.py input_csv_filepath=data/test_cases/dockgen/ensemble_inputs.csv output_dir=data/test_cases/dockgen/top_consensus_ensemble_predictions_1 max_method_predictions=40 method_top_n_to_select=3 export_top_n=1 export_file_format=null skip_existing=true relax_method_ligands_post_ranking=true resume=true cuda_device_index=0 ensemble_methods='[diffdock, dynamicbind, neuralplexer, rfaa]' ensemble_benchmarking=true ensemble_benchmarking_dataset=dockgen ensemble_ranking_method=consensus ensemble_benchmarking_repeat_index=1 ... # benchmark using the CASP15 dataset e.g., after generating 40 complexes per target with each method -python3 posebench/models/ensemble_generation.py input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_consensus_ensemble_predictions_1 combine_casp_output_files=true max_method_predictions=40 export_top_n=5 export_file_format=casp15 skip_existing=true relax_method_ligands_post_ranking=false resume=true cuda_device_index=0 ensemble_methods='[diffdock, dynamicbind, neuralplexer, rfaa, tulip, vina]' ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 ensemble_ranking_method=consensus ensemble_benchmarking_repeat_index=1 -python3 posebench/models/ensemble_generation.py input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_consensus_ensemble_predictions_1 combine_casp_output_files=true max_method_predictions=40 export_top_n=5 export_file_format=casp15 skip_existing=true relax_method_ligands_post_ranking=true resume=true cuda_device_index=0 ensemble_methods='[diffdock, dynamicbind, neuralplexer, rfaa, tulip, vina]' ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 ensemble_ranking_method=consensus ensemble_benchmarking_repeat_index=1 +python3 posebench/models/ensemble_generation.py input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_consensus_ensemble_predictions_1 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=3 export_top_n=5 export_file_format=casp15 skip_existing=true relax_method_ligands_post_ranking=false resume=true cuda_device_index=0 ensemble_methods='[diffdock, dynamicbind, neuralplexer, rfaa]' ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 ensemble_ranking_method=consensus ensemble_benchmarking_repeat_index=1 +python3 posebench/models/ensemble_generation.py input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_consensus_ensemble_predictions_1 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=3 export_top_n=5 export_file_format=casp15 skip_existing=true relax_method_ligands_post_ranking=true resume=true cuda_device_index=0 ensemble_methods='[diffdock, dynamicbind, neuralplexer, rfaa]' ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 ensemble_ranking_method=consensus ensemble_benchmarking_repeat_index=1 ... # analyze benchmarking results for the PoseBusters Benchmark dataset python3 posebench/analysis/inference_analysis.py method=ensemble dataset=posebusters_benchmark repeat_index=1 @@ -806,7 +913,7 @@ python3 posebench/analysis/inference_analysis.py method=ensemble dataset=astex_d python3 posebench/analysis/inference_analysis.py method=ensemble dataset=dockgen repeat_index=1 ... # analyze benchmarking results for the CASP15 dataset -python3 posebench/analysis/inference_analysis_casp.py method=ensemble dataset=casp15 ensemble_ranking_method=consensus repeat_index=1 +python3 posebench/analysis/inference_analysis_casp.py method=ensemble dataset=casp15 repeat_index=1 ... ``` @@ -877,6 +984,7 @@ rm -rf docs/build/ && sphinx-build docs/source/ docs/build/ # NOTE: errors can s - [AutoDock-Vina](https://github.com/ccsb-scripps/AutoDock-Vina) - [casp15_ligand](https://git.scicore.unibas.ch/schwede/casp15_ligand) +- [chai-lab](https://github.com/chaidiscovery/chai-lab) - [DiffDock](https://github.com/gcorso/DiffDock) - [FABind](https://github.com/QizhiPei/FABind) - [DynamicBind](https://github.com/luwei0917/DynamicBind) diff --git a/configs/analysis/complex_alignment.yaml b/configs/analysis/complex_alignment.yaml index 21cad23f..555aa259 100644 --- a/configs/analysis/complex_alignment.yaml +++ b/configs/analysis/complex_alignment.yaml @@ -1,11 +1,12 @@ -method: neuralplexer # the method for which to align predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`) -vina_binding_site_method: diffdock # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`) +method: neuralplexer # the method for which to align predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `chai-lab`) +vina_binding_site_method: diffdock # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `chai-lab`) dataset: posebusters_benchmark # the dataset to use - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`) ensemble_ranking_method: consensus # the method with which to rank-order and select the top ensemble prediction for each target - NOTE: must be one of (`consensus`, `ff`) input_data_dir: ${oc.env:PROJECT_ROOT}/data/${dataset}_set # the input protein-ligand complex directory to recursively parse -output_dir: ${resolve_method_output_dir:${method},${dataset},${vina_binding_site_method},${ensemble_ranking_method},${repeat_index}} # the output directory to which to save the relaxed predictions +output_dir: ${resolve_method_output_dir:${method},${dataset},${vina_binding_site_method},${ensemble_ranking_method},${repeat_index},${pocket_only_baseline},${v1_baseline}} # the output directory to which to save the relaxed predictions rank_to_align: 1 # the pose rank to align -aligned_filename_postfix: "_aligned" # the postfix to append to each aligned complex filename +aligned_filename_suffix: "_aligned" # the suffix to append to each aligned complex filename force_process: false # whether to force processing of all complexes, even if they have already been processed repeat_index: 1 # the repeat index which was used for inference pocket_only_baseline: false # whether to prepare the pocket-only baseline +v1_baseline: false # whether to prepare the v1 baseline diff --git a/configs/analysis/inference_analysis.yaml b/configs/analysis/inference_analysis.yaml index 64b71f1e..9ca874fd 100644 --- a/configs/analysis/inference_analysis.yaml +++ b/configs/analysis/inference_analysis.yaml @@ -1,13 +1,14 @@ full_report: true # whether to generate a full PoseBusters report (i.e. with all metrics) or a summary report (i.e. with only the most important metrics) -method: diffdock # the method for which to score predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `vina`, `ensemble`) -vina_binding_site_method: diffdock # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `p2rank`) +method: diffdock # the method for which to score predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `chai-lab`, `vina`, `ensemble`) +vina_binding_site_method: diffdock # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `chai-lab`, `p2rank`) dataset: posebusters_benchmark # the dataset to use - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`) ensemble_ranking_method: consensus # the method with which to rank-order and select the top ensemble prediction for each target - NOTE: must be one of (`consensus`, `ff`) -input_csv_path: ${resolve_method_input_csv_path:${method},${dataset}} # the input CSV filepath with which to run inference +input_csv_path: ${resolve_method_input_csv_path:${method},${dataset},${pocket_only_baseline}} # the input CSV filepath with which to run inference input_data_dir: ${oc.env:PROJECT_ROOT}/data/${dataset}_set # the input protein-ligand complex directory to recursively parse posebusters_ccd_ids_filepath: ${oc.env:PROJECT_ROOT}/data/posebusters_pdb_ccd_ids.txt # the path to the PoseBusters PDB CCD IDs file that lists the targets that do not contain any crystal contacts -dockgen_test_ids_filepath: ${oc.env:PROJECT_ROOT}/data/dockgen_set/split_test_rmsd_filtered.txt # the path to the (ESMFold RMSD-filtered) DockGen test set IDs file -output_dir: ${resolve_method_output_dir:${method},${dataset},${vina_binding_site_method},${ensemble_ranking_method},${repeat_index}} # the output directory to which to save the relaxed predictions +dockgen_test_ids_filepath: ${oc.env:PROJECT_ROOT}/data/dockgen_set/split_test_rmsd_filtered.txt # the path to the (predicted RMSD-filtered) DockGen test set IDs file +output_dir: ${resolve_method_output_dir:${method},${dataset},${vina_binding_site_method},${ensemble_ranking_method},${repeat_index},${pocket_only_baseline},${v1_baseline}} # the output directory to which to save the relaxed predictions repeat_index: 1 # the repeat index which was used for inference pocket_only_baseline: false # whether to analyze the pocket-only baseline +v1_baseline: false # whether to analyze the v1 baseline relax_protein: false # whether to relax the protein - NOTE: currently periodically yields unpredictable protein-ligand separation diff --git a/configs/analysis/inference_analysis_casp.yaml b/configs/analysis/inference_analysis_casp.yaml index 7c046e2f..b8413789 100644 --- a/configs/analysis/inference_analysis_casp.yaml +++ b/configs/analysis/inference_analysis_casp.yaml @@ -1,8 +1,8 @@ full_report: true # whether to generate a full PoseBusters report (i.e. with all metrics) or a summary report (i.e. with only the most important metrics) python_exec_path: ${oc.env:HOME}/mambaforge/envs/casp15_ligand_scoring/bin/python3 # the Python executable to use scoring_script_path: ${oc.env:PROJECT_ROOT}/posebench/analysis/casp15_ligand_scoring/score_predictions.py # the path to the script to use for scoring CASP predictions -method: diffdock # the method for which to score predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `vina`, `ensemble`, `tulip`) -vina_binding_site_method: diffdock # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`) +method: diffdock # the method for which to score predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `chai-lab`, `vina`, `ensemble`, `tulip`) +vina_binding_site_method: diffdock # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `chai-lab`) dataset: casp15 # the dataset to use - NOTE: must be one of (`casp15`) ensemble_ranking_method: consensus # the method with which to rank-order and select the top ensemble prediction for each target - NOTE: must be one of (`consensus`, `ff`) predictions_dir: ${oc.env:PROJECT_ROOT}/data/test_cases/${dataset}/top_${method}_ensemble_predictions_${repeat_index} # the directory containing the predictions to analyze @@ -12,4 +12,6 @@ fault_tolerant: true # whether to continue processing targets if an error occurs skip_existing: true # whether to skip processing targets for which output already exists score_relaxed_structures: true # whether to score relaxed structures in addition to the original (unrelaxed) structures repeat_index: 1 # the run index to use for scoring predictions -no_pretraining: false # whether to score a model without pretraining +no_ilcl: false # whether to score a model trained without an inter-ligand clash loss (ILCL) - NOTE: only applicable to the `neuralplexer` method +relax_protein: false # whether to relax the protein - NOTE: currently periodically yields unpredictable protein-ligand separation +v1_baseline: false # whether to score the v1 baseline predictions diff --git a/configs/data/chai_input_preparation.yaml b/configs/data/chai_input_preparation.yaml new file mode 100644 index 00000000..e8a5c2d7 --- /dev/null +++ b/configs/data/chai_input_preparation.yaml @@ -0,0 +1,9 @@ +dataset: posebusters_benchmark # the dataset to use - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`) +input_data_dir: ${oc.env:PROJECT_ROOT}/data/${dataset}_set # the input protein-ligand complex directory to recursively parse +output_scripts_path: ${oc.env:PROJECT_ROOT}/forks/chai-lab/prediction_inputs/${dataset} # the output directory in which to save the input files +posebusters_ccd_ids_filepath: ${oc.env:PROJECT_ROOT}/data/posebusters_pdb_ccd_ids.txt # the path to the PoseBusters PDB CCD IDs file that lists the targets that do not contain any crystal contacts +dockgen_test_ids_filepath: ${oc.env:PROJECT_ROOT}/data/dockgen_set/split_test.txt # the path to the DockGen test set IDs file +protein_filepath: null # the path to the protein structure file to use +ligand_smiles: null # the ligand SMILES string for which to predict the binding pose +input_id: null # the input ID to use for inference +pocket_only_baseline: false # whether to prepare the pocket-only baseline diff --git a/configs/data/chai_output_extraction.yaml b/configs/data/chai_output_extraction.yaml new file mode 100644 index 00000000..853005d3 --- /dev/null +++ b/configs/data/chai_output_extraction.yaml @@ -0,0 +1,13 @@ +dataset: posebusters_benchmark # the dataset to use - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`) +prediction_inputs_dir: ${oc.env:PROJECT_ROOT}/forks/chai-lab/prediction_inputs/${dataset} +prediction_outputs_dir: ${oc.env:PROJECT_ROOT}/forks/chai-lab/prediction_outputs/${dataset}_${repeat_index} +inference_outputs_dir: ${oc.env:PROJECT_ROOT}/forks/chai-lab/inference/chai-lab_${dataset}_outputs_${repeat_index} +input_data_dir: ${oc.env:PROJECT_ROOT}/data/${dataset}_set # the input protein-ligand complex directory to recursively parse +posebusters_ccd_ids_filepath: ${oc.env:PROJECT_ROOT}/data/posebusters_pdb_ccd_ids.txt # the path to the PoseBusters PDB CCD IDs file that lists the targets that do not contain any crystal contacts +dockgen_test_ids_filepath: ${oc.env:PROJECT_ROOT}/data/dockgen_set/split_test.txt # the path to the DockGen test set IDs file +complex_filepath: null # if not `null`, this should be the path to the complex PDB file for which to extract outputs +complex_id: null # if not `null`, this should be the complex ID of the single complex for which to extract outputs +ligand_smiles: null # if not `null`, this should be the (i.e., `.` fragment-separated) complex ligand SMILES string of the single complex for which to extract outputs +output_dir: null # if not `null`, this should be the path to the output file to which to write the extracted outputs +repeat_index: 1 # the repeat index with which inference was run +pocket_only_baseline: false # whether to prepare the pocket-only baseline diff --git a/configs/data/diffdock_input_preparation.yaml b/configs/data/diffdock_input_preparation.yaml index 62e0df6f..100f429b 100644 --- a/configs/data/diffdock_input_preparation.yaml +++ b/configs/data/diffdock_input_preparation.yaml @@ -1,6 +1,6 @@ dataset: posebusters_benchmark # the dataset to use - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`) input_data_dir: ${oc.env:PROJECT_ROOT}/data/${dataset}_set # the input protein-ligand complex directory to recursively parse -input_protein_structure_dir: ${oc.env:PROJECT_ROOT}/data/${dataset}_set/${dataset}_holo_aligned_esmfold_structures # the input protein structure directory to parse +input_protein_structure_dir: ${oc.env:PROJECT_ROOT}/data/${dataset}_set/${dataset}_holo_aligned_predicted_structures # the input protein structure directory to parse output_csv_path: ${oc.env:PROJECT_ROOT}/forks/DiffDock/inference/diffdock_${dataset}_inputs.csv # the output CSV filepath to which to write the parsed input data posebusters_ccd_ids_filepath: ${oc.env:PROJECT_ROOT}/data/posebusters_pdb_ccd_ids.txt # the path to the PoseBusters PDB CCD IDs file that lists the targets that do not contain any crystal contacts dockgen_test_ids_filepath: ${oc.env:PROJECT_ROOT}/data/dockgen_set/split_test.txt # the path to the DockGen test set IDs file diff --git a/configs/data/dynamicbind_input_preparation.yaml b/configs/data/dynamicbind_input_preparation.yaml index b55208a2..bdb6cb19 100644 --- a/configs/data/dynamicbind_input_preparation.yaml +++ b/configs/data/dynamicbind_input_preparation.yaml @@ -6,3 +6,4 @@ posebusters_ccd_ids_filepath: ${oc.env:PROJECT_ROOT}/data/posebusters_pdb_ccd_id dockgen_test_ids_filepath: ${oc.env:PROJECT_ROOT}/data/dockgen_set/split_test.txt # the path to the DockGen test set IDs file protein_filepath: null # the path to the protein structure file to use ligand_smiles: null # the ligand SMILES string for which to predict the binding pose +pocket_only_baseline: false # whether to prepare the pocket-only baseline diff --git a/configs/data/fabind_input_preparation.yaml b/configs/data/fabind_input_preparation.yaml index 7267d06f..24016784 100644 --- a/configs/data/fabind_input_preparation.yaml +++ b/configs/data/fabind_input_preparation.yaml @@ -3,3 +3,4 @@ input_data_dir: ${oc.env:PROJECT_ROOT}/data/${dataset}_set # the input protein-l output_csv_path: ${oc.env:PROJECT_ROOT}/forks/FABind/inference/fabind_${dataset}_inputs.csv # the output CSV filepath to which to write the parsed input data posebusters_ccd_ids_filepath: ${oc.env:PROJECT_ROOT}/data/posebusters_pdb_ccd_ids.txt # the path to the PoseBusters PDB CCD IDs file that lists the targets that do not contain any crystal contacts dockgen_test_ids_filepath: ${oc.env:PROJECT_ROOT}/data/dockgen_set/split_test.txt # the path to the DockGen test set IDs file +pocket_only_baseline: false # whether to prepare the pocket-only baseline diff --git a/configs/data/neuralplexer_input_preparation.yaml b/configs/data/neuralplexer_input_preparation.yaml index 4a03afb6..8ea57215 100644 --- a/configs/data/neuralplexer_input_preparation.yaml +++ b/configs/data/neuralplexer_input_preparation.yaml @@ -1,6 +1,6 @@ dataset: posebusters_benchmark # the dataset to use - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`) input_data_dir: ${oc.env:PROJECT_ROOT}/data/${dataset}_set # the input protein-ligand complex directory to recursively parse -input_receptor_structure_dir: ${oc.env:PROJECT_ROOT}/data/${dataset}_set/${dataset}_holo_aligned_esmfold_structures # if not `null`, the input template protein structure directory to parse +input_receptor_structure_dir: ${oc.env:PROJECT_ROOT}/data/${dataset}_set/${dataset}_holo_aligned_predicted_structures # if not `null`, the input template protein structure directory to parse output_csv_path: ${oc.env:PROJECT_ROOT}/forks/NeuralPLexer/inference/neuralplexer_${dataset}_inputs.csv # the output CSV filepath to which to write the parsed input data posebusters_ccd_ids_filepath: ${oc.env:PROJECT_ROOT}/data/posebusters_pdb_ccd_ids.txt # the path to the PoseBusters PDB CCD IDs file that lists the targets that do not contain any crystal contacts dockgen_test_ids_filepath: ${oc.env:PROJECT_ROOT}/data/dockgen_set/split_test.txt # the path to the DockGen test set IDs file diff --git a/configs/data/rfaa_output_extraction.yaml b/configs/data/rfaa_output_extraction.yaml index fc3639b3..cb989e9e 100644 --- a/configs/data/rfaa_output_extraction.yaml +++ b/configs/data/rfaa_output_extraction.yaml @@ -7,3 +7,4 @@ complex_id: null # if not `null`, this should be the complex ID of the single co ligand_smiles: null # if not `null`, this should be the (i.e., `.` fragment-separated) complex ligand SMILES string of the single complex for which to extract outputs output_dir: null # if not `null`, this should be the path to the output file to which to write the extracted outputs repeat_index: 1 # the repeat index with which inference was run +pocket_only_baseline: false # whether to prepare the pocket-only baseline diff --git a/configs/data/tulip_output_extraction.yaml b/configs/data/tulip_output_extraction.yaml index 503f9152..8786dfc5 100644 --- a/configs/data/tulip_output_extraction.yaml +++ b/configs/data/tulip_output_extraction.yaml @@ -2,5 +2,6 @@ dataset: posebusters_benchmark # the dataset to use - NOTE: must be one of (`pos prediction_outputs_dir: ${oc.env:PROJECT_ROOT}/forks/TULIP/outputs/${dataset}_${repeat_index} inference_outputs_dir: ${oc.env:PROJECT_ROOT}/forks/TULIP/inference/tulip_${dataset}_outputs_${repeat_index} posebusters_ccd_ids_filepath: ${oc.env:PROJECT_ROOT}/data/posebusters_pdb_ccd_ids.txt # the path to the PoseBusters PDB CCD IDs file that lists the targets that do not contain any crystal contacts +dockgen_test_ids_filepath: ${oc.env:PROJECT_ROOT}/data/dockgen_set/split_test.txt # the path to the DockGen test set IDs file method_top_n_to_select: 5 # the number of top models for each target to select for analysis repeat_index: 1 # the repeat index to use diff --git a/configs/model/chai_inference.yaml b/configs/model/chai_inference.yaml new file mode 100644 index 00000000..6f94ce9b --- /dev/null +++ b/configs/model/chai_inference.yaml @@ -0,0 +1,8 @@ +dataset: posebusters_benchmark # the dataset to use - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`) +input_dir: ${oc.env:PROJECT_ROOT}/forks/chai-lab/prediction_inputs/${dataset} # the input directory with which to run inference +output_dir: ${oc.env:PROJECT_ROOT}/forks/chai-lab/prediction_outputs/${dataset}_${repeat_index} # the output directory to which to save the inference results +cuda_device_index: 0 # the CUDA device to use for inference, or `null` to use CPU +repeat_index: 1 # the repeat index to use for inference +skip_existing: true # whether to skip running inference if the prediction for a target already exists +pocket_only_baseline: false # whether to run the pocket-only baseline +max_num_inputs: null # if provided, the number of (dataset subset) inputs over which to run inference diff --git a/configs/model/diffdock_inference.yaml b/configs/model/diffdock_inference.yaml index acb96b73..8188ea7a 100644 --- a/configs/model/diffdock_inference.yaml +++ b/configs/model/diffdock_inference.yaml @@ -14,4 +14,6 @@ actual_steps: 19 # the actual number of inference steps to run (i.e., after how no_final_step_noise: true # whether to disable the final inference step's noise from being added repeat_index: 1 # the repeat index to use for inference skip_existing: true # whether to skip inference for existing output directories +pocket_only_baseline: false # whether to run the pocket-only baseline max_num_inputs: null # if provided, the number of (dataset subset) inputs over which to run inference +v1_baseline: false # whether to run the v1 baseline diff --git a/configs/model/dynamicbind_inference.yaml b/configs/model/dynamicbind_inference.yaml index 062eca5c..c289e498 100644 --- a/configs/model/dynamicbind_inference.yaml +++ b/configs/model/dynamicbind_inference.yaml @@ -2,12 +2,13 @@ cuda_device_index: 0 # the CUDA device to use for inference, or `null` to use CP python_exec_path: ${oc.env:PROJECT_ROOT}/forks/DynamicBind/DynamicBind/bin/python3 # the Python executable to use dynamicbind_exec_dir: ${oc.env:PROJECT_ROOT}/forks/DynamicBind # the DynamicBind directory in which to execute the inference scripts dataset: posebusters_benchmark # the dataset to use - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`) -input_data_dir: ${oc.env:PROJECT_ROOT}/data/${dataset}_set/${dataset}_holo_aligned_esmfold_structures # the input protein-ligand complex directory to recursively parse for protein inputs +input_data_dir: ${oc.env:PROJECT_ROOT}/data/${dataset}_set/${dataset}_holo_aligned_predicted_structures # the input protein-ligand complex directory to recursively parse for protein inputs input_ligand_csv_dir: ${oc.env:PROJECT_ROOT}/forks/DynamicBind/inference/dynamicbind_${dataset}_inputs # the input CSV directory with which to run inference samples_per_complex: 40 # the number of samples to generate per complex savings_per_complex: 1 # the (top-N) number of sample visualizations to save per complex inference_steps: 20 # the number of inference steps to run for each complex batch_size: 5 # the batch size to use for inference +cache_path: ${oc.env:PROJECT_ROOT}/data/dynamicbind_cache/cache # the cache directory to use for storing intermediate data files header: ${dataset} # name of the results directory to create num_workers: 1 # the number of workers to use for native relaxation during inference skip_existing: true # whether to skip existing predictions diff --git a/configs/model/ensemble_generation.yaml b/configs/model/ensemble_generation.yaml index 5b07176b..67fb385d 100644 --- a/configs/model/ensemble_generation.yaml +++ b/configs/model/ensemble_generation.yaml @@ -1,4 +1,5 @@ -ensemble_methods: [diffdock, dynamicbind, neuralplexer, rfaa, vina, tulip] # the methods from which to gather predictions for ensembling - NOTE: must be one of (`diffdock`, `dynamicbind`, `neuralplexer`, `rfaa`, `vina`, `tulip`) +# General inference arguments: +ensemble_methods: [diffdock, dynamicbind, neuralplexer, rfaa] # the methods from which to gather predictions for ensembling - NOTE: must be one of (`diffdock`, `dynamicbind`, `neuralplexer`, `rfaa`, `chai-lab`, `vina`, `tulip`) generate_vina_scripts: false # whether to generate Vina scripts using other methods' binding site predictions - NOTE: `resume` must also be `true` when this is `true`, meaning other methods' predictions must have already been generated locally rank_single_method_intrinsically: true # whether to rank single-method predictions using either `consensus` or `vina` ranking (false) or instead using their intrinsic (explicit) rank assignment (true) output_bash_file_dir: ensemble_generation_scripts # the directory in which to save the generated Bash scripts @@ -10,7 +11,7 @@ structure_prediction_chunk_size: null # optional chunk size to use during ESMFol structure_prediction_cpu_only: false # whether to only use CPU for structure prediction structure_prediction_cpu_offload: false # whether to offload structure prediction to CPU max_method_predictions: 40 # maximum number of predictions to make with each method -method_top_n_to_select: ${max_method_predictions} # number of top-ranked predictions to select from each method for subsequent ranking +method_top_n_to_select: 3 # number of top-ranked predictions to select from each method for subsequent ranking skip_existing: false # whether to skip existing ensemble predictions relax_method_ligands_pre_ranking: false # whether to relax the predicted ligands (method-specifically) before ranking relax_method_ligands_post_ranking: true # whether to relax the predicted ligands (method-agnostically) after ranking @@ -31,7 +32,7 @@ relax_skip_existing: false # whether to skip existing relaxation results resume: false # whether to resume from a previous run after generating and manually running each method's prediction script input_dir: null # optional path to the directory from which to load the ensemble predictions to rank - NOTE: currently, only `neuralplexer` makes use of this for inference output parsing output_dir: ${oc.env:PROJECT_ROOT}/data/test_cases/5S8I_2LY/top_${ensemble_ranking_method}_ensemble_predictions # path to the directory to save the top-ranked ensemble predictions -export_file_format: casp15 # if not `null`, the CASP format (i.e., `casp15`) in which to export top-ranked predictions +export_file_format: casp15 # the (optional) format (i.e., `casp15`) in which to export top-ranked predictions export_top_n: 5 # number of top-ranked predictions to export in CASP format casp_author: "001" # group number to report in CASP format casp_method: "Ligand_Predictor" # the method name to report in CASP format @@ -42,7 +43,7 @@ ensemble_benchmarking: false # whether to run ensemble benchmarking ensemble_benchmarking_dataset: posebusters_benchmark # the dataset to use for ensemble benchmarking - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`) ensemble_benchmarking_repeat_index: 1 # the repeat index to use for ensemble benchmarking ensemble_ranking_method: consensus # the method with which to rank-order and select the top ensemble prediction for each target - NOTE: must be one of (`consensus`, `ff`) -ensemble_benchmarking_apo_protein_dir: ${oc.env:PROJECT_ROOT}/data/${ensemble_benchmarking_dataset}_set/${ensemble_benchmarking_dataset}_holo_aligned_esmfold_structures # the directory containing the apo proteins to use for ensemble benchmarking +ensemble_benchmarking_apo_protein_dir: ${oc.env:PROJECT_ROOT}/data/${ensemble_benchmarking_dataset}_set/${ensemble_benchmarking_dataset}_holo_aligned_predicted_structures # the directory containing the apo proteins to use for ensemble benchmarking # DiffDock inference arguments: diffdock_python_exec_path: ${oc.env:PROJECT_ROOT}/forks/DiffDock/DiffDock/bin/python3 # the Python executable to use diffdock_exec_dir: ${oc.env:PROJECT_ROOT}/forks/DiffDock # the DiffDock directory in which to execute the inference scripts @@ -57,11 +58,12 @@ diffdock_batch_size: 10 # the batch size to use for inference diffdock_actual_steps: 19 # the actual number of inference steps to run (i.e., after how many steps to halt the reverse diffusion process) diffdock_no_final_step_noise: true # whether to disable the final inference step's noise from being added diffdock_skip_existing: true # whether to skip existing predictions +diffdock_v1_baseline: false # whether to run the v1 baseline # DynamicBind inference arguments: dynamicbind_python_exec_path: ${oc.env:PROJECT_ROOT}/forks/DynamicBind/DynamicBind/bin/python3 # the Python executable to use dynamicbind_exec_dir: ${oc.env:PROJECT_ROOT}/forks/DynamicBind # the DynamicBind directory in which to execute the inference scripts dynamicbind_dataset: ensemble # the dataset to use for inference - NOTE: must be one of (`ensemble`) -dynamicbind_input_protein_data_dir: ${oc.env:PROJECT_ROOT}/forks/DynamicBind/inference/ensemble_esmfold_structures # the input protein-ligand complex directory to recursively parse for protein inputs +dynamicbind_input_protein_data_dir: ${oc.env:PROJECT_ROOT}/forks/DynamicBind/inference/ensemble_predicted_structures # the input protein-ligand complex directory to recursively parse for protein inputs dynamicbind_input_ligand_csv_dir: ${oc.env:PROJECT_ROOT}/forks/DynamicBind/inference/dynamicbind_ensemble_inputs # the input CSV directory with which to run inference dynamicbind_samples_per_complex: 40 # the number of samples to generate per complex dynamicbind_savings_per_complex: 1 # the (top-N) number of sample visualizations to save per complex @@ -96,7 +98,7 @@ neuralplexer_use_template: true # whether to use the input template protein stru neuralplexer_separate_pdb: true # whether to separate the predicted protein structures into dedicated PDB files neuralplexer_rank_outputs_by_confidence: true # whether to rank the output conformations, by default, by ligand confidence (if available) and by protein confidence otherwise neuralplexer_plddt_ranking_type: ligand # the type of plDDT ranking to apply to generated samples - NOTE: must be one of (`protein`, `ligand`, `protein_ligand`) -neuralplexer_no_pretraining: false # whether to avoid loading pretrained weights +neuralplexer_no_ilcl: false # whether to score the NeuralPLexer weights trained without an inter-ligand clash loss (ILCL) # RoseTTAFold-All-Atom inference arguments: rfaa_python_exec_path: ${oc.env:PROJECT_ROOT}/forks/RoseTTAFold-All-Atom/RFAA/bin/python3 # the Python executable to use rfaa_exec_dir: ${oc.env:PROJECT_ROOT}/forks/RoseTTAFold-All-Atom # the RoseTTAFold-All-Atom directory in which to execute the inference scripts @@ -105,8 +107,11 @@ rfaa_output_dir: ${oc.env:PROJECT_ROOT}/forks/RoseTTAFold-All-Atom/inference/rfa rfaa_max_cycles: 10 # the maximum number recycling iterations to run rfaa_inference_config_name: null # the name of the inference config to use - NOTE: if `run_inference_directly` is true, this must reference a valid YAML config file name e.g., that was generated by `python posebench/models/rfaa_inference.py` with `run_inference_directly=false` rfaa_inference_dir_name: null # the name of the inference output directory to use +# Chai-1 inference arguments: +chai_out_path: ${oc.env:PROJECT_ROOT}/forks/chai-lab/inference/chai-lab_ensemble_outputs # the output directory to which to write the predictions +chai_skip_existing: true # whether to skip running inference if the prediction for a target already exists # Vina inference arguments: -vina_binding_site_methods: [diffdock] # the methods to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`) +vina_binding_site_methods: [diffdock, p2rank] # the methods to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `dynamicbind`, `neuralplexer`, `p2rank`) vina_python2_exec_path: ${oc.env:PROJECT_ROOT}/forks/Vina/ADFR/bin/python # the path to the Python 2 executable vina_prepare_receptor_script_path: ${oc.env:PROJECT_ROOT}/forks/Vina/ADFR/CCSBpckgs/AutoDockTools/Utilities24/prepare_receptor4.py # the path to the prepare_receptor.py script vina_output_dir: ${oc.env:PROJECT_ROOT}/forks/Vina/inference/vina_ensemble_outputs # the output directory to which to save the inference results @@ -121,5 +126,8 @@ vina_binding_site_size_z: 25.0 # the z-axis size of the binding site box to use vina_binding_site_spacing: 1.0 # the spacing of the binding site box (in Angstrom) to use with AutoDock Vina vina_num_modes: 40 # the number of binding modes (i.e., poses) to generate with AutoDock Vina vina_skip_existing: true # whether to skip existing output files +vina_p2rank_exec_utility: predict # the P2Rank executable utility to use for inference +vina_p2rank_config: alphafold # the P2Rank configuration to use for inference +vina_p2rank_enable_pymol_visualizations: false # whether to enable P2Rank's PyMOL visualizations # TULIP inference arguments: tulip_output_dir: ${oc.env:PROJECT_ROOT}/forks/TULIP/inference/tulip_ensemble_outputs # the output directory to which to save the inference results diff --git a/configs/model/fabind_inference.yaml b/configs/model/fabind_inference.yaml index 9a8fa0f0..8635ba5a 100644 --- a/configs/model/fabind_inference.yaml +++ b/configs/model/fabind_inference.yaml @@ -3,10 +3,10 @@ python_exec_path: ${oc.env:PROJECT_ROOT}/forks/FABind/FABind/bin/python3 # the P fabind_exec_dir: ${oc.env:PROJECT_ROOT}/forks/FABind/fabind # the FABind directory in which to execute the inference scripts dataset: posebusters_benchmark # the dataset to use - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`) input_csv_path: ${oc.env:PROJECT_ROOT}/forks/FABind/inference/fabind_${dataset}_inputs.csv # the input CSV filepath with which to run inference -input_data_dir: ${oc.env:PROJECT_ROOT}/data/${dataset}_set/${dataset}_holo_aligned_esmfold_structures # the input protein-ligand complex directory to recursively parse +input_data_dir: ${oc.env:PROJECT_ROOT}/data/${dataset}_set/${dataset}_holo_aligned_predicted_structures # the input protein-ligand complex directory to recursively parse num_threads: 1 # the number of threads to use for inference -save_pt_dir: ${oc.env:PROJECT_ROOT}/forks/FABind/inference/fabind_${dataset}_temp_files # a temporary directory in which to save the intermediate PyTorch tensors save_mols_dir: ${oc.env:PROJECT_ROOT}/forks/FABind/inference/fabind_${dataset}_temp_files/mol # a temporary directory in which to save the intermediate RDKit molecules +save_pt_dir: ${oc.env:PROJECT_ROOT}/forks/FABind/inference/fabind_${dataset}_temp_files # a temporary directory in which to save the intermediate PyTorch tensors ckpt_path: ${oc.env:PROJECT_ROOT}/forks/FABind/ckpt/best_model.bin # the checkpoint path to use for inference output_dir: ${oc.env:PROJECT_ROOT}/forks/FABind/inference/fabind_${dataset}_output_${repeat_index} # the output directory to which to save the inference results repeat_index: 1 # the repeat index to use for inference diff --git a/configs/model/inference_relaxation.yaml b/configs/model/inference_relaxation.yaml index 0cffd798..ce8960e6 100644 --- a/configs/model/inference_relaxation.yaml +++ b/configs/model/inference_relaxation.yaml @@ -1,5 +1,5 @@ -method: diffdock # the method for which to relax predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `vina`, `tulip`) -vina_binding_site_method: diffdock # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `p2rank`) +method: diffdock # the method for which to relax predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `chai-lab`, `vina`, `tulip`) +vina_binding_site_method: diffdock # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `chai-lab`, `p2rank`) dataset: posebusters_benchmark # the dataset for which to relax predictions - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`) ensemble_ranking_method: consensus # the method with which to rank-order and select the top ensemble prediction for each target - NOTE: must be one of (`consensus`, `ff`) num_processes: 1 # the number of parallel processes to use for relaxation @@ -11,8 +11,8 @@ platform: "fastest" # platform on which to run relaxation cuda_device_index: 0 # CUDA device index log_level: "INFO" # logging level protein_dir: ${resolve_method_protein_dir:${method},${dataset},${repeat_index},${pocket_only_baseline}} # the directory from which to load (potentially inferred) proteins -ligand_dir: ${resolve_method_ligand_dir:${method},${dataset},${vina_binding_site_method},${repeat_index}} # the directory from which to load inferred ligands -output_dir: ${resolve_method_output_dir:${method},${dataset},${vina_binding_site_method},${ensemble_ranking_method},${repeat_index}} # the output directory to which to save the relaxed predictions +ligand_dir: ${resolve_method_ligand_dir:${method},${dataset},${vina_binding_site_method},${repeat_index},${pocket_only_baseline},${v1_baseline}} # the directory from which to load inferred ligands +output_dir: ${resolve_method_output_dir:${method},${dataset},${vina_binding_site_method},${ensemble_ranking_method},${repeat_index},${pocket_only_baseline},${v1_baseline}} # the output directory to which to save the relaxed predictions relax_protein: false # whether to relax the protein - NOTE: currently periodically yields unpredictable protein-ligand separation remove_initial_protein_hydrogens: false # whether to remove hydrogens from the initial protein assign_each_ligand_unique_force: false # when relaxing the protein, whether to assign each ligand a unique force constant @@ -25,3 +25,4 @@ max_num_attempts: 5 # when relaxing the protein, maximum number of relaxation at skip_existing: true # whether to skip existing relaxed predictions repeat_index: 1 # the repeat index which was used for inference pocket_only_baseline: false # whether to prepare the pocket-only baseline +v1_baseline: false # whether to prepare the v1 baseline diff --git a/configs/model/neuralplexer_inference.yaml b/configs/model/neuralplexer_inference.yaml index 481266ae..e55f1f10 100644 --- a/configs/model/neuralplexer_inference.yaml +++ b/configs/model/neuralplexer_inference.yaml @@ -25,7 +25,8 @@ separate_pdb: true # whether to separate the predicted protein structures into d rank_outputs_by_confidence: true # whether to rank the output conformations, by default, by ligand confidence (if available) and by protein confidence otherwise frozen_prot: false # whether to freeze the protein structure's geometry updates plddt_ranking_type: ligand # the type of plDDT ranking to apply to generated samples - NOTE: must be one of (`protein`, `ligand`, `protein_ligand`) -no_pretraining: false # whether to avoid loading pretrained weights +no_ilcl: false # whether to score the NeuralPLexer weights trained without an inter-ligand clash loss (ILCL) csv_path: null # the CSV filepath from which to parse benchmarking input data repeat_index: 1 # the repeat index to use for inference +pocket_only_baseline: false # whether to run the pocket-only baseline max_num_inputs: null # if provided, the number of (dataset subset) inputs over which to run inference diff --git a/configs/model/rfaa_inference.yaml b/configs/model/rfaa_inference.yaml index 7526c80f..41c73ed0 100644 --- a/configs/model/rfaa_inference.yaml +++ b/configs/model/rfaa_inference.yaml @@ -11,4 +11,5 @@ inference_config_name: null # the name of the inference config to use - NOTE: if inference_dir_name: null # the name of the inference output directory to use repeat_index: 1 # the repeat index to use for inference skip_existing: true # whether to skip running inference if the prediction for a target already exists +pocket_only_baseline: false # whether to run the pocket-only baseline max_num_inputs: null # if provided, the number of (dataset subset) inputs over which to run inference diff --git a/configs/model/vina_inference.yaml b/configs/model/vina_inference.yaml index c3e36bc4..cc35ab7a 100644 --- a/configs/model/vina_inference.yaml +++ b/configs/model/vina_inference.yaml @@ -1,11 +1,12 @@ +# General inference arguments: dataset: posebusters_benchmark # the dataset to use - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`) -method: diffdock # the method from which to use binding site predictions - NOTE: must be one of (`diffdock`, `dynamicbind`, `neuralplexer`, `p2rank`, `ensemble`) - NOTE: `p2rank` is not included in `ensemble` +method: diffdock # the method from which to derive binding site predictions - NOTE: must be one of (`diffdock`, `dynamicbind`, `neuralplexer`, `p2rank`, `ensemble`) ensemble_ranking_method: consensus # the method with which to rank-order and select the top ensemble prediction for each target - NOTE: must be one of (`consensus`, `ff`) python2_exec_path: ${oc.env:PROJECT_ROOT}/forks/Vina/ADFR/bin/python # the path to the Python 2 executable p2rank_exec_path: ${oc.env:PROJECT_ROOT}/forks/P2Rank/p2rank_2.4.2/prank # the path to the P2Rank executable prepare_receptor_script_path: ${oc.env:PROJECT_ROOT}/forks/Vina/ADFR/CCSBpckgs/AutoDockTools/Utilities24/prepare_receptor4.py # the path to the prepare_receptor.py script -input_dir: ${resolve_method_output_dir:${method},${dataset},${method},${ensemble_ranking_method},${repeat_index}} # the input directory with which to run inference -input_protein_structure_dir: ${oc.env:PROJECT_ROOT}/data/${dataset}_set/${dataset}_holo_aligned_esmfold_structures # the input protein structure directory to parse +input_dir: ${resolve_method_output_dir:${method},${dataset},${method},${ensemble_ranking_method},${repeat_index},${pocket_only_baseline},${v1_baseline}} # the input directory with which to run inference +input_protein_structure_dir: ${oc.env:PROJECT_ROOT}/data/${dataset}_set/${dataset}_holo_aligned_predicted_structures # the input protein structure directory to parse output_dir: ${oc.env:PROJECT_ROOT}/data/test_cases/${dataset}/vina_${method}_${dataset}_outputs_${repeat_index} # the output directory to which to save the inference results cpu: 0 # the number of CPU workers to use with AutoDock Vina for parallel processing, 0 for all available seed: null # the random seed to use with AutoDock Vina @@ -24,7 +25,9 @@ apo_protein_filepath: null # the apo protein file path to use for inference input_id: null # the input ID to use for inference repeat_index: 1 # the repeat index to use for inference pocket_only_baseline: false # whether to run the pocket-only baseline +v1_baseline: false # whether to run the v1 baseline +max_num_inputs: null # if provided, the number of (dataset subset) inputs over which to run inference +# p2rank inference arguments: p2rank_exec_utility: predict # the P2Rank executable utility to use for inference p2rank_config: alphafold # the P2Rank configuration to use for inference p2rank_enable_pymol_visualizations: false # whether to enable P2Rank's PyMOL visualizations -max_num_inputs: null # if provided, the number of (dataset subset) inputs over which to run inference diff --git a/configs/scripts/benchmark_baseline_compute_resources.yaml b/configs/scripts/benchmark_baseline_compute_resources.yaml index c51cbb24..954d29b5 100644 --- a/configs/scripts/benchmark_baseline_compute_resources.yaml +++ b/configs/scripts/benchmark_baseline_compute_resources.yaml @@ -1,5 +1,5 @@ -method: diffdock # the method for which to score predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `vina`, `ensemble`) -vina_binding_site_method: diffdock # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `p2rank`) +method: diffdock # the method for which to score predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `chai-lab`, `vina`, `ensemble`) +vina_binding_site_method: diffdock # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `chai-lab`, `p2rank`) dataset: astex_diverse # the dataset to use - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`) repeat_index: 1 # the repeat index which was used for inference max_num_inputs: 20 # the number of (dataset subset) inputs over which to benchmark each baseline method's compute resource usage diff --git a/configs/scripts/build_inference_script.yaml b/configs/scripts/build_inference_script.yaml index a78b40f3..c5751c06 100644 --- a/configs/scripts/build_inference_script.yaml +++ b/configs/scripts/build_inference_script.yaml @@ -1,9 +1,35 @@ -method: diffdock # the method for which to score predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `vina`, `ensemble`) -vina_binding_site_method: diffdock # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `p2rank`) +# run arguments: +method: diffdock # the method for which to score predictions - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `chai-lab`, `vina`, `ensemble`) +vina_binding_site_method: diffdock # the method to use for Vina binding site prediction - NOTE: must be one of (`diffdock`, `fabind`, `dynamicbind`, `neuralplexer`, `rfaa`, `chai-lab`, `p2rank`) +ensemble_ranking_method: consensus # the method to use for ensemble ranking - NOTE: must be one of (`consensus`, `ff`) dataset: astex_diverse # the dataset to use - NOTE: must be one of (`posebusters_benchmark`, `astex_diverse`, `dockgen`, `casp15`) repeat_index: 1 # the repeat index which was used for inference -num_repeats: 3 # the number of repeats to be used when building all combinations of scripts cuda_device_index: 0 # the CUDA device index to use for inference (for all methods except AutoDock-Vina) output_script_dir: ${oc.env:PROJECT_ROOT}/scripts/inference # the directory in which to save the output script -export_hpc_headers: false # whether to insert HPC headers into the output script -build_all_scripts: false # whether to build all combinations of scripts +pocket_only_baseline: null # whether to perform a pocket-only baseline for the PoseBusters Benchmark set - NOTE: not applicable only to `tulip` +v1_baseline: false # whether to perform the V1 baseline for DiffDock +no_ilcl: null # whether to use model weights trained with an inter-ligand clash loss (ILCL) for the CASP15 set - NOTE: only applicable to `neuralplexer` +relax_protein: null # whether to relax the protein structure before scoring - NOTE: currently in an experimental state +export_hpc_headers: true # whether to insert high-performance computing (by default, SLURM) headers into the output script +verbose: false # whether to print verbose (e.g., invalid configuration) output +# sweep arguments: +sweep: false # whether to build all combinations of method-dataset run scripts +methods_to_sweep: [ + "diffdock", + "fabind", + "dynamicbind", + "neuralplexer", + "rfaa", + "chai-lab", + "vina", + "ensemble", + ] # the methods to sweep +vina_binding_site_methods_to_sweep: ["diffdock", "p2rank"] # the Vina binding site prediction methods to sweep +ensemble_ranking_methods_to_sweep: ["consensus"] # the ensemble ranking methods to sweep - NOTE: must be one of (`consensus`, `ff`) +datasets_to_sweep: [ + "posebusters_benchmark", + "astex_diverse", + "dockgen", + "casp15", + ] # the datasets to sweep +num_sweep_repeats: 3 # the number of repeats to run for each method-dataset sweep (if the method is a generative method) diff --git a/data/test_cases/astex_diverse/ensemble_inputs.csv b/data/test_cases/astex_diverse/ensemble_inputs.csv index bf38cdf6..f7d68555 100644 --- a/data/test_cases/astex_diverse/ensemble_inputs.csv +++ b/data/test_cases/astex_diverse/ensemble_inputs.csv @@ -1,86 +1,86 @@ protein_input,ligand_smiles,name -data/astex_diverse_set/1G9V_RQ3/1G9V_RQ3_protein.pdb,,1G9V_RQ3 -data/astex_diverse_set/1GKC_NFH/1GKC_NFH_protein.pdb,,1GKC_NFH -data/astex_diverse_set/1GM8_SOX/1GM8_SOX_protein.pdb,,1GM8_SOX -data/astex_diverse_set/1GPK_HUP/1GPK_HUP_protein.pdb,,1GPK_HUP -data/astex_diverse_set/1HNN_SKF/1HNN_SKF_protein.pdb,,1HNN_SKF -data/astex_diverse_set/1HP0_AD3/1HP0_AD3_protein.pdb,,1HP0_AD3 -data/astex_diverse_set/1HQ2_PH2/1HQ2_PH2_protein.pdb,,1HQ2_PH2 -data/astex_diverse_set/1HVY_D16/1HVY_D16_protein.pdb,,1HVY_D16 -data/astex_diverse_set/1HWI_115/1HWI_115_protein.pdb,,1HWI_115 -data/astex_diverse_set/1HWW_SWA/1HWW_SWA_protein.pdb,,1HWW_SWA -data/astex_diverse_set/1IA1_TQ3/1IA1_TQ3_protein.pdb,,1IA1_TQ3 -data/astex_diverse_set/1IG3_VIB/1IG3_VIB_protein.pdb,,1IG3_VIB -data/astex_diverse_set/1J3J_CP6/1J3J_CP6_protein.pdb,,1J3J_CP6 -data/astex_diverse_set/1JD0_AZM/1JD0_AZM_protein.pdb,,1JD0_AZM -data/astex_diverse_set/1JJE_BYS/1JJE_BYS_protein.pdb,,1JJE_BYS -data/astex_diverse_set/1JLA_TNK/1JLA_TNK_protein.pdb,,1JLA_TNK -data/astex_diverse_set/1K3U_IAD/1K3U_IAD_protein.pdb,,1K3U_IAD -data/astex_diverse_set/1KE5_LS1/1KE5_LS1_protein.pdb,,1KE5_LS1 -data/astex_diverse_set/1KZK_JE2/1KZK_JE2_protein.pdb,,1KZK_JE2 -data/astex_diverse_set/1L2S_STC/1L2S_STC_protein.pdb,,1L2S_STC -data/astex_diverse_set/1L7F_BCZ/1L7F_BCZ_protein.pdb,,1L7F_BCZ -data/astex_diverse_set/1LPZ_CMB/1LPZ_CMB_protein.pdb,,1LPZ_CMB -data/astex_diverse_set/1LRH_NLA/1LRH_NLA_protein.pdb,,1LRH_NLA -data/astex_diverse_set/1M2Z_DEX/1M2Z_DEX_protein.pdb,,1M2Z_DEX -data/astex_diverse_set/1MEH_MOA/1MEH_MOA_protein.pdb,,1MEH_MOA -data/astex_diverse_set/1MMV_3AR/1MMV_3AR_protein.pdb,,1MMV_3AR -data/astex_diverse_set/1MZC_BNE/1MZC_BNE_protein.pdb,,1MZC_BNE -data/astex_diverse_set/1N1M_A3M/1N1M_A3M_protein.pdb,,1N1M_A3M -data/astex_diverse_set/1N2J_PAF/1N2J_PAF_protein.pdb,,1N2J_PAF -data/astex_diverse_set/1N2V_BDI/1N2V_BDI_protein.pdb,,1N2V_BDI -data/astex_diverse_set/1N46_PFA/1N46_PFA_protein.pdb,,1N46_PFA -data/astex_diverse_set/1NAV_IH5/1NAV_IH5_protein.pdb,,1NAV_IH5 -data/astex_diverse_set/1OF1_SCT/1OF1_SCT_protein.pdb,,1OF1_SCT -data/astex_diverse_set/1OF6_DTY/1OF6_DTY_protein.pdb,,1OF6_DTY -data/astex_diverse_set/1OPK_P16/1OPK_P16_protein.pdb,,1OPK_P16 -data/astex_diverse_set/1OQ5_CEL/1OQ5_CEL_protein.pdb,,1OQ5_CEL -data/astex_diverse_set/1OWE_675/1OWE_675_protein.pdb,,1OWE_675 -data/astex_diverse_set/1OYT_FSN/1OYT_FSN_protein.pdb,,1OYT_FSN -data/astex_diverse_set/1P2Y_NCT/1P2Y_NCT_protein.pdb,,1P2Y_NCT -data/astex_diverse_set/1P62_GEO/1P62_GEO_protein.pdb,,1P62_GEO -data/astex_diverse_set/1PMN_984/1PMN_984_protein.pdb,,1PMN_984 -data/astex_diverse_set/1Q1G_MTI/1Q1G_MTI_protein.pdb,,1Q1G_MTI -data/astex_diverse_set/1Q41_IXM/1Q41_IXM_protein.pdb,,1Q41_IXM -data/astex_diverse_set/1Q4G_BFL/1Q4G_BFL_protein.pdb,,1Q4G_BFL -data/astex_diverse_set/1R1H_BIR/1R1H_BIR_protein.pdb,,1R1H_BIR -data/astex_diverse_set/1R55_097/1R55_097_protein.pdb,,1R55_097 -data/astex_diverse_set/1R58_AO5/1R58_AO5_protein.pdb,,1R58_AO5 -data/astex_diverse_set/1R9O_FLP/1R9O_FLP_protein.pdb,,1R9O_FLP -data/astex_diverse_set/1S19_MC9/1S19_MC9_protein.pdb,,1S19_MC9 -data/astex_diverse_set/1S3V_TQD/1S3V_TQD_protein.pdb,,1S3V_TQD -data/astex_diverse_set/1SG0_STL/1SG0_STL_protein.pdb,,1SG0_STL -data/astex_diverse_set/1SJ0_E4D/1SJ0_E4D_protein.pdb,,1SJ0_E4D -data/astex_diverse_set/1SQ5_PAU/1SQ5_PAU_protein.pdb,,1SQ5_PAU -data/astex_diverse_set/1SQN_NDR/1SQN_NDR_protein.pdb,,1SQN_NDR -data/astex_diverse_set/1T40_ID5/1T40_ID5_protein.pdb,,1T40_ID5 -data/astex_diverse_set/1T46_STI/1T46_STI_protein.pdb,,1T46_STI -data/astex_diverse_set/1T9B_1CS/1T9B_1CS_protein.pdb,,1T9B_1CS -data/astex_diverse_set/1TOW_CRZ/1TOW_CRZ_protein.pdb,,1TOW_CRZ -data/astex_diverse_set/1TT1_KAI/1TT1_KAI_protein.pdb,,1TT1_KAI -data/astex_diverse_set/1TZ8_DES/1TZ8_DES_protein.pdb,,1TZ8_DES -data/astex_diverse_set/1U1C_BAU/1U1C_BAU_protein.pdb,,1U1C_BAU -data/astex_diverse_set/1U4D_DBQ/1U4D_DBQ_protein.pdb,,1U4D_DBQ -data/astex_diverse_set/1UML_FR4/1UML_FR4_protein.pdb,,1UML_FR4 -data/astex_diverse_set/1UNL_RRC/1UNL_RRC_protein.pdb,,1UNL_RRC -data/astex_diverse_set/1UOU_CMU/1UOU_CMU_protein.pdb,,1UOU_CMU -data/astex_diverse_set/1V0P_PVB/1V0P_PVB_protein.pdb,,1V0P_PVB -data/astex_diverse_set/1V48_HA1/1V48_HA1_protein.pdb,,1V48_HA1 -data/astex_diverse_set/1V4S_MRK/1V4S_MRK_protein.pdb,,1V4S_MRK -data/astex_diverse_set/1VCJ_IBA/1VCJ_IBA_protein.pdb,,1VCJ_IBA -data/astex_diverse_set/1W1P_GIO/1W1P_GIO_protein.pdb,,1W1P_GIO -data/astex_diverse_set/1W2G_THM/1W2G_THM_protein.pdb,,1W2G_THM -data/astex_diverse_set/1X8X_TYR/1X8X_TYR_protein.pdb,,1X8X_TYR -data/astex_diverse_set/1XM6_5RM/1XM6_5RM_protein.pdb,,1XM6_5RM -data/astex_diverse_set/1XOQ_ROF/1XOQ_ROF_protein.pdb,,1XOQ_ROF -data/astex_diverse_set/1XOZ_CIA/1XOZ_CIA_protein.pdb,,1XOZ_CIA -data/astex_diverse_set/1Y6B_AAX/1Y6B_AAX_protein.pdb,,1Y6B_AAX -data/astex_diverse_set/1YGC_905/1YGC_905_protein.pdb,,1YGC_905 -data/astex_diverse_set/1YQY_915/1YQY_915_protein.pdb,,1YQY_915 -data/astex_diverse_set/1YV3_BIT/1YV3_BIT_protein.pdb,,1YV3_BIT -data/astex_diverse_set/1YVF_PH7/1YVF_PH7_protein.pdb,,1YVF_PH7 -data/astex_diverse_set/1YWR_LI9/1YWR_LI9_protein.pdb,,1YWR_LI9 -data/astex_diverse_set/1Z95_198/1Z95_198_protein.pdb,,1Z95_198 -data/astex_diverse_set/2BM2_PM2/2BM2_PM2_protein.pdb,,2BM2_PM2 -data/astex_diverse_set/2BR1_PFP/2BR1_PFP_protein.pdb,,2BR1_PFP -data/astex_diverse_set/2BSM_BSM/2BSM_BSM_protein.pdb,,2BSM_BSM +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1G9V_RQ3_holo_aligned_predicted_protein.pdb,,1G9V_RQ3 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1GKC_NFH_holo_aligned_predicted_protein.pdb,,1GKC_NFH +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1GM8_SOX_holo_aligned_predicted_protein.pdb,,1GM8_SOX +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1GPK_HUP_holo_aligned_predicted_protein.pdb,,1GPK_HUP +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1HNN_SKF_holo_aligned_predicted_protein.pdb,,1HNN_SKF +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1HP0_AD3_holo_aligned_predicted_protein.pdb,,1HP0_AD3 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1HQ2_PH2_holo_aligned_predicted_protein.pdb,,1HQ2_PH2 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1HVY_D16_holo_aligned_predicted_protein.pdb,,1HVY_D16 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1HWI_115_holo_aligned_predicted_protein.pdb,,1HWI_115 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1HWW_SWA_holo_aligned_predicted_protein.pdb,,1HWW_SWA +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1IA1_TQ3_holo_aligned_predicted_protein.pdb,,1IA1_TQ3 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1IG3_VIB_holo_aligned_predicted_protein.pdb,,1IG3_VIB +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1J3J_CP6_holo_aligned_predicted_protein.pdb,,1J3J_CP6 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1JD0_AZM_holo_aligned_predicted_protein.pdb,,1JD0_AZM +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1JJE_BYS_holo_aligned_predicted_protein.pdb,,1JJE_BYS +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1JLA_TNK_holo_aligned_predicted_protein.pdb,,1JLA_TNK +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1K3U_IAD_holo_aligned_predicted_protein.pdb,,1K3U_IAD +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1KE5_LS1_holo_aligned_predicted_protein.pdb,,1KE5_LS1 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1KZK_JE2_holo_aligned_predicted_protein.pdb,,1KZK_JE2 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1L2S_STC_holo_aligned_predicted_protein.pdb,,1L2S_STC +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1L7F_BCZ_holo_aligned_predicted_protein.pdb,,1L7F_BCZ +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1LPZ_CMB_holo_aligned_predicted_protein.pdb,,1LPZ_CMB +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1LRH_NLA_holo_aligned_predicted_protein.pdb,,1LRH_NLA +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1M2Z_DEX_holo_aligned_predicted_protein.pdb,,1M2Z_DEX +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1MEH_MOA_holo_aligned_predicted_protein.pdb,,1MEH_MOA +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1MMV_3AR_holo_aligned_predicted_protein.pdb,,1MMV_3AR +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1MZC_BNE_holo_aligned_predicted_protein.pdb,,1MZC_BNE +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1N1M_A3M_holo_aligned_predicted_protein.pdb,,1N1M_A3M +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1N2J_PAF_holo_aligned_predicted_protein.pdb,,1N2J_PAF +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1N2V_BDI_holo_aligned_predicted_protein.pdb,,1N2V_BDI +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1N46_PFA_holo_aligned_predicted_protein.pdb,,1N46_PFA +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1NAV_IH5_holo_aligned_predicted_protein.pdb,,1NAV_IH5 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OF1_SCT_holo_aligned_predicted_protein.pdb,,1OF1_SCT +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OF6_DTY_holo_aligned_predicted_protein.pdb,,1OF6_DTY +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OPK_P16_holo_aligned_predicted_protein.pdb,,1OPK_P16 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OQ5_CEL_holo_aligned_predicted_protein.pdb,,1OQ5_CEL +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OWE_675_holo_aligned_predicted_protein.pdb,,1OWE_675 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OYT_FSN_holo_aligned_predicted_protein.pdb,,1OYT_FSN +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1P2Y_NCT_holo_aligned_predicted_protein.pdb,,1P2Y_NCT +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1P62_GEO_holo_aligned_predicted_protein.pdb,,1P62_GEO +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1PMN_984_holo_aligned_predicted_protein.pdb,,1PMN_984 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1Q1G_MTI_holo_aligned_predicted_protein.pdb,,1Q1G_MTI +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1Q41_IXM_holo_aligned_predicted_protein.pdb,,1Q41_IXM +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1Q4G_BFL_holo_aligned_predicted_protein.pdb,,1Q4G_BFL +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1R1H_BIR_holo_aligned_predicted_protein.pdb,,1R1H_BIR +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1R55_097_holo_aligned_predicted_protein.pdb,,1R55_097 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1R58_AO5_holo_aligned_predicted_protein.pdb,,1R58_AO5 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1R9O_FLP_holo_aligned_predicted_protein.pdb,,1R9O_FLP +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1S19_MC9_holo_aligned_predicted_protein.pdb,,1S19_MC9 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1S3V_TQD_holo_aligned_predicted_protein.pdb,,1S3V_TQD +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1SG0_STL_holo_aligned_predicted_protein.pdb,,1SG0_STL +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1SJ0_E4D_holo_aligned_predicted_protein.pdb,,1SJ0_E4D +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1SQ5_PAU_holo_aligned_predicted_protein.pdb,,1SQ5_PAU +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1SQN_NDR_holo_aligned_predicted_protein.pdb,,1SQN_NDR +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1T40_ID5_holo_aligned_predicted_protein.pdb,,1T40_ID5 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1T46_STI_holo_aligned_predicted_protein.pdb,,1T46_STI +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1T9B_1CS_holo_aligned_predicted_protein.pdb,,1T9B_1CS +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1TOW_CRZ_holo_aligned_predicted_protein.pdb,,1TOW_CRZ +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1TT1_KAI_holo_aligned_predicted_protein.pdb,,1TT1_KAI +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1TZ8_DES_holo_aligned_predicted_protein.pdb,,1TZ8_DES +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1U1C_BAU_holo_aligned_predicted_protein.pdb,,1U1C_BAU +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1U4D_DBQ_holo_aligned_predicted_protein.pdb,,1U4D_DBQ +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1UML_FR4_holo_aligned_predicted_protein.pdb,,1UML_FR4 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1UNL_RRC_holo_aligned_predicted_protein.pdb,,1UNL_RRC +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1UOU_CMU_holo_aligned_predicted_protein.pdb,,1UOU_CMU +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1V0P_PVB_holo_aligned_predicted_protein.pdb,,1V0P_PVB +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1V48_HA1_holo_aligned_predicted_protein.pdb,,1V48_HA1 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1V4S_MRK_holo_aligned_predicted_protein.pdb,,1V4S_MRK +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1VCJ_IBA_holo_aligned_predicted_protein.pdb,,1VCJ_IBA +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1W1P_GIO_holo_aligned_predicted_protein.pdb,,1W1P_GIO +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1W2G_THM_holo_aligned_predicted_protein.pdb,,1W2G_THM +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1X8X_TYR_holo_aligned_predicted_protein.pdb,,1X8X_TYR +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1XM6_5RM_holo_aligned_predicted_protein.pdb,,1XM6_5RM +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1XOQ_ROF_holo_aligned_predicted_protein.pdb,,1XOQ_ROF +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1XOZ_CIA_holo_aligned_predicted_protein.pdb,,1XOZ_CIA +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1Y6B_AAX_holo_aligned_predicted_protein.pdb,,1Y6B_AAX +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1YGC_905_holo_aligned_predicted_protein.pdb,,1YGC_905 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1YQY_915_holo_aligned_predicted_protein.pdb,,1YQY_915 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1YV3_BIT_holo_aligned_predicted_protein.pdb,,1YV3_BIT +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1YVF_PH7_holo_aligned_predicted_protein.pdb,,1YVF_PH7 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1YWR_LI9_holo_aligned_predicted_protein.pdb,,1YWR_LI9 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1Z95_198_holo_aligned_predicted_protein.pdb,,1Z95_198 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/2BM2_PM2_holo_aligned_predicted_protein.pdb,,2BM2_PM2 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/2BR1_PFP_holo_aligned_predicted_protein.pdb,,2BR1_PFP +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/2BSM_BSM_holo_aligned_predicted_protein.pdb,,2BSM_BSM diff --git a/data/test_cases/astex_diverse/ensemble_pocket_only_inputs.csv b/data/test_cases/astex_diverse/ensemble_pocket_only_inputs.csv new file mode 100644 index 00000000..ab556568 --- /dev/null +++ b/data/test_cases/astex_diverse/ensemble_pocket_only_inputs.csv @@ -0,0 +1,86 @@ +protein_input,ligand_smiles,name +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1G9V_RQ3_holo_aligned_predicted_protein.pdb,,1G9V_RQ3 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1GKC_NFH_holo_aligned_predicted_protein.pdb,,1GKC_NFH +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1GM8_SOX_holo_aligned_predicted_protein.pdb,,1GM8_SOX +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1GPK_HUP_holo_aligned_predicted_protein.pdb,,1GPK_HUP +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1HNN_SKF_holo_aligned_predicted_protein.pdb,,1HNN_SKF +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1HP0_AD3_holo_aligned_predicted_protein.pdb,,1HP0_AD3 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1HQ2_PH2_holo_aligned_predicted_protein.pdb,,1HQ2_PH2 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1HVY_D16_holo_aligned_predicted_protein.pdb,,1HVY_D16 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1HWI_115_holo_aligned_predicted_protein.pdb,,1HWI_115 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1HWW_SWA_holo_aligned_predicted_protein.pdb,,1HWW_SWA +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1IA1_TQ3_holo_aligned_predicted_protein.pdb,,1IA1_TQ3 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1IG3_VIB_holo_aligned_predicted_protein.pdb,,1IG3_VIB +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1J3J_CP6_holo_aligned_predicted_protein.pdb,,1J3J_CP6 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1JD0_AZM_holo_aligned_predicted_protein.pdb,,1JD0_AZM +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1JJE_BYS_holo_aligned_predicted_protein.pdb,,1JJE_BYS +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1JLA_TNK_holo_aligned_predicted_protein.pdb,,1JLA_TNK +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1K3U_IAD_holo_aligned_predicted_protein.pdb,,1K3U_IAD +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1KE5_LS1_holo_aligned_predicted_protein.pdb,,1KE5_LS1 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1KZK_JE2_holo_aligned_predicted_protein.pdb,,1KZK_JE2 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1L2S_STC_holo_aligned_predicted_protein.pdb,,1L2S_STC +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1L7F_BCZ_holo_aligned_predicted_protein.pdb,,1L7F_BCZ +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1LPZ_CMB_holo_aligned_predicted_protein.pdb,,1LPZ_CMB +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1LRH_NLA_holo_aligned_predicted_protein.pdb,,1LRH_NLA +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1M2Z_DEX_holo_aligned_predicted_protein.pdb,,1M2Z_DEX +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1MEH_MOA_holo_aligned_predicted_protein.pdb,,1MEH_MOA +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1MMV_3AR_holo_aligned_predicted_protein.pdb,,1MMV_3AR +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1MZC_BNE_holo_aligned_predicted_protein.pdb,,1MZC_BNE +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1N1M_A3M_holo_aligned_predicted_protein.pdb,,1N1M_A3M +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1N2J_PAF_holo_aligned_predicted_protein.pdb,,1N2J_PAF +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1N2V_BDI_holo_aligned_predicted_protein.pdb,,1N2V_BDI +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1N46_PFA_holo_aligned_predicted_protein.pdb,,1N46_PFA +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1NAV_IH5_holo_aligned_predicted_protein.pdb,,1NAV_IH5 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1OF1_SCT_holo_aligned_predicted_protein.pdb,,1OF1_SCT +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1OF6_DTY_holo_aligned_predicted_protein.pdb,,1OF6_DTY +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1OPK_P16_holo_aligned_predicted_protein.pdb,,1OPK_P16 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1OQ5_CEL_holo_aligned_predicted_protein.pdb,,1OQ5_CEL +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1OWE_675_holo_aligned_predicted_protein.pdb,,1OWE_675 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1OYT_FSN_holo_aligned_predicted_protein.pdb,,1OYT_FSN +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1P2Y_NCT_holo_aligned_predicted_protein.pdb,,1P2Y_NCT +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1P62_GEO_holo_aligned_predicted_protein.pdb,,1P62_GEO +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1PMN_984_holo_aligned_predicted_protein.pdb,,1PMN_984 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1Q1G_MTI_holo_aligned_predicted_protein.pdb,,1Q1G_MTI +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1Q41_IXM_holo_aligned_predicted_protein.pdb,,1Q41_IXM +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1Q4G_BFL_holo_aligned_predicted_protein.pdb,,1Q4G_BFL +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1R1H_BIR_holo_aligned_predicted_protein.pdb,,1R1H_BIR +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1R55_097_holo_aligned_predicted_protein.pdb,,1R55_097 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1R58_AO5_holo_aligned_predicted_protein.pdb,,1R58_AO5 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1R9O_FLP_holo_aligned_predicted_protein.pdb,,1R9O_FLP +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1S19_MC9_holo_aligned_predicted_protein.pdb,,1S19_MC9 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1S3V_TQD_holo_aligned_predicted_protein.pdb,,1S3V_TQD +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1SG0_STL_holo_aligned_predicted_protein.pdb,,1SG0_STL +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1SJ0_E4D_holo_aligned_predicted_protein.pdb,,1SJ0_E4D +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1SQ5_PAU_holo_aligned_predicted_protein.pdb,,1SQ5_PAU +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1SQN_NDR_holo_aligned_predicted_protein.pdb,,1SQN_NDR +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1T40_ID5_holo_aligned_predicted_protein.pdb,,1T40_ID5 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1T46_STI_holo_aligned_predicted_protein.pdb,,1T46_STI +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1T9B_1CS_holo_aligned_predicted_protein.pdb,,1T9B_1CS +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1TOW_CRZ_holo_aligned_predicted_protein.pdb,,1TOW_CRZ +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1TT1_KAI_holo_aligned_predicted_protein.pdb,,1TT1_KAI +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1TZ8_DES_holo_aligned_predicted_protein.pdb,,1TZ8_DES +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1U1C_BAU_holo_aligned_predicted_protein.pdb,,1U1C_BAU +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1U4D_DBQ_holo_aligned_predicted_protein.pdb,,1U4D_DBQ +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1UML_FR4_holo_aligned_predicted_protein.pdb,,1UML_FR4 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1UNL_RRC_holo_aligned_predicted_protein.pdb,,1UNL_RRC +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1UOU_CMU_holo_aligned_predicted_protein.pdb,,1UOU_CMU +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1V0P_PVB_holo_aligned_predicted_protein.pdb,,1V0P_PVB +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1V48_HA1_holo_aligned_predicted_protein.pdb,,1V48_HA1 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1V4S_MRK_holo_aligned_predicted_protein.pdb,,1V4S_MRK +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1VCJ_IBA_holo_aligned_predicted_protein.pdb,,1VCJ_IBA +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1W1P_GIO_holo_aligned_predicted_protein.pdb,,1W1P_GIO +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1W2G_THM_holo_aligned_predicted_protein.pdb,,1W2G_THM +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1X8X_TYR_holo_aligned_predicted_protein.pdb,,1X8X_TYR +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1XM6_5RM_holo_aligned_predicted_protein.pdb,,1XM6_5RM +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1XOQ_ROF_holo_aligned_predicted_protein.pdb,,1XOQ_ROF +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1XOZ_CIA_holo_aligned_predicted_protein.pdb,,1XOZ_CIA +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1Y6B_AAX_holo_aligned_predicted_protein.pdb,,1Y6B_AAX +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1YGC_905_holo_aligned_predicted_protein.pdb,,1YGC_905 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1YQY_915_holo_aligned_predicted_protein.pdb,,1YQY_915 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1YV3_BIT_holo_aligned_predicted_protein.pdb,,1YV3_BIT +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1YVF_PH7_holo_aligned_predicted_protein.pdb,,1YVF_PH7 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1YWR_LI9_holo_aligned_predicted_protein.pdb,,1YWR_LI9 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/1Z95_198_holo_aligned_predicted_protein.pdb,,1Z95_198 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/2BM2_PM2_holo_aligned_predicted_protein.pdb,,2BM2_PM2 +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/2BR1_PFP_holo_aligned_predicted_protein.pdb,,2BR1_PFP +data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures_bs_cropped/2BSM_BSM_holo_aligned_predicted_protein.pdb,,2BSM_BSM diff --git a/data/test_cases/casp15/ensemble_evaluation_inputs.csv b/data/test_cases/casp15/ensemble_evaluation_inputs.csv index b928f707..3bb02962 100644 --- a/data/test_cases/casp15/ensemble_evaluation_inputs.csv +++ b/data/test_cases/casp15/ensemble_evaluation_inputs.csv @@ -1,21 +1,20 @@ protein_input,ligand_smiles,name,ligand_numbers,ligand_names,ligand_tasks -data/casp15_set/predicted_structures/H1135.pdb,,H1135,,,P -data/casp15_set/predicted_structures/H1171v1.pdb,,H1171v1,,,P -data/casp15_set/predicted_structures/H1171v2.pdb,,H1171v2,,,P -data/casp15_set/predicted_structures/H1172v1.pdb,,H1172v1,,,P -data/casp15_set/predicted_structures/H1172v2.pdb,,H1172v2,,,P -data/casp15_set/predicted_structures/H1172v3.pdb,,H1172v3,,,P -data/casp15_set/predicted_structures/H1172v4.pdb,,H1172v4,,,P -data/casp15_set/predicted_structures/T1124.pdb,,T1124,,,P -data/casp15_set/predicted_structures/T1127v2.pdb,,T1127v2,,,P -data/casp15_set/predicted_structures/T1146.pdb,,T1146,,,P -data/casp15_set/predicted_structures/T1152.pdb,,T1152,,,P -data/casp15_set/predicted_structures/T1158v1.pdb,,T1158v1,,,P -data/casp15_set/predicted_structures/T1158v2.pdb,,T1158v2,,,P -data/casp15_set/predicted_structures/T1158v3.pdb,,T1158v3,,,P -data/casp15_set/predicted_structures/T1158v4.pdb,,T1158v4,,,P -data/casp15_set/predicted_structures/T1170.pdb,,T1170,,,P -data/casp15_set/predicted_structures/T1181.pdb,,T1181,,,P -data/casp15_set/predicted_structures/T1186.pdb,,T1186,,,P -data/casp15_set/predicted_structures/T1187.pdb,,T1187,,,P -data/casp15_set/predicted_structures/T1188.pdb,,T1188,,,P +data/casp15_set/casp15_holo_aligned_predicted_structures/H1135.pdb,,H1135,,,P +data/casp15_set/casp15_holo_aligned_predicted_structures/H1171v1.pdb,,H1171v1,,,P +data/casp15_set/casp15_holo_aligned_predicted_structures/H1171v2.pdb,,H1171v2,,,P +data/casp15_set/casp15_holo_aligned_predicted_structures/H1172v1.pdb,,H1172v1,,,P +data/casp15_set/casp15_holo_aligned_predicted_structures/H1172v2.pdb,,H1172v2,,,P +data/casp15_set/casp15_holo_aligned_predicted_structures/H1172v3.pdb,,H1172v3,,,P +data/casp15_set/casp15_holo_aligned_predicted_structures/H1172v4.pdb,,H1172v4,,,P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1124.pdb,,T1124,,,P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1127v2.pdb,,T1127v2,,,P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1146.pdb,,T1146,,,P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1152.pdb,,T1152,,,P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1158v1.pdb,,T1158v1,,,P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1158v2.pdb,,T1158v2,,,P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1158v3.pdb,,T1158v3,,,P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1158v4.pdb,,T1158v4,,,P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1181.pdb,,T1181,,,P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1186.pdb,,T1186,,,P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1187.pdb,,T1187,,,P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1188.pdb,,T1188,,,P diff --git a/data/test_cases/casp15/ensemble_inputs.csv b/data/test_cases/casp15/ensemble_inputs.csv index e0794dea..a99f5bfc 100644 --- a/data/test_cases/casp15/ensemble_inputs.csv +++ b/data/test_cases/casp15/ensemble_inputs.csv @@ -1,21 +1,20 @@ protein_input,ligand_smiles,name,ligand_numbers,ligand_names,ligand_tasks -data/casp15_set/predicted_structures/H1135.pdb,[Cl-]:[Cl-]:[Cl-]:[K+]:[K+]:[K+]:[K+]:[K+]:[K+]:[K+]:[K+]:[K+],H1135,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]","['CL', 'CL', 'CL', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K']",P -data/casp15_set/predicted_structures/H1171v1.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:[Mg+2]:[Mg+2]:[Mg+2]:[Mg+2]:[Mg+2],H1171v1,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]","['ADP', 'AGS', 'AGS', 'AGS', 'AGS', 'AGS', 'MG', 'MG', 'MG', 'MG', 'MG']",P -data/casp15_set/predicted_structures/H1171v2.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:[Mg+2]:[Mg+2]:[Mg+2],H1171v2,"[1, 2, 3, 4, 5, 6, 7, 8, 9]","['ADP', 'ADP', 'ADP', 'AGS', 'AGS', 'AGS', 'MG', 'MG', 'MG']",P -data/casp15_set/predicted_structures/H1172v1.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:[Mg+2]:[Mg+2]:[Mg+2],H1172v1,"[1, 2, 3, 4, 5, 6, 7, 8, 9]","['ADP', 'ADP', 'ADP', 'AGS', 'AGS', 'AGS', 'MG', 'MG', 'MG']",P -data/casp15_set/predicted_structures/H1172v2.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:[Mg+2]:[Mg+2]:[Mg+2],H1172v2,"[1, 2, 3, 4, 5, 6, 7, 8, 9]","['ADP', 'ADP', 'ADP', 'ADP', 'AGS', 'AGS', 'MG', 'MG', 'MG']",P -data/casp15_set/predicted_structures/H1172v3.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:[Mg+2]:[Mg+2],H1172v3,"[1, 2, 3, 4, 5, 6, 7, 8]","['ADP', 'ADP', 'ADP', 'ADP', 'AGS', 'AGS', 'MG', 'MG']",P -data/casp15_set/predicted_structures/H1172v4.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:[Mg+2]:[Mg+2]:[Mg+2],H1172v4,"[1, 2, 3, 4, 5, 6, 7, 8, 9]","['ADP', 'ADP', 'ADP', 'AGS', 'AGS', 'AGS', 'MG', 'MG', 'MG']",P -data/casp15_set/predicted_structures/T1127v2.pdb,CC(C)(CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS:CC(C)(CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS:O=S(=O)(O)CCN1CCN(CCO)CC1:O=S(=O)(O)CCN1CCN(CCO)CC1:C[C@H](O)CC(C)(C)O,T1127v2,"[1, 2, 3, 4, 5]","['COA', 'COA', 'EPE', 'EPE', 'MPD']",P -data/casp15_set/predicted_structures/T1146.pdb,CC(=O)N[C@H]1[C@H](O)O[C@H](CO)[C@@H](O)[C@@H]1O,T1146,[1],['NAG'],P -data/casp15_set/predicted_structures/T1152.pdb,CC(=O)NC1C(O)OC(CO)C(OC2OC(CO)C(OC3OC(CO)C(O)C(O)C3NC(C)=O)C(O)C2NC(C)=O)C1O,T1152,[1],['NAG'],P -data/casp15_set/predicted_structures/T1170.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:[Mg+2]:[Mg+2]:[Mg+2]:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:[Mg+2],T1170,"[1, 2, 4, 5, 6, 7, 8, 9, 10, 9]","['ADP', 'ADP', 'AGS', 'AGS', 'AGS', 'MG', 'MG', 'MG', 'AGS', 'MG']",P -data/casp15_set/predicted_structures/T1181.pdb,CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O:CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O:CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O:CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O:[Zn+2]:[Zn+2]:[Zn+2]:[Ca+2]:CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O,T1181,"[1, 2, 3, 4, 5, 6, 7, 8, 9]","['OAA', 'OAA', 'OAA', 'OAA', 'ZN', 'ZN', 'ZN', 'CA', 'OAA']",P -data/casp15_set/predicted_structures/T1187.pdb,CC(=O)N[C@H]1[C@H](O[C@@H]2[C@@H](CO)O[C@@H](O[C@@H]3[C@@H](CO)O[C@@H](O)[C@H](NC(C)=O)[C@H]3O)[C@H](NC(C)=O)[C@H]2O)O[C@H](CO)[C@@H](O)[C@@H]1O:CC(=O)N[C@H]1[C@H](O[C@@H]2[C@@H](CO)O[C@@H](O[C@@H]3[C@@H](CO)O[C@@H](O)[C@H](NC(C)=O)[C@H]3O)[C@H](NC(C)=O)[C@H]2O)O[C@H](CO)[C@@H](O)[C@@H]1O,T1187,"[1, 2]","['NAG', 'NAG']",P -data/casp15_set/predicted_structures/T1188.pdb,Cn1cnc2c1c(=O)n(CCCn1c(=O)c3c(ncn3C)n(C)c1=O)c(=O)n2C:Cn1cnc2c1c(=O)n(CCCn1c(=O)c3c(ncn3C)n(C)c1=O)c(=O)n2C:[Cd+2]:[Cd+2]:[Co+2],T1188,"[1, 2, 3, 4, 5]","['DW0', 'DW0', 'CD', 'CD', 'CO']",P -data/casp15_set/predicted_structures/T1124.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSCC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSCC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O:N[C@@H](Cc1ccc(O)cc1)C(=O)O:N[C@@H](Cc1ccc(O)cc1)C(=O)O,T1124,"[1, 2, 3, 4]","['SAH', 'SAH', 'TYR', 'TYR']",P -data/casp15_set/predicted_structures/T1186.pdb,Cc1onc(c1C(=O)N[C@H](C=O)[C@@H]1N[C@@H](C(O)=O)C(C)(C)S1)-c1c(Cl)cccc1Cl,T1186,[1],['LIG'],P -data/casp15_set/predicted_structures/T1158v1.pdb,CCCCC[C@@H](/C=C/[C@H]1[C@@H](CC(=O)[C@@H]1CCCCCCC(=O)O)O)O,T1158v1,[1],['XPG'],P -data/casp15_set/predicted_structures/T1158v2.pdb,CCCCC[C@@H](/C=C/[C@H]1[C@@H](CC(=O)[C@@H]1C/C=C\CCCC(=O)O)O)O,T1158v2,[1],['P2E'],P -data/casp15_set/predicted_structures/T1158v3.pdb,C[C@]12CC[C@H]3[C@H]([C@@H]1CCC2=O)CC=C4[C@@]3(CC[C@@H](C4)OS(=O)(=O)O)C,T1158v3,[1],['XH0'],P -data/casp15_set/predicted_structures/T1158v4.pdb,C1=NC(=C2C(=N1)N(C=N2)[C@H]3[C@@H]([C@@H]([C@H](O3)COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O)O)N:C1=NC(=C2C(=N1)N(C=N2)[C@H]3[C@@H]([C@@H]([C@H](O3)COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O)O)N:[Mg+2]:[Mg+2],T1158v4,"[1, 2, 3, 4]","['ATP', 'ATP', 'MG', 'MG']",P +data/casp15_set/casp15_holo_aligned_predicted_structures/H1135.pdb,[Cl-]:[Cl-]:[Cl-]:[K+]:[K+]:[K+]:[K+]:[K+]:[K+]:[K+]:[K+]:[K+],H1135,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]","['CL', 'CL', 'CL', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K']",P +data/casp15_set/casp15_holo_aligned_predicted_structures/H1171v1.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:[Mg+2]:[Mg+2]:[Mg+2]:[Mg+2]:[Mg+2],H1171v1,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]","['ADP', 'AGS', 'AGS', 'AGS', 'AGS', 'AGS', 'MG', 'MG', 'MG', 'MG', 'MG']",P +data/casp15_set/casp15_holo_aligned_predicted_structures/H1171v2.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:[Mg+2]:[Mg+2]:[Mg+2],H1171v2,"[1, 2, 3, 4, 5, 6, 7, 8, 9]","['ADP', 'ADP', 'ADP', 'AGS', 'AGS', 'AGS', 'MG', 'MG', 'MG']",P +data/casp15_set/casp15_holo_aligned_predicted_structures/H1172v1.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:[Mg+2]:[Mg+2]:[Mg+2],H1172v1,"[1, 2, 3, 4, 5, 6, 7, 8, 9]","['ADP', 'ADP', 'ADP', 'AGS', 'AGS', 'AGS', 'MG', 'MG', 'MG']",P +data/casp15_set/casp15_holo_aligned_predicted_structures/H1172v2.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:[Mg+2]:[Mg+2]:[Mg+2],H1172v2,"[1, 2, 3, 4, 5, 6, 7, 8, 9]","['ADP', 'ADP', 'ADP', 'ADP', 'AGS', 'AGS', 'MG', 'MG', 'MG']",P +data/casp15_set/casp15_holo_aligned_predicted_structures/H1172v3.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:[Mg+2]:[Mg+2],H1172v3,"[1, 2, 3, 4, 5, 6, 7, 8]","['ADP', 'ADP', 'ADP', 'ADP', 'AGS', 'AGS', 'MG', 'MG']",P +data/casp15_set/casp15_holo_aligned_predicted_structures/H1172v4.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:[Mg+2]:[Mg+2]:[Mg+2],H1172v4,"[1, 2, 3, 4, 5, 6, 7, 8, 9]","['ADP', 'ADP', 'ADP', 'AGS', 'AGS', 'AGS', 'MG', 'MG', 'MG']",P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1127v2.pdb,CC(C)(CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS:CC(C)(CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS:O=S(=O)(O)CCN1CCN(CCO)CC1:O=S(=O)(O)CCN1CCN(CCO)CC1:C[C@H](O)CC(C)(C)O,T1127v2,"[1, 2, 3, 4, 5]","['COA', 'COA', 'EPE', 'EPE', 'MPD']",P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1146.pdb,CC(=O)N[C@H]1[C@H](O)O[C@H](CO)[C@@H](O)[C@@H]1O,T1146,[1],['NAG'],P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1152.pdb,CC(=O)NC1C(O)OC(CO)C(OC2OC(CO)C(OC3OC(CO)C(O)C(O)C3NC(C)=O)C(O)C2NC(C)=O)C1O,T1152,[1],['NAG'],P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1181.pdb,CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O:CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O:CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O:CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O:[Zn+2]:[Zn+2]:[Zn+2]:[Ca+2]:CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O,T1181,"[1, 2, 3, 4, 5, 6, 7, 8, 9]","['OAA', 'OAA', 'OAA', 'OAA', 'ZN', 'ZN', 'ZN', 'CA', 'OAA']",P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1187.pdb,CC(=O)N[C@H]1[C@H](O[C@@H]2[C@@H](CO)O[C@@H](O[C@@H]3[C@@H](CO)O[C@@H](O)[C@H](NC(C)=O)[C@H]3O)[C@H](NC(C)=O)[C@H]2O)O[C@H](CO)[C@@H](O)[C@@H]1O:CC(=O)N[C@H]1[C@H](O[C@@H]2[C@@H](CO)O[C@@H](O[C@@H]3[C@@H](CO)O[C@@H](O)[C@H](NC(C)=O)[C@H]3O)[C@H](NC(C)=O)[C@H]2O)O[C@H](CO)[C@@H](O)[C@@H]1O,T1187,"[1, 2]","['NAG', 'NAG']",P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1188.pdb,Cn1cnc2c1c(=O)n(CCCn1c(=O)c3c(ncn3C)n(C)c1=O)c(=O)n2C:Cn1cnc2c1c(=O)n(CCCn1c(=O)c3c(ncn3C)n(C)c1=O)c(=O)n2C:[Cd+2]:[Cd+2]:[Co+2],T1188,"[1, 2, 3, 4, 5]","['DW0', 'DW0', 'CD', 'CD', 'CO']",P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1124.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSCC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSCC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O:N[C@@H](Cc1ccc(O)cc1)C(=O)O:N[C@@H](Cc1ccc(O)cc1)C(=O)O,T1124,"[1, 2, 3, 4]","['SAH', 'SAH', 'TYR', 'TYR']",P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1186.pdb,Cc1onc(c1C(=O)N[C@H](C=O)[C@@H]1N[C@@H](C(O)=O)C(C)(C)S1)-c1c(Cl)cccc1Cl,T1186,[1],['LIG'],P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1158v1.pdb,CCCCC[C@@H](/C=C/[C@H]1[C@@H](CC(=O)[C@@H]1CCCCCCC(=O)O)O)O,T1158v1,[1],['XPG'],P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1158v2.pdb,CCCCC[C@@H](/C=C/[C@H]1[C@@H](CC(=O)[C@@H]1C/C=C\CCCC(=O)O)O)O,T1158v2,[1],['P2E'],P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1158v3.pdb,C[C@]12CC[C@H]3[C@H]([C@@H]1CCC2=O)CC=C4[C@@]3(CC[C@@H](C4)OS(=O)(=O)O)C,T1158v3,[1],['XH0'],P +data/casp15_set/casp15_holo_aligned_predicted_structures/T1158v4.pdb,C1=NC(=C2C(=N1)N(C=N2)[C@H]3[C@@H]([C@@H]([C@H](O3)COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O)O)N:C1=NC(=C2C(=N1)N(C=N2)[C@H]3[C@@H]([C@@H]([C@H](O3)COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O)O)N:[Mg+2]:[Mg+2],T1158v4,"[1, 2, 3, 4]","['ATP', 'ATP', 'MG', 'MG']",P diff --git a/data/test_cases/casp15/ensemble_pocket_only_inputs.csv b/data/test_cases/casp15/ensemble_pocket_only_inputs.csv new file mode 100644 index 00000000..9215f85f --- /dev/null +++ b/data/test_cases/casp15/ensemble_pocket_only_inputs.csv @@ -0,0 +1,20 @@ +protein_input,ligand_smiles,name,ligand_numbers,ligand_names,ligand_tasks +data/casp15_set/casp15_holo_aligned_predicted_structures_bs_cropped/H1135.pdb,[Cl-]:[Cl-]:[Cl-]:[K+]:[K+]:[K+]:[K+]:[K+]:[K+]:[K+]:[K+]:[K+],H1135,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]","['CL', 'CL', 'CL', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K']",P +data/casp15_set/casp15_holo_aligned_predicted_structures_bs_cropped/H1171v1.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:[Mg+2]:[Mg+2]:[Mg+2]:[Mg+2]:[Mg+2],H1171v1,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]","['ADP', 'AGS', 'AGS', 'AGS', 'AGS', 'AGS', 'MG', 'MG', 'MG', 'MG', 'MG']",P +data/casp15_set/casp15_holo_aligned_predicted_structures_bs_cropped/H1171v2.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:[Mg+2]:[Mg+2]:[Mg+2],H1171v2,"[1, 2, 3, 4, 5, 6, 7, 8, 9]","['ADP', 'ADP', 'ADP', 'AGS', 'AGS', 'AGS', 'MG', 'MG', 'MG']",P +data/casp15_set/casp15_holo_aligned_predicted_structures_bs_cropped/H1172v1.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:[Mg+2]:[Mg+2]:[Mg+2],H1172v1,"[1, 2, 3, 4, 5, 6, 7, 8, 9]","['ADP', 'ADP', 'ADP', 'AGS', 'AGS', 'AGS', 'MG', 'MG', 'MG']",P +data/casp15_set/casp15_holo_aligned_predicted_structures_bs_cropped/H1172v2.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:[Mg+2]:[Mg+2]:[Mg+2],H1172v2,"[1, 2, 3, 4, 5, 6, 7, 8, 9]","['ADP', 'ADP', 'ADP', 'ADP', 'AGS', 'AGS', 'MG', 'MG', 'MG']",P +data/casp15_set/casp15_holo_aligned_predicted_structures_bs_cropped/H1172v3.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:[Mg+2]:[Mg+2],H1172v3,"[1, 2, 3, 4, 5, 6, 7, 8]","['ADP', 'ADP', 'ADP', 'ADP', 'AGS', 'AGS', 'MG', 'MG']",P +data/casp15_set/casp15_holo_aligned_predicted_structures_bs_cropped/H1172v4.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:[Mg+2]:[Mg+2]:[Mg+2],H1172v4,"[1, 2, 3, 4, 5, 6, 7, 8, 9]","['ADP', 'ADP', 'ADP', 'AGS', 'AGS', 'AGS', 'MG', 'MG', 'MG']",P +data/casp15_set/casp15_holo_aligned_predicted_structures_bs_cropped/T1127v2.pdb,CC(C)(CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS:CC(C)(CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS:O=S(=O)(O)CCN1CCN(CCO)CC1:O=S(=O)(O)CCN1CCN(CCO)CC1:C[C@H](O)CC(C)(C)O,T1127v2,"[1, 2, 3, 4, 5]","['COA', 'COA', 'EPE', 'EPE', 'MPD']",P +data/casp15_set/casp15_holo_aligned_predicted_structures_bs_cropped/T1146.pdb,CC(=O)N[C@H]1[C@H](O)O[C@H](CO)[C@@H](O)[C@@H]1O,T1146,[1],['NAG'],P +data/casp15_set/casp15_holo_aligned_predicted_structures_bs_cropped/T1152.pdb,CC(=O)NC1C(O)OC(CO)C(OC2OC(CO)C(OC3OC(CO)C(O)C(O)C3NC(C)=O)C(O)C2NC(C)=O)C1O,T1152,[1],['NAG'],P +data/casp15_set/casp15_holo_aligned_predicted_structures_bs_cropped/T1181.pdb,CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O:CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O:CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O:CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O:[Zn+2]:[Zn+2]:[Zn+2]:[Ca+2]:CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O,T1181,"[1, 2, 3, 4, 5, 6, 7, 8, 9]","['OAA', 'OAA', 'OAA', 'OAA', 'ZN', 'ZN', 'ZN', 'CA', 'OAA']",P +data/casp15_set/casp15_holo_aligned_predicted_structures_bs_cropped/T1187.pdb,CC(=O)N[C@H]1[C@H](O[C@@H]2[C@@H](CO)O[C@@H](O[C@@H]3[C@@H](CO)O[C@@H](O)[C@H](NC(C)=O)[C@H]3O)[C@H](NC(C)=O)[C@H]2O)O[C@H](CO)[C@@H](O)[C@@H]1O:CC(=O)N[C@H]1[C@H](O[C@@H]2[C@@H](CO)O[C@@H](O[C@@H]3[C@@H](CO)O[C@@H](O)[C@H](NC(C)=O)[C@H]3O)[C@H](NC(C)=O)[C@H]2O)O[C@H](CO)[C@@H](O)[C@@H]1O,T1187,"[1, 2]","['NAG', 'NAG']",P +data/casp15_set/casp15_holo_aligned_predicted_structures_bs_cropped/T1188.pdb,Cn1cnc2c1c(=O)n(CCCn1c(=O)c3c(ncn3C)n(C)c1=O)c(=O)n2C:Cn1cnc2c1c(=O)n(CCCn1c(=O)c3c(ncn3C)n(C)c1=O)c(=O)n2C:[Cd+2]:[Cd+2]:[Co+2],T1188,"[1, 2, 3, 4, 5]","['DW0', 'DW0', 'CD', 'CD', 'CO']",P +data/casp15_set/casp15_holo_aligned_predicted_structures_bs_cropped/T1124.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSCC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSCC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O:N[C@@H](Cc1ccc(O)cc1)C(=O)O:N[C@@H](Cc1ccc(O)cc1)C(=O)O,T1124,"[1, 2, 3, 4]","['SAH', 'SAH', 'TYR', 'TYR']",P +data/casp15_set/casp15_holo_aligned_predicted_structures_bs_cropped/T1186.pdb,Cc1onc(c1C(=O)N[C@H](C=O)[C@@H]1N[C@@H](C(O)=O)C(C)(C)S1)-c1c(Cl)cccc1Cl,T1186,[1],['LIG'],P +data/casp15_set/casp15_holo_aligned_predicted_structures_bs_cropped/T1158v1.pdb,CCCCC[C@@H](/C=C/[C@H]1[C@@H](CC(=O)[C@@H]1CCCCCCC(=O)O)O)O,T1158v1,[1],['XPG'],P +data/casp15_set/casp15_holo_aligned_predicted_structures_bs_cropped/T1158v2.pdb,CCCCC[C@@H](/C=C/[C@H]1[C@@H](CC(=O)[C@@H]1C/C=C\CCCC(=O)O)O)O,T1158v2,[1],['P2E'],P +data/casp15_set/casp15_holo_aligned_predicted_structures_bs_cropped/T1158v3.pdb,C[C@]12CC[C@H]3[C@H]([C@@H]1CCC2=O)CC=C4[C@@]3(CC[C@@H](C4)OS(=O)(=O)O)C,T1158v3,[1],['XH0'],P +data/casp15_set/casp15_holo_aligned_predicted_structures_bs_cropped/T1158v4.pdb,C1=NC(=C2C(=N1)N(C=N2)[C@H]3[C@@H]([C@@H]([C@H](O3)COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O)O)N:C1=NC(=C2C(=N1)N(C=N2)[C@H]3[C@@H]([C@@H]([C@H](O3)COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O)O)N:[Mg+2]:[Mg+2],T1158v4,"[1, 2, 3, 4]","['ATP', 'ATP', 'MG', 'MG']",P diff --git a/data/test_cases/casp15/ensemble_prediction_inputs.csv b/data/test_cases/casp15/ensemble_prediction_inputs.csv index a90af3e2..3fb6d715 100644 --- a/data/test_cases/casp15/ensemble_prediction_inputs.csv +++ b/data/test_cases/casp15/ensemble_prediction_inputs.csv @@ -9,7 +9,6 @@ TLRPQYFKEYIGQDKVKDQLKIFIEAAKLRDEALDHTLLFGPPGLGKTTMAFVIANEMGVNLKQTSGPAIEKAGDLVAIL MFSRIRLATPTDVPFIHKLIHQMAVFERLTHLFVATESGLASTLFNSRPFQAVTVFLLEISPSPFPTTHDASSPDFTPFLETHKVDLPIEDPDREKFLPDKLNDVVVAGFVLFFPNYPSFLAKQGFYIEDIFMREPYRRKGFGKLLLTAVAKQAVKLGVGRVEWIVIDWNVNAINFYEQMGAQVFKEWRLCRLTGDALQAIDKLN:MFSRIRLATPTDVPFIHKLIHQMAVFERLTHLFVATESGLASTLFNSRPFQAVTVFLLEISPSPFPTTHDASSPDFTPFLETHKVDLPIEDPDREKFLPDKLNDVVVAGFVLFFPNYPSFLAKQGFYIEDIFMREPYRRKGFGKLLLTAVAKQAVKLGVGRVEWIVIDWNVNAINFYEQMGAQVFKEWRLCRLTGDALQAIDKLNI,CC(C)(CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS:CC(C)(CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS:O=S(=O)(O)CCN1CCN(CCO)CC1:O=S(=O)(O)CCN1CCN(CCO)CC1:C[C@H](O)CC(C)(C)O,T1127v2,"[1, 2, 3, 4, 5]","['COA', 'COA', 'EPE', 'EPE', 'MPD']",P DQAYLYLQNIYSRDNVSKLHVLSLQQRLEYFSRIFLGKPYLGGALGEGANSAYDNDPLYRFDAFDATTYVETVAALTLSYGEAEFQKNMNVIRYQDGVVSLITRNHFTSVDWNPNVEKLGILRDVTAEIGLADVSTLQTLIDKKEWYKKQASAMVKVKDNEAKKVGGIIARTQAIRPQISALNFLSKEILTAKPDLLLRFPKAGIVNIVRKNWNVRDAIGTNLDVSHQGIIFERDGEIIFRHASYKKSSQYVVEVPLLEYVKKNFGDQTFAGLNVLSFV,CC(=O)N[C@H]1[C@H](O)O[C@H](CO)[C@@H](O)[C@@H]1O,T1146,[1],['NAG'],P MYTVKPGDTMWKIAVKYQIGISEIIAANPQIKNPNLIYPGQKINIP:MYTVKPGDTMWKIAVKYQIGISEIIAANPQIKNPNLIYPGQKINIP:MYTVKPGDTMWKIAVKYQIGISEIIAANPQIKNPNLIYPGQKINIPN,CC(=O)NC1C(O)OC(CO)C(OC2OC(CO)C(OC3OC(CO)C(O)C(O)C3NC(C)=O)C(O)C2NC(C)=O)C1O,T1152,[1],['NAG'],P -TLRPQYFKEYIGQDKVKDQLKIFIEAAKLRDEALDHTLLFGPPGLGKTTMAFVIANEMGVNLKQTSGPAIEKAGDLVAILNDLEPGDILFIDEIHRMPMAVEEVLYSAMEDYYIDIMIGAGETSRSVHLDLPPFTLVGATTRAGMLSNPLRARFGINGHMEYYELPDLTEIVERTSEIFEMTITPEAALELARRSRGTPRIANRLLKRVRDYAQIMGDGVIDDKIADQALTMLDVDHEGLDYVDQKILRTMIEMYGGGPVGLGTLSVNIAEERETVEDMYEPYLIQKGFIMRTRTGRVATAKAYEHMGYDYT:TLRPQYFKEYIGQDKVKDQLKIFIEAAKLRDEALDHTLLFGPPGLGKTTMAFVIANEMGVNLKQTSGPAIEKAGDLVAILNDLEPGDILFIDEIHRMPMAVEEVLYSAMEDYYIDIMIGAGETSRSVHLDLPPFTLVGATTRAGMLSNPLRARFGINGHMEYYELPDLTEIVERTSEIFEMTITPEAALELARRSRGTPRIANRLLKRVRDYAQIMGDGVIDDKIADQALTMLDVDHEGLDYVDQKILRTMIEMYGGGPVGLGTLSVNIAEERETVEDMYEPYLIQKGFIMRTRTGRVATAKAYEHMGYDYT:TLRPQYFKEYIGQDKVKDQLKIFIEAAKLRDEALDHTLLFGPPGLGKTTMAFVIANEMGVNLKQTSGPAIEKAGDLVAILNDLEPGDILFIDEIHRMPMAVEEVLYSAMEDYYIDIMITSRSVHLDLPPFTLVGATTRAGMLSNPLRARFGINGHMEYYELPDLTEIVERTSEIFEMTITPEAALELARRSRGTPRIANRLLKRVRDYAQIMGDGVIDDKIADQALTMLDVDHEGLDYVDQKILRTMIEMYGGGPVGLGTLSVNIAEERETVEDMYEPYLIQKGFIMRTRTGRVATAKAYEHMGYDYTR:TLRPQYFKEYIGQDKVKDQLKIFIEAAKLRDEALDHTLLFGPPGLGKTTMAFVIANEMGVNLKQTSGPAIEKAGDLVAILNDLEPGDILFIDEIHRMPMAVEEVLYSAMEDYYIDIMIGAGETSRSVHLDLPPFTLVGATTRAGMLSNPLRARFGINGHMEYYELPDLTEIVERTSEIFEMTITPEAALELARRSRGTPRIANRLLKRVRDYAQIMGDGVIDDKIADQALTMLDVDHEGLDYVDQKILRTMIEMYGGGPVGLGTLSVNIAEERETVEDMYEPYLIQKGFIMRTRTGRVATAKAYEHMGYDYTR:TLRPQYFKEYIGQDKVKDQLKIFIEAAKLRDEALDHTLLFGPPGLGKTTMAFVIANEMGVNLKQTSGPAIEKAGDLVAILNDLEPGDILFIDEIHRMPMAVEEVLYSAMEDYYIDIMIGAGETSRSVHLDLPPFTLVGATTRAGMLSNPLRARFGINGHMEYYELPDLTEIVERTSEIFEMTITPEAALELARRSRGTPRIANRLLKRVRDYAQIMGDGVIDDKIADQALTMLDVDHEGLDYVDQKILRTMIEMYGGGPVGLGTLSVNIAEERETVEDMYEPYLIQKGFIMRTRTGRVATAKAYEHMGYDYT:TLRPQYFKEYIGQDKVKDQLKIFIEAAKLRDEALDHTLLFGPPGLGKTTMAFVIANEMGVNLKQTSGPAIEKAGDLVAILNDLEPGDILFIDEIHRMPMAVEEVLYSAMEDYYIDIMIGAGETSRSVHLDLPPFTLVGATTRAGMLSNPLRARFGINGHMEYYELPDLTEIVERTSEIFEMTITPEAALELARRSRGTPRIANRLLKRVRDYAQIMGDGVIDDKIADQALTMLDVDHEGLDYVDQKILRTMIEMYGGGPVGLGTLSVNIAEERETVEDMYEPYLIQKGFIMRTRTGRVATAKAYEHMGYDYT,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:[Mg+2]:[Mg+2]:[Mg+2]:Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O:[Mg+2],T1170,"[1, 2, 4, 5, 6, 7, 8, 9, 10, 9]","['ADP', 'ADP', 'AGS', 'AGS', 'AGS', 'MG', 'MG', 'MG', 'AGS', 'MG']",P DSYQQRNTRELWRRSLAEAGLTLVDGSFEEGATVTSKLDVVWHVAAGQCYAWGGTLPKTVGANSTPINTGGVSANAWVEYNSFTTNIPSGFYGGSCFPTENNRSLSVGDVIPSGVLFVRVSGNLMMLGSALTSSVTVTSFDQTVINGAHELYPASFFNEVITGWKIAQNDAQLSRLLPKAGNIYIGYSPVTVEGIFTIQGDIHLKPLLPKVTVDSRQFWLRSPRTWNDTNNEYDYTPYDIRIVGNFLFDFYRQDASFDPGVGLAMQSAGTLELSGCELQGAWNSNVTMDYGRWALANNLYSHDCGRGEIQQPDGSNRQGMAIIVGNATEAHIHHIRTRDTWASSVFMTSARPGRTLNVMIHDISINGSGGNGLRLQSEDLVTGVGGGNGTAITRVNISNVTIKNCESHGIRANFTNGSVTGAYIEACNAGIAIEGSSDVTYDNITIKNCSQPILCRYYPVVCTRLRFSNILITGHTDWAVFFLRKDGSSHTVNLGDIEFDGLTIHCTQSGSKAVMINCGLNATATSDITMKNVRIVGAFPDADGQAICQIHNARHIEVDNWNIRGVNGQPSSYLYAQAAQSLNVHRLVGVQPFGTVDRPIECVGIAGHVNIVNCSVPATTKGILYTSTPSSRYEEGNQFYNSAQPQQFPFTNAGQLRGGDVNTLTTTQLANIVATLINDMSLGKFIIR:DSYQQRNTRELWRRSLAEAGLTLVDGSFEEGATVTSKLDVVWHVAAGQCYAWGGTLPKTVGANSTPINTGGVSANAWVEYNSFTTNIPSGFYGGSCFPTENNRSLSVGDVIPSGVLFVRVSGNLMMLGSALTSSVTVTSFDQTVINGAHELYPASFFNEVITGWKIAQNDAQLSRLLPKAGNIYIGYSPVTVEGIFTIQGDIHLKPLLPKVTVDSRQFWLRSPRTWNDTNNEYDYTPYDIRIVGNFLFDFYRQDASFDPGVGLAMQSAGTLELSGCELQGAWNSNVTMDYGRWALANNLYSHDCGRGEIQQPDGSNRQGMAIIVGNATEAHIHHIRTRDTWASSVFMTSARPGRTLNVMIHDISINGSGGNGLRLQSEDLVTGVGGGNGTAITRVNISNVTIKNCESHGIRANFTNGSVTGAYIEACNAGIAIEGSSDVTYDNITIKNCSQPILCRYYPVVCTRLRFSNILITGHTDWAVFFLRKDGSSHTVNLGDIEFDGLTIHCTQSGSKAVMINCGLNATATSDITMKNVRIVGAFPDADGQAICQIHNARHIEVDNWNIRGVNGQPSSYLYAQAAQSLNVHRLVGVQPFGTVDRPIECVGIAGHVNIVNCSVPATTKGILYTSTPSSRYEEGNQFYNSAQPQQFPFTNAGQLRGGDVNTLTTTQLANIVATLINDMSLGKFIIR:DSYQQRNTRELWRRSLAEAGLTLVDGSFEEGATVTSKLDVVWHVAAGQCYAWGGTLPKTVGANSTPINTGGVSANAWVEYNSFTTNIPSGFYGGSCFPTENNRSLSVGDVIPSGVLFVRVSGNLMMLGSALTSSVTVTSFDQTVINGAHELYPASFFNEVITGWKIAQNDAQLSRLLPKAGNIYIGYSPVTVEGIFTIQGDIHLKPLLPKVTVDSRQFWLRSPRTWNDTNNEYDYTPYDIRIVGNFLFDFYRQDASFDPGVGLAMQSAGTLELSGCELQGAWNSNVTMDYGRWALANNLYSHDCGRGEIQQPDGSNRQGMAIIVGNATEAHIHHIRTRDTWASSVFMTSARPGRTLNVMIHDISINGSGGNGLRLQSEDLVTGVGGGNGTAITRVNISNVTIKNCESHGIRANFTNGSVTGAYIEACNAGIAIEGSSDVTYDNITIKNCSQPILCRYYPVVCTRLRFSNILITGHTDWAVFFLRKDGSSHTVNLGDIEFDGLTIHCTQSGSKAVMINCGLNATATSDITMKNVRIVGAFPDADGQAICQIHNARHIEVDNWNIRGVNGQPSSYLYAQAAQSLNVHRLVGVQPFGTVDRPIECVGIAGHVNIVNCSVPATTKGILYTSTPSSRYEEGNQFYNSAQPQQFPFTNAGQLRGGDVNTLTTTQLANIVATLINDMSLGKFIIR,CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O:CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O:CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O:CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O:[Zn+2]:[Zn+2]:[Zn+2]:[Ca+2]:CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O,T1181,"[1, 2, 3, 4, 5, 6, 7, 8, 9]","['OAA', 'OAA', 'OAA', 'OAA', 'ZN', 'ZN', 'ZN', 'CA', 'OAA']",P GQGQWIAARDLSITWVDNPQYWTWKTVDPNIEVAELRRVAWLDIYGKIETKNLIRKTSYAVYLVFKLTDNPRELERATASLRFVNEVAEGAGIEGTTVFISKKKKLPGELGRFPHLRSDGWLEIKLGEFFNNLGEDGEVEMRLMEINDKTWKSGIIVKGFDIRPN:GQGQWIAARDLSITWVDNPQYWTWKTVDPNIEVAELRRVAWLDIYGKIETKNLIRKTSYAVYLVFKLTDNPRELERATASLRFVNEVAEGAGIEGTTVFISKKKKLPGELGRFPHLRSDGWLEIKLGEFFNNLGEDGEVEMRLMEINDKTWKSGIIVKGFDIRPN,CC(=O)N[C@H]1[C@H](O[C@@H]2[C@@H](CO)O[C@@H](O[C@@H]3[C@@H](CO)O[C@@H](O)[C@H](NC(C)=O)[C@H]3O)[C@H](NC(C)=O)[C@H]2O)O[C@H](CO)[C@@H](O)[C@@H]1O:CC(=O)N[C@H]1[C@H](O[C@@H]2[C@@H](CO)O[C@@H](O[C@@H]3[C@@H](CO)O[C@@H](O)[C@H](NC(C)=O)[C@H]3O)[C@H](NC(C)=O)[C@H]2O)O[C@H](CO)[C@@H](O)[C@@H]1O,T1187,"[1, 2]","['NAG', 'NAG']",P KLRNVMYYGDWSIWGGQGNFYPKDIPADKLTHLNFAFMDFNSSGELIYCDKDAAIGHPLGNLGVTYGDVNGGILNAFQVLKSENPNLKIGVSLGGWSKSGDFSTIAATPSIRAKFVENVMKFIKYTNMDFVDIDWEYPGDYREPDKTDNINDEGTPNASAGDKENYILLLQDLKEALNKQGKELGKVYELSVALPAGVSKIEKGIDVDKLFNIVDFANIMTYDMAGAWSTTSGHQTALYTNPNAPEEYKGLSVDESVKYYISQGAEREKIVVGAAYYTRGWEQVSDKGTDPNNPGLFGEAAVVNKDADLSPTPGALNEAPMKNGEGGRAGGVWGYNALDKLKSKYTGLKEYWDDSAKAPYLYNSETGAFFTYDNIRSIQEKAKYVKENNLGGIIGWMASQDATTNSTKRDELTTATKESLFGKEDLPKYEIKYTENDITCTVTPVKQSWGSGGVLKMSITNNEKLDESGEVLSTVETSAKTVKNMKVYIKTDGIAITGSQYPAGPVTKEGDYYVIDFGKISDGKLMKAGITFTFDLNLDKAIEDTNNIISIEVSQRMYQTSPEFNRQTIWENT,Cn1cnc2c1c(=O)n(CCCn1c(=O)c3c(ncn3C)n(C)c1=O)c(=O)n2C:Cn1cnc2c1c(=O)n(CCCn1c(=O)c3c(ncn3C)n(C)c1=O)c(=O)n2C:[Cd+2]:[Cd+2]:[Co+2],T1188,"[1, 2, 3, 4, 5]","['DW0', 'DW0', 'CD', 'CD', 'CO']",P diff --git a/data/test_cases/dockgen/ensemble_inputs.csv b/data/test_cases/dockgen/ensemble_inputs.csv index 84def4ab..f2dfc4f2 100644 --- a/data/test_cases/dockgen/ensemble_inputs.csv +++ b/data/test_cases/dockgen/ensemble_inputs.csv @@ -1,190 +1,190 @@ name,protein_input,ligand_smiles -3gvl_1_SLB_2,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3gvl_1_SLB_2_holo_aligned_esmfold_protein.pdb,CC(O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(O)O)C[C@@H]1O -3inr_1_GDU_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3inr_1_GDU_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](O[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCC(O)NC3O)[C@H](O)[C@@H]2O)[C@H](O)[C@@H](O)[C@H]1O -3jqm_1_GTP_5,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3jqm_1_GTP_5_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]3O)C2N1 -3ju4_1_SLB_2,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ju4_1_SLB_2_holo_aligned_esmfold_protein.pdb,CC(O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(O)O)C[C@@H]1O -4cnl_1_CHT_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4cnl_1_CHT_1_holo_aligned_esmfold_protein.pdb,C[N+](C)(C)CCO -1hg0_1_SIN_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1hg0_1_SIN_1_holo_aligned_esmfold_protein.pdb,OC(O)CCC(O)O -1i8t_1_FAD_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1i8t_1_FAD_1_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2CC1C -1o28_1_UFP_2,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1o28_1_UFP_2_holo_aligned_esmfold_protein.pdb,OC1NC(O)N([C@H]2C[C@H](O)[C@@H](CO[PH](O)(O)O)O2)CC1F -1o72_2_PC_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1o72_2_PC_0_holo_aligned_esmfold_protein.pdb,C[N+](C)(C)CCO[PH](O)(O)O -1pj2_1_FUM_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1pj2_1_FUM_0_holo_aligned_esmfold_protein.pdb,OC(O)CCC(O)O -1pj4_1_FUM_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1pj4_1_FUM_1_holo_aligned_esmfold_protein.pdb,OC(O)CCC(O)O -1qaw_1_TRP_7,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1qaw_1_TRP_7_holo_aligned_esmfold_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O -1rqp_1_SAM_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1rqp_1_SAM_0_holo_aligned_esmfold_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O -1sbz_1_FMN_3,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1sbz_1_FMN_3_holo_aligned_esmfold_protein.pdb,C[C@@H]1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2C[C@@H]1C -1sij_1_PCD_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1sij_1_PCD_0_holo_aligned_esmfold_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@H]4NC5NC(N)NC(O)C5N[C@H]4[C@@H]4S[Mo](O)(O)SC43)[C@@H](O)[C@H]2O)C(O)N1 -1tke_1_SER_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1tke_1_SER_0_holo_aligned_esmfold_protein.pdb,N[C@@H](CO)C(O)O -1tkg_1_SSA_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1tkg_1_SSA_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](COS(O)(O)NC(O)[C@@H](N)CO)[C@@H](O)[C@H]1O -1u8u_1_OCA_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1u8u_1_OCA_0_holo_aligned_esmfold_protein.pdb,CCCCCCCC(O)O -1uf5_1_CDT_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1uf5_1_CDT_0_holo_aligned_esmfold_protein.pdb,CSCC[C@@H](NC(N)O)C(O)O -1uf7_1_CDV_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1uf7_1_CDV_0_holo_aligned_esmfold_protein.pdb,CC(C)[C@@H](NC(N)O)C(O)O -1uf8_1_ING_2,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1uf8_1_ING_2_holo_aligned_esmfold_protein.pdb,NC(O)N[C@H](CC1CCCCC1)C(O)O -1v2g_1_OCA_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1v2g_1_OCA_0_holo_aligned_esmfold_protein.pdb,CCCCCCCC(O)O -1v97_1_MTE_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1v97_1_MTE_1_holo_aligned_esmfold_protein.pdb,NC1NC(O)[C@@H]2N[C@H]3C(S)C(S)[C@@H](CO[PH](O)(O)O)O[C@H]3NC2N1 -1za2_1_CTP_4,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1za2_1_CTP_4_holo_aligned_esmfold_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 -2cdc_1_XYS_5,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2cdc_1_XYS_5_holo_aligned_esmfold_protein.pdb,O[C@@H]1[C@@H](O)[C@@H](O)OC[C@H]1O -2ext_1_TRP_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2ext_1_TRP_0_holo_aligned_esmfold_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O -2g7c_1_NAG-GAL-GLA_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2g7c_1_NAG-GAL-GLA_1_holo_aligned_esmfold_protein.pdb,CC(O)N[C@@H]1[C@@H](O)[C@H](O[C@@H]2O[C@H](CO)[C@H](O)[C@H](O[C@H]3O[C@H](CO)[C@H](O)[C@H](O)[C@H]3O)[C@H]2O)[C@@H](CO)O[C@H]1O -2gag_1_FOA_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2gag_1_FOA_0_holo_aligned_esmfold_protein.pdb,OC(O)C1CCCO1 -2gag_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2gag_1_NAD_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCCC([C@H](N)O)C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O -2gah_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2gah_1_NAD_0_holo_aligned_esmfold_protein.pdb,NC(O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1 -2gf3_2_FOA_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2gf3_2_FOA_1_holo_aligned_esmfold_protein.pdb,OC(O)C1CCCO1 -2him_1_ASN_3,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2him_1_ASN_3_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)O -2hk9_1_SKM_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2hk9_1_SKM_0_holo_aligned_esmfold_protein.pdb,OC(O)[C@H]1C[C@@H](O)[C@@H](O)[C@H](O)C1 -2hs3_1_FGR_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2hs3_1_FGR_0_holo_aligned_esmfold_protein.pdb,OCNCC(O)N[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O -2o5m_1_MNR_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2o5m_1_MNR_0_holo_aligned_esmfold_protein.pdb,CCC1C(C)C2C[C@@H]3[C@H](C)[C@H](CCC(O)O)C4CC5[C@@H](CCC(O)O)C(C)C6CC7[C@@H](CC)[C@H](C)C8CC1N2[Mn@SP2](N65)(N87)N43 -2q37_1_3AL_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2q37_1_3AL_0_holo_aligned_esmfold_protein.pdb,NC(O)N[C@H]1NC(O)NC1O -2q6k_1_ADN_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2q6k_1_ADN_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O -2r4e_1_13P_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2r4e_1_13P_0_holo_aligned_esmfold_protein.pdb,OC[C@@H](O)CO[PH](O)(O)O -2v5e_1_GU4-YYJ_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v5e_1_GU4-YYJ_0_holo_aligned_esmfold_protein.pdb,OS(O)(O)OC[C@H]1O[C@@](COS(O)(O)O)(O[C@H]2O[C@H](COS(O)(O)O)[C@@H](OS(O)(O)O)[C@H](OS(O)(O)O)[C@H]2OS(O)(O)O)[C@@H](OS(O)(O)O)[C@@H]1OS(O)(O)O -2v7t_1_SAH_4,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v7t_1_SAH_4_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CSCC[C@H](N)C(O)O)[C@@H](O)[C@H]1O -2v7u_1_SAM_2,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v7u_1_SAM_2_holo_aligned_esmfold_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O -2v7v_1_5FD_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v7v_1_5FD_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CF)[C@@H](O)[C@H]1O -2v7w_1_5FD_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v7w_1_5FD_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CF)[C@@H](O)[C@H]1O -2vdf_1_OCT_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2vdf_1_OCT_0_holo_aligned_esmfold_protein.pdb,CCCCCCCC -2vfu_1_MTL_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2vfu_1_MTL_0_holo_aligned_esmfold_protein.pdb,OC[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)CO -2wab_1_BGC-BGC-BGC-BGC-BGC_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2wab_1_BGC-BGC-BGC-BGC-BGC_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O[C@H]2[C@H](O)[C@@H](O)[C@H](O[C@H]3[C@H](O)[C@@H](O)[C@H](O[C@H]4[C@H](O)[C@@H](O)[C@H](O[C@H]5[C@H](O)[C@@H](O)[C@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O -2wao_1_BGC-BGC-BGC-BGC-BGC_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2wao_1_BGC-BGC-BGC-BGC-BGC_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O[C@H]2[C@H](O)[C@@H](O)[C@H](O[C@H]3[C@H](O)[C@@H](O)[C@H](O[C@H]4[C@H](O)[C@@H](O)[C@H](O[C@H]5[C@H](O)[C@@H](O)[C@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O -2wr8_1_SAH_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2wr8_1_SAH_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CSCC[C@H](N)C(O)O)[C@@H](O)[C@H]1O -2wwc_1_CHT_2,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2wwc_1_CHT_2_holo_aligned_esmfold_protein.pdb,C[N+](C)(C)CCO -2x34_2_UQ8_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2x34_2_UQ8_0_holo_aligned_esmfold_protein.pdb,COC1C(O)C(C)C(CCC(C)CCCC(C)CCCC(C)CCC[C@H](C)CCCC(C)CCC[C@H](C)CCCC(C)CCCC(C)C)C(O)C1OC -2xrh_1_NIO_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2xrh_1_NIO_1_holo_aligned_esmfold_protein.pdb,OC(O)[C@H]1CCCNC1 -2xta_1_ACO_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2xta_1_ACO_0_holo_aligned_esmfold_protein.pdb,CCO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O[PH](O)(O)O -2zcz_2_TRP_3,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2zcz_2_TRP_3_holo_aligned_esmfold_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O -2zd0_1_TRP_9,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2zd0_1_TRP_9_holo_aligned_esmfold_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O -2ze9_1_PD7_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2ze9_1_PD7_0_holo_aligned_esmfold_protein.pdb,CCCCCCC(O)OC[C@H](CO[PH](O)(O)O)OC(O)CCCCCC -3ad7_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ad7_1_NAD_0_holo_aligned_esmfold_protein.pdb,N[C@H](O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5[C@H](N)NCN[C@H]54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1 -3ad9_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ad9_1_NAD_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2[C@@H]1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCCC([C@H](N)O)C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O -3ada_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ada_1_NAD_0_holo_aligned_esmfold_protein.pdb,N[C@H](O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CN[C@@H]5[C@H](N)NCN[C@H]54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1 -3eca_1_ASP_3,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3eca_1_ASP_3_holo_aligned_esmfold_protein.pdb,N[C@@H](CC(O)O)C(O)O -3gf4_1_FAD_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3gf4_1_FAD_1_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C4NCN[C@H]5N)[C@H](O)[C@@H]3O)C2CC1C -3gf4_1_U5P_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3gf4_1_U5P_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 -3he3_5_UDP_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3he3_5_UDP_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 -3it6_1_ORN_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3it6_1_ORN_1_holo_aligned_esmfold_protein.pdb,NCCC[C@H](N)C(O)O -3k8l_1_GLC-GLC-GLC-GLC-GLC_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3k8l_1_GLC-GLC-GLC-GLC-GLC_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](O[C@H]2[C@H](O)[C@@H](O)[C@@H](O[C@H]3[C@H](O)[C@@H](O)[C@@H](O[C@H]4[C@H](O)[C@@H](O)[C@@H](O[C@H]5[C@H](O)[C@@H](O)[C@@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O -3k8m_1_GLC-GLC-AC1_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3k8m_1_GLC-GLC-AC1_0_holo_aligned_esmfold_protein.pdb,C[C@H]1O[C@H](O[C@H]2[C@H](O)[C@@H](O)[C@@H](O[C@H]3[C@H](O)[C@@H](O)[C@@H](O)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1N[C@H]1CC(CO)[C@@H](O)[C@H](O)[C@H]1O -3nvv_1_MTE_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3nvv_1_MTE_1_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2N[C@@H]3[C@H](NC2N1)O[C@H](CO[PH](O)(O)O)C(S)[C@@H]3S -3o01_2_DXC_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3o01_2_DXC_0_holo_aligned_esmfold_protein.pdb,C[C@H](CCC(O)O)[C@H]1CC[C@H]2[C@@H]3CC[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3C[C@H](O)[C@]12C -3o02_2_JN3_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3o02_2_JN3_0_holo_aligned_esmfold_protein.pdb,C[C@H](CCC(O)O)[C@H]1CC[C@H]2[C@@H]3[C@H](O)C[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3CC[C@]12C -3o7j_1_2AL_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3o7j_1_2AL_0_holo_aligned_esmfold_protein.pdb,NC(O)NC1NC(O)NC1O -3q14_1_PCR_3,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3q14_1_PCR_3_holo_aligned_esmfold_protein.pdb,CC1CCC(O)CC1 -3qrc_2_GU4-YYJ_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3qrc_2_GU4-YYJ_0_holo_aligned_esmfold_protein.pdb,OS(O)(O)OC[C@H]1O[C@@](COS(O)(O)O)(O[C@H]2O[C@H](COS(O)(O)O)[C@@H](OS(O)(O)O)[C@H](OS(O)(O)O)[C@H]2OS(O)(O)O)[C@@H](OS(O)(O)O)[C@@H]1OS(O)(O)O -3s5x_1_BMA-MAN-MAN-MAN-MAN_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3s5x_1_BMA-MAN-MAN-MAN-MAN_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](O[C@@H]2[C@H](O)[C@@H](OC[C@H]3O[C@@H](O)[C@@H](O)[C@@H](O[C@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]4O)[C@@H]3O)O[C@H](CO)[C@H]2O)[C@@H](O)[C@@H](O)[C@@H]1O -3s6a_1_ANP_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3s6a_1_ANP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](N)(O)O)[C@@H](O)[C@H]1O -3se5_1_ANP_2,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3se5_1_ANP_2_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)N[PH](O)(O)O)[C@@H](O)[C@H]1O -3sr6_1_MTE_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3sr6_1_MTE_1_holo_aligned_esmfold_protein.pdb,NC1NC(O)[C@@H]2N[C@H]3C(S)C(S)[C@@H](CO[PH](O)(O)O)O[C@H]3NC2N1 -3ub7_1_ACM_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ub7_1_ACM_1_holo_aligned_esmfold_protein.pdb,CC(N)O -3ub9_1_NHY_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ub9_1_NHY_1_holo_aligned_esmfold_protein.pdb,N[C@@H](O)NO -3uni_1_SAL_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3uni_1_SAL_0_holo_aligned_esmfold_protein.pdb,OC(O)C1CCCCC1O -3wrb_1_GDE_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3wrb_1_GDE_0_holo_aligned_esmfold_protein.pdb,OC(O)C1CC(O)C(O)C(O)C1 -3wvc_1_FEG_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3wvc_1_FEG_0_holo_aligned_esmfold_protein.pdb,CC1C(CC(O)O)NC(O)[C@@H](C)[C@@H]1O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(O)NC(N)NC32)[C@H](O)[C@@H]1O -3zec_1_ANP_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3zec_1_ANP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)N[PH](O)(O)O)[C@@H](O)[C@H]1O -3zjx_1_BOG_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3zjx_1_BOG_0_holo_aligned_esmfold_protein.pdb,CCCCCCCCO[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O -3zqu_1_FNR_5,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3zqu_1_FNR_5_holo_aligned_esmfold_protein.pdb,C[C@H]1CC2NC3[C@H](O)N[C@H](O)N[C@@H]3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@@H]2C[C@@H]1C -3zzs_1_TRP_3,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3zzs_1_TRP_3_holo_aligned_esmfold_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O -4b4v_1_L34_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4b4v_1_L34_0_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2C(NC[C@@H]3CN(C4CCC([C@H](O)N[C@@H](CCC(O)O)C(O)O)CC4)C(O)N23)N1 -4bc9_1_CNV-FAD_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4bc9_1_CNV-FAD_0_holo_aligned_esmfold_protein.pdb,CC1CC2[C@H](CC1C)N(C(C)CN)C1C(O)NC(O)NC1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCN[C@H]32)[C@H](O)[C@@H]1O -4cdn_2_FAD_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4cdn_2_FAD_0_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2C[C@H]1C -4fyv_1_DCP_2,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4fyv_1_DCP_2_holo_aligned_esmfold_protein.pdb,NC1CCN([C@H]2C[C@H](O)[C@@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)O2)C(O)N1 -4fyw_1_CTP_4,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4fyw_1_CTP_4_holo_aligned_esmfold_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 -4gk9_1_BMA-MAN-MAN-MAN-MAN_3,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4gk9_1_BMA-MAN-MAN-MAN-MAN_3_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](OC[C@H]2O[C@H](OC[C@H]3O[C@@H](O)[C@@H](O)[C@@H](O[C@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]4O)[C@@H]3O)[C@@H](O)[C@@H](O[C@H]3O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]3O)[C@@H]2O)[C@@H](O)[C@@H](O)[C@@H]1O -4h2f_1_ADN_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4h2f_1_ADN_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O -4idk_1_1FE_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4idk_1_1FE_0_holo_aligned_esmfold_protein.pdb,NCC(O)NC1CCC2NC(O)NC2C1 -4kgx_1_CTP_5,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4kgx_1_CTP_5_holo_aligned_esmfold_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 -4mig_1_G3F_2,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4mig_1_G3F_2_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](F)[C@@H]1O -4mo2_2_FDA_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4mo2_2_FDA_0_holo_aligned_esmfold_protein.pdb,CC1CC2[C@@H](CC1C)N[C@H]1C(O)NC(O)N[C@H]1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O -4mos_1_GAF_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4mos_1_GAF_1_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](O)[C@H](F)[C@@H](O)[C@H]1O -4n4l_1_HG1_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4n4l_1_HG1_0_holo_aligned_esmfold_protein.pdb,NC(O)CCCC1CCCCC1 -4o0d_1_GLY_3,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4o0d_1_GLY_3_holo_aligned_esmfold_protein.pdb,NCC(O)O -4o0f_1_GLY_3,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4o0f_1_GLY_3_holo_aligned_esmfold_protein.pdb,NCC(O)O -4o95_1_245_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4o95_1_245_0_holo_aligned_esmfold_protein.pdb,OC(NC1CCCCC1)N[C@H]1CCNC(Cl)C1 -4oal_2_245_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4oal_2_245_0_holo_aligned_esmfold_protein.pdb,OC(NC1CCCCC1)N[C@H]1CCNC(Cl)C1 -4osx_1_GLY_2,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4osx_1_GLY_2_holo_aligned_esmfold_protein.pdb,NCC(O)O -4osy_1_GLY_3,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4osy_1_GLY_3_holo_aligned_esmfold_protein.pdb,NCC(O)O -4pfx_1_UDP_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4pfx_1_UDP_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 -4phr_1_UDP_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4phr_1_UDP_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 -4phs_1_UDP_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4phs_1_UDP_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)([OH2+])O[PH](O)(O)[OH2+])[C@@H](O)[C@H]2O)C(O)N1 -4pya_1_2X3_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4pya_1_2X3_0_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2N[C@H]3N([C@@H]2N1)[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@]3(O)[C@H]1O -4qa8_1_PJZ_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4qa8_1_PJZ_0_holo_aligned_esmfold_protein.pdb,CCCCCCCCCCCCCC[C@@H](O)OC[C@@H](C)O[C@@H](O)CCCCCCCCCCC -4qo5_1_NAG_2,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4qo5_1_NAG_2_holo_aligned_esmfold_protein.pdb,CC(O)N[C@H]1CO[C@H](CO)[C@@H](O)[C@@H]1O -4rhe_1_FMN_6,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4rhe_1_FMN_6_holo_aligned_esmfold_protein.pdb,CC1C[C@@H]2[C@@H](C[C@@H]1C)NC1[C@H](O)NC(O)N[C@@H]1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH]([O-])(O)O -4rpj_1_UDP_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4rpj_1_UDP_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 -4rpm_1_HXC_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4rpm_1_HXC_0_holo_aligned_esmfold_protein.pdb,CCCCCC(O)SC -4tvd_1_BGC_4,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4tvd_1_BGC_4_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1O -4tvd_1_GLC_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4tvd_1_GLC_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O -4u63_1_FAD_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4u63_1_FAD_0_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2CC1C -4uoc_1_NCN_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4uoc_1_NCN_1_holo_aligned_esmfold_protein.pdb,OC(O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]2O)C1 -4uuw_1_AMP_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4uuw_1_AMP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O -4xdr_1_ADN_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4xdr_1_ADN_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O -4xfm_1_THE_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4xfm_1_THE_0_holo_aligned_esmfold_protein.pdb,OC[C@@H](O)[C@H](O)C(O)O -4ydx_1_TCE_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4ydx_1_TCE_0_holo_aligned_esmfold_protein.pdb,OC(O)CCP(CCC(O)O)CCC(O)O -4zav_1_4LS_8,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zav_1_4LS_8_holo_aligned_esmfold_protein.pdb,CC(C)CCN1C2CC(C)[C@@H](C)CC2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@H]2NC(O)N[C@@H](O)C21 -4zaw_1_4LU_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zaw_1_4LU_1_holo_aligned_esmfold_protein.pdb,C[C@H]1[C@@H](C)C[C@@H]2C3[C@@H]1C(C)(C)CCN3[C@H]1C(O)NC(O)NC1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O -4zay_1_4LS_6,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zay_1_4LS_6_holo_aligned_esmfold_protein.pdb,CC(C)CCN1[C@H]2[C@H](O)N[C@H](O)N[C@@H]2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@@H]2CC(C)C(C)C[C@H]21 -4zaz_1_4LS_6,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zaz_1_4LS_6_holo_aligned_esmfold_protein.pdb,CC(C)CCN1C2CC(C)[C@@H](C)CC2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2N[C@@H](O)N[C@@H](O)C21 -4zqx_1_ATP_2,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zqx_1_ATP_2_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O -5a98_1_ATP_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5a98_1_ATP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O -5ae3_2_AWB_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5ae3_2_AWB_1_holo_aligned_esmfold_protein.pdb,CCCCCC[C@H]1C(O)O[C@H](C)[C@H](N[C@H](O)[C@@H]2CCC[C@H](NCO)[C@H]2O)C(O)O[C@@H](C)[C@@H]1OC(O)CC(C)C -5b5s_1_BOG_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5b5s_1_BOG_0_holo_aligned_esmfold_protein.pdb,CCCCCCCCO[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O -5d9g_1_GLU-ASN-LEU-TYR-PHE-GLN_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5d9g_1_GLU-ASN-LEU-TYR-PHE-GLN_0_holo_aligned_esmfold_protein.pdb,CC(C)C[C@H](N)C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)N[C@@H](Cc1ccccc1)C(=O)N[C@@H](CCC(N)=O)C(=O)O -5dnc_1_ASN_2,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5dnc_1_ASN_2_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)O -5eno_1_5QG_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5eno_1_5QG_0_holo_aligned_esmfold_protein.pdb,CC1(C)CC2C(CO1)C(N1CCOCC1)NC(SCCC1CCCCC1)[C@@H]2CN -5enp_1_5QF_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5enp_1_5QF_0_holo_aligned_esmfold_protein.pdb,COCCN1CCN([C@@H]2NC(SCC[C@H]3CCC(OC)[C@@H](OC)C3)[C@H](CN)[C@H]3CC(C)(C)OCC23)CC1 -5enq_1_5QE_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5enq_1_5QE_0_holo_aligned_esmfold_protein.pdb,CC(O)NC1CCC(CCSC2NC(N3C[C@@H](C)O[C@@H](C)C3)[C@@H]3COC(C)(C)CC3C2CN)CC1 -5enr_1_MBX_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5enr_1_MBX_0_holo_aligned_esmfold_protein.pdb,CCC(O)NC1CCC(CCS[C@@H]2NC(N3C[C@H](C)O[C@@H](C)C3)C3COC(C)(C)CC3C2CN)CC1 -5ent_1_MIY_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5ent_1_MIY_0_holo_aligned_esmfold_protein.pdb,CN(C)C1CCC(O)C2C1C[C@H]1C[C@H]3[C@H](N(C)C)C(O)C(C(N)O)[C@@H](O)[C@@]3(O)C(O)[C@@H]1C2O -5ers_1_AMP_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5ers_1_AMP_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O -5f2t_1_PLM_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5f2t_1_PLM_0_holo_aligned_esmfold_protein.pdb,CCCCCCCCCCCCCCCC(O)O -5f52_1_ASP_2,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5f52_1_ASP_2_holo_aligned_esmfold_protein.pdb,N[C@@H](CC(O)O)C(O)O -5fiu_1_Y3J_3,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5fiu_1_Y3J_3_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](C(F)F)[C@@H](O)[C@@H]1O -5fxd_1_H7Y_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5fxd_1_H7Y_1_holo_aligned_esmfold_protein.pdb,CCC[C@H]1CCC(O)C(OC)C1 -5fxe_1_CIY_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5fxe_1_CIY_1_holo_aligned_esmfold_protein.pdb,CO[C@H]1CC(CCCO)CCC1O -5fxf_1_BEZ_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5fxf_1_BEZ_0_holo_aligned_esmfold_protein.pdb,OC(O)C1CCCCC1 -5gqi_1_ATP_7,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5gqi_1_ATP_7_holo_aligned_esmfold_protein.pdb,N[C@@H]1NCN[C@H]2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O -5gql_1_ATP_4,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5gql_1_ATP_4_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O -5hhz_1_ZME_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5hhz_1_ZME_0_holo_aligned_esmfold_protein.pdb,CC1CCN(C2NCNC3NCNC32)C1 -5hmr_1_FDZ_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5hmr_1_FDZ_0_holo_aligned_esmfold_protein.pdb,OC(NC1CNNS1)N[C@H]1CCC[C@H](OC(F)(F)F)C1 -5hqx_1_EDZ_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5hqx_1_EDZ_0_holo_aligned_esmfold_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CNNS1 -5hw0_1_GLU_2,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5hw0_1_GLU_2_holo_aligned_esmfold_protein.pdb,N[C@@H](CCC(O)O)C(O)O -5ida_1_BMA_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5ida_1_BMA_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O)[C@@H](O)[C@@H](O)[C@@H]1O -5k3o_2_ASP_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k3o_2_ASP_0_holo_aligned_esmfold_protein.pdb,N[C@@H](CC(O)O)C(O)O -5k45_2_GLU_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k45_2_GLU_1_holo_aligned_esmfold_protein.pdb,N[C@@H](CCC(O)O)C(O)O -5k4h_2_GLU_3,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k4h_2_GLU_3_holo_aligned_esmfold_protein.pdb,N[C@@H](CCC(O)O)C(O)O -5k62_1_ASN-VAL_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k62_1_ASN-VAL_0_holo_aligned_esmfold_protein.pdb,CC(C)[C@@H](CO)NC(O)[C@@H](N)CC(N)O -5k63_1_ASN-GLY_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k63_1_ASN-GLY_0_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)NCCO -5k66_1_ASN-GLU_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k66_1_ASN-GLU_0_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)N[C@H](CO)CCC(O)O -5mh1_1_BMA_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5mh1_1_BMA_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O)[C@@H](O)[C@@H](O)[C@@H]1O -5u82_2_ZN0_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5u82_2_ZN0_0_holo_aligned_esmfold_protein.pdb,CC[SnH](CC)CC -6a71_1_9UX_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6a71_1_9UX_0_holo_aligned_esmfold_protein.pdb,O[Mo@]12S[Mo@@]1(O)S2 -6a72_1_9UX_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6a72_1_9UX_0_holo_aligned_esmfold_protein.pdb,O[Mo@]12S[Mo@@]1(O)S2 -6b1b_1_TMO_15,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6b1b_1_TMO_15_holo_aligned_esmfold_protein.pdb,C[N+](C)(C)O -6ea9_1_9BG_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6ea9_1_9BG_0_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O[PH]([O-])(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O[PH]([O-])([O-])O)C2N1 -6ep5_1_ADP_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6ep5_1_ADP_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O -6etf_1_AMP_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6etf_1_AMP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2[C@H]1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O -6fgc_1_ADP_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6fgc_1_ADP_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O -6fgc_1_D95_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6fgc_1_D95_1_holo_aligned_esmfold_protein.pdb,C[C@H]1[C@H](OC(O)CCC(O)O)O[C@@H]2O[C@@]3(C)CC[C@H]4[C@H](C)CC[C@@H]1[C@@]24OO3 -6gbf_1_AMP_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6gbf_1_AMP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O -6jls_1_FMN_6,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6jls_1_FMN_6_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2CC1C -6n19_2_K8V_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6n19_2_K8V_0_holo_aligned_esmfold_protein.pdb,CCC(O)NC1CCC(C([O-])O)C(C([O-])O)C1 -6nco_1_KQP_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6nco_1_KQP_0_holo_aligned_esmfold_protein.pdb,CC(C)(O)C1CCC([C@H]2CC(Cl)C[C@@H](C34(C(N)N)CC3C4)C2)CC1 -6npp_1_KWG_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6npp_1_KWG_0_holo_aligned_esmfold_protein.pdb,[O-]C(O)C1CCCC(CCC2CCCCC2)C1N1CCCC1 -6o6y_1_ACK_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6o6y_1_ACK_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@H]2O[PH]([O-])(O)O[C@H]21 -6o70_1_ACK_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6o70_1_ACK_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@H]2O[PH]([O-])(O)O[C@H]21 -6pa2_1_ASP_2,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6pa2_1_ASP_2_holo_aligned_esmfold_protein.pdb,N[C@@H](CC(O)O)C(O)O -6pa6_2_ASN_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6pa6_2_ASN_0_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)O -6paa_1_ASP_2,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6paa_1_ASP_2_holo_aligned_esmfold_protein.pdb,N[C@@H](CC(O)O)C(O)O -6qkr_1_FAD_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6qkr_1_FAD_0_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(NC(O)N[C@H]3O)N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C4NCN[C@H]5N)[C@H](O)[C@@H]3O)C2CC1C -6rms_1_AMP_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6rms_1_AMP_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O -6ryz_1_SAM_2,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6ryz_1_SAM_2_holo_aligned_esmfold_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C2NCN[C@@H]3N)[C@H](O)[C@@H]1O -6rz2_1_5CD_2,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6rz2_1_5CD_2_holo_aligned_esmfold_protein.pdb,NC1NCNC2[C@H]1NCN2[C@@H]1O[C@H](CCl)[C@@H](O)[C@H]1O -6tvg_1_AP2_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6tvg_1_AP2_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)C[PH](O)(O)O)[C@@H](O)[C@H]1O -6uqy_2_AT3_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6uqy_2_AT3_0_holo_aligned_esmfold_protein.pdb,C[C@@H](O)SCC[N+](C)(C)C -6ur1_2_AT3_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6ur1_2_AT3_0_holo_aligned_esmfold_protein.pdb,C[C@@H](O)SCC[N+](C)(C)C -6v2a_1_ASN_3,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6v2a_1_ASN_3_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)O -6wyz_1_DGL_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6wyz_1_DGL_1_holo_aligned_esmfold_protein.pdb,N[C@H](CCC(O)O)C(O)O -6xb3_3_9BG_1,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6xb3_3_9BG_1_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O[PH]([O-])(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O[PH]([O-])(O)O)C2N1 -6xug_1_O1Q_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6xug_1_O1Q_0_holo_aligned_esmfold_protein.pdb,CC1CCC[C@H](N2NCC[C@H]2C2CC(Cl)C3NNN(C4CC[C@H]5CNNC5C4)C3C2)C1 -6yao_1_OJ2_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6yao_1_OJ2_0_holo_aligned_esmfold_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CCC[C@@H](OC(F)(F)F)C1 -6yap_1_OHZ_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6yap_1_OHZ_0_holo_aligned_esmfold_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CC(Cl)C[C@H](OC(F)(F)F)C1 -6yaq_1_OHZ_0,data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6yaq_1_OHZ_0_holo_aligned_esmfold_protein.pdb,OCCC1CCCC[C@H]1N[C@H](O)N[C@H]1C[C@@H](Cl)C[C@@H](OC(F)(F)F)C1 +3gvl_1_SLB_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3gvl_1_SLB_2_holo_aligned_predicted_protein.pdb,CC(O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(O)O)C[C@@H]1O +3inr_1_GDU_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3inr_1_GDU_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCC(O)NC3O)[C@H](O)[C@@H]2O)[C@H](O)[C@@H](O)[C@H]1O +3jqm_1_GTP_5,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3jqm_1_GTP_5_holo_aligned_predicted_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]3O)C2N1 +3ju4_1_SLB_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ju4_1_SLB_2_holo_aligned_predicted_protein.pdb,CC(O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(O)O)C[C@@H]1O +4cnl_1_CHT_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4cnl_1_CHT_1_holo_aligned_predicted_protein.pdb,C[N+](C)(C)CCO +1hg0_1_SIN_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1hg0_1_SIN_1_holo_aligned_predicted_protein.pdb,OC(O)CCC(O)O +1i8t_1_FAD_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1i8t_1_FAD_1_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2CC1C +1o28_1_UFP_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1o28_1_UFP_2_holo_aligned_predicted_protein.pdb,OC1NC(O)N([C@H]2C[C@H](O)[C@@H](CO[PH](O)(O)O)O2)CC1F +1o72_2_PC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1o72_2_PC_0_holo_aligned_predicted_protein.pdb,C[N+](C)(C)CCO[PH](O)(O)O +1pj2_1_FUM_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1pj2_1_FUM_0_holo_aligned_predicted_protein.pdb,OC(O)CCC(O)O +1pj4_1_FUM_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1pj4_1_FUM_1_holo_aligned_predicted_protein.pdb,OC(O)CCC(O)O +1qaw_1_TRP_7,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1qaw_1_TRP_7_holo_aligned_predicted_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O +1rqp_1_SAM_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1rqp_1_SAM_0_holo_aligned_predicted_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O +1sbz_1_FMN_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1sbz_1_FMN_3_holo_aligned_predicted_protein.pdb,C[C@@H]1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2C[C@@H]1C +1sij_1_PCD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1sij_1_PCD_0_holo_aligned_predicted_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@H]4NC5NC(N)NC(O)C5N[C@H]4[C@@H]4S[Mo](O)(O)SC43)[C@@H](O)[C@H]2O)C(O)N1 +1tke_1_SER_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1tke_1_SER_0_holo_aligned_predicted_protein.pdb,N[C@@H](CO)C(O)O +1tkg_1_SSA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1tkg_1_SSA_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](COS(O)(O)NC(O)[C@@H](N)CO)[C@@H](O)[C@H]1O +1u8u_1_OCA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1u8u_1_OCA_0_holo_aligned_predicted_protein.pdb,CCCCCCCC(O)O +1uf5_1_CDT_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1uf5_1_CDT_0_holo_aligned_predicted_protein.pdb,CSCC[C@@H](NC(N)O)C(O)O +1uf7_1_CDV_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1uf7_1_CDV_0_holo_aligned_predicted_protein.pdb,CC(C)[C@@H](NC(N)O)C(O)O +1uf8_1_ING_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1uf8_1_ING_2_holo_aligned_predicted_protein.pdb,NC(O)N[C@H](CC1CCCCC1)C(O)O +1v2g_1_OCA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1v2g_1_OCA_0_holo_aligned_predicted_protein.pdb,CCCCCCCC(O)O +1v97_1_MTE_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1v97_1_MTE_1_holo_aligned_predicted_protein.pdb,NC1NC(O)[C@@H]2N[C@H]3C(S)C(S)[C@@H](CO[PH](O)(O)O)O[C@H]3NC2N1 +1za2_1_CTP_4,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1za2_1_CTP_4_holo_aligned_predicted_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 +2cdc_1_XYS_5,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2cdc_1_XYS_5_holo_aligned_predicted_protein.pdb,O[C@@H]1[C@@H](O)[C@@H](O)OC[C@H]1O +2ext_1_TRP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2ext_1_TRP_0_holo_aligned_predicted_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O +2g7c_1_NAG-GAL-GLA_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2g7c_1_NAG-GAL-GLA_1_holo_aligned_predicted_protein.pdb,CC(O)N[C@@H]1[C@@H](O)[C@H](O[C@@H]2O[C@H](CO)[C@H](O)[C@H](O[C@H]3O[C@H](CO)[C@H](O)[C@H](O)[C@H]3O)[C@H]2O)[C@@H](CO)O[C@H]1O +2gag_1_FOA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2gag_1_FOA_0_holo_aligned_predicted_protein.pdb,OC(O)C1CCCO1 +2gag_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2gag_1_NAD_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCCC([C@H](N)O)C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O +2gah_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2gah_1_NAD_0_holo_aligned_predicted_protein.pdb,NC(O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1 +2gf3_2_FOA_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2gf3_2_FOA_1_holo_aligned_predicted_protein.pdb,OC(O)C1CCCO1 +2him_1_ASN_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2him_1_ASN_3_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)O +2hk9_1_SKM_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2hk9_1_SKM_0_holo_aligned_predicted_protein.pdb,OC(O)[C@H]1C[C@@H](O)[C@@H](O)[C@H](O)C1 +2hs3_1_FGR_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2hs3_1_FGR_0_holo_aligned_predicted_protein.pdb,OCNCC(O)N[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O +2o5m_1_MNR_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2o5m_1_MNR_0_holo_aligned_predicted_protein.pdb,CCC1C(C)C2C[C@@H]3[C@H](C)[C@H](CCC(O)O)C4CC5[C@@H](CCC(O)O)C(C)C6CC7[C@@H](CC)[C@H](C)C8CC1N2[Mn@SP2](N65)(N87)N43 +2q37_1_3AL_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2q37_1_3AL_0_holo_aligned_predicted_protein.pdb,NC(O)N[C@H]1NC(O)NC1O +2q6k_1_ADN_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2q6k_1_ADN_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O +2r4e_1_13P_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2r4e_1_13P_0_holo_aligned_predicted_protein.pdb,OC[C@@H](O)CO[PH](O)(O)O +2v5e_1_GU4-YYJ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2v5e_1_GU4-YYJ_0_holo_aligned_predicted_protein.pdb,OS(O)(O)OC[C@H]1O[C@@](COS(O)(O)O)(O[C@H]2O[C@H](COS(O)(O)O)[C@@H](OS(O)(O)O)[C@H](OS(O)(O)O)[C@H]2OS(O)(O)O)[C@@H](OS(O)(O)O)[C@@H]1OS(O)(O)O +2v7t_1_SAH_4,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2v7t_1_SAH_4_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CSCC[C@H](N)C(O)O)[C@@H](O)[C@H]1O +2v7u_1_SAM_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2v7u_1_SAM_2_holo_aligned_predicted_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O +2v7v_1_5FD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2v7v_1_5FD_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CF)[C@@H](O)[C@H]1O +2v7w_1_5FD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2v7w_1_5FD_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CF)[C@@H](O)[C@H]1O +2vdf_1_OCT_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2vdf_1_OCT_0_holo_aligned_predicted_protein.pdb,CCCCCCCC +2vfu_1_MTL_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2vfu_1_MTL_0_holo_aligned_predicted_protein.pdb,OC[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)CO +2wab_1_BGC-BGC-BGC-BGC-BGC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2wab_1_BGC-BGC-BGC-BGC-BGC_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O[C@H]2[C@H](O)[C@@H](O)[C@H](O[C@H]3[C@H](O)[C@@H](O)[C@H](O[C@H]4[C@H](O)[C@@H](O)[C@H](O[C@H]5[C@H](O)[C@@H](O)[C@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O +2wao_1_BGC-BGC-BGC-BGC-BGC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2wao_1_BGC-BGC-BGC-BGC-BGC_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O[C@H]2[C@H](O)[C@@H](O)[C@H](O[C@H]3[C@H](O)[C@@H](O)[C@H](O[C@H]4[C@H](O)[C@@H](O)[C@H](O[C@H]5[C@H](O)[C@@H](O)[C@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O +2wr8_1_SAH_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2wr8_1_SAH_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CSCC[C@H](N)C(O)O)[C@@H](O)[C@H]1O +2wwc_1_CHT_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2wwc_1_CHT_2_holo_aligned_predicted_protein.pdb,C[N+](C)(C)CCO +2x34_2_UQ8_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2x34_2_UQ8_0_holo_aligned_predicted_protein.pdb,COC1C(O)C(C)C(CCC(C)CCCC(C)CCCC(C)CCC[C@H](C)CCCC(C)CCC[C@H](C)CCCC(C)CCCC(C)C)C(O)C1OC +2xrh_1_NIO_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2xrh_1_NIO_1_holo_aligned_predicted_protein.pdb,OC(O)[C@H]1CCCNC1 +2xta_1_ACO_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2xta_1_ACO_0_holo_aligned_predicted_protein.pdb,CCO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O[PH](O)(O)O +2zcz_2_TRP_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2zcz_2_TRP_3_holo_aligned_predicted_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O +2zd0_1_TRP_9,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2zd0_1_TRP_9_holo_aligned_predicted_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O +2ze9_1_PD7_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2ze9_1_PD7_0_holo_aligned_predicted_protein.pdb,CCCCCCC(O)OC[C@H](CO[PH](O)(O)O)OC(O)CCCCCC +3ad7_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ad7_1_NAD_0_holo_aligned_predicted_protein.pdb,N[C@H](O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5[C@H](N)NCN[C@H]54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1 +3ad9_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ad9_1_NAD_0_holo_aligned_predicted_protein.pdb,NC1NCNC2[C@@H]1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCCC([C@H](N)O)C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O +3ada_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ada_1_NAD_0_holo_aligned_predicted_protein.pdb,N[C@H](O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CN[C@@H]5[C@H](N)NCN[C@H]54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1 +3eca_1_ASP_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3eca_1_ASP_3_holo_aligned_predicted_protein.pdb,N[C@@H](CC(O)O)C(O)O +3gf4_1_FAD_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3gf4_1_FAD_1_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C4NCN[C@H]5N)[C@H](O)[C@@H]3O)C2CC1C +3gf4_1_U5P_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3gf4_1_U5P_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 +3he3_5_UDP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3he3_5_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 +3it6_1_ORN_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3it6_1_ORN_1_holo_aligned_predicted_protein.pdb,NCCC[C@H](N)C(O)O +3k8l_1_GLC-GLC-GLC-GLC-GLC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3k8l_1_GLC-GLC-GLC-GLC-GLC_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O[C@H]2[C@H](O)[C@@H](O)[C@@H](O[C@H]3[C@H](O)[C@@H](O)[C@@H](O[C@H]4[C@H](O)[C@@H](O)[C@@H](O[C@H]5[C@H](O)[C@@H](O)[C@@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O +3k8m_1_GLC-GLC-AC1_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3k8m_1_GLC-GLC-AC1_0_holo_aligned_predicted_protein.pdb,C[C@H]1O[C@H](O[C@H]2[C@H](O)[C@@H](O)[C@@H](O[C@H]3[C@H](O)[C@@H](O)[C@@H](O)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1N[C@H]1CC(CO)[C@@H](O)[C@H](O)[C@H]1O +3nvv_1_MTE_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3nvv_1_MTE_1_holo_aligned_predicted_protein.pdb,NC1NC(O)C2N[C@@H]3[C@H](NC2N1)O[C@H](CO[PH](O)(O)O)C(S)[C@@H]3S +3o01_2_DXC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3o01_2_DXC_0_holo_aligned_predicted_protein.pdb,C[C@H](CCC(O)O)[C@H]1CC[C@H]2[C@@H]3CC[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3C[C@H](O)[C@]12C +3o02_2_JN3_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3o02_2_JN3_0_holo_aligned_predicted_protein.pdb,C[C@H](CCC(O)O)[C@H]1CC[C@H]2[C@@H]3[C@H](O)C[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3CC[C@]12C +3o7j_1_2AL_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3o7j_1_2AL_0_holo_aligned_predicted_protein.pdb,NC(O)NC1NC(O)NC1O +3q14_1_PCR_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3q14_1_PCR_3_holo_aligned_predicted_protein.pdb,CC1CCC(O)CC1 +3qrc_2_GU4-YYJ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3qrc_2_GU4-YYJ_0_holo_aligned_predicted_protein.pdb,OS(O)(O)OC[C@H]1O[C@@](COS(O)(O)O)(O[C@H]2O[C@H](COS(O)(O)O)[C@@H](OS(O)(O)O)[C@H](OS(O)(O)O)[C@H]2OS(O)(O)O)[C@@H](OS(O)(O)O)[C@@H]1OS(O)(O)O +3s5x_1_BMA-MAN-MAN-MAN-MAN_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3s5x_1_BMA-MAN-MAN-MAN-MAN_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O[C@@H]2[C@H](O)[C@@H](OC[C@H]3O[C@@H](O)[C@@H](O)[C@@H](O[C@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]4O)[C@@H]3O)O[C@H](CO)[C@H]2O)[C@@H](O)[C@@H](O)[C@@H]1O +3s6a_1_ANP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3s6a_1_ANP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](N)(O)O)[C@@H](O)[C@H]1O +3se5_1_ANP_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3se5_1_ANP_2_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)N[PH](O)(O)O)[C@@H](O)[C@H]1O +3sr6_1_MTE_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3sr6_1_MTE_1_holo_aligned_predicted_protein.pdb,NC1NC(O)[C@@H]2N[C@H]3C(S)C(S)[C@@H](CO[PH](O)(O)O)O[C@H]3NC2N1 +3ub7_1_ACM_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ub7_1_ACM_1_holo_aligned_predicted_protein.pdb,CC(N)O +3ub9_1_NHY_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ub9_1_NHY_1_holo_aligned_predicted_protein.pdb,N[C@@H](O)NO +3uni_1_SAL_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3uni_1_SAL_0_holo_aligned_predicted_protein.pdb,OC(O)C1CCCCC1O +3wrb_1_GDE_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3wrb_1_GDE_0_holo_aligned_predicted_protein.pdb,OC(O)C1CC(O)C(O)C(O)C1 +3wvc_1_FEG_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3wvc_1_FEG_0_holo_aligned_predicted_protein.pdb,CC1C(CC(O)O)NC(O)[C@@H](C)[C@@H]1O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(O)NC(N)NC32)[C@H](O)[C@@H]1O +3zec_1_ANP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3zec_1_ANP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)N[PH](O)(O)O)[C@@H](O)[C@H]1O +3zjx_1_BOG_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3zjx_1_BOG_0_holo_aligned_predicted_protein.pdb,CCCCCCCCO[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O +3zqu_1_FNR_5,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3zqu_1_FNR_5_holo_aligned_predicted_protein.pdb,C[C@H]1CC2NC3[C@H](O)N[C@H](O)N[C@@H]3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@@H]2C[C@@H]1C +3zzs_1_TRP_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3zzs_1_TRP_3_holo_aligned_predicted_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O +4b4v_1_L34_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4b4v_1_L34_0_holo_aligned_predicted_protein.pdb,NC1NC(O)C2C(NC[C@@H]3CN(C4CCC([C@H](O)N[C@@H](CCC(O)O)C(O)O)CC4)C(O)N23)N1 +4bc9_1_CNV-FAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4bc9_1_CNV-FAD_0_holo_aligned_predicted_protein.pdb,CC1CC2[C@H](CC1C)N(C(C)CN)C1C(O)NC(O)NC1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCN[C@H]32)[C@H](O)[C@@H]1O +4cdn_2_FAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4cdn_2_FAD_0_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2C[C@H]1C +4fyv_1_DCP_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4fyv_1_DCP_2_holo_aligned_predicted_protein.pdb,NC1CCN([C@H]2C[C@H](O)[C@@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)O2)C(O)N1 +4fyw_1_CTP_4,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4fyw_1_CTP_4_holo_aligned_predicted_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 +4gk9_1_BMA-MAN-MAN-MAN-MAN_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4gk9_1_BMA-MAN-MAN-MAN-MAN_3_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](OC[C@H]2O[C@H](OC[C@H]3O[C@@H](O)[C@@H](O)[C@@H](O[C@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]4O)[C@@H]3O)[C@@H](O)[C@@H](O[C@H]3O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]3O)[C@@H]2O)[C@@H](O)[C@@H](O)[C@@H]1O +4h2f_1_ADN_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4h2f_1_ADN_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O +4idk_1_1FE_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4idk_1_1FE_0_holo_aligned_predicted_protein.pdb,NCC(O)NC1CCC2NC(O)NC2C1 +4kgx_1_CTP_5,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4kgx_1_CTP_5_holo_aligned_predicted_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 +4mig_1_G3F_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4mig_1_G3F_2_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](F)[C@@H]1O +4mo2_2_FDA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4mo2_2_FDA_0_holo_aligned_predicted_protein.pdb,CC1CC2[C@@H](CC1C)N[C@H]1C(O)NC(O)N[C@H]1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O +4mos_1_GAF_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4mos_1_GAF_1_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O)[C@H](F)[C@@H](O)[C@H]1O +4n4l_1_HG1_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4n4l_1_HG1_0_holo_aligned_predicted_protein.pdb,NC(O)CCCC1CCCCC1 +4o0d_1_GLY_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4o0d_1_GLY_3_holo_aligned_predicted_protein.pdb,NCC(O)O +4o0f_1_GLY_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4o0f_1_GLY_3_holo_aligned_predicted_protein.pdb,NCC(O)O +4o95_1_245_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4o95_1_245_0_holo_aligned_predicted_protein.pdb,OC(NC1CCCCC1)N[C@H]1CCNC(Cl)C1 +4oal_2_245_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4oal_2_245_0_holo_aligned_predicted_protein.pdb,OC(NC1CCCCC1)N[C@H]1CCNC(Cl)C1 +4osx_1_GLY_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4osx_1_GLY_2_holo_aligned_predicted_protein.pdb,NCC(O)O +4osy_1_GLY_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4osy_1_GLY_3_holo_aligned_predicted_protein.pdb,NCC(O)O +4pfx_1_UDP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4pfx_1_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 +4phr_1_UDP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4phr_1_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 +4phs_1_UDP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4phs_1_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)([OH2+])O[PH](O)(O)[OH2+])[C@@H](O)[C@H]2O)C(O)N1 +4pya_1_2X3_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4pya_1_2X3_0_holo_aligned_predicted_protein.pdb,NC1NC(O)C2N[C@H]3N([C@@H]2N1)[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@]3(O)[C@H]1O +4qa8_1_PJZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4qa8_1_PJZ_0_holo_aligned_predicted_protein.pdb,CCCCCCCCCCCCCC[C@@H](O)OC[C@@H](C)O[C@@H](O)CCCCCCCCCCC +4qo5_1_NAG_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4qo5_1_NAG_2_holo_aligned_predicted_protein.pdb,CC(O)N[C@H]1CO[C@H](CO)[C@@H](O)[C@@H]1O +4rhe_1_FMN_6,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4rhe_1_FMN_6_holo_aligned_predicted_protein.pdb,CC1C[C@@H]2[C@@H](C[C@@H]1C)NC1[C@H](O)NC(O)N[C@@H]1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH]([O-])(O)O +4rpj_1_UDP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4rpj_1_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 +4rpm_1_HXC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4rpm_1_HXC_0_holo_aligned_predicted_protein.pdb,CCCCCC(O)SC +4tvd_1_BGC_4,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4tvd_1_BGC_4_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1O +4tvd_1_GLC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4tvd_1_GLC_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O +4u63_1_FAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4u63_1_FAD_0_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2CC1C +4uoc_1_NCN_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4uoc_1_NCN_1_holo_aligned_predicted_protein.pdb,OC(O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]2O)C1 +4uuw_1_AMP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4uuw_1_AMP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O +4xdr_1_ADN_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4xdr_1_ADN_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O +4xfm_1_THE_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4xfm_1_THE_0_holo_aligned_predicted_protein.pdb,OC[C@@H](O)[C@H](O)C(O)O +4ydx_1_TCE_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4ydx_1_TCE_0_holo_aligned_predicted_protein.pdb,OC(O)CCP(CCC(O)O)CCC(O)O +4zav_1_4LS_8,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4zav_1_4LS_8_holo_aligned_predicted_protein.pdb,CC(C)CCN1C2CC(C)[C@@H](C)CC2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@H]2NC(O)N[C@@H](O)C21 +4zaw_1_4LU_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4zaw_1_4LU_1_holo_aligned_predicted_protein.pdb,C[C@H]1[C@@H](C)C[C@@H]2C3[C@@H]1C(C)(C)CCN3[C@H]1C(O)NC(O)NC1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O +4zay_1_4LS_6,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4zay_1_4LS_6_holo_aligned_predicted_protein.pdb,CC(C)CCN1[C@H]2[C@H](O)N[C@H](O)N[C@@H]2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@@H]2CC(C)C(C)C[C@H]21 +4zaz_1_4LS_6,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4zaz_1_4LS_6_holo_aligned_predicted_protein.pdb,CC(C)CCN1C2CC(C)[C@@H](C)CC2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2N[C@@H](O)N[C@@H](O)C21 +4zqx_1_ATP_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4zqx_1_ATP_2_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O +5a98_1_ATP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5a98_1_ATP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O +5ae3_2_AWB_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5ae3_2_AWB_1_holo_aligned_predicted_protein.pdb,CCCCCC[C@H]1C(O)O[C@H](C)[C@H](N[C@H](O)[C@@H]2CCC[C@H](NCO)[C@H]2O)C(O)O[C@@H](C)[C@@H]1OC(O)CC(C)C +5b5s_1_BOG_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5b5s_1_BOG_0_holo_aligned_predicted_protein.pdb,CCCCCCCCO[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O +5d9g_1_GLU-ASN-LEU-TYR-PHE-GLN_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5d9g_1_GLU-ASN-LEU-TYR-PHE-GLN_0_holo_aligned_predicted_protein.pdb,CC(C)C[C@H](N)C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)N[C@@H](Cc1ccccc1)C(=O)N[C@@H](CCC(N)=O)C(=O)O +5dnc_1_ASN_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5dnc_1_ASN_2_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)O +5eno_1_5QG_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5eno_1_5QG_0_holo_aligned_predicted_protein.pdb,CC1(C)CC2C(CO1)C(N1CCOCC1)NC(SCCC1CCCCC1)[C@@H]2CN +5enp_1_5QF_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5enp_1_5QF_0_holo_aligned_predicted_protein.pdb,COCCN1CCN([C@@H]2NC(SCC[C@H]3CCC(OC)[C@@H](OC)C3)[C@H](CN)[C@H]3CC(C)(C)OCC23)CC1 +5enq_1_5QE_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5enq_1_5QE_0_holo_aligned_predicted_protein.pdb,CC(O)NC1CCC(CCSC2NC(N3C[C@@H](C)O[C@@H](C)C3)[C@@H]3COC(C)(C)CC3C2CN)CC1 +5enr_1_MBX_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5enr_1_MBX_0_holo_aligned_predicted_protein.pdb,CCC(O)NC1CCC(CCS[C@@H]2NC(N3C[C@H](C)O[C@@H](C)C3)C3COC(C)(C)CC3C2CN)CC1 +5ent_1_MIY_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5ent_1_MIY_0_holo_aligned_predicted_protein.pdb,CN(C)C1CCC(O)C2C1C[C@H]1C[C@H]3[C@H](N(C)C)C(O)C(C(N)O)[C@@H](O)[C@@]3(O)C(O)[C@@H]1C2O +5ers_1_AMP_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5ers_1_AMP_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O +5f2t_1_PLM_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5f2t_1_PLM_0_holo_aligned_predicted_protein.pdb,CCCCCCCCCCCCCCCC(O)O +5f52_1_ASP_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5f52_1_ASP_2_holo_aligned_predicted_protein.pdb,N[C@@H](CC(O)O)C(O)O +5fiu_1_Y3J_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5fiu_1_Y3J_3_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](C(F)F)[C@@H](O)[C@@H]1O +5fxd_1_H7Y_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5fxd_1_H7Y_1_holo_aligned_predicted_protein.pdb,CCC[C@H]1CCC(O)C(OC)C1 +5fxe_1_CIY_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5fxe_1_CIY_1_holo_aligned_predicted_protein.pdb,CO[C@H]1CC(CCCO)CCC1O +5fxf_1_BEZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5fxf_1_BEZ_0_holo_aligned_predicted_protein.pdb,OC(O)C1CCCCC1 +5gqi_1_ATP_7,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5gqi_1_ATP_7_holo_aligned_predicted_protein.pdb,N[C@@H]1NCN[C@H]2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O +5gql_1_ATP_4,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5gql_1_ATP_4_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O +5hhz_1_ZME_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5hhz_1_ZME_0_holo_aligned_predicted_protein.pdb,CC1CCN(C2NCNC3NCNC32)C1 +5hmr_1_FDZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5hmr_1_FDZ_0_holo_aligned_predicted_protein.pdb,OC(NC1CNNS1)N[C@H]1CCC[C@H](OC(F)(F)F)C1 +5hqx_1_EDZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5hqx_1_EDZ_0_holo_aligned_predicted_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CNNS1 +5hw0_1_GLU_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5hw0_1_GLU_2_holo_aligned_predicted_protein.pdb,N[C@@H](CCC(O)O)C(O)O +5ida_1_BMA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5ida_1_BMA_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@@H](O)[C@@H](O)[C@@H]1O +5k3o_2_ASP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k3o_2_ASP_0_holo_aligned_predicted_protein.pdb,N[C@@H](CC(O)O)C(O)O +5k45_2_GLU_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k45_2_GLU_1_holo_aligned_predicted_protein.pdb,N[C@@H](CCC(O)O)C(O)O +5k4h_2_GLU_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k4h_2_GLU_3_holo_aligned_predicted_protein.pdb,N[C@@H](CCC(O)O)C(O)O +5k62_1_ASN-VAL_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k62_1_ASN-VAL_0_holo_aligned_predicted_protein.pdb,CC(C)[C@@H](CO)NC(O)[C@@H](N)CC(N)O +5k63_1_ASN-GLY_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k63_1_ASN-GLY_0_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)NCCO +5k66_1_ASN-GLU_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k66_1_ASN-GLU_0_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)N[C@H](CO)CCC(O)O +5mh1_1_BMA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5mh1_1_BMA_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@@H](O)[C@@H](O)[C@@H]1O +5u82_2_ZN0_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5u82_2_ZN0_0_holo_aligned_predicted_protein.pdb,CC[SnH](CC)CC +6a71_1_9UX_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6a71_1_9UX_0_holo_aligned_predicted_protein.pdb,O[Mo@]12S[Mo@@]1(O)S2 +6a72_1_9UX_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6a72_1_9UX_0_holo_aligned_predicted_protein.pdb,O[Mo@]12S[Mo@@]1(O)S2 +6b1b_1_TMO_15,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6b1b_1_TMO_15_holo_aligned_predicted_protein.pdb,C[N+](C)(C)O +6ea9_1_9BG_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6ea9_1_9BG_0_holo_aligned_predicted_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O[PH]([O-])(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O[PH]([O-])([O-])O)C2N1 +6ep5_1_ADP_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6ep5_1_ADP_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O +6etf_1_AMP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6etf_1_AMP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2[C@H]1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O +6fgc_1_ADP_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6fgc_1_ADP_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O +6fgc_1_D95_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6fgc_1_D95_1_holo_aligned_predicted_protein.pdb,C[C@H]1[C@H](OC(O)CCC(O)O)O[C@@H]2O[C@@]3(C)CC[C@H]4[C@H](C)CC[C@@H]1[C@@]24OO3 +6gbf_1_AMP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6gbf_1_AMP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O +6jls_1_FMN_6,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6jls_1_FMN_6_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2CC1C +6n19_2_K8V_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6n19_2_K8V_0_holo_aligned_predicted_protein.pdb,CCC(O)NC1CCC(C([O-])O)C(C([O-])O)C1 +6nco_1_KQP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6nco_1_KQP_0_holo_aligned_predicted_protein.pdb,CC(C)(O)C1CCC([C@H]2CC(Cl)C[C@@H](C34(C(N)N)CC3C4)C2)CC1 +6npp_1_KWG_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6npp_1_KWG_0_holo_aligned_predicted_protein.pdb,[O-]C(O)C1CCCC(CCC2CCCCC2)C1N1CCCC1 +6o6y_1_ACK_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6o6y_1_ACK_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@H]2O[PH]([O-])(O)O[C@H]21 +6o70_1_ACK_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6o70_1_ACK_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@H]2O[PH]([O-])(O)O[C@H]21 +6pa2_1_ASP_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6pa2_1_ASP_2_holo_aligned_predicted_protein.pdb,N[C@@H](CC(O)O)C(O)O +6pa6_2_ASN_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6pa6_2_ASN_0_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)O +6paa_1_ASP_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6paa_1_ASP_2_holo_aligned_predicted_protein.pdb,N[C@@H](CC(O)O)C(O)O +6qkr_1_FAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6qkr_1_FAD_0_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(NC(O)N[C@H]3O)N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C4NCN[C@H]5N)[C@H](O)[C@@H]3O)C2CC1C +6rms_1_AMP_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6rms_1_AMP_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O +6ryz_1_SAM_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6ryz_1_SAM_2_holo_aligned_predicted_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C2NCN[C@@H]3N)[C@H](O)[C@@H]1O +6rz2_1_5CD_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6rz2_1_5CD_2_holo_aligned_predicted_protein.pdb,NC1NCNC2[C@H]1NCN2[C@@H]1O[C@H](CCl)[C@@H](O)[C@H]1O +6tvg_1_AP2_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6tvg_1_AP2_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)C[PH](O)(O)O)[C@@H](O)[C@H]1O +6uqy_2_AT3_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6uqy_2_AT3_0_holo_aligned_predicted_protein.pdb,C[C@@H](O)SCC[N+](C)(C)C +6ur1_2_AT3_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6ur1_2_AT3_0_holo_aligned_predicted_protein.pdb,C[C@@H](O)SCC[N+](C)(C)C +6v2a_1_ASN_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6v2a_1_ASN_3_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)O +6wyz_1_DGL_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6wyz_1_DGL_1_holo_aligned_predicted_protein.pdb,N[C@H](CCC(O)O)C(O)O +6xb3_3_9BG_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6xb3_3_9BG_1_holo_aligned_predicted_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O[PH]([O-])(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O[PH]([O-])(O)O)C2N1 +6xug_1_O1Q_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6xug_1_O1Q_0_holo_aligned_predicted_protein.pdb,CC1CCC[C@H](N2NCC[C@H]2C2CC(Cl)C3NNN(C4CC[C@H]5CNNC5C4)C3C2)C1 +6yao_1_OJ2_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6yao_1_OJ2_0_holo_aligned_predicted_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CCC[C@@H](OC(F)(F)F)C1 +6yap_1_OHZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6yap_1_OHZ_0_holo_aligned_predicted_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CC(Cl)C[C@H](OC(F)(F)F)C1 +6yaq_1_OHZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6yaq_1_OHZ_0_holo_aligned_predicted_protein.pdb,OCCC1CCCC[C@H]1N[C@H](O)N[C@H]1C[C@@H](Cl)C[C@@H](OC(F)(F)F)C1 diff --git a/data/test_cases/dockgen/ensemble_pocket_only_inputs.csv b/data/test_cases/dockgen/ensemble_pocket_only_inputs.csv new file mode 100644 index 00000000..bb1b3251 --- /dev/null +++ b/data/test_cases/dockgen/ensemble_pocket_only_inputs.csv @@ -0,0 +1,190 @@ +name,protein_input,ligand_smiles +3gvl_1_SLB_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3gvl_1_SLB_2_holo_aligned_predicted_protein.pdb,CC(O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(O)O)C[C@@H]1O +3inr_1_GDU_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3inr_1_GDU_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCC(O)NC3O)[C@H](O)[C@@H]2O)[C@H](O)[C@@H](O)[C@H]1O +3jqm_1_GTP_5,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3jqm_1_GTP_5_holo_aligned_predicted_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]3O)C2N1 +3ju4_1_SLB_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3ju4_1_SLB_2_holo_aligned_predicted_protein.pdb,CC(O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(O)O)C[C@@H]1O +4cnl_1_CHT_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4cnl_1_CHT_1_holo_aligned_predicted_protein.pdb,C[N+](C)(C)CCO +1hg0_1_SIN_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/1hg0_1_SIN_1_holo_aligned_predicted_protein.pdb,OC(O)CCC(O)O +1i8t_1_FAD_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/1i8t_1_FAD_1_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2CC1C +1o28_1_UFP_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/1o28_1_UFP_2_holo_aligned_predicted_protein.pdb,OC1NC(O)N([C@H]2C[C@H](O)[C@@H](CO[PH](O)(O)O)O2)CC1F +1o72_2_PC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/1o72_2_PC_0_holo_aligned_predicted_protein.pdb,C[N+](C)(C)CCO[PH](O)(O)O +1pj2_1_FUM_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/1pj2_1_FUM_0_holo_aligned_predicted_protein.pdb,OC(O)CCC(O)O +1pj4_1_FUM_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/1pj4_1_FUM_1_holo_aligned_predicted_protein.pdb,OC(O)CCC(O)O +1qaw_1_TRP_7,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/1qaw_1_TRP_7_holo_aligned_predicted_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O +1rqp_1_SAM_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/1rqp_1_SAM_0_holo_aligned_predicted_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O +1sbz_1_FMN_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/1sbz_1_FMN_3_holo_aligned_predicted_protein.pdb,C[C@@H]1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2C[C@@H]1C +1sij_1_PCD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/1sij_1_PCD_0_holo_aligned_predicted_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@H]4NC5NC(N)NC(O)C5N[C@H]4[C@@H]4S[Mo](O)(O)SC43)[C@@H](O)[C@H]2O)C(O)N1 +1tke_1_SER_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/1tke_1_SER_0_holo_aligned_predicted_protein.pdb,N[C@@H](CO)C(O)O +1tkg_1_SSA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/1tkg_1_SSA_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](COS(O)(O)NC(O)[C@@H](N)CO)[C@@H](O)[C@H]1O +1u8u_1_OCA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/1u8u_1_OCA_0_holo_aligned_predicted_protein.pdb,CCCCCCCC(O)O +1uf5_1_CDT_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/1uf5_1_CDT_0_holo_aligned_predicted_protein.pdb,CSCC[C@@H](NC(N)O)C(O)O +1uf7_1_CDV_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/1uf7_1_CDV_0_holo_aligned_predicted_protein.pdb,CC(C)[C@@H](NC(N)O)C(O)O +1uf8_1_ING_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/1uf8_1_ING_2_holo_aligned_predicted_protein.pdb,NC(O)N[C@H](CC1CCCCC1)C(O)O +1v2g_1_OCA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/1v2g_1_OCA_0_holo_aligned_predicted_protein.pdb,CCCCCCCC(O)O +1v97_1_MTE_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/1v97_1_MTE_1_holo_aligned_predicted_protein.pdb,NC1NC(O)[C@@H]2N[C@H]3C(S)C(S)[C@@H](CO[PH](O)(O)O)O[C@H]3NC2N1 +1za2_1_CTP_4,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/1za2_1_CTP_4_holo_aligned_predicted_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 +2cdc_1_XYS_5,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2cdc_1_XYS_5_holo_aligned_predicted_protein.pdb,O[C@@H]1[C@@H](O)[C@@H](O)OC[C@H]1O +2ext_1_TRP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2ext_1_TRP_0_holo_aligned_predicted_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O +2g7c_1_NAG-GAL-GLA_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2g7c_1_NAG-GAL-GLA_1_holo_aligned_predicted_protein.pdb,CC(O)N[C@@H]1[C@@H](O)[C@H](O[C@@H]2O[C@H](CO)[C@H](O)[C@H](O[C@H]3O[C@H](CO)[C@H](O)[C@H](O)[C@H]3O)[C@H]2O)[C@@H](CO)O[C@H]1O +2gag_1_FOA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2gag_1_FOA_0_holo_aligned_predicted_protein.pdb,OC(O)C1CCCO1 +2gag_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2gag_1_NAD_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCCC([C@H](N)O)C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O +2gah_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2gah_1_NAD_0_holo_aligned_predicted_protein.pdb,NC(O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1 +2gf3_2_FOA_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2gf3_2_FOA_1_holo_aligned_predicted_protein.pdb,OC(O)C1CCCO1 +2him_1_ASN_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2him_1_ASN_3_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)O +2hk9_1_SKM_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2hk9_1_SKM_0_holo_aligned_predicted_protein.pdb,OC(O)[C@H]1C[C@@H](O)[C@@H](O)[C@H](O)C1 +2hs3_1_FGR_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2hs3_1_FGR_0_holo_aligned_predicted_protein.pdb,OCNCC(O)N[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O +2o5m_1_MNR_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2o5m_1_MNR_0_holo_aligned_predicted_protein.pdb,CCC1C(C)C2C[C@@H]3[C@H](C)[C@H](CCC(O)O)C4CC5[C@@H](CCC(O)O)C(C)C6CC7[C@@H](CC)[C@H](C)C8CC1N2[Mn@SP2](N65)(N87)N43 +2q37_1_3AL_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2q37_1_3AL_0_holo_aligned_predicted_protein.pdb,NC(O)N[C@H]1NC(O)NC1O +2q6k_1_ADN_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2q6k_1_ADN_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O +2r4e_1_13P_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2r4e_1_13P_0_holo_aligned_predicted_protein.pdb,OC[C@@H](O)CO[PH](O)(O)O +2v5e_1_GU4-YYJ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2v5e_1_GU4-YYJ_0_holo_aligned_predicted_protein.pdb,OS(O)(O)OC[C@H]1O[C@@](COS(O)(O)O)(O[C@H]2O[C@H](COS(O)(O)O)[C@@H](OS(O)(O)O)[C@H](OS(O)(O)O)[C@H]2OS(O)(O)O)[C@@H](OS(O)(O)O)[C@@H]1OS(O)(O)O +2v7t_1_SAH_4,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2v7t_1_SAH_4_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CSCC[C@H](N)C(O)O)[C@@H](O)[C@H]1O +2v7u_1_SAM_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2v7u_1_SAM_2_holo_aligned_predicted_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O +2v7v_1_5FD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2v7v_1_5FD_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CF)[C@@H](O)[C@H]1O +2v7w_1_5FD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2v7w_1_5FD_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CF)[C@@H](O)[C@H]1O +2vdf_1_OCT_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2vdf_1_OCT_0_holo_aligned_predicted_protein.pdb,CCCCCCCC +2vfu_1_MTL_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2vfu_1_MTL_0_holo_aligned_predicted_protein.pdb,OC[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)CO +2wab_1_BGC-BGC-BGC-BGC-BGC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2wab_1_BGC-BGC-BGC-BGC-BGC_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O[C@H]2[C@H](O)[C@@H](O)[C@H](O[C@H]3[C@H](O)[C@@H](O)[C@H](O[C@H]4[C@H](O)[C@@H](O)[C@H](O[C@H]5[C@H](O)[C@@H](O)[C@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O +2wao_1_BGC-BGC-BGC-BGC-BGC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2wao_1_BGC-BGC-BGC-BGC-BGC_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O[C@H]2[C@H](O)[C@@H](O)[C@H](O[C@H]3[C@H](O)[C@@H](O)[C@H](O[C@H]4[C@H](O)[C@@H](O)[C@H](O[C@H]5[C@H](O)[C@@H](O)[C@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O +2wr8_1_SAH_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2wr8_1_SAH_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CSCC[C@H](N)C(O)O)[C@@H](O)[C@H]1O +2wwc_1_CHT_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2wwc_1_CHT_2_holo_aligned_predicted_protein.pdb,C[N+](C)(C)CCO +2x34_2_UQ8_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2x34_2_UQ8_0_holo_aligned_predicted_protein.pdb,COC1C(O)C(C)C(CCC(C)CCCC(C)CCCC(C)CCC[C@H](C)CCCC(C)CCC[C@H](C)CCCC(C)CCCC(C)C)C(O)C1OC +2xrh_1_NIO_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2xrh_1_NIO_1_holo_aligned_predicted_protein.pdb,OC(O)[C@H]1CCCNC1 +2xta_1_ACO_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2xta_1_ACO_0_holo_aligned_predicted_protein.pdb,CCO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O[PH](O)(O)O +2zcz_2_TRP_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2zcz_2_TRP_3_holo_aligned_predicted_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O +2zd0_1_TRP_9,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2zd0_1_TRP_9_holo_aligned_predicted_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O +2ze9_1_PD7_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/2ze9_1_PD7_0_holo_aligned_predicted_protein.pdb,CCCCCCC(O)OC[C@H](CO[PH](O)(O)O)OC(O)CCCCCC +3ad7_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3ad7_1_NAD_0_holo_aligned_predicted_protein.pdb,N[C@H](O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5[C@H](N)NCN[C@H]54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1 +3ad9_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3ad9_1_NAD_0_holo_aligned_predicted_protein.pdb,NC1NCNC2[C@@H]1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCCC([C@H](N)O)C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O +3ada_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3ada_1_NAD_0_holo_aligned_predicted_protein.pdb,N[C@H](O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CN[C@@H]5[C@H](N)NCN[C@H]54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1 +3eca_1_ASP_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3eca_1_ASP_3_holo_aligned_predicted_protein.pdb,N[C@@H](CC(O)O)C(O)O +3gf4_1_FAD_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3gf4_1_FAD_1_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C4NCN[C@H]5N)[C@H](O)[C@@H]3O)C2CC1C +3gf4_1_U5P_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3gf4_1_U5P_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 +3he3_5_UDP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3he3_5_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 +3it6_1_ORN_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3it6_1_ORN_1_holo_aligned_predicted_protein.pdb,NCCC[C@H](N)C(O)O +3k8l_1_GLC-GLC-GLC-GLC-GLC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3k8l_1_GLC-GLC-GLC-GLC-GLC_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O[C@H]2[C@H](O)[C@@H](O)[C@@H](O[C@H]3[C@H](O)[C@@H](O)[C@@H](O[C@H]4[C@H](O)[C@@H](O)[C@@H](O[C@H]5[C@H](O)[C@@H](O)[C@@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O +3k8m_1_GLC-GLC-AC1_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3k8m_1_GLC-GLC-AC1_0_holo_aligned_predicted_protein.pdb,C[C@H]1O[C@H](O[C@H]2[C@H](O)[C@@H](O)[C@@H](O[C@H]3[C@H](O)[C@@H](O)[C@@H](O)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1N[C@H]1CC(CO)[C@@H](O)[C@H](O)[C@H]1O +3nvv_1_MTE_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3nvv_1_MTE_1_holo_aligned_predicted_protein.pdb,NC1NC(O)C2N[C@@H]3[C@H](NC2N1)O[C@H](CO[PH](O)(O)O)C(S)[C@@H]3S +3o01_2_DXC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3o01_2_DXC_0_holo_aligned_predicted_protein.pdb,C[C@H](CCC(O)O)[C@H]1CC[C@H]2[C@@H]3CC[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3C[C@H](O)[C@]12C +3o02_2_JN3_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3o02_2_JN3_0_holo_aligned_predicted_protein.pdb,C[C@H](CCC(O)O)[C@H]1CC[C@H]2[C@@H]3[C@H](O)C[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3CC[C@]12C +3o7j_1_2AL_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3o7j_1_2AL_0_holo_aligned_predicted_protein.pdb,NC(O)NC1NC(O)NC1O +3q14_1_PCR_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3q14_1_PCR_3_holo_aligned_predicted_protein.pdb,CC1CCC(O)CC1 +3qrc_2_GU4-YYJ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3qrc_2_GU4-YYJ_0_holo_aligned_predicted_protein.pdb,OS(O)(O)OC[C@H]1O[C@@](COS(O)(O)O)(O[C@H]2O[C@H](COS(O)(O)O)[C@@H](OS(O)(O)O)[C@H](OS(O)(O)O)[C@H]2OS(O)(O)O)[C@@H](OS(O)(O)O)[C@@H]1OS(O)(O)O +3s5x_1_BMA-MAN-MAN-MAN-MAN_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3s5x_1_BMA-MAN-MAN-MAN-MAN_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O[C@@H]2[C@H](O)[C@@H](OC[C@H]3O[C@@H](O)[C@@H](O)[C@@H](O[C@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]4O)[C@@H]3O)O[C@H](CO)[C@H]2O)[C@@H](O)[C@@H](O)[C@@H]1O +3s6a_1_ANP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3s6a_1_ANP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](N)(O)O)[C@@H](O)[C@H]1O +3se5_1_ANP_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3se5_1_ANP_2_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)N[PH](O)(O)O)[C@@H](O)[C@H]1O +3sr6_1_MTE_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3sr6_1_MTE_1_holo_aligned_predicted_protein.pdb,NC1NC(O)[C@@H]2N[C@H]3C(S)C(S)[C@@H](CO[PH](O)(O)O)O[C@H]3NC2N1 +3ub7_1_ACM_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3ub7_1_ACM_1_holo_aligned_predicted_protein.pdb,CC(N)O +3ub9_1_NHY_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3ub9_1_NHY_1_holo_aligned_predicted_protein.pdb,N[C@@H](O)NO +3uni_1_SAL_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3uni_1_SAL_0_holo_aligned_predicted_protein.pdb,OC(O)C1CCCCC1O +3wrb_1_GDE_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3wrb_1_GDE_0_holo_aligned_predicted_protein.pdb,OC(O)C1CC(O)C(O)C(O)C1 +3wvc_1_FEG_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3wvc_1_FEG_0_holo_aligned_predicted_protein.pdb,CC1C(CC(O)O)NC(O)[C@@H](C)[C@@H]1O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(O)NC(N)NC32)[C@H](O)[C@@H]1O +3zec_1_ANP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3zec_1_ANP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)N[PH](O)(O)O)[C@@H](O)[C@H]1O +3zjx_1_BOG_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3zjx_1_BOG_0_holo_aligned_predicted_protein.pdb,CCCCCCCCO[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O +3zqu_1_FNR_5,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3zqu_1_FNR_5_holo_aligned_predicted_protein.pdb,C[C@H]1CC2NC3[C@H](O)N[C@H](O)N[C@@H]3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@@H]2C[C@@H]1C +3zzs_1_TRP_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/3zzs_1_TRP_3_holo_aligned_predicted_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O +4b4v_1_L34_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4b4v_1_L34_0_holo_aligned_predicted_protein.pdb,NC1NC(O)C2C(NC[C@@H]3CN(C4CCC([C@H](O)N[C@@H](CCC(O)O)C(O)O)CC4)C(O)N23)N1 +4bc9_1_CNV-FAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4bc9_1_CNV-FAD_0_holo_aligned_predicted_protein.pdb,CC1CC2[C@H](CC1C)N(C(C)CN)C1C(O)NC(O)NC1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCN[C@H]32)[C@H](O)[C@@H]1O +4cdn_2_FAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4cdn_2_FAD_0_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2C[C@H]1C +4fyv_1_DCP_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4fyv_1_DCP_2_holo_aligned_predicted_protein.pdb,NC1CCN([C@H]2C[C@H](O)[C@@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)O2)C(O)N1 +4fyw_1_CTP_4,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4fyw_1_CTP_4_holo_aligned_predicted_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 +4gk9_1_BMA-MAN-MAN-MAN-MAN_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4gk9_1_BMA-MAN-MAN-MAN-MAN_3_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](OC[C@H]2O[C@H](OC[C@H]3O[C@@H](O)[C@@H](O)[C@@H](O[C@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]4O)[C@@H]3O)[C@@H](O)[C@@H](O[C@H]3O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]3O)[C@@H]2O)[C@@H](O)[C@@H](O)[C@@H]1O +4h2f_1_ADN_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4h2f_1_ADN_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O +4idk_1_1FE_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4idk_1_1FE_0_holo_aligned_predicted_protein.pdb,NCC(O)NC1CCC2NC(O)NC2C1 +4kgx_1_CTP_5,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4kgx_1_CTP_5_holo_aligned_predicted_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 +4mig_1_G3F_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4mig_1_G3F_2_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](F)[C@@H]1O +4mo2_2_FDA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4mo2_2_FDA_0_holo_aligned_predicted_protein.pdb,CC1CC2[C@@H](CC1C)N[C@H]1C(O)NC(O)N[C@H]1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O +4mos_1_GAF_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4mos_1_GAF_1_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O)[C@H](F)[C@@H](O)[C@H]1O +4n4l_1_HG1_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4n4l_1_HG1_0_holo_aligned_predicted_protein.pdb,NC(O)CCCC1CCCCC1 +4o0d_1_GLY_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4o0d_1_GLY_3_holo_aligned_predicted_protein.pdb,NCC(O)O +4o0f_1_GLY_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4o0f_1_GLY_3_holo_aligned_predicted_protein.pdb,NCC(O)O +4o95_1_245_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4o95_1_245_0_holo_aligned_predicted_protein.pdb,OC(NC1CCCCC1)N[C@H]1CCNC(Cl)C1 +4oal_2_245_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4oal_2_245_0_holo_aligned_predicted_protein.pdb,OC(NC1CCCCC1)N[C@H]1CCNC(Cl)C1 +4osx_1_GLY_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4osx_1_GLY_2_holo_aligned_predicted_protein.pdb,NCC(O)O +4osy_1_GLY_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4osy_1_GLY_3_holo_aligned_predicted_protein.pdb,NCC(O)O +4pfx_1_UDP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4pfx_1_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 +4phr_1_UDP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4phr_1_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 +4phs_1_UDP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4phs_1_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)([OH2+])O[PH](O)(O)[OH2+])[C@@H](O)[C@H]2O)C(O)N1 +4pya_1_2X3_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4pya_1_2X3_0_holo_aligned_predicted_protein.pdb,NC1NC(O)C2N[C@H]3N([C@@H]2N1)[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@]3(O)[C@H]1O +4qa8_1_PJZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4qa8_1_PJZ_0_holo_aligned_predicted_protein.pdb,CCCCCCCCCCCCCC[C@@H](O)OC[C@@H](C)O[C@@H](O)CCCCCCCCCCC +4qo5_1_NAG_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4qo5_1_NAG_2_holo_aligned_predicted_protein.pdb,CC(O)N[C@H]1CO[C@H](CO)[C@@H](O)[C@@H]1O +4rhe_1_FMN_6,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4rhe_1_FMN_6_holo_aligned_predicted_protein.pdb,CC1C[C@@H]2[C@@H](C[C@@H]1C)NC1[C@H](O)NC(O)N[C@@H]1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH]([O-])(O)O +4rpj_1_UDP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4rpj_1_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1 +4rpm_1_HXC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4rpm_1_HXC_0_holo_aligned_predicted_protein.pdb,CCCCCC(O)SC +4tvd_1_BGC_4,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4tvd_1_BGC_4_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1O +4tvd_1_GLC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4tvd_1_GLC_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O +4u63_1_FAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4u63_1_FAD_0_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2CC1C +4uoc_1_NCN_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4uoc_1_NCN_1_holo_aligned_predicted_protein.pdb,OC(O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]2O)C1 +4uuw_1_AMP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4uuw_1_AMP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O +4xdr_1_ADN_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4xdr_1_ADN_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O +4xfm_1_THE_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4xfm_1_THE_0_holo_aligned_predicted_protein.pdb,OC[C@@H](O)[C@H](O)C(O)O +4ydx_1_TCE_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4ydx_1_TCE_0_holo_aligned_predicted_protein.pdb,OC(O)CCP(CCC(O)O)CCC(O)O +4zav_1_4LS_8,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4zav_1_4LS_8_holo_aligned_predicted_protein.pdb,CC(C)CCN1C2CC(C)[C@@H](C)CC2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@H]2NC(O)N[C@@H](O)C21 +4zaw_1_4LU_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4zaw_1_4LU_1_holo_aligned_predicted_protein.pdb,C[C@H]1[C@@H](C)C[C@@H]2C3[C@@H]1C(C)(C)CCN3[C@H]1C(O)NC(O)NC1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O +4zay_1_4LS_6,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4zay_1_4LS_6_holo_aligned_predicted_protein.pdb,CC(C)CCN1[C@H]2[C@H](O)N[C@H](O)N[C@@H]2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@@H]2CC(C)C(C)C[C@H]21 +4zaz_1_4LS_6,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4zaz_1_4LS_6_holo_aligned_predicted_protein.pdb,CC(C)CCN1C2CC(C)[C@@H](C)CC2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2N[C@@H](O)N[C@@H](O)C21 +4zqx_1_ATP_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/4zqx_1_ATP_2_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O +5a98_1_ATP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5a98_1_ATP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O +5ae3_2_AWB_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5ae3_2_AWB_1_holo_aligned_predicted_protein.pdb,CCCCCC[C@H]1C(O)O[C@H](C)[C@H](N[C@H](O)[C@@H]2CCC[C@H](NCO)[C@H]2O)C(O)O[C@@H](C)[C@@H]1OC(O)CC(C)C +5b5s_1_BOG_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5b5s_1_BOG_0_holo_aligned_predicted_protein.pdb,CCCCCCCCO[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O +5d9g_1_GLU-ASN-LEU-TYR-PHE-GLN_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5d9g_1_GLU-ASN-LEU-TYR-PHE-GLN_0_holo_aligned_predicted_protein.pdb,CC(C)C[C@H](N)C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)N[C@@H](Cc1ccccc1)C(=O)N[C@@H](CCC(N)=O)C(=O)O +5dnc_1_ASN_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5dnc_1_ASN_2_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)O +5eno_1_5QG_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5eno_1_5QG_0_holo_aligned_predicted_protein.pdb,CC1(C)CC2C(CO1)C(N1CCOCC1)NC(SCCC1CCCCC1)[C@@H]2CN +5enp_1_5QF_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5enp_1_5QF_0_holo_aligned_predicted_protein.pdb,COCCN1CCN([C@@H]2NC(SCC[C@H]3CCC(OC)[C@@H](OC)C3)[C@H](CN)[C@H]3CC(C)(C)OCC23)CC1 +5enq_1_5QE_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5enq_1_5QE_0_holo_aligned_predicted_protein.pdb,CC(O)NC1CCC(CCSC2NC(N3C[C@@H](C)O[C@@H](C)C3)[C@@H]3COC(C)(C)CC3C2CN)CC1 +5enr_1_MBX_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5enr_1_MBX_0_holo_aligned_predicted_protein.pdb,CCC(O)NC1CCC(CCS[C@@H]2NC(N3C[C@H](C)O[C@@H](C)C3)C3COC(C)(C)CC3C2CN)CC1 +5ent_1_MIY_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5ent_1_MIY_0_holo_aligned_predicted_protein.pdb,CN(C)C1CCC(O)C2C1C[C@H]1C[C@H]3[C@H](N(C)C)C(O)C(C(N)O)[C@@H](O)[C@@]3(O)C(O)[C@@H]1C2O +5ers_1_AMP_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5ers_1_AMP_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O +5f2t_1_PLM_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5f2t_1_PLM_0_holo_aligned_predicted_protein.pdb,CCCCCCCCCCCCCCCC(O)O +5f52_1_ASP_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5f52_1_ASP_2_holo_aligned_predicted_protein.pdb,N[C@@H](CC(O)O)C(O)O +5fiu_1_Y3J_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5fiu_1_Y3J_3_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](C(F)F)[C@@H](O)[C@@H]1O +5fxd_1_H7Y_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5fxd_1_H7Y_1_holo_aligned_predicted_protein.pdb,CCC[C@H]1CCC(O)C(OC)C1 +5fxe_1_CIY_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5fxe_1_CIY_1_holo_aligned_predicted_protein.pdb,CO[C@H]1CC(CCCO)CCC1O +5fxf_1_BEZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5fxf_1_BEZ_0_holo_aligned_predicted_protein.pdb,OC(O)C1CCCCC1 +5gqi_1_ATP_7,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5gqi_1_ATP_7_holo_aligned_predicted_protein.pdb,N[C@@H]1NCN[C@H]2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O +5gql_1_ATP_4,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5gql_1_ATP_4_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O +5hhz_1_ZME_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5hhz_1_ZME_0_holo_aligned_predicted_protein.pdb,CC1CCN(C2NCNC3NCNC32)C1 +5hmr_1_FDZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5hmr_1_FDZ_0_holo_aligned_predicted_protein.pdb,OC(NC1CNNS1)N[C@H]1CCC[C@H](OC(F)(F)F)C1 +5hqx_1_EDZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5hqx_1_EDZ_0_holo_aligned_predicted_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CNNS1 +5hw0_1_GLU_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5hw0_1_GLU_2_holo_aligned_predicted_protein.pdb,N[C@@H](CCC(O)O)C(O)O +5ida_1_BMA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5ida_1_BMA_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@@H](O)[C@@H](O)[C@@H]1O +5k3o_2_ASP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5k3o_2_ASP_0_holo_aligned_predicted_protein.pdb,N[C@@H](CC(O)O)C(O)O +5k45_2_GLU_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5k45_2_GLU_1_holo_aligned_predicted_protein.pdb,N[C@@H](CCC(O)O)C(O)O +5k4h_2_GLU_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5k4h_2_GLU_3_holo_aligned_predicted_protein.pdb,N[C@@H](CCC(O)O)C(O)O +5k62_1_ASN-VAL_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5k62_1_ASN-VAL_0_holo_aligned_predicted_protein.pdb,CC(C)[C@@H](CO)NC(O)[C@@H](N)CC(N)O +5k63_1_ASN-GLY_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5k63_1_ASN-GLY_0_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)NCCO +5k66_1_ASN-GLU_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5k66_1_ASN-GLU_0_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)N[C@H](CO)CCC(O)O +5mh1_1_BMA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5mh1_1_BMA_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@@H](O)[C@@H](O)[C@@H]1O +5u82_2_ZN0_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/5u82_2_ZN0_0_holo_aligned_predicted_protein.pdb,CC[SnH](CC)CC +6a71_1_9UX_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6a71_1_9UX_0_holo_aligned_predicted_protein.pdb,O[Mo@]12S[Mo@@]1(O)S2 +6a72_1_9UX_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6a72_1_9UX_0_holo_aligned_predicted_protein.pdb,O[Mo@]12S[Mo@@]1(O)S2 +6b1b_1_TMO_15,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6b1b_1_TMO_15_holo_aligned_predicted_protein.pdb,C[N+](C)(C)O +6ea9_1_9BG_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6ea9_1_9BG_0_holo_aligned_predicted_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O[PH]([O-])(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O[PH]([O-])([O-])O)C2N1 +6ep5_1_ADP_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6ep5_1_ADP_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O +6etf_1_AMP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6etf_1_AMP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2[C@H]1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O +6fgc_1_ADP_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6fgc_1_ADP_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O +6fgc_1_D95_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6fgc_1_D95_1_holo_aligned_predicted_protein.pdb,C[C@H]1[C@H](OC(O)CCC(O)O)O[C@@H]2O[C@@]3(C)CC[C@H]4[C@H](C)CC[C@@H]1[C@@]24OO3 +6gbf_1_AMP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6gbf_1_AMP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O +6jls_1_FMN_6,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6jls_1_FMN_6_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2CC1C +6n19_2_K8V_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6n19_2_K8V_0_holo_aligned_predicted_protein.pdb,CCC(O)NC1CCC(C([O-])O)C(C([O-])O)C1 +6nco_1_KQP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6nco_1_KQP_0_holo_aligned_predicted_protein.pdb,CC(C)(O)C1CCC([C@H]2CC(Cl)C[C@@H](C34(C(N)N)CC3C4)C2)CC1 +6npp_1_KWG_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6npp_1_KWG_0_holo_aligned_predicted_protein.pdb,[O-]C(O)C1CCCC(CCC2CCCCC2)C1N1CCCC1 +6o6y_1_ACK_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6o6y_1_ACK_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@H]2O[PH]([O-])(O)O[C@H]21 +6o70_1_ACK_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6o70_1_ACK_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@H]2O[PH]([O-])(O)O[C@H]21 +6pa2_1_ASP_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6pa2_1_ASP_2_holo_aligned_predicted_protein.pdb,N[C@@H](CC(O)O)C(O)O +6pa6_2_ASN_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6pa6_2_ASN_0_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)O +6paa_1_ASP_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6paa_1_ASP_2_holo_aligned_predicted_protein.pdb,N[C@@H](CC(O)O)C(O)O +6qkr_1_FAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6qkr_1_FAD_0_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(NC(O)N[C@H]3O)N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C4NCN[C@H]5N)[C@H](O)[C@@H]3O)C2CC1C +6rms_1_AMP_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6rms_1_AMP_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O +6ryz_1_SAM_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6ryz_1_SAM_2_holo_aligned_predicted_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C2NCN[C@@H]3N)[C@H](O)[C@@H]1O +6rz2_1_5CD_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6rz2_1_5CD_2_holo_aligned_predicted_protein.pdb,NC1NCNC2[C@H]1NCN2[C@@H]1O[C@H](CCl)[C@@H](O)[C@H]1O +6tvg_1_AP2_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6tvg_1_AP2_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)C[PH](O)(O)O)[C@@H](O)[C@H]1O +6uqy_2_AT3_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6uqy_2_AT3_0_holo_aligned_predicted_protein.pdb,C[C@@H](O)SCC[N+](C)(C)C +6ur1_2_AT3_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6ur1_2_AT3_0_holo_aligned_predicted_protein.pdb,C[C@@H](O)SCC[N+](C)(C)C +6v2a_1_ASN_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6v2a_1_ASN_3_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)O +6wyz_1_DGL_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6wyz_1_DGL_1_holo_aligned_predicted_protein.pdb,N[C@H](CCC(O)O)C(O)O +6xb3_3_9BG_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6xb3_3_9BG_1_holo_aligned_predicted_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O[PH]([O-])(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O[PH]([O-])(O)O)C2N1 +6xug_1_O1Q_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6xug_1_O1Q_0_holo_aligned_predicted_protein.pdb,CC1CCC[C@H](N2NCC[C@H]2C2CC(Cl)C3NNN(C4CC[C@H]5CNNC5C4)C3C2)C1 +6yao_1_OJ2_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6yao_1_OJ2_0_holo_aligned_predicted_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CCC[C@@H](OC(F)(F)F)C1 +6yap_1_OHZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6yap_1_OHZ_0_holo_aligned_predicted_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CC(Cl)C[C@H](OC(F)(F)F)C1 +6yaq_1_OHZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures_bs_cropped/6yaq_1_OHZ_0_holo_aligned_predicted_protein.pdb,OCCC1CCCC[C@H]1N[C@H](O)N[C@H]1C[C@@H](Cl)C[C@@H](OC(F)(F)F)C1 diff --git a/data/test_cases/posebusters_benchmark/ensemble_inputs.csv b/data/test_cases/posebusters_benchmark/ensemble_inputs.csv index 328b05c1..6bc47cc3 100644 --- a/data/test_cases/posebusters_benchmark/ensemble_inputs.csv +++ b/data/test_cases/posebusters_benchmark/ensemble_inputs.csv @@ -1,429 +1,429 @@ protein_input,ligand_smiles,name -data/posebusters_benchmark_set/5S8I_2LY/5S8I_2LY_protein.pdb,,5S8I_2LY -data/posebusters_benchmark_set/5SAK_ZRY/5SAK_ZRY_protein.pdb,,5SAK_ZRY -data/posebusters_benchmark_set/5SB2_1K2/5SB2_1K2_protein.pdb,,5SB2_1K2 -data/posebusters_benchmark_set/5SD5_HWI/5SD5_HWI_protein.pdb,,5SD5_HWI -data/posebusters_benchmark_set/5SIS_JSM/5SIS_JSM_protein.pdb,,5SIS_JSM -data/posebusters_benchmark_set/6M2B_EZO/6M2B_EZO_protein.pdb,,6M2B_EZO -data/posebusters_benchmark_set/6M73_FNR/6M73_FNR_protein.pdb,,6M73_FNR -data/posebusters_benchmark_set/6T88_MWQ/6T88_MWQ_protein.pdb,,6T88_MWQ -data/posebusters_benchmark_set/6TW5_9M2/6TW5_9M2_protein.pdb,,6TW5_9M2 -data/posebusters_benchmark_set/6TW7_NZB/6TW7_NZB_protein.pdb,,6TW7_NZB -data/posebusters_benchmark_set/6VS3_R6V/6VS3_R6V_protein.pdb,,6VS3_R6V -data/posebusters_benchmark_set/6VTA_AKN/6VTA_AKN_protein.pdb,,6VTA_AKN -data/posebusters_benchmark_set/6W59_SZD/6W59_SZD_protein.pdb,,6W59_SZD -data/posebusters_benchmark_set/6WTN_RXT/6WTN_RXT_protein.pdb,,6WTN_RXT -data/posebusters_benchmark_set/6X8D_ARA/6X8D_ARA_protein.pdb,,6X8D_ARA -data/posebusters_benchmark_set/6XAF_GDP/6XAF_GDP_protein.pdb,,6XAF_GDP -data/posebusters_benchmark_set/6XBO_5MC/6XBO_5MC_protein.pdb,,6XBO_5MC -data/posebusters_benchmark_set/6XCT_478/6XCT_478_protein.pdb,,6XCT_478 -data/posebusters_benchmark_set/6XG5_TOP/6XG5_TOP_protein.pdb,,6XG5_TOP -data/posebusters_benchmark_set/6XHT_V2V/6XHT_V2V_protein.pdb,,6XHT_V2V -data/posebusters_benchmark_set/6XM9_V55/6XM9_V55_protein.pdb,,6XM9_V55 -data/posebusters_benchmark_set/6XUM_30L/6XUM_30L_protein.pdb,,6XUM_30L -data/posebusters_benchmark_set/6Y7L_QMG/6Y7L_QMG_protein.pdb,,6Y7L_QMG -data/posebusters_benchmark_set/6YDY_K73/6YDY_K73_protein.pdb,,6YDY_K73 -data/posebusters_benchmark_set/6YJA_2BA/6YJA_2BA_protein.pdb,,6YJA_2BA -data/posebusters_benchmark_set/6YMS_OZH/6YMS_OZH_protein.pdb,,6YMS_OZH -data/posebusters_benchmark_set/6YQV_8K2/6YQV_8K2_protein.pdb,,6YQV_8K2 -data/posebusters_benchmark_set/6YQW_82I/6YQW_82I_protein.pdb,,6YQW_82I -data/posebusters_benchmark_set/6YR2_T1C/6YR2_T1C_protein.pdb,,6YR2_T1C -data/posebusters_benchmark_set/6YRV_PJ8/6YRV_PJ8_protein.pdb,,6YRV_PJ8 -data/posebusters_benchmark_set/6YSP_PAL/6YSP_PAL_protein.pdb,,6YSP_PAL -data/posebusters_benchmark_set/6YT6_PKE/6YT6_PKE_protein.pdb,,6YT6_PKE -data/posebusters_benchmark_set/6YYO_Q1K/6YYO_Q1K_protein.pdb,,6YYO_Q1K -data/posebusters_benchmark_set/6Z0R_Q4H/6Z0R_Q4H_protein.pdb,,6Z0R_Q4H -data/posebusters_benchmark_set/6Z14_Q4Z/6Z14_Q4Z_protein.pdb,,6Z14_Q4Z -data/posebusters_benchmark_set/6Z1C_7EY/6Z1C_7EY_protein.pdb,,6Z1C_7EY -data/posebusters_benchmark_set/6Z2C_Q5E/6Z2C_Q5E_protein.pdb,,6Z2C_Q5E -data/posebusters_benchmark_set/6Z4N_Q7B/6Z4N_Q7B_protein.pdb,,6Z4N_Q7B -data/posebusters_benchmark_set/6Z5Z_BDF/6Z5Z_BDF_protein.pdb,,6Z5Z_BDF -data/posebusters_benchmark_set/6ZAE_ACV/6ZAE_ACV_protein.pdb,,6ZAE_ACV -data/posebusters_benchmark_set/6ZC3_JOR/6ZC3_JOR_protein.pdb,,6ZC3_JOR -data/posebusters_benchmark_set/6ZCY_QF8/6ZCY_QF8_protein.pdb,,6ZCY_QF8 -data/posebusters_benchmark_set/6ZK5_IMH/6ZK5_IMH_protein.pdb,,6ZK5_IMH -data/posebusters_benchmark_set/6ZPB_3D1/6ZPB_3D1_protein.pdb,,6ZPB_3D1 -data/posebusters_benchmark_set/6ZR8_QOZ/6ZR8_QOZ_protein.pdb,,6ZR8_QOZ -data/posebusters_benchmark_set/6ZT2_QPK/6ZT2_QPK_protein.pdb,,6ZT2_QPK -data/posebusters_benchmark_set/6ZX3_QRZ/6ZX3_QRZ_protein.pdb,,6ZX3_QRZ -data/posebusters_benchmark_set/6ZXQ_IMO/6ZXQ_IMO_protein.pdb,,6ZXQ_IMO -data/posebusters_benchmark_set/7A1P_QW2/7A1P_QW2_protein.pdb,,7A1P_QW2 -data/posebusters_benchmark_set/7A9E_R4W/7A9E_R4W_protein.pdb,,7A9E_R4W -data/posebusters_benchmark_set/7A9H_TPP/7A9H_TPP_protein.pdb,,7A9H_TPP -data/posebusters_benchmark_set/7AA0_R6B/7AA0_R6B_protein.pdb,,7AA0_R6B -data/posebusters_benchmark_set/7AFX_R9K/7AFX_R9K_protein.pdb,,7AFX_R9K -data/posebusters_benchmark_set/7AKL_RK5/7AKL_RK5_protein.pdb,,7AKL_RK5 -data/posebusters_benchmark_set/7AMC_73B/7AMC_73B_protein.pdb,,7AMC_73B -data/posebusters_benchmark_set/7AN5_RDH/7AN5_RDH_protein.pdb,,7AN5_RDH -data/posebusters_benchmark_set/7AS1_21G/7AS1_21G_protein.pdb,,7AS1_21G -data/posebusters_benchmark_set/7AVI_S2Q/7AVI_S2Q_protein.pdb,,7AVI_S2Q -data/posebusters_benchmark_set/7B0E_C2E/7B0E_C2E_protein.pdb,,7B0E_C2E -data/posebusters_benchmark_set/7B2C_TP7/7B2C_TP7_protein.pdb,,7B2C_TP7 -data/posebusters_benchmark_set/7B94_ANP/7B94_ANP_protein.pdb,,7B94_ANP -data/posebusters_benchmark_set/7BA0_T5H/7BA0_T5H_protein.pdb,,7BA0_T5H -data/posebusters_benchmark_set/7BCP_GCO/7BCP_GCO_protein.pdb,,7BCP_GCO -data/posebusters_benchmark_set/7BHX_TO5/7BHX_TO5_protein.pdb,,7BHX_TO5 -data/posebusters_benchmark_set/7BJ6_TVK/7BJ6_TVK_protein.pdb,,7BJ6_TVK -data/posebusters_benchmark_set/7BJJ_TVW/7BJJ_TVW_protein.pdb,,7BJJ_TVW -data/posebusters_benchmark_set/7BKA_4JC/7BKA_4JC_protein.pdb,,7BKA_4JC -data/posebusters_benchmark_set/7BLA_WCS/7BLA_WCS_protein.pdb,,7BLA_WCS -data/posebusters_benchmark_set/7BLG_GAL/7BLG_GAL_protein.pdb,,7BLG_GAL -data/posebusters_benchmark_set/7BMI_U4B/7BMI_U4B_protein.pdb,,7BMI_U4B -data/posebusters_benchmark_set/7BNH_BEZ/7BNH_BEZ_protein.pdb,,7BNH_BEZ -data/posebusters_benchmark_set/7BTT_F8R/7BTT_F8R_protein.pdb,,7BTT_F8R -data/posebusters_benchmark_set/7C0U_FGO/7C0U_FGO_protein.pdb,,7C0U_FGO -data/posebusters_benchmark_set/7C3U_AZG/7C3U_AZG_protein.pdb,,7C3U_AZG -data/posebusters_benchmark_set/7C6P_SQH/7C6P_SQH_protein.pdb,,7C6P_SQH -data/posebusters_benchmark_set/7C8Q_DSG/7C8Q_DSG_protein.pdb,,7C8Q_DSG -data/posebusters_benchmark_set/7CD9_FVR/7CD9_FVR_protein.pdb,,7CD9_FVR -data/posebusters_benchmark_set/7CIJ_G0C/7CIJ_G0C_protein.pdb,,7CIJ_G0C -data/posebusters_benchmark_set/7CL8_TES/7CL8_TES_protein.pdb,,7CL8_TES -data/posebusters_benchmark_set/7CNQ_G8X/7CNQ_G8X_protein.pdb,,7CNQ_G8X -data/posebusters_benchmark_set/7CNS_PMV/7CNS_PMV_protein.pdb,,7CNS_PMV -data/posebusters_benchmark_set/7CTM_BDP/7CTM_BDP_protein.pdb,,7CTM_BDP -data/posebusters_benchmark_set/7CUO_PHB/7CUO_PHB_protein.pdb,,7CUO_PHB -data/posebusters_benchmark_set/7D0P_1VU/7D0P_1VU_protein.pdb,,7D0P_1VU -data/posebusters_benchmark_set/7D5C_GV6/7D5C_GV6_protein.pdb,,7D5C_GV6 -data/posebusters_benchmark_set/7D6O_MTE/7D6O_MTE_protein.pdb,,7D6O_MTE -data/posebusters_benchmark_set/7D8Q_GZF/7D8Q_GZF_protein.pdb,,7D8Q_GZF -data/posebusters_benchmark_set/7D9L_GSF/7D9L_GSF_protein.pdb,,7D9L_GSF -data/posebusters_benchmark_set/7DIN_MPO/7DIN_MPO_protein.pdb,,7DIN_MPO -data/posebusters_benchmark_set/7DKT_GLF/7DKT_GLF_protein.pdb,,7DKT_GLF -data/posebusters_benchmark_set/7DQL_4CL/7DQL_4CL_protein.pdb,,7DQL_4CL -data/posebusters_benchmark_set/7DUA_HJ0/7DUA_HJ0_protein.pdb,,7DUA_HJ0 -data/posebusters_benchmark_set/7E2S_BLA/7E2S_BLA_protein.pdb,,7E2S_BLA -data/posebusters_benchmark_set/7E4L_MDN/7E4L_MDN_protein.pdb,,7E4L_MDN -data/posebusters_benchmark_set/7EBG_J0L/7EBG_J0L_protein.pdb,,7EBG_J0L -data/posebusters_benchmark_set/7ECR_SIN/7ECR_SIN_protein.pdb,,7ECR_SIN -data/posebusters_benchmark_set/7ED2_A3P/7ED2_A3P_protein.pdb,,7ED2_A3P -data/posebusters_benchmark_set/7ELT_TYM/7ELT_TYM_protein.pdb,,7ELT_TYM -data/posebusters_benchmark_set/7EN7_J79/7EN7_J79_protein.pdb,,7EN7_J79 -data/posebusters_benchmark_set/7EPV_FDA/7EPV_FDA_protein.pdb,,7EPV_FDA -data/posebusters_benchmark_set/7ES1_UDP/7ES1_UDP_protein.pdb,,7ES1_UDP -data/posebusters_benchmark_set/7F51_BA7/7F51_BA7_protein.pdb,,7F51_BA7 -data/posebusters_benchmark_set/7F5D_EUO/7F5D_EUO_protein.pdb,,7F5D_EUO -data/posebusters_benchmark_set/7F8T_FAD/7F8T_FAD_protein.pdb,,7F8T_FAD -data/posebusters_benchmark_set/7FB7_8NF/7FB7_8NF_protein.pdb,,7FB7_8NF -data/posebusters_benchmark_set/7FHA_ADX/7FHA_ADX_protein.pdb,,7FHA_ADX -data/posebusters_benchmark_set/7FRX_O88/7FRX_O88_protein.pdb,,7FRX_O88 -data/posebusters_benchmark_set/7FT9_4MB/7FT9_4MB_protein.pdb,,7FT9_4MB -data/posebusters_benchmark_set/7JG0_GAR/7JG0_GAR_protein.pdb,,7JG0_GAR -data/posebusters_benchmark_set/7JGW_V9S/7JGW_V9S_protein.pdb,,7JGW_V9S -data/posebusters_benchmark_set/7JHQ_VAJ/7JHQ_VAJ_protein.pdb,,7JHQ_VAJ -data/posebusters_benchmark_set/7JMV_4NC/7JMV_4NC_protein.pdb,,7JMV_4NC -data/posebusters_benchmark_set/7JNB_A2G/7JNB_A2G_protein.pdb,,7JNB_A2G -data/posebusters_benchmark_set/7JR8_VH7/7JR8_VH7_protein.pdb,,7JR8_VH7 -data/posebusters_benchmark_set/7JUD_MMA/7JUD_MMA_protein.pdb,,7JUD_MMA -data/posebusters_benchmark_set/7JXX_VP7/7JXX_VP7_protein.pdb,,7JXX_VP7 -data/posebusters_benchmark_set/7JY3_VUD/7JY3_VUD_protein.pdb,,7JY3_VUD -data/posebusters_benchmark_set/7K0V_VQP/7K0V_VQP_protein.pdb,,7K0V_VQP -data/posebusters_benchmark_set/7K41_VUA/7K41_VUA_protein.pdb,,7K41_VUA -data/posebusters_benchmark_set/7KB1_WBJ/7KB1_WBJ_protein.pdb,,7KB1_WBJ -data/posebusters_benchmark_set/7KC5_BJZ/7KC5_BJZ_protein.pdb,,7KC5_BJZ -data/posebusters_benchmark_set/7KFO_IAC/7KFO_IAC_protein.pdb,,7KFO_IAC -data/posebusters_benchmark_set/7KLX_WOV/7KLX_WOV_protein.pdb,,7KLX_WOV -data/posebusters_benchmark_set/7KM8_WPD/7KM8_WPD_protein.pdb,,7KM8_WPD -data/posebusters_benchmark_set/7KP6_WTP/7KP6_WTP_protein.pdb,,7KP6_WTP -data/posebusters_benchmark_set/7KQU_YOF/7KQU_YOF_protein.pdb,,7KQU_YOF -data/posebusters_benchmark_set/7KRU_ATP/7KRU_ATP_protein.pdb,,7KRU_ATP -data/posebusters_benchmark_set/7KZ9_XN7/7KZ9_XN7_protein.pdb,,7KZ9_XN7 -data/posebusters_benchmark_set/7L00_XCJ/7L00_XCJ_protein.pdb,,7L00_XCJ -data/posebusters_benchmark_set/7L03_F9F/7L03_F9F_protein.pdb,,7L03_F9F -data/posebusters_benchmark_set/7L5F_XNG/7L5F_XNG_protein.pdb,,7L5F_XNG -data/posebusters_benchmark_set/7L6D_BMF/7L6D_BMF_protein.pdb,,7L6D_BMF -data/posebusters_benchmark_set/7L7C_XQ1/7L7C_XQ1_protein.pdb,,7L7C_XQ1 -data/posebusters_benchmark_set/7L81_UD4/7L81_UD4_protein.pdb,,7L81_UD4 -data/posebusters_benchmark_set/7LB3_XXS/7LB3_XXS_protein.pdb,,7LB3_XXS -data/posebusters_benchmark_set/7LCU_XTA/7LCU_XTA_protein.pdb,,7LCU_XTA -data/posebusters_benchmark_set/7LEV_0JO/7LEV_0JO_protein.pdb,,7LEV_0JO -data/posebusters_benchmark_set/7LJN_GTP/7LJN_GTP_protein.pdb,,7LJN_GTP -data/posebusters_benchmark_set/7LMO_NYO/7LMO_NYO_protein.pdb,,7LMO_NYO -data/posebusters_benchmark_set/7LOE_Y84/7LOE_Y84_protein.pdb,,7LOE_Y84 -data/posebusters_benchmark_set/7LOU_IFM/7LOU_IFM_protein.pdb,,7LOU_IFM -data/posebusters_benchmark_set/7LT0_ONJ/7LT0_ONJ_protein.pdb,,7LT0_ONJ -data/posebusters_benchmark_set/7LZD_YHY/7LZD_YHY_protein.pdb,,7LZD_YHY -data/posebusters_benchmark_set/7LZQ_YJV/7LZQ_YJV_protein.pdb,,7LZQ_YJV -data/posebusters_benchmark_set/7M31_TDR/7M31_TDR_protein.pdb,,7M31_TDR -data/posebusters_benchmark_set/7M3H_YPV/7M3H_YPV_protein.pdb,,7M3H_YPV -data/posebusters_benchmark_set/7M41_YQG/7M41_YQG_protein.pdb,,7M41_YQG -data/posebusters_benchmark_set/7M6K_YRJ/7M6K_YRJ_protein.pdb,,7M6K_YRJ -data/posebusters_benchmark_set/7MAE_XUS/7MAE_XUS_protein.pdb,,7MAE_XUS -data/posebusters_benchmark_set/7MEU_MGP/7MEU_MGP_protein.pdb,,7MEU_MGP -data/posebusters_benchmark_set/7MFP_Z7P/7MFP_Z7P_protein.pdb,,7MFP_Z7P -data/posebusters_benchmark_set/7MGT_ZD4/7MGT_ZD4_protein.pdb,,7MGT_ZD4 -data/posebusters_benchmark_set/7MGY_ZD1/7MGY_ZD1_protein.pdb,,7MGY_ZD1 -data/posebusters_benchmark_set/7MMH_ZJY/7MMH_ZJY_protein.pdb,,7MMH_ZJY -data/posebusters_benchmark_set/7MOI_HPS/7MOI_HPS_protein.pdb,,7MOI_HPS -data/posebusters_benchmark_set/7MRH_ZMJ/7MRH_ZMJ_protein.pdb,,7MRH_ZMJ -data/posebusters_benchmark_set/7MS7_ZQ1/7MS7_ZQ1_protein.pdb,,7MS7_ZQ1 -data/posebusters_benchmark_set/7MSR_DCA/7MSR_DCA_protein.pdb,,7MSR_DCA -data/posebusters_benchmark_set/7MWN_WI5/7MWN_WI5_protein.pdb,,7MWN_WI5 -data/posebusters_benchmark_set/7MWU_ZPM/7MWU_ZPM_protein.pdb,,7MWU_ZPM -data/posebusters_benchmark_set/7MY1_IPE/7MY1_IPE_protein.pdb,,7MY1_IPE -data/posebusters_benchmark_set/7MYU_ZR7/7MYU_ZR7_protein.pdb,,7MYU_ZR7 -data/posebusters_benchmark_set/7MZS_GLA/7MZS_GLA_protein.pdb,,7MZS_GLA -data/posebusters_benchmark_set/7N03_ZRP/7N03_ZRP_protein.pdb,,7N03_ZRP -data/posebusters_benchmark_set/7N4N_0BK/7N4N_0BK_protein.pdb,,7N4N_0BK -data/posebusters_benchmark_set/7N4W_P4V/7N4W_P4V_protein.pdb,,7N4W_P4V -data/posebusters_benchmark_set/7N6F_0I1/7N6F_0I1_protein.pdb,,7N6F_0I1 -data/posebusters_benchmark_set/7N7B_T3F/7N7B_T3F_protein.pdb,,7N7B_T3F -data/posebusters_benchmark_set/7N7H_CTP/7N7H_CTP_protein.pdb,,7N7H_CTP -data/posebusters_benchmark_set/7NA4_1I9/7NA4_1I9_protein.pdb,,7NA4_1I9 -data/posebusters_benchmark_set/7NB4_U6Q/7NB4_U6Q_protein.pdb,,7NB4_U6Q -data/posebusters_benchmark_set/7NF0_BYN/7NF0_BYN_protein.pdb,,7NF0_BYN -data/posebusters_benchmark_set/7NF3_4LU/7NF3_4LU_protein.pdb,,7NF3_4LU -data/posebusters_benchmark_set/7NFB_GEN/7NFB_GEN_protein.pdb,,7NFB_GEN -data/posebusters_benchmark_set/7NGW_UAW/7NGW_UAW_protein.pdb,,7NGW_UAW -data/posebusters_benchmark_set/7NLK_UHK/7NLK_UHK_protein.pdb,,7NLK_UHK -data/posebusters_benchmark_set/7NLV_UJE/7NLV_UJE_protein.pdb,,7NLV_UJE -data/posebusters_benchmark_set/7NML_I7B/7NML_I7B_protein.pdb,,7NML_I7B -data/posebusters_benchmark_set/7NP6_UK8/7NP6_UK8_protein.pdb,,7NP6_UK8 -data/posebusters_benchmark_set/7NPL_UKZ/7NPL_UKZ_protein.pdb,,7NPL_UKZ -data/posebusters_benchmark_set/7NR6_UO8/7NR6_UO8_protein.pdb,,7NR6_UO8 -data/posebusters_benchmark_set/7NR8_UOE/7NR8_UOE_protein.pdb,,7NR8_UOE -data/posebusters_benchmark_set/7NSW_HC4/7NSW_HC4_protein.pdb,,7NSW_HC4 -data/posebusters_benchmark_set/7NTG_F6R/7NTG_F6R_protein.pdb,,7NTG_F6R -data/posebusters_benchmark_set/7NU0_DCL/7NU0_DCL_protein.pdb,,7NU0_DCL -data/posebusters_benchmark_set/7NUT_GLP/7NUT_GLP_protein.pdb,,7NUT_GLP -data/posebusters_benchmark_set/7NXO_UU8/7NXO_UU8_protein.pdb,,7NXO_UU8 -data/posebusters_benchmark_set/7O0N_CDP/7O0N_CDP_protein.pdb,,7O0N_CDP -data/posebusters_benchmark_set/7O1T_5X8/7O1T_5X8_protein.pdb,,7O1T_5X8 -data/posebusters_benchmark_set/7OCB_V88/7OCB_V88_protein.pdb,,7OCB_V88 -data/posebusters_benchmark_set/7ODX_DGP/7ODX_DGP_protein.pdb,,7ODX_DGP -data/posebusters_benchmark_set/7ODY_DGI/7ODY_DGI_protein.pdb,,7ODY_DGI -data/posebusters_benchmark_set/7OEO_V9Z/7OEO_V9Z_protein.pdb,,7OEO_V9Z -data/posebusters_benchmark_set/7OFF_VCB/7OFF_VCB_protein.pdb,,7OFF_VCB -data/posebusters_benchmark_set/7OFK_VCH/7OFK_VCH_protein.pdb,,7OFK_VCH -data/posebusters_benchmark_set/7OKC_VFE/7OKC_VFE_protein.pdb,,7OKC_VFE -data/posebusters_benchmark_set/7OKF_VH5/7OKF_VH5_protein.pdb,,7OKF_VH5 -data/posebusters_benchmark_set/7OLI_8HG/7OLI_8HG_protein.pdb,,7OLI_8HG -data/posebusters_benchmark_set/7OLT_58J/7OLT_58J_protein.pdb,,7OLT_58J -data/posebusters_benchmark_set/7OMJ_GCP/7OMJ_GCP_protein.pdb,,7OMJ_GCP -data/posebusters_benchmark_set/7OMX_CNA/7OMX_CNA_protein.pdb,,7OMX_CNA -data/posebusters_benchmark_set/7OP9_06K/7OP9_06K_protein.pdb,,7OP9_06K -data/posebusters_benchmark_set/7OPG_06N/7OPG_06N_protein.pdb,,7OPG_06N -data/posebusters_benchmark_set/7ORW_7WA/7ORW_7WA_protein.pdb,,7ORW_7WA -data/posebusters_benchmark_set/7OSO_0V1/7OSO_0V1_protein.pdb,,7OSO_0V1 -data/posebusters_benchmark_set/7OU8_1XI/7OU8_1XI_protein.pdb,,7OU8_1XI -data/posebusters_benchmark_set/7OZ9_NGK/7OZ9_NGK_protein.pdb,,7OZ9_NGK -data/posebusters_benchmark_set/7OZC_G6S/7OZC_G6S_protein.pdb,,7OZC_G6S -data/posebusters_benchmark_set/7P1F_KFN/7P1F_KFN_protein.pdb,,7P1F_KFN -data/posebusters_benchmark_set/7P1M_4IU/7P1M_4IU_protein.pdb,,7P1M_4IU -data/posebusters_benchmark_set/7P2I_MFU/7P2I_MFU_protein.pdb,,7P2I_MFU -data/posebusters_benchmark_set/7P2W_4QR/7P2W_4QR_protein.pdb,,7P2W_4QR -data/posebusters_benchmark_set/7P4C_5OV/7P4C_5OV_protein.pdb,,7P4C_5OV -data/posebusters_benchmark_set/7P4J_5JK/7P4J_5JK_protein.pdb,,7P4J_5JK -data/posebusters_benchmark_set/7P4V_DAT/7P4V_DAT_protein.pdb,,7P4V_DAT -data/posebusters_benchmark_set/7P5T_5YG/7P5T_5YG_protein.pdb,,7P5T_5YG -data/posebusters_benchmark_set/7P85_5ZG/7P85_5ZG_protein.pdb,,7P85_5ZG -data/posebusters_benchmark_set/7PA4_C/7PA4_C_protein.pdb,,7PA4_C -data/posebusters_benchmark_set/7PGX_FMN/7PGX_FMN_protein.pdb,,7PGX_FMN -data/posebusters_benchmark_set/7PIH_7QW/7PIH_7QW_protein.pdb,,7PIH_7QW -data/posebusters_benchmark_set/7PJQ_OWH/7PJQ_OWH_protein.pdb,,7PJQ_OWH -data/posebusters_benchmark_set/7PK0_BYC/7PK0_BYC_protein.pdb,,7PK0_BYC -data/posebusters_benchmark_set/7PL1_SFG/7PL1_SFG_protein.pdb,,7PL1_SFG -data/posebusters_benchmark_set/7POM_7VZ/7POM_7VZ_protein.pdb,,7POM_7VZ -data/posebusters_benchmark_set/7PRI_7TI/7PRI_7TI_protein.pdb,,7PRI_7TI -data/posebusters_benchmark_set/7PRM_81I/7PRM_81I_protein.pdb,,7PRM_81I -data/posebusters_benchmark_set/7PT3_3KK/7PT3_3KK_protein.pdb,,7PT3_3KK -data/posebusters_benchmark_set/7PUV_84Z/7PUV_84Z_protein.pdb,,7PUV_84Z -data/posebusters_benchmark_set/7Q19_DSM/7Q19_DSM_protein.pdb,,7Q19_DSM -data/posebusters_benchmark_set/7Q25_8J9/7Q25_8J9_protein.pdb,,7Q25_8J9 -data/posebusters_benchmark_set/7Q27_8KC/7Q27_8KC_protein.pdb,,7Q27_8KC -data/posebusters_benchmark_set/7Q2B_M6H/7Q2B_M6H_protein.pdb,,7Q2B_M6H -data/posebusters_benchmark_set/7Q5I_I0F/7Q5I_I0F_protein.pdb,,7Q5I_I0F -data/posebusters_benchmark_set/7QE4_NGA/7QE4_NGA_protein.pdb,,7QE4_NGA -data/posebusters_benchmark_set/7QF4_RBF/7QF4_RBF_protein.pdb,,7QF4_RBF -data/posebusters_benchmark_set/7QFM_AY3/7QFM_AY3_protein.pdb,,7QFM_AY3 -data/posebusters_benchmark_set/7QGP_DJ8/7QGP_DJ8_protein.pdb,,7QGP_DJ8 -data/posebusters_benchmark_set/7QHG_T3B/7QHG_T3B_protein.pdb,,7QHG_T3B -data/posebusters_benchmark_set/7QHL_D5P/7QHL_D5P_protein.pdb,,7QHL_D5P -data/posebusters_benchmark_set/7QK0_EBL/7QK0_EBL_protein.pdb,,7QK0_EBL -data/posebusters_benchmark_set/7QPP_VDX/7QPP_VDX_protein.pdb,,7QPP_VDX -data/posebusters_benchmark_set/7QSW_CAP/7QSW_CAP_protein.pdb,,7QSW_CAP -data/posebusters_benchmark_set/7QTA_URI/7QTA_URI_protein.pdb,,7QTA_URI -data/posebusters_benchmark_set/7R3D_APR/7R3D_APR_protein.pdb,,7R3D_APR -data/posebusters_benchmark_set/7R59_I5F/7R59_I5F_protein.pdb,,7R59_I5F -data/posebusters_benchmark_set/7R6J_2I7/7R6J_2I7_protein.pdb,,7R6J_2I7 -data/posebusters_benchmark_set/7R7R_AWJ/7R7R_AWJ_protein.pdb,,7R7R_AWJ -data/posebusters_benchmark_set/7R9N_F97/7R9N_F97_protein.pdb,,7R9N_F97 -data/posebusters_benchmark_set/7RC3_SAH/7RC3_SAH_protein.pdb,,7RC3_SAH -data/posebusters_benchmark_set/7REE_4LY/7REE_4LY_protein.pdb,,7REE_4LY -data/posebusters_benchmark_set/7RH3_59O/7RH3_59O_protein.pdb,,7RH3_59O -data/posebusters_benchmark_set/7RH8_UTP/7RH8_UTP_protein.pdb,,7RH8_UTP -data/posebusters_benchmark_set/7RKW_5TV/7RKW_5TV_protein.pdb,,7RKW_5TV -data/posebusters_benchmark_set/7RNI_60I/7RNI_60I_protein.pdb,,7RNI_60I -data/posebusters_benchmark_set/7ROR_69X/7ROR_69X_protein.pdb,,7ROR_69X -data/posebusters_benchmark_set/7ROU_66I/7ROU_66I_protein.pdb,,7ROU_66I -data/posebusters_benchmark_set/7RPZ_6IC/7RPZ_6IC_protein.pdb,,7RPZ_6IC -data/posebusters_benchmark_set/7RSV_7IQ/7RSV_7IQ_protein.pdb,,7RSV_7IQ -data/posebusters_benchmark_set/7RUI_7QZ/7RUI_7QZ_protein.pdb,,7RUI_7QZ -data/posebusters_benchmark_set/7RWO_7WN/7RWO_7WN_protein.pdb,,7RWO_7WN -data/posebusters_benchmark_set/7RWS_4UR/7RWS_4UR_protein.pdb,,7RWS_4UR -data/posebusters_benchmark_set/7RZL_NPO/7RZL_NPO_protein.pdb,,7RZL_NPO -data/posebusters_benchmark_set/7S45_ACO/7S45_ACO_protein.pdb,,7S45_ACO -data/posebusters_benchmark_set/7S9H_7PP/7S9H_7PP_protein.pdb,,7S9H_7PP -data/posebusters_benchmark_set/7SCW_GSP/7SCW_GSP_protein.pdb,,7SCW_GSP -data/posebusters_benchmark_set/7SDD_4IP/7SDD_4IP_protein.pdb,,7SDD_4IP -data/posebusters_benchmark_set/7SED_8VD/7SED_8VD_protein.pdb,,7SED_8VD -data/posebusters_benchmark_set/7SFO_98L/7SFO_98L_protein.pdb,,7SFO_98L -data/posebusters_benchmark_set/7SGV_L30/7SGV_L30_protein.pdb,,7SGV_L30 -data/posebusters_benchmark_set/7SIU_9ID/7SIU_9ID_protein.pdb,,7SIU_9ID -data/posebusters_benchmark_set/7SNE_9XR/7SNE_9XR_protein.pdb,,7SNE_9XR -data/posebusters_benchmark_set/7SSM_B7L/7SSM_B7L_protein.pdb,,7SSM_B7L -data/posebusters_benchmark_set/7SUC_COM/7SUC_COM_protein.pdb,,7SUC_COM -data/posebusters_benchmark_set/7SZA_DUI/7SZA_DUI_protein.pdb,,7SZA_DUI -data/posebusters_benchmark_set/7T0D_FPP/7T0D_FPP_protein.pdb,,7T0D_FPP -data/posebusters_benchmark_set/7T0U_E3I/7T0U_E3I_protein.pdb,,7T0U_E3I -data/posebusters_benchmark_set/7T1D_E7K/7T1D_E7K_protein.pdb,,7T1D_E7K -data/posebusters_benchmark_set/7T2I_E9F/7T2I_E9F_protein.pdb,,7T2I_E9F -data/posebusters_benchmark_set/7T3E_SLB/7T3E_SLB_protein.pdb,,7T3E_SLB -data/posebusters_benchmark_set/7T3F_EM0/7T3F_EM0_protein.pdb,,7T3F_EM0 -data/posebusters_benchmark_set/7T9O_GEI/7T9O_GEI_protein.pdb,,7T9O_GEI -data/posebusters_benchmark_set/7TB0_UD1/7TB0_UD1_protein.pdb,,7TB0_UD1 -data/posebusters_benchmark_set/7TBU_S3P/7TBU_S3P_protein.pdb,,7TBU_S3P -data/posebusters_benchmark_set/7TE8_P0T/7TE8_P0T_protein.pdb,,7TE8_P0T -data/posebusters_benchmark_set/7TH4_FFO/7TH4_FFO_protein.pdb,,7TH4_FFO -data/posebusters_benchmark_set/7THI_PGA/7THI_PGA_protein.pdb,,7THI_PGA -data/posebusters_benchmark_set/7TM6_GPJ/7TM6_GPJ_protein.pdb,,7TM6_GPJ -data/posebusters_benchmark_set/7TOM_5AD/7TOM_5AD_protein.pdb,,7TOM_5AD -data/posebusters_benchmark_set/7TS6_KMI/7TS6_KMI_protein.pdb,,7TS6_KMI -data/posebusters_benchmark_set/7TSF_H4B/7TSF_H4B_protein.pdb,,7TSF_H4B -data/posebusters_benchmark_set/7TUO_KL9/7TUO_KL9_protein.pdb,,7TUO_KL9 -data/posebusters_benchmark_set/7TWC_CXS/7TWC_CXS_protein.pdb,,7TWC_CXS -data/posebusters_benchmark_set/7TXK_LW8/7TXK_LW8_protein.pdb,,7TXK_LW8 -data/posebusters_benchmark_set/7TXP_0FX/7TXP_0FX_protein.pdb,,7TXP_0FX -data/posebusters_benchmark_set/7TYP_KUR/7TYP_KUR_protein.pdb,,7TYP_KUR -data/posebusters_benchmark_set/7U0U_FK5/7U0U_FK5_protein.pdb,,7U0U_FK5 -data/posebusters_benchmark_set/7U3J_L6U/7U3J_L6U_protein.pdb,,7U3J_L6U -data/posebusters_benchmark_set/7UAS_MBU/7UAS_MBU_protein.pdb,,7UAS_MBU -data/posebusters_benchmark_set/7UAW_MF6/7UAW_MF6_protein.pdb,,7UAW_MF6 -data/posebusters_benchmark_set/7UEY_N0R/7UEY_N0R_protein.pdb,,7UEY_N0R -data/posebusters_benchmark_set/7UF2_5SP/7UF2_5SP_protein.pdb,,7UF2_5SP -data/posebusters_benchmark_set/7UJ4_OQ4/7UJ4_OQ4_protein.pdb,,7UJ4_OQ4 -data/posebusters_benchmark_set/7UJ5_DGL/7UJ5_DGL_protein.pdb,,7UJ5_DGL -data/posebusters_benchmark_set/7UJF_R3V/7UJF_R3V_protein.pdb,,7UJF_R3V -data/posebusters_benchmark_set/7ULC_56B/7ULC_56B_protein.pdb,,7ULC_56B -data/posebusters_benchmark_set/7UMV_NUU/7UMV_NUU_protein.pdb,,7UMV_NUU -data/posebusters_benchmark_set/7UMW_NAD/7UMW_NAD_protein.pdb,,7UMW_NAD -data/posebusters_benchmark_set/7UP3_NZ0/7UP3_NZ0_protein.pdb,,7UP3_NZ0 -data/posebusters_benchmark_set/7UQ3_O2U/7UQ3_O2U_protein.pdb,,7UQ3_O2U -data/posebusters_benchmark_set/7USH_82V/7USH_82V_protein.pdb,,7USH_82V -data/posebusters_benchmark_set/7UTW_NAI/7UTW_NAI_protein.pdb,,7UTW_NAI -data/posebusters_benchmark_set/7UXS_OJC/7UXS_OJC_protein.pdb,,7UXS_OJC -data/posebusters_benchmark_set/7UY4_SMI/7UY4_SMI_protein.pdb,,7UY4_SMI -data/posebusters_benchmark_set/7UYB_OK0/7UYB_OK0_protein.pdb,,7UYB_OK0 -data/posebusters_benchmark_set/7V14_ORU/7V14_ORU_protein.pdb,,7V14_ORU -data/posebusters_benchmark_set/7V3N_AKG/7V3N_AKG_protein.pdb,,7V3N_AKG -data/posebusters_benchmark_set/7V3S_5I9/7V3S_5I9_protein.pdb,,7V3S_5I9 -data/posebusters_benchmark_set/7V43_C4O/7V43_C4O_protein.pdb,,7V43_C4O -data/posebusters_benchmark_set/7V8Z_5YH/7V8Z_5YH_protein.pdb,,7V8Z_5YH -data/posebusters_benchmark_set/7VB8_STL/7VB8_STL_protein.pdb,,7VB8_STL -data/posebusters_benchmark_set/7VBU_6I4/7VBU_6I4_protein.pdb,,7VBU_6I4 -data/posebusters_benchmark_set/7VC5_9SF/7VC5_9SF_protein.pdb,,7VC5_9SF -data/posebusters_benchmark_set/7VJT_7IJ/7VJT_7IJ_protein.pdb,,7VJT_7IJ -data/posebusters_benchmark_set/7VKZ_NOJ/7VKZ_NOJ_protein.pdb,,7VKZ_NOJ -data/posebusters_benchmark_set/7VQ9_ISY/7VQ9_ISY_protein.pdb,,7VQ9_ISY -data/posebusters_benchmark_set/7VWF_K55/7VWF_K55_protein.pdb,,7VWF_K55 -data/posebusters_benchmark_set/7VYJ_CA0/7VYJ_CA0_protein.pdb,,7VYJ_CA0 -data/posebusters_benchmark_set/7W05_GMP/7W05_GMP_protein.pdb,,7W05_GMP -data/posebusters_benchmark_set/7W06_ITN/7W06_ITN_protein.pdb,,7W06_ITN -data/posebusters_benchmark_set/7W6F_8I6/7W6F_8I6_protein.pdb,,7W6F_8I6 -data/posebusters_benchmark_set/7WCF_ACP/7WCF_ACP_protein.pdb,,7WCF_ACP -data/posebusters_benchmark_set/7WDT_NGS/7WDT_NGS_protein.pdb,,7WDT_NGS -data/posebusters_benchmark_set/7WJB_BGC/7WJB_BGC_protein.pdb,,7WJB_BGC -data/posebusters_benchmark_set/7WKL_CAQ/7WKL_CAQ_protein.pdb,,7WKL_CAQ -data/posebusters_benchmark_set/7WL4_JFU/7WL4_JFU_protein.pdb,,7WL4_JFU -data/posebusters_benchmark_set/7WN5_JGL/7WN5_JGL_protein.pdb,,7WN5_JGL -data/posebusters_benchmark_set/7WPW_F15/7WPW_F15_protein.pdb,,7WPW_F15 -data/posebusters_benchmark_set/7WQQ_5Z6/7WQQ_5Z6_protein.pdb,,7WQQ_5Z6 -data/posebusters_benchmark_set/7WUX_6OI/7WUX_6OI_protein.pdb,,7WUX_6OI -data/posebusters_benchmark_set/7WUY_76N/7WUY_76N_protein.pdb,,7WUY_76N -data/posebusters_benchmark_set/7WY1_D0L/7WY1_D0L_protein.pdb,,7WY1_D0L -data/posebusters_benchmark_set/7X5N_5M5/7X5N_5M5_protein.pdb,,7X5N_5M5 -data/posebusters_benchmark_set/7X9K_8OG/7X9K_8OG_protein.pdb,,7X9K_8OG -data/posebusters_benchmark_set/7XBV_APC/7XBV_APC_protein.pdb,,7XBV_APC -data/posebusters_benchmark_set/7XEK_9YX/7XEK_9YX_protein.pdb,,7XEK_9YX -data/posebusters_benchmark_set/7XFA_D9J/7XFA_D9J_protein.pdb,,7XFA_D9J -data/posebusters_benchmark_set/7XG5_PLP/7XG5_PLP_protein.pdb,,7XG5_PLP -data/posebusters_benchmark_set/7XI7_4RI/7XI7_4RI_protein.pdb,,7XI7_4RI -data/posebusters_benchmark_set/7XIJ_EJ3/7XIJ_EJ3_protein.pdb,,7XIJ_EJ3 -data/posebusters_benchmark_set/7XJN_NSD/7XJN_NSD_protein.pdb,,7XJN_NSD -data/posebusters_benchmark_set/7XPO_UPG/7XPO_UPG_protein.pdb,,7XPO_UPG -data/posebusters_benchmark_set/7XQZ_FPF/7XQZ_FPF_protein.pdb,,7XQZ_FPF -data/posebusters_benchmark_set/7XRL_FWK/7XRL_FWK_protein.pdb,,7XRL_FWK -data/posebusters_benchmark_set/7YZU_DO7/7YZU_DO7_protein.pdb,,7YZU_DO7 -data/posebusters_benchmark_set/7Z1Q_NIO/7Z1Q_NIO_protein.pdb,,7Z1Q_NIO -data/posebusters_benchmark_set/7Z2O_IAJ/7Z2O_IAJ_protein.pdb,,7Z2O_IAJ -data/posebusters_benchmark_set/7Z7F_IF3/7Z7F_IF3_protein.pdb,,7Z7F_IF3 -data/posebusters_benchmark_set/7ZCC_OGA/7ZCC_OGA_protein.pdb,,7ZCC_OGA -data/posebusters_benchmark_set/7ZDY_6MJ/7ZDY_6MJ_protein.pdb,,7ZDY_6MJ -data/posebusters_benchmark_set/7ZF0_DHR/7ZF0_DHR_protein.pdb,,7ZF0_DHR -data/posebusters_benchmark_set/7ZHP_IQY/7ZHP_IQY_protein.pdb,,7ZHP_IQY -data/posebusters_benchmark_set/7ZL5_IWE/7ZL5_IWE_protein.pdb,,7ZL5_IWE -data/posebusters_benchmark_set/7ZOC_T8E/7ZOC_T8E_protein.pdb,,7ZOC_T8E -data/posebusters_benchmark_set/7ZTL_BCN/7ZTL_BCN_protein.pdb,,7ZTL_BCN -data/posebusters_benchmark_set/7ZU2_DHT/7ZU2_DHT_protein.pdb,,7ZU2_DHT -data/posebusters_benchmark_set/7ZXV_45D/7ZXV_45D_protein.pdb,,7ZXV_45D -data/posebusters_benchmark_set/7ZXZ_K9R/7ZXZ_K9R_protein.pdb,,7ZXZ_K9R -data/posebusters_benchmark_set/7ZYS_KNR/7ZYS_KNR_protein.pdb,,7ZYS_KNR -data/posebusters_benchmark_set/7ZZB_KGX/7ZZB_KGX_protein.pdb,,7ZZB_KGX -data/posebusters_benchmark_set/7ZZW_KKW/7ZZW_KKW_protein.pdb,,7ZZW_KKW -data/posebusters_benchmark_set/8A1H_DLZ/8A1H_DLZ_protein.pdb,,8A1H_DLZ -data/posebusters_benchmark_set/8A2D_KXY/8A2D_KXY_protein.pdb,,8A2D_KXY -data/posebusters_benchmark_set/8AAU_LH0/8AAU_LH0_protein.pdb,,8AAU_LH0 -data/posebusters_benchmark_set/8ACL_LQL/8ACL_LQL_protein.pdb,,8ACL_LQL -data/posebusters_benchmark_set/8AEM_LVF/8AEM_LVF_protein.pdb,,8AEM_LVF -data/posebusters_benchmark_set/8AEU_M0L/8AEU_M0L_protein.pdb,,8AEU_M0L -data/posebusters_benchmark_set/8AIE_M7L/8AIE_M7L_protein.pdb,,8AIE_M7L -data/posebusters_benchmark_set/8AIJ_M9I/8AIJ_M9I_protein.pdb,,8AIJ_M9I -data/posebusters_benchmark_set/8AJX_FUM/8AJX_FUM_protein.pdb,,8AJX_FUM -data/posebusters_benchmark_set/8AP0_PRP/8AP0_PRP_protein.pdb,,8AP0_PRP -data/posebusters_benchmark_set/8AQL_PLG/8AQL_PLG_protein.pdb,,8AQL_PLG -data/posebusters_benchmark_set/8AUH_L9I/8AUH_L9I_protein.pdb,,8AUH_L9I -data/posebusters_benchmark_set/8AY3_OE3/8AY3_OE3_protein.pdb,,8AY3_OE3 -data/posebusters_benchmark_set/8B8H_OJQ/8B8H_OJQ_protein.pdb,,8B8H_OJQ -data/posebusters_benchmark_set/8BN6_R53/8BN6_R53_protein.pdb,,8BN6_R53 -data/posebusters_benchmark_set/8BOM_QU6/8BOM_QU6_protein.pdb,,8BOM_QU6 -data/posebusters_benchmark_set/8BPL_CP/8BPL_CP_protein.pdb,,8BPL_CP -data/posebusters_benchmark_set/8BRO_R7E/8BRO_R7E_protein.pdb,,8BRO_R7E -data/posebusters_benchmark_set/8BTI_RFO/8BTI_RFO_protein.pdb,,8BTI_RFO -data/posebusters_benchmark_set/8C3N_ADP/8C3N_ADP_protein.pdb,,8C3N_ADP -data/posebusters_benchmark_set/8C5D_GTB/8C5D_GTB_protein.pdb,,8C5D_GTB -data/posebusters_benchmark_set/8C5M_MTA/8C5M_MTA_protein.pdb,,8C5M_MTA -data/posebusters_benchmark_set/8C7Y_TXV/8C7Y_TXV_protein.pdb,,8C7Y_TXV -data/posebusters_benchmark_set/8CGC_LMR/8CGC_LMR_protein.pdb,,8CGC_LMR -data/posebusters_benchmark_set/8CI0_8EL/8CI0_8EL_protein.pdb,,8CI0_8EL -data/posebusters_benchmark_set/8CNH_V6U/8CNH_V6U_protein.pdb,,8CNH_V6U -data/posebusters_benchmark_set/8CSD_C5P/8CSD_C5P_protein.pdb,,8CSD_C5P -data/posebusters_benchmark_set/8D19_GSH/8D19_GSH_protein.pdb,,8D19_GSH -data/posebusters_benchmark_set/8D39_QDB/8D39_QDB_protein.pdb,,8D39_QDB -data/posebusters_benchmark_set/8D5D_5DK/8D5D_5DK_protein.pdb,,8D5D_5DK -data/posebusters_benchmark_set/8DHG_T78/8DHG_T78_protein.pdb,,8DHG_T78 -data/posebusters_benchmark_set/8DKO_TFB/8DKO_TFB_protein.pdb,,8DKO_TFB -data/posebusters_benchmark_set/8DP2_UMA/8DP2_UMA_protein.pdb,,8DP2_UMA -data/posebusters_benchmark_set/8DSC_NCA/8DSC_NCA_protein.pdb,,8DSC_NCA -data/posebusters_benchmark_set/8DW5_FQ7/8DW5_FQ7_protein.pdb,,8DW5_FQ7 -data/posebusters_benchmark_set/8DZT_G4P/8DZT_G4P_protein.pdb,,8DZT_G4P -data/posebusters_benchmark_set/8E77_ULP/8E77_ULP_protein.pdb,,8E77_ULP -data/posebusters_benchmark_set/8EAB_VN2/8EAB_VN2_protein.pdb,,8EAB_VN2 -data/posebusters_benchmark_set/8EAD_UY0/8EAD_UY0_protein.pdb,,8EAD_UY0 -data/posebusters_benchmark_set/8ERS_WQO/8ERS_WQO_protein.pdb,,8ERS_WQO -data/posebusters_benchmark_set/8EX2_Q2Q/8EX2_Q2Q_protein.pdb,,8EX2_Q2Q -data/posebusters_benchmark_set/8EXL_799/8EXL_799_protein.pdb,,8EXL_799 -data/posebusters_benchmark_set/8EYE_X4I/8EYE_X4I_protein.pdb,,8EYE_X4I -data/posebusters_benchmark_set/8F4J_PHO/8F4J_PHO_protein.pdb,,8F4J_PHO -data/posebusters_benchmark_set/8F8E_XJI/8F8E_XJI_protein.pdb,,8F8E_XJI -data/posebusters_benchmark_set/8FAV_4Y5/8FAV_4Y5_protein.pdb,,8FAV_4Y5 -data/posebusters_benchmark_set/8FLN_Y7W/8FLN_Y7W_protein.pdb,,8FLN_Y7W -data/posebusters_benchmark_set/8FLV_ZB9/8FLV_ZB9_protein.pdb,,8FLV_ZB9 -data/posebusters_benchmark_set/8FO5_Y4U/8FO5_Y4U_protein.pdb,,8FO5_Y4U -data/posebusters_benchmark_set/8FV9_80J/8FV9_80J_protein.pdb,,8FV9_80J -data/posebusters_benchmark_set/8G0V_YHT/8G0V_YHT_protein.pdb,,8G0V_YHT -data/posebusters_benchmark_set/8G43_ZU6/8G43_ZU6_protein.pdb,,8G43_ZU6 -data/posebusters_benchmark_set/8G6P_API/8G6P_API_protein.pdb,,8G6P_API -data/posebusters_benchmark_set/8GFD_ZHR/8GFD_ZHR_protein.pdb,,8GFD_ZHR -data/posebusters_benchmark_set/8H0M_2EH/8H0M_2EH_protein.pdb,,8H0M_2EH -data/posebusters_benchmark_set/8HFN_XGC/8HFN_XGC_protein.pdb,,8HFN_XGC -data/posebusters_benchmark_set/8HO0_3ZI/8HO0_3ZI_protein.pdb,,8HO0_3ZI -data/posebusters_benchmark_set/8SLG_G5A/8SLG_G5A_protein.pdb,,8SLG_G5A +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/5S8I_2LY_holo_aligned_predicted_protein.pdb,,5S8I_2LY +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/5SAK_ZRY_holo_aligned_predicted_protein.pdb,,5SAK_ZRY +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/5SB2_1K2_holo_aligned_predicted_protein.pdb,,5SB2_1K2 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/5SD5_HWI_holo_aligned_predicted_protein.pdb,,5SD5_HWI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/5SIS_JSM_holo_aligned_predicted_protein.pdb,,5SIS_JSM +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6M2B_EZO_holo_aligned_predicted_protein.pdb,,6M2B_EZO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6M73_FNR_holo_aligned_predicted_protein.pdb,,6M73_FNR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6T88_MWQ_holo_aligned_predicted_protein.pdb,,6T88_MWQ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6TW5_9M2_holo_aligned_predicted_protein.pdb,,6TW5_9M2 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6TW7_NZB_holo_aligned_predicted_protein.pdb,,6TW7_NZB +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6VS3_R6V_holo_aligned_predicted_protein.pdb,,6VS3_R6V +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6VTA_AKN_holo_aligned_predicted_protein.pdb,,6VTA_AKN +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6W59_SZD_holo_aligned_predicted_protein.pdb,,6W59_SZD +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6WTN_RXT_holo_aligned_predicted_protein.pdb,,6WTN_RXT +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6X8D_ARA_holo_aligned_predicted_protein.pdb,,6X8D_ARA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6XAF_GDP_holo_aligned_predicted_protein.pdb,,6XAF_GDP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6XBO_5MC_holo_aligned_predicted_protein.pdb,,6XBO_5MC +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6XCT_478_holo_aligned_predicted_protein.pdb,,6XCT_478 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6XG5_TOP_holo_aligned_predicted_protein.pdb,,6XG5_TOP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6XHT_V2V_holo_aligned_predicted_protein.pdb,,6XHT_V2V +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6XM9_V55_holo_aligned_predicted_protein.pdb,,6XM9_V55 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6XUM_30L_holo_aligned_predicted_protein.pdb,,6XUM_30L +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6Y7L_QMG_holo_aligned_predicted_protein.pdb,,6Y7L_QMG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YDY_K73_holo_aligned_predicted_protein.pdb,,6YDY_K73 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YJA_2BA_holo_aligned_predicted_protein.pdb,,6YJA_2BA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YMS_OZH_holo_aligned_predicted_protein.pdb,,6YMS_OZH +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YQV_8K2_holo_aligned_predicted_protein.pdb,,6YQV_8K2 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YQW_82I_holo_aligned_predicted_protein.pdb,,6YQW_82I +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YR2_T1C_holo_aligned_predicted_protein.pdb,,6YR2_T1C +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YRV_PJ8_holo_aligned_predicted_protein.pdb,,6YRV_PJ8 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YSP_PAL_holo_aligned_predicted_protein.pdb,,6YSP_PAL +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YT6_PKE_holo_aligned_predicted_protein.pdb,,6YT6_PKE +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YYO_Q1K_holo_aligned_predicted_protein.pdb,,6YYO_Q1K +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6Z0R_Q4H_holo_aligned_predicted_protein.pdb,,6Z0R_Q4H +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6Z14_Q4Z_holo_aligned_predicted_protein.pdb,,6Z14_Q4Z +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6Z1C_7EY_holo_aligned_predicted_protein.pdb,,6Z1C_7EY +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6Z2C_Q5E_holo_aligned_predicted_protein.pdb,,6Z2C_Q5E +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6Z4N_Q7B_holo_aligned_predicted_protein.pdb,,6Z4N_Q7B +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6Z5Z_BDF_holo_aligned_predicted_protein.pdb,,6Z5Z_BDF +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZAE_ACV_holo_aligned_predicted_protein.pdb,,6ZAE_ACV +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZC3_JOR_holo_aligned_predicted_protein.pdb,,6ZC3_JOR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZCY_QF8_holo_aligned_predicted_protein.pdb,,6ZCY_QF8 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZK5_IMH_holo_aligned_predicted_protein.pdb,,6ZK5_IMH +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZPB_3D1_holo_aligned_predicted_protein.pdb,,6ZPB_3D1 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZR8_QOZ_holo_aligned_predicted_protein.pdb,,6ZR8_QOZ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZT2_QPK_holo_aligned_predicted_protein.pdb,,6ZT2_QPK +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZX3_QRZ_holo_aligned_predicted_protein.pdb,,6ZX3_QRZ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZXQ_IMO_holo_aligned_predicted_protein.pdb,,6ZXQ_IMO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7A1P_QW2_holo_aligned_predicted_protein.pdb,,7A1P_QW2 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7A9E_R4W_holo_aligned_predicted_protein.pdb,,7A9E_R4W +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7A9H_TPP_holo_aligned_predicted_protein.pdb,,7A9H_TPP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7AA0_R6B_holo_aligned_predicted_protein.pdb,,7AA0_R6B +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7AFX_R9K_holo_aligned_predicted_protein.pdb,,7AFX_R9K +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7AKL_RK5_holo_aligned_predicted_protein.pdb,,7AKL_RK5 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7AMC_73B_holo_aligned_predicted_protein.pdb,,7AMC_73B +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7AN5_RDH_holo_aligned_predicted_protein.pdb,,7AN5_RDH +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7AS1_21G_holo_aligned_predicted_protein.pdb,,7AS1_21G +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7AVI_S2Q_holo_aligned_predicted_protein.pdb,,7AVI_S2Q +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7B0E_C2E_holo_aligned_predicted_protein.pdb,,7B0E_C2E +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7B2C_TP7_holo_aligned_predicted_protein.pdb,,7B2C_TP7 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7B94_ANP_holo_aligned_predicted_protein.pdb,,7B94_ANP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BA0_T5H_holo_aligned_predicted_protein.pdb,,7BA0_T5H +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BCP_GCO_holo_aligned_predicted_protein.pdb,,7BCP_GCO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BHX_TO5_holo_aligned_predicted_protein.pdb,,7BHX_TO5 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BJ6_TVK_holo_aligned_predicted_protein.pdb,,7BJ6_TVK +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BJJ_TVW_holo_aligned_predicted_protein.pdb,,7BJJ_TVW +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BKA_4JC_holo_aligned_predicted_protein.pdb,,7BKA_4JC +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BLA_WCS_holo_aligned_predicted_protein.pdb,,7BLA_WCS +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BLG_GAL_holo_aligned_predicted_protein.pdb,,7BLG_GAL +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BMI_U4B_holo_aligned_predicted_protein.pdb,,7BMI_U4B +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BNH_BEZ_holo_aligned_predicted_protein.pdb,,7BNH_BEZ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BTT_F8R_holo_aligned_predicted_protein.pdb,,7BTT_F8R +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7C0U_FGO_holo_aligned_predicted_protein.pdb,,7C0U_FGO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7C3U_AZG_holo_aligned_predicted_protein.pdb,,7C3U_AZG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7C6P_SQH_holo_aligned_predicted_protein.pdb,,7C6P_SQH +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7C8Q_DSG_holo_aligned_predicted_protein.pdb,,7C8Q_DSG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CD9_FVR_holo_aligned_predicted_protein.pdb,,7CD9_FVR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CIJ_G0C_holo_aligned_predicted_protein.pdb,,7CIJ_G0C +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CL8_TES_holo_aligned_predicted_protein.pdb,,7CL8_TES +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CNQ_G8X_holo_aligned_predicted_protein.pdb,,7CNQ_G8X +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CNS_PMV_holo_aligned_predicted_protein.pdb,,7CNS_PMV +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CTM_BDP_holo_aligned_predicted_protein.pdb,,7CTM_BDP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CUO_PHB_holo_aligned_predicted_protein.pdb,,7CUO_PHB +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7D0P_1VU_holo_aligned_predicted_protein.pdb,,7D0P_1VU +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7D5C_GV6_holo_aligned_predicted_protein.pdb,,7D5C_GV6 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7D6O_MTE_holo_aligned_predicted_protein.pdb,,7D6O_MTE +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7D8Q_GZF_holo_aligned_predicted_protein.pdb,,7D8Q_GZF +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7D9L_GSF_holo_aligned_predicted_protein.pdb,,7D9L_GSF +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7DIN_MPO_holo_aligned_predicted_protein.pdb,,7DIN_MPO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7DKT_GLF_holo_aligned_predicted_protein.pdb,,7DKT_GLF +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7DQL_4CL_holo_aligned_predicted_protein.pdb,,7DQL_4CL +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7DUA_HJ0_holo_aligned_predicted_protein.pdb,,7DUA_HJ0 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7E2S_BLA_holo_aligned_predicted_protein.pdb,,7E2S_BLA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7E4L_MDN_holo_aligned_predicted_protein.pdb,,7E4L_MDN +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7EBG_J0L_holo_aligned_predicted_protein.pdb,,7EBG_J0L +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ECR_SIN_holo_aligned_predicted_protein.pdb,,7ECR_SIN +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ED2_A3P_holo_aligned_predicted_protein.pdb,,7ED2_A3P +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ELT_TYM_holo_aligned_predicted_protein.pdb,,7ELT_TYM +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7EN7_J79_holo_aligned_predicted_protein.pdb,,7EN7_J79 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7EPV_FDA_holo_aligned_predicted_protein.pdb,,7EPV_FDA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ES1_UDP_holo_aligned_predicted_protein.pdb,,7ES1_UDP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7F51_BA7_holo_aligned_predicted_protein.pdb,,7F51_BA7 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7F5D_EUO_holo_aligned_predicted_protein.pdb,,7F5D_EUO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7F8T_FAD_holo_aligned_predicted_protein.pdb,,7F8T_FAD +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7FB7_8NF_holo_aligned_predicted_protein.pdb,,7FB7_8NF +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7FHA_ADX_holo_aligned_predicted_protein.pdb,,7FHA_ADX +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7FRX_O88_holo_aligned_predicted_protein.pdb,,7FRX_O88 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7FT9_4MB_holo_aligned_predicted_protein.pdb,,7FT9_4MB +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7JG0_GAR_holo_aligned_predicted_protein.pdb,,7JG0_GAR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7JGW_V9S_holo_aligned_predicted_protein.pdb,,7JGW_V9S +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7JHQ_VAJ_holo_aligned_predicted_protein.pdb,,7JHQ_VAJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7JMV_4NC_holo_aligned_predicted_protein.pdb,,7JMV_4NC +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7JNB_A2G_holo_aligned_predicted_protein.pdb,,7JNB_A2G +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7JR8_VH7_holo_aligned_predicted_protein.pdb,,7JR8_VH7 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7JUD_MMA_holo_aligned_predicted_protein.pdb,,7JUD_MMA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7JXX_VP7_holo_aligned_predicted_protein.pdb,,7JXX_VP7 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7JY3_VUD_holo_aligned_predicted_protein.pdb,,7JY3_VUD +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7K0V_VQP_holo_aligned_predicted_protein.pdb,,7K0V_VQP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7K41_VUA_holo_aligned_predicted_protein.pdb,,7K41_VUA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KB1_WBJ_holo_aligned_predicted_protein.pdb,,7KB1_WBJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KC5_BJZ_holo_aligned_predicted_protein.pdb,,7KC5_BJZ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KFO_IAC_holo_aligned_predicted_protein.pdb,,7KFO_IAC +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KLX_WOV_holo_aligned_predicted_protein.pdb,,7KLX_WOV +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KM8_WPD_holo_aligned_predicted_protein.pdb,,7KM8_WPD +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KP6_WTP_holo_aligned_predicted_protein.pdb,,7KP6_WTP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KQU_YOF_holo_aligned_predicted_protein.pdb,,7KQU_YOF +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KRU_ATP_holo_aligned_predicted_protein.pdb,,7KRU_ATP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KZ9_XN7_holo_aligned_predicted_protein.pdb,,7KZ9_XN7 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7L00_XCJ_holo_aligned_predicted_protein.pdb,,7L00_XCJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7L03_F9F_holo_aligned_predicted_protein.pdb,,7L03_F9F +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7L5F_XNG_holo_aligned_predicted_protein.pdb,,7L5F_XNG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7L6D_BMF_holo_aligned_predicted_protein.pdb,,7L6D_BMF +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7L7C_XQ1_holo_aligned_predicted_protein.pdb,,7L7C_XQ1 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7L81_UD4_holo_aligned_predicted_protein.pdb,,7L81_UD4 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LB3_XXS_holo_aligned_predicted_protein.pdb,,7LB3_XXS +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LCU_XTA_holo_aligned_predicted_protein.pdb,,7LCU_XTA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LEV_0JO_holo_aligned_predicted_protein.pdb,,7LEV_0JO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LJN_GTP_holo_aligned_predicted_protein.pdb,,7LJN_GTP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LMO_NYO_holo_aligned_predicted_protein.pdb,,7LMO_NYO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LOE_Y84_holo_aligned_predicted_protein.pdb,,7LOE_Y84 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LOU_IFM_holo_aligned_predicted_protein.pdb,,7LOU_IFM +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LT0_ONJ_holo_aligned_predicted_protein.pdb,,7LT0_ONJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LZD_YHY_holo_aligned_predicted_protein.pdb,,7LZD_YHY +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LZQ_YJV_holo_aligned_predicted_protein.pdb,,7LZQ_YJV +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7M31_TDR_holo_aligned_predicted_protein.pdb,,7M31_TDR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7M3H_YPV_holo_aligned_predicted_protein.pdb,,7M3H_YPV +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7M41_YQG_holo_aligned_predicted_protein.pdb,,7M41_YQG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7M6K_YRJ_holo_aligned_predicted_protein.pdb,,7M6K_YRJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MAE_XUS_holo_aligned_predicted_protein.pdb,,7MAE_XUS +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MEU_MGP_holo_aligned_predicted_protein.pdb,,7MEU_MGP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MFP_Z7P_holo_aligned_predicted_protein.pdb,,7MFP_Z7P +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MGT_ZD4_holo_aligned_predicted_protein.pdb,,7MGT_ZD4 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MGY_ZD1_holo_aligned_predicted_protein.pdb,,7MGY_ZD1 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MMH_ZJY_holo_aligned_predicted_protein.pdb,,7MMH_ZJY +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MOI_HPS_holo_aligned_predicted_protein.pdb,,7MOI_HPS +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MRH_ZMJ_holo_aligned_predicted_protein.pdb,,7MRH_ZMJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MS7_ZQ1_holo_aligned_predicted_protein.pdb,,7MS7_ZQ1 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MSR_DCA_holo_aligned_predicted_protein.pdb,,7MSR_DCA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MWN_WI5_holo_aligned_predicted_protein.pdb,,7MWN_WI5 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MWU_ZPM_holo_aligned_predicted_protein.pdb,,7MWU_ZPM +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MY1_IPE_holo_aligned_predicted_protein.pdb,,7MY1_IPE +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MYU_ZR7_holo_aligned_predicted_protein.pdb,,7MYU_ZR7 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MZS_GLA_holo_aligned_predicted_protein.pdb,,7MZS_GLA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N03_ZRP_holo_aligned_predicted_protein.pdb,,7N03_ZRP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N4N_0BK_holo_aligned_predicted_protein.pdb,,7N4N_0BK +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N4W_P4V_holo_aligned_predicted_protein.pdb,,7N4W_P4V +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N6F_0I1_holo_aligned_predicted_protein.pdb,,7N6F_0I1 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N7B_T3F_holo_aligned_predicted_protein.pdb,,7N7B_T3F +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N7H_CTP_holo_aligned_predicted_protein.pdb,,7N7H_CTP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NA4_1I9_holo_aligned_predicted_protein.pdb,,7NA4_1I9 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NB4_U6Q_holo_aligned_predicted_protein.pdb,,7NB4_U6Q +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NF0_BYN_holo_aligned_predicted_protein.pdb,,7NF0_BYN +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NF3_4LU_holo_aligned_predicted_protein.pdb,,7NF3_4LU +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NFB_GEN_holo_aligned_predicted_protein.pdb,,7NFB_GEN +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NGW_UAW_holo_aligned_predicted_protein.pdb,,7NGW_UAW +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NLK_UHK_holo_aligned_predicted_protein.pdb,,7NLK_UHK +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NLV_UJE_holo_aligned_predicted_protein.pdb,,7NLV_UJE +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NML_I7B_holo_aligned_predicted_protein.pdb,,7NML_I7B +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NP6_UK8_holo_aligned_predicted_protein.pdb,,7NP6_UK8 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NPL_UKZ_holo_aligned_predicted_protein.pdb,,7NPL_UKZ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NR6_UO8_holo_aligned_predicted_protein.pdb,,7NR6_UO8 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NR8_UOE_holo_aligned_predicted_protein.pdb,,7NR8_UOE +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NSW_HC4_holo_aligned_predicted_protein.pdb,,7NSW_HC4 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NTG_F6R_holo_aligned_predicted_protein.pdb,,7NTG_F6R +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NU0_DCL_holo_aligned_predicted_protein.pdb,,7NU0_DCL +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NUT_GLP_holo_aligned_predicted_protein.pdb,,7NUT_GLP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NXO_UU8_holo_aligned_predicted_protein.pdb,,7NXO_UU8 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7O0N_CDP_holo_aligned_predicted_protein.pdb,,7O0N_CDP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7O1T_5X8_holo_aligned_predicted_protein.pdb,,7O1T_5X8 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OCB_V88_holo_aligned_predicted_protein.pdb,,7OCB_V88 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ODX_DGP_holo_aligned_predicted_protein.pdb,,7ODX_DGP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ODY_DGI_holo_aligned_predicted_protein.pdb,,7ODY_DGI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OEO_V9Z_holo_aligned_predicted_protein.pdb,,7OEO_V9Z +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OFF_VCB_holo_aligned_predicted_protein.pdb,,7OFF_VCB +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OFK_VCH_holo_aligned_predicted_protein.pdb,,7OFK_VCH +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OKC_VFE_holo_aligned_predicted_protein.pdb,,7OKC_VFE +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OKF_VH5_holo_aligned_predicted_protein.pdb,,7OKF_VH5 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OLI_8HG_holo_aligned_predicted_protein.pdb,,7OLI_8HG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OLT_58J_holo_aligned_predicted_protein.pdb,,7OLT_58J +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OMJ_GCP_holo_aligned_predicted_protein.pdb,,7OMJ_GCP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OMX_CNA_holo_aligned_predicted_protein.pdb,,7OMX_CNA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OP9_06K_holo_aligned_predicted_protein.pdb,,7OP9_06K +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OPG_06N_holo_aligned_predicted_protein.pdb,,7OPG_06N +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ORW_7WA_holo_aligned_predicted_protein.pdb,,7ORW_7WA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OSO_0V1_holo_aligned_predicted_protein.pdb,,7OSO_0V1 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OU8_1XI_holo_aligned_predicted_protein.pdb,,7OU8_1XI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OZ9_NGK_holo_aligned_predicted_protein.pdb,,7OZ9_NGK +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OZC_G6S_holo_aligned_predicted_protein.pdb,,7OZC_G6S +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7P1F_KFN_holo_aligned_predicted_protein.pdb,,7P1F_KFN +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7P1M_4IU_holo_aligned_predicted_protein.pdb,,7P1M_4IU +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7P2I_MFU_holo_aligned_predicted_protein.pdb,,7P2I_MFU +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7P2W_4QR_holo_aligned_predicted_protein.pdb,,7P2W_4QR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7P4C_5OV_holo_aligned_predicted_protein.pdb,,7P4C_5OV +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7P4J_5JK_holo_aligned_predicted_protein.pdb,,7P4J_5JK +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7P4V_DAT_holo_aligned_predicted_protein.pdb,,7P4V_DAT +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7P5T_5YG_holo_aligned_predicted_protein.pdb,,7P5T_5YG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7P85_5ZG_holo_aligned_predicted_protein.pdb,,7P85_5ZG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PA4_C_holo_aligned_predicted_protein.pdb,,7PA4_C +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PGX_FMN_holo_aligned_predicted_protein.pdb,,7PGX_FMN +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PIH_7QW_holo_aligned_predicted_protein.pdb,,7PIH_7QW +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PJQ_OWH_holo_aligned_predicted_protein.pdb,,7PJQ_OWH +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PK0_BYC_holo_aligned_predicted_protein.pdb,,7PK0_BYC +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PL1_SFG_holo_aligned_predicted_protein.pdb,,7PL1_SFG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7POM_7VZ_holo_aligned_predicted_protein.pdb,,7POM_7VZ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PRI_7TI_holo_aligned_predicted_protein.pdb,,7PRI_7TI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PRM_81I_holo_aligned_predicted_protein.pdb,,7PRM_81I +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PT3_3KK_holo_aligned_predicted_protein.pdb,,7PT3_3KK +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PUV_84Z_holo_aligned_predicted_protein.pdb,,7PUV_84Z +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Q19_DSM_holo_aligned_predicted_protein.pdb,,7Q19_DSM +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Q25_8J9_holo_aligned_predicted_protein.pdb,,7Q25_8J9 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Q27_8KC_holo_aligned_predicted_protein.pdb,,7Q27_8KC +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Q2B_M6H_holo_aligned_predicted_protein.pdb,,7Q2B_M6H +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Q5I_I0F_holo_aligned_predicted_protein.pdb,,7Q5I_I0F +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QE4_NGA_holo_aligned_predicted_protein.pdb,,7QE4_NGA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QF4_RBF_holo_aligned_predicted_protein.pdb,,7QF4_RBF +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QFM_AY3_holo_aligned_predicted_protein.pdb,,7QFM_AY3 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QGP_DJ8_holo_aligned_predicted_protein.pdb,,7QGP_DJ8 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QHG_T3B_holo_aligned_predicted_protein.pdb,,7QHG_T3B +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QHL_D5P_holo_aligned_predicted_protein.pdb,,7QHL_D5P +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QK0_EBL_holo_aligned_predicted_protein.pdb,,7QK0_EBL +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QPP_VDX_holo_aligned_predicted_protein.pdb,,7QPP_VDX +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QSW_CAP_holo_aligned_predicted_protein.pdb,,7QSW_CAP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QTA_URI_holo_aligned_predicted_protein.pdb,,7QTA_URI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7R3D_APR_holo_aligned_predicted_protein.pdb,,7R3D_APR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7R59_I5F_holo_aligned_predicted_protein.pdb,,7R59_I5F +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7R6J_2I7_holo_aligned_predicted_protein.pdb,,7R6J_2I7 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7R7R_AWJ_holo_aligned_predicted_protein.pdb,,7R7R_AWJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7R9N_F97_holo_aligned_predicted_protein.pdb,,7R9N_F97 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RC3_SAH_holo_aligned_predicted_protein.pdb,,7RC3_SAH +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7REE_4LY_holo_aligned_predicted_protein.pdb,,7REE_4LY +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RH3_59O_holo_aligned_predicted_protein.pdb,,7RH3_59O +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RH8_UTP_holo_aligned_predicted_protein.pdb,,7RH8_UTP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RKW_5TV_holo_aligned_predicted_protein.pdb,,7RKW_5TV +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RNI_60I_holo_aligned_predicted_protein.pdb,,7RNI_60I +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ROR_69X_holo_aligned_predicted_protein.pdb,,7ROR_69X +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ROU_66I_holo_aligned_predicted_protein.pdb,,7ROU_66I +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RPZ_6IC_holo_aligned_predicted_protein.pdb,,7RPZ_6IC +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RSV_7IQ_holo_aligned_predicted_protein.pdb,,7RSV_7IQ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RUI_7QZ_holo_aligned_predicted_protein.pdb,,7RUI_7QZ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RWO_7WN_holo_aligned_predicted_protein.pdb,,7RWO_7WN +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RWS_4UR_holo_aligned_predicted_protein.pdb,,7RWS_4UR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RZL_NPO_holo_aligned_predicted_protein.pdb,,7RZL_NPO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7S45_ACO_holo_aligned_predicted_protein.pdb,,7S45_ACO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7S9H_7PP_holo_aligned_predicted_protein.pdb,,7S9H_7PP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SCW_GSP_holo_aligned_predicted_protein.pdb,,7SCW_GSP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SDD_4IP_holo_aligned_predicted_protein.pdb,,7SDD_4IP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SED_8VD_holo_aligned_predicted_protein.pdb,,7SED_8VD +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SFO_98L_holo_aligned_predicted_protein.pdb,,7SFO_98L +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SGV_L30_holo_aligned_predicted_protein.pdb,,7SGV_L30 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SIU_9ID_holo_aligned_predicted_protein.pdb,,7SIU_9ID +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SNE_9XR_holo_aligned_predicted_protein.pdb,,7SNE_9XR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SSM_B7L_holo_aligned_predicted_protein.pdb,,7SSM_B7L +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SUC_COM_holo_aligned_predicted_protein.pdb,,7SUC_COM +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SZA_DUI_holo_aligned_predicted_protein.pdb,,7SZA_DUI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7T0D_FPP_holo_aligned_predicted_protein.pdb,,7T0D_FPP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7T0U_E3I_holo_aligned_predicted_protein.pdb,,7T0U_E3I +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7T1D_E7K_holo_aligned_predicted_protein.pdb,,7T1D_E7K +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7T2I_E9F_holo_aligned_predicted_protein.pdb,,7T2I_E9F +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7T3E_SLB_holo_aligned_predicted_protein.pdb,,7T3E_SLB +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7T3F_EM0_holo_aligned_predicted_protein.pdb,,7T3F_EM0 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7T9O_GEI_holo_aligned_predicted_protein.pdb,,7T9O_GEI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TB0_UD1_holo_aligned_predicted_protein.pdb,,7TB0_UD1 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TBU_S3P_holo_aligned_predicted_protein.pdb,,7TBU_S3P +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TE8_P0T_holo_aligned_predicted_protein.pdb,,7TE8_P0T +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TH4_FFO_holo_aligned_predicted_protein.pdb,,7TH4_FFO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7THI_PGA_holo_aligned_predicted_protein.pdb,,7THI_PGA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TM6_GPJ_holo_aligned_predicted_protein.pdb,,7TM6_GPJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TOM_5AD_holo_aligned_predicted_protein.pdb,,7TOM_5AD +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TS6_KMI_holo_aligned_predicted_protein.pdb,,7TS6_KMI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TSF_H4B_holo_aligned_predicted_protein.pdb,,7TSF_H4B +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TUO_KL9_holo_aligned_predicted_protein.pdb,,7TUO_KL9 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TWC_CXS_holo_aligned_predicted_protein.pdb,,7TWC_CXS +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TXK_LW8_holo_aligned_predicted_protein.pdb,,7TXK_LW8 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TXP_0FX_holo_aligned_predicted_protein.pdb,,7TXP_0FX +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TYP_KUR_holo_aligned_predicted_protein.pdb,,7TYP_KUR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7U0U_FK5_holo_aligned_predicted_protein.pdb,,7U0U_FK5 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7U3J_L6U_holo_aligned_predicted_protein.pdb,,7U3J_L6U +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UAS_MBU_holo_aligned_predicted_protein.pdb,,7UAS_MBU +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UAW_MF6_holo_aligned_predicted_protein.pdb,,7UAW_MF6 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UEY_N0R_holo_aligned_predicted_protein.pdb,,7UEY_N0R +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UF2_5SP_holo_aligned_predicted_protein.pdb,,7UF2_5SP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UJ4_OQ4_holo_aligned_predicted_protein.pdb,,7UJ4_OQ4 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UJ5_DGL_holo_aligned_predicted_protein.pdb,,7UJ5_DGL +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UJF_R3V_holo_aligned_predicted_protein.pdb,,7UJF_R3V +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ULC_56B_holo_aligned_predicted_protein.pdb,,7ULC_56B +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UMV_NUU_holo_aligned_predicted_protein.pdb,,7UMV_NUU +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UMW_NAD_holo_aligned_predicted_protein.pdb,,7UMW_NAD +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UP3_NZ0_holo_aligned_predicted_protein.pdb,,7UP3_NZ0 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UQ3_O2U_holo_aligned_predicted_protein.pdb,,7UQ3_O2U +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7USH_82V_holo_aligned_predicted_protein.pdb,,7USH_82V +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UTW_NAI_holo_aligned_predicted_protein.pdb,,7UTW_NAI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UXS_OJC_holo_aligned_predicted_protein.pdb,,7UXS_OJC +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UY4_SMI_holo_aligned_predicted_protein.pdb,,7UY4_SMI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UYB_OK0_holo_aligned_predicted_protein.pdb,,7UYB_OK0 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7V14_ORU_holo_aligned_predicted_protein.pdb,,7V14_ORU +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7V3N_AKG_holo_aligned_predicted_protein.pdb,,7V3N_AKG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7V3S_5I9_holo_aligned_predicted_protein.pdb,,7V3S_5I9 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7V43_C4O_holo_aligned_predicted_protein.pdb,,7V43_C4O +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7V8Z_5YH_holo_aligned_predicted_protein.pdb,,7V8Z_5YH +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VB8_STL_holo_aligned_predicted_protein.pdb,,7VB8_STL +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VBU_6I4_holo_aligned_predicted_protein.pdb,,7VBU_6I4 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VC5_9SF_holo_aligned_predicted_protein.pdb,,7VC5_9SF +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VJT_7IJ_holo_aligned_predicted_protein.pdb,,7VJT_7IJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VKZ_NOJ_holo_aligned_predicted_protein.pdb,,7VKZ_NOJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VQ9_ISY_holo_aligned_predicted_protein.pdb,,7VQ9_ISY +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VWF_K55_holo_aligned_predicted_protein.pdb,,7VWF_K55 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VYJ_CA0_holo_aligned_predicted_protein.pdb,,7VYJ_CA0 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7W05_GMP_holo_aligned_predicted_protein.pdb,,7W05_GMP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7W06_ITN_holo_aligned_predicted_protein.pdb,,7W06_ITN +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7W6F_8I6_holo_aligned_predicted_protein.pdb,,7W6F_8I6 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WCF_ACP_holo_aligned_predicted_protein.pdb,,7WCF_ACP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WDT_NGS_holo_aligned_predicted_protein.pdb,,7WDT_NGS +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WJB_BGC_holo_aligned_predicted_protein.pdb,,7WJB_BGC +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WKL_CAQ_holo_aligned_predicted_protein.pdb,,7WKL_CAQ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WL4_JFU_holo_aligned_predicted_protein.pdb,,7WL4_JFU +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WN5_JGL_holo_aligned_predicted_protein.pdb,,7WN5_JGL +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WPW_F15_holo_aligned_predicted_protein.pdb,,7WPW_F15 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WQQ_5Z6_holo_aligned_predicted_protein.pdb,,7WQQ_5Z6 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WUX_6OI_holo_aligned_predicted_protein.pdb,,7WUX_6OI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WUY_76N_holo_aligned_predicted_protein.pdb,,7WUY_76N +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WY1_D0L_holo_aligned_predicted_protein.pdb,,7WY1_D0L +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7X5N_5M5_holo_aligned_predicted_protein.pdb,,7X5N_5M5 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7X9K_8OG_holo_aligned_predicted_protein.pdb,,7X9K_8OG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XBV_APC_holo_aligned_predicted_protein.pdb,,7XBV_APC +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XEK_9YX_holo_aligned_predicted_protein.pdb,,7XEK_9YX +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XFA_D9J_holo_aligned_predicted_protein.pdb,,7XFA_D9J +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XG5_PLP_holo_aligned_predicted_protein.pdb,,7XG5_PLP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XI7_4RI_holo_aligned_predicted_protein.pdb,,7XI7_4RI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XIJ_EJ3_holo_aligned_predicted_protein.pdb,,7XIJ_EJ3 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XJN_NSD_holo_aligned_predicted_protein.pdb,,7XJN_NSD +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XPO_UPG_holo_aligned_predicted_protein.pdb,,7XPO_UPG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XQZ_FPF_holo_aligned_predicted_protein.pdb,,7XQZ_FPF +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XRL_FWK_holo_aligned_predicted_protein.pdb,,7XRL_FWK +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7YZU_DO7_holo_aligned_predicted_protein.pdb,,7YZU_DO7 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Z1Q_NIO_holo_aligned_predicted_protein.pdb,,7Z1Q_NIO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Z2O_IAJ_holo_aligned_predicted_protein.pdb,,7Z2O_IAJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Z7F_IF3_holo_aligned_predicted_protein.pdb,,7Z7F_IF3 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZCC_OGA_holo_aligned_predicted_protein.pdb,,7ZCC_OGA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZDY_6MJ_holo_aligned_predicted_protein.pdb,,7ZDY_6MJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZF0_DHR_holo_aligned_predicted_protein.pdb,,7ZF0_DHR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZHP_IQY_holo_aligned_predicted_protein.pdb,,7ZHP_IQY +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZL5_IWE_holo_aligned_predicted_protein.pdb,,7ZL5_IWE +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZOC_T8E_holo_aligned_predicted_protein.pdb,,7ZOC_T8E +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZTL_BCN_holo_aligned_predicted_protein.pdb,,7ZTL_BCN +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZU2_DHT_holo_aligned_predicted_protein.pdb,,7ZU2_DHT +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZXV_45D_holo_aligned_predicted_protein.pdb,,7ZXV_45D +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZXZ_K9R_holo_aligned_predicted_protein.pdb,,7ZXZ_K9R +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZYS_KNR_holo_aligned_predicted_protein.pdb,,7ZYS_KNR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZZB_KGX_holo_aligned_predicted_protein.pdb,,7ZZB_KGX +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZZW_KKW_holo_aligned_predicted_protein.pdb,,7ZZW_KKW +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8A1H_DLZ_holo_aligned_predicted_protein.pdb,,8A1H_DLZ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8A2D_KXY_holo_aligned_predicted_protein.pdb,,8A2D_KXY +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AAU_LH0_holo_aligned_predicted_protein.pdb,,8AAU_LH0 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8ACL_LQL_holo_aligned_predicted_protein.pdb,,8ACL_LQL +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AEM_LVF_holo_aligned_predicted_protein.pdb,,8AEM_LVF +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AEU_M0L_holo_aligned_predicted_protein.pdb,,8AEU_M0L +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AIE_M7L_holo_aligned_predicted_protein.pdb,,8AIE_M7L +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AIJ_M9I_holo_aligned_predicted_protein.pdb,,8AIJ_M9I +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AJX_FUM_holo_aligned_predicted_protein.pdb,,8AJX_FUM +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AP0_PRP_holo_aligned_predicted_protein.pdb,,8AP0_PRP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AQL_PLG_holo_aligned_predicted_protein.pdb,,8AQL_PLG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AUH_L9I_holo_aligned_predicted_protein.pdb,,8AUH_L9I +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AY3_OE3_holo_aligned_predicted_protein.pdb,,8AY3_OE3 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8B8H_OJQ_holo_aligned_predicted_protein.pdb,,8B8H_OJQ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8BN6_R53_holo_aligned_predicted_protein.pdb,,8BN6_R53 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8BOM_QU6_holo_aligned_predicted_protein.pdb,,8BOM_QU6 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8BPL_CP_holo_aligned_predicted_protein.pdb,,8BPL_CP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8BRO_R7E_holo_aligned_predicted_protein.pdb,,8BRO_R7E +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8BTI_RFO_holo_aligned_predicted_protein.pdb,,8BTI_RFO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8C3N_ADP_holo_aligned_predicted_protein.pdb,,8C3N_ADP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8C5D_GTB_holo_aligned_predicted_protein.pdb,,8C5D_GTB +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8C5M_MTA_holo_aligned_predicted_protein.pdb,,8C5M_MTA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8C7Y_TXV_holo_aligned_predicted_protein.pdb,,8C7Y_TXV +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8CGC_LMR_holo_aligned_predicted_protein.pdb,,8CGC_LMR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8CI0_8EL_holo_aligned_predicted_protein.pdb,,8CI0_8EL +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8CNH_V6U_holo_aligned_predicted_protein.pdb,,8CNH_V6U +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8CSD_C5P_holo_aligned_predicted_protein.pdb,,8CSD_C5P +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8D19_GSH_holo_aligned_predicted_protein.pdb,,8D19_GSH +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8D39_QDB_holo_aligned_predicted_protein.pdb,,8D39_QDB +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8D5D_5DK_holo_aligned_predicted_protein.pdb,,8D5D_5DK +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8DHG_T78_holo_aligned_predicted_protein.pdb,,8DHG_T78 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8DKO_TFB_holo_aligned_predicted_protein.pdb,,8DKO_TFB +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8DP2_UMA_holo_aligned_predicted_protein.pdb,,8DP2_UMA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8DSC_NCA_holo_aligned_predicted_protein.pdb,,8DSC_NCA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8DW5_FQ7_holo_aligned_predicted_protein.pdb,,8DW5_FQ7 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8DZT_G4P_holo_aligned_predicted_protein.pdb,,8DZT_G4P +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8E77_ULP_holo_aligned_predicted_protein.pdb,,8E77_ULP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8EAB_VN2_holo_aligned_predicted_protein.pdb,,8EAB_VN2 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8EAD_UY0_holo_aligned_predicted_protein.pdb,,8EAD_UY0 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8ERS_WQO_holo_aligned_predicted_protein.pdb,,8ERS_WQO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8EX2_Q2Q_holo_aligned_predicted_protein.pdb,,8EX2_Q2Q +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8EXL_799_holo_aligned_predicted_protein.pdb,,8EXL_799 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8EYE_X4I_holo_aligned_predicted_protein.pdb,,8EYE_X4I +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8F4J_PHO_holo_aligned_predicted_protein.pdb,,8F4J_PHO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8F8E_XJI_holo_aligned_predicted_protein.pdb,,8F8E_XJI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8FAV_4Y5_holo_aligned_predicted_protein.pdb,,8FAV_4Y5 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8FLN_Y7W_holo_aligned_predicted_protein.pdb,,8FLN_Y7W +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8FLV_ZB9_holo_aligned_predicted_protein.pdb,,8FLV_ZB9 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8FO5_Y4U_holo_aligned_predicted_protein.pdb,,8FO5_Y4U +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8FV9_80J_holo_aligned_predicted_protein.pdb,,8FV9_80J +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8G0V_YHT_holo_aligned_predicted_protein.pdb,,8G0V_YHT +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8G43_ZU6_holo_aligned_predicted_protein.pdb,,8G43_ZU6 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8G6P_API_holo_aligned_predicted_protein.pdb,,8G6P_API +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8GFD_ZHR_holo_aligned_predicted_protein.pdb,,8GFD_ZHR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8H0M_2EH_holo_aligned_predicted_protein.pdb,,8H0M_2EH +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8HFN_XGC_holo_aligned_predicted_protein.pdb,,8HFN_XGC +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8HO0_3ZI_holo_aligned_predicted_protein.pdb,,8HO0_3ZI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8SLG_G5A_holo_aligned_predicted_protein.pdb,,8SLG_G5A diff --git a/data/test_cases/posebusters_benchmark/ensemble_pocket_only_inputs.csv b/data/test_cases/posebusters_benchmark/ensemble_pocket_only_inputs.csv new file mode 100644 index 00000000..05422214 --- /dev/null +++ b/data/test_cases/posebusters_benchmark/ensemble_pocket_only_inputs.csv @@ -0,0 +1,429 @@ +protein_input,ligand_smiles,name +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/5S8I_2LY_holo_aligned_predicted_protein.pdb,,5S8I_2LY +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/5SAK_ZRY_holo_aligned_predicted_protein.pdb,,5SAK_ZRY +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/5SB2_1K2_holo_aligned_predicted_protein.pdb,,5SB2_1K2 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/5SD5_HWI_holo_aligned_predicted_protein.pdb,,5SD5_HWI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/5SIS_JSM_holo_aligned_predicted_protein.pdb,,5SIS_JSM +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6M2B_EZO_holo_aligned_predicted_protein.pdb,,6M2B_EZO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6M73_FNR_holo_aligned_predicted_protein.pdb,,6M73_FNR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6T88_MWQ_holo_aligned_predicted_protein.pdb,,6T88_MWQ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6TW5_9M2_holo_aligned_predicted_protein.pdb,,6TW5_9M2 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6TW7_NZB_holo_aligned_predicted_protein.pdb,,6TW7_NZB +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6VS3_R6V_holo_aligned_predicted_protein.pdb,,6VS3_R6V +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6VTA_AKN_holo_aligned_predicted_protein.pdb,,6VTA_AKN +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6W59_SZD_holo_aligned_predicted_protein.pdb,,6W59_SZD +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6WTN_RXT_holo_aligned_predicted_protein.pdb,,6WTN_RXT +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6X8D_ARA_holo_aligned_predicted_protein.pdb,,6X8D_ARA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6XAF_GDP_holo_aligned_predicted_protein.pdb,,6XAF_GDP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6XBO_5MC_holo_aligned_predicted_protein.pdb,,6XBO_5MC +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6XCT_478_holo_aligned_predicted_protein.pdb,,6XCT_478 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6XG5_TOP_holo_aligned_predicted_protein.pdb,,6XG5_TOP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6XHT_V2V_holo_aligned_predicted_protein.pdb,,6XHT_V2V +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6XM9_V55_holo_aligned_predicted_protein.pdb,,6XM9_V55 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6XUM_30L_holo_aligned_predicted_protein.pdb,,6XUM_30L +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6Y7L_QMG_holo_aligned_predicted_protein.pdb,,6Y7L_QMG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YDY_K73_holo_aligned_predicted_protein.pdb,,6YDY_K73 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YJA_2BA_holo_aligned_predicted_protein.pdb,,6YJA_2BA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YMS_OZH_holo_aligned_predicted_protein.pdb,,6YMS_OZH +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YQV_8K2_holo_aligned_predicted_protein.pdb,,6YQV_8K2 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YQW_82I_holo_aligned_predicted_protein.pdb,,6YQW_82I +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YR2_T1C_holo_aligned_predicted_protein.pdb,,6YR2_T1C +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YRV_PJ8_holo_aligned_predicted_protein.pdb,,6YRV_PJ8 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YSP_PAL_holo_aligned_predicted_protein.pdb,,6YSP_PAL +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YT6_PKE_holo_aligned_predicted_protein.pdb,,6YT6_PKE +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YYO_Q1K_holo_aligned_predicted_protein.pdb,,6YYO_Q1K +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6Z0R_Q4H_holo_aligned_predicted_protein.pdb,,6Z0R_Q4H +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6Z14_Q4Z_holo_aligned_predicted_protein.pdb,,6Z14_Q4Z +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6Z1C_7EY_holo_aligned_predicted_protein.pdb,,6Z1C_7EY +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6Z2C_Q5E_holo_aligned_predicted_protein.pdb,,6Z2C_Q5E +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6Z4N_Q7B_holo_aligned_predicted_protein.pdb,,6Z4N_Q7B +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6Z5Z_BDF_holo_aligned_predicted_protein.pdb,,6Z5Z_BDF +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZAE_ACV_holo_aligned_predicted_protein.pdb,,6ZAE_ACV +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZC3_JOR_holo_aligned_predicted_protein.pdb,,6ZC3_JOR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZCY_QF8_holo_aligned_predicted_protein.pdb,,6ZCY_QF8 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZK5_IMH_holo_aligned_predicted_protein.pdb,,6ZK5_IMH +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZPB_3D1_holo_aligned_predicted_protein.pdb,,6ZPB_3D1 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZR8_QOZ_holo_aligned_predicted_protein.pdb,,6ZR8_QOZ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZT2_QPK_holo_aligned_predicted_protein.pdb,,6ZT2_QPK +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZX3_QRZ_holo_aligned_predicted_protein.pdb,,6ZX3_QRZ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZXQ_IMO_holo_aligned_predicted_protein.pdb,,6ZXQ_IMO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7A1P_QW2_holo_aligned_predicted_protein.pdb,,7A1P_QW2 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7A9E_R4W_holo_aligned_predicted_protein.pdb,,7A9E_R4W +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7A9H_TPP_holo_aligned_predicted_protein.pdb,,7A9H_TPP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7AA0_R6B_holo_aligned_predicted_protein.pdb,,7AA0_R6B +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7AFX_R9K_holo_aligned_predicted_protein.pdb,,7AFX_R9K +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7AKL_RK5_holo_aligned_predicted_protein.pdb,,7AKL_RK5 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7AMC_73B_holo_aligned_predicted_protein.pdb,,7AMC_73B +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7AN5_RDH_holo_aligned_predicted_protein.pdb,,7AN5_RDH +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7AS1_21G_holo_aligned_predicted_protein.pdb,,7AS1_21G +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7AVI_S2Q_holo_aligned_predicted_protein.pdb,,7AVI_S2Q +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7B0E_C2E_holo_aligned_predicted_protein.pdb,,7B0E_C2E +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7B2C_TP7_holo_aligned_predicted_protein.pdb,,7B2C_TP7 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7B94_ANP_holo_aligned_predicted_protein.pdb,,7B94_ANP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BA0_T5H_holo_aligned_predicted_protein.pdb,,7BA0_T5H +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BCP_GCO_holo_aligned_predicted_protein.pdb,,7BCP_GCO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BHX_TO5_holo_aligned_predicted_protein.pdb,,7BHX_TO5 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BJ6_TVK_holo_aligned_predicted_protein.pdb,,7BJ6_TVK +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BJJ_TVW_holo_aligned_predicted_protein.pdb,,7BJJ_TVW +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BKA_4JC_holo_aligned_predicted_protein.pdb,,7BKA_4JC +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BLA_WCS_holo_aligned_predicted_protein.pdb,,7BLA_WCS +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BLG_GAL_holo_aligned_predicted_protein.pdb,,7BLG_GAL +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BMI_U4B_holo_aligned_predicted_protein.pdb,,7BMI_U4B +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BNH_BEZ_holo_aligned_predicted_protein.pdb,,7BNH_BEZ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BTT_F8R_holo_aligned_predicted_protein.pdb,,7BTT_F8R +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7C0U_FGO_holo_aligned_predicted_protein.pdb,,7C0U_FGO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7C3U_AZG_holo_aligned_predicted_protein.pdb,,7C3U_AZG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7C6P_SQH_holo_aligned_predicted_protein.pdb,,7C6P_SQH +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7C8Q_DSG_holo_aligned_predicted_protein.pdb,,7C8Q_DSG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CD9_FVR_holo_aligned_predicted_protein.pdb,,7CD9_FVR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CIJ_G0C_holo_aligned_predicted_protein.pdb,,7CIJ_G0C +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CL8_TES_holo_aligned_predicted_protein.pdb,,7CL8_TES +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CNQ_G8X_holo_aligned_predicted_protein.pdb,,7CNQ_G8X +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CNS_PMV_holo_aligned_predicted_protein.pdb,,7CNS_PMV +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CTM_BDP_holo_aligned_predicted_protein.pdb,,7CTM_BDP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CUO_PHB_holo_aligned_predicted_protein.pdb,,7CUO_PHB +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7D0P_1VU_holo_aligned_predicted_protein.pdb,,7D0P_1VU +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7D5C_GV6_holo_aligned_predicted_protein.pdb,,7D5C_GV6 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7D6O_MTE_holo_aligned_predicted_protein.pdb,,7D6O_MTE +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7D8Q_GZF_holo_aligned_predicted_protein.pdb,,7D8Q_GZF +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7D9L_GSF_holo_aligned_predicted_protein.pdb,,7D9L_GSF +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7DIN_MPO_holo_aligned_predicted_protein.pdb,,7DIN_MPO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7DKT_GLF_holo_aligned_predicted_protein.pdb,,7DKT_GLF +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7DQL_4CL_holo_aligned_predicted_protein.pdb,,7DQL_4CL +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7DUA_HJ0_holo_aligned_predicted_protein.pdb,,7DUA_HJ0 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7E2S_BLA_holo_aligned_predicted_protein.pdb,,7E2S_BLA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7E4L_MDN_holo_aligned_predicted_protein.pdb,,7E4L_MDN +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7EBG_J0L_holo_aligned_predicted_protein.pdb,,7EBG_J0L +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ECR_SIN_holo_aligned_predicted_protein.pdb,,7ECR_SIN +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ED2_A3P_holo_aligned_predicted_protein.pdb,,7ED2_A3P +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ELT_TYM_holo_aligned_predicted_protein.pdb,,7ELT_TYM +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7EN7_J79_holo_aligned_predicted_protein.pdb,,7EN7_J79 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7EPV_FDA_holo_aligned_predicted_protein.pdb,,7EPV_FDA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ES1_UDP_holo_aligned_predicted_protein.pdb,,7ES1_UDP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7F51_BA7_holo_aligned_predicted_protein.pdb,,7F51_BA7 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7F5D_EUO_holo_aligned_predicted_protein.pdb,,7F5D_EUO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7F8T_FAD_holo_aligned_predicted_protein.pdb,,7F8T_FAD +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7FB7_8NF_holo_aligned_predicted_protein.pdb,,7FB7_8NF +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7FHA_ADX_holo_aligned_predicted_protein.pdb,,7FHA_ADX +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7FRX_O88_holo_aligned_predicted_protein.pdb,,7FRX_O88 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7FT9_4MB_holo_aligned_predicted_protein.pdb,,7FT9_4MB +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7JG0_GAR_holo_aligned_predicted_protein.pdb,,7JG0_GAR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7JGW_V9S_holo_aligned_predicted_protein.pdb,,7JGW_V9S +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7JHQ_VAJ_holo_aligned_predicted_protein.pdb,,7JHQ_VAJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7JMV_4NC_holo_aligned_predicted_protein.pdb,,7JMV_4NC +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7JNB_A2G_holo_aligned_predicted_protein.pdb,,7JNB_A2G +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7JR8_VH7_holo_aligned_predicted_protein.pdb,,7JR8_VH7 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7JUD_MMA_holo_aligned_predicted_protein.pdb,,7JUD_MMA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7JXX_VP7_holo_aligned_predicted_protein.pdb,,7JXX_VP7 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7JY3_VUD_holo_aligned_predicted_protein.pdb,,7JY3_VUD +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7K0V_VQP_holo_aligned_predicted_protein.pdb,,7K0V_VQP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7K41_VUA_holo_aligned_predicted_protein.pdb,,7K41_VUA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KB1_WBJ_holo_aligned_predicted_protein.pdb,,7KB1_WBJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KC5_BJZ_holo_aligned_predicted_protein.pdb,,7KC5_BJZ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KFO_IAC_holo_aligned_predicted_protein.pdb,,7KFO_IAC +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KLX_WOV_holo_aligned_predicted_protein.pdb,,7KLX_WOV +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KM8_WPD_holo_aligned_predicted_protein.pdb,,7KM8_WPD +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KP6_WTP_holo_aligned_predicted_protein.pdb,,7KP6_WTP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KQU_YOF_holo_aligned_predicted_protein.pdb,,7KQU_YOF +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KRU_ATP_holo_aligned_predicted_protein.pdb,,7KRU_ATP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KZ9_XN7_holo_aligned_predicted_protein.pdb,,7KZ9_XN7 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7L00_XCJ_holo_aligned_predicted_protein.pdb,,7L00_XCJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7L03_F9F_holo_aligned_predicted_protein.pdb,,7L03_F9F +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7L5F_XNG_holo_aligned_predicted_protein.pdb,,7L5F_XNG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7L6D_BMF_holo_aligned_predicted_protein.pdb,,7L6D_BMF +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7L7C_XQ1_holo_aligned_predicted_protein.pdb,,7L7C_XQ1 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7L81_UD4_holo_aligned_predicted_protein.pdb,,7L81_UD4 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LB3_XXS_holo_aligned_predicted_protein.pdb,,7LB3_XXS +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LCU_XTA_holo_aligned_predicted_protein.pdb,,7LCU_XTA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LEV_0JO_holo_aligned_predicted_protein.pdb,,7LEV_0JO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LJN_GTP_holo_aligned_predicted_protein.pdb,,7LJN_GTP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LMO_NYO_holo_aligned_predicted_protein.pdb,,7LMO_NYO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LOE_Y84_holo_aligned_predicted_protein.pdb,,7LOE_Y84 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LOU_IFM_holo_aligned_predicted_protein.pdb,,7LOU_IFM +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LT0_ONJ_holo_aligned_predicted_protein.pdb,,7LT0_ONJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LZD_YHY_holo_aligned_predicted_protein.pdb,,7LZD_YHY +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LZQ_YJV_holo_aligned_predicted_protein.pdb,,7LZQ_YJV +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7M31_TDR_holo_aligned_predicted_protein.pdb,,7M31_TDR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7M3H_YPV_holo_aligned_predicted_protein.pdb,,7M3H_YPV +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7M41_YQG_holo_aligned_predicted_protein.pdb,,7M41_YQG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7M6K_YRJ_holo_aligned_predicted_protein.pdb,,7M6K_YRJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MAE_XUS_holo_aligned_predicted_protein.pdb,,7MAE_XUS +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MEU_MGP_holo_aligned_predicted_protein.pdb,,7MEU_MGP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MFP_Z7P_holo_aligned_predicted_protein.pdb,,7MFP_Z7P +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MGT_ZD4_holo_aligned_predicted_protein.pdb,,7MGT_ZD4 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MGY_ZD1_holo_aligned_predicted_protein.pdb,,7MGY_ZD1 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MMH_ZJY_holo_aligned_predicted_protein.pdb,,7MMH_ZJY +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MOI_HPS_holo_aligned_predicted_protein.pdb,,7MOI_HPS +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MRH_ZMJ_holo_aligned_predicted_protein.pdb,,7MRH_ZMJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MS7_ZQ1_holo_aligned_predicted_protein.pdb,,7MS7_ZQ1 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MSR_DCA_holo_aligned_predicted_protein.pdb,,7MSR_DCA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MWN_WI5_holo_aligned_predicted_protein.pdb,,7MWN_WI5 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MWU_ZPM_holo_aligned_predicted_protein.pdb,,7MWU_ZPM +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MY1_IPE_holo_aligned_predicted_protein.pdb,,7MY1_IPE +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MYU_ZR7_holo_aligned_predicted_protein.pdb,,7MYU_ZR7 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MZS_GLA_holo_aligned_predicted_protein.pdb,,7MZS_GLA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N03_ZRP_holo_aligned_predicted_protein.pdb,,7N03_ZRP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N4N_0BK_holo_aligned_predicted_protein.pdb,,7N4N_0BK +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N4W_P4V_holo_aligned_predicted_protein.pdb,,7N4W_P4V +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N6F_0I1_holo_aligned_predicted_protein.pdb,,7N6F_0I1 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N7B_T3F_holo_aligned_predicted_protein.pdb,,7N7B_T3F +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N7H_CTP_holo_aligned_predicted_protein.pdb,,7N7H_CTP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NA4_1I9_holo_aligned_predicted_protein.pdb,,7NA4_1I9 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NB4_U6Q_holo_aligned_predicted_protein.pdb,,7NB4_U6Q +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NF0_BYN_holo_aligned_predicted_protein.pdb,,7NF0_BYN +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NF3_4LU_holo_aligned_predicted_protein.pdb,,7NF3_4LU +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NFB_GEN_holo_aligned_predicted_protein.pdb,,7NFB_GEN +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NGW_UAW_holo_aligned_predicted_protein.pdb,,7NGW_UAW +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NLK_UHK_holo_aligned_predicted_protein.pdb,,7NLK_UHK +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NLV_UJE_holo_aligned_predicted_protein.pdb,,7NLV_UJE +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NML_I7B_holo_aligned_predicted_protein.pdb,,7NML_I7B +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NP6_UK8_holo_aligned_predicted_protein.pdb,,7NP6_UK8 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NPL_UKZ_holo_aligned_predicted_protein.pdb,,7NPL_UKZ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NR6_UO8_holo_aligned_predicted_protein.pdb,,7NR6_UO8 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NR8_UOE_holo_aligned_predicted_protein.pdb,,7NR8_UOE +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NSW_HC4_holo_aligned_predicted_protein.pdb,,7NSW_HC4 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NTG_F6R_holo_aligned_predicted_protein.pdb,,7NTG_F6R +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NU0_DCL_holo_aligned_predicted_protein.pdb,,7NU0_DCL +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NUT_GLP_holo_aligned_predicted_protein.pdb,,7NUT_GLP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NXO_UU8_holo_aligned_predicted_protein.pdb,,7NXO_UU8 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7O0N_CDP_holo_aligned_predicted_protein.pdb,,7O0N_CDP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7O1T_5X8_holo_aligned_predicted_protein.pdb,,7O1T_5X8 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OCB_V88_holo_aligned_predicted_protein.pdb,,7OCB_V88 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ODX_DGP_holo_aligned_predicted_protein.pdb,,7ODX_DGP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ODY_DGI_holo_aligned_predicted_protein.pdb,,7ODY_DGI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OEO_V9Z_holo_aligned_predicted_protein.pdb,,7OEO_V9Z +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OFF_VCB_holo_aligned_predicted_protein.pdb,,7OFF_VCB +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OFK_VCH_holo_aligned_predicted_protein.pdb,,7OFK_VCH +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OKC_VFE_holo_aligned_predicted_protein.pdb,,7OKC_VFE +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OKF_VH5_holo_aligned_predicted_protein.pdb,,7OKF_VH5 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OLI_8HG_holo_aligned_predicted_protein.pdb,,7OLI_8HG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OLT_58J_holo_aligned_predicted_protein.pdb,,7OLT_58J +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OMJ_GCP_holo_aligned_predicted_protein.pdb,,7OMJ_GCP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OMX_CNA_holo_aligned_predicted_protein.pdb,,7OMX_CNA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OP9_06K_holo_aligned_predicted_protein.pdb,,7OP9_06K +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OPG_06N_holo_aligned_predicted_protein.pdb,,7OPG_06N +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ORW_7WA_holo_aligned_predicted_protein.pdb,,7ORW_7WA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OSO_0V1_holo_aligned_predicted_protein.pdb,,7OSO_0V1 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OU8_1XI_holo_aligned_predicted_protein.pdb,,7OU8_1XI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OZ9_NGK_holo_aligned_predicted_protein.pdb,,7OZ9_NGK +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OZC_G6S_holo_aligned_predicted_protein.pdb,,7OZC_G6S +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7P1F_KFN_holo_aligned_predicted_protein.pdb,,7P1F_KFN +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7P1M_4IU_holo_aligned_predicted_protein.pdb,,7P1M_4IU +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7P2I_MFU_holo_aligned_predicted_protein.pdb,,7P2I_MFU +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7P2W_4QR_holo_aligned_predicted_protein.pdb,,7P2W_4QR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7P4C_5OV_holo_aligned_predicted_protein.pdb,,7P4C_5OV +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7P4J_5JK_holo_aligned_predicted_protein.pdb,,7P4J_5JK +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7P4V_DAT_holo_aligned_predicted_protein.pdb,,7P4V_DAT +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7P5T_5YG_holo_aligned_predicted_protein.pdb,,7P5T_5YG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7P85_5ZG_holo_aligned_predicted_protein.pdb,,7P85_5ZG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PA4_C_holo_aligned_predicted_protein.pdb,,7PA4_C +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PGX_FMN_holo_aligned_predicted_protein.pdb,,7PGX_FMN +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PIH_7QW_holo_aligned_predicted_protein.pdb,,7PIH_7QW +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PJQ_OWH_holo_aligned_predicted_protein.pdb,,7PJQ_OWH +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PK0_BYC_holo_aligned_predicted_protein.pdb,,7PK0_BYC +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PL1_SFG_holo_aligned_predicted_protein.pdb,,7PL1_SFG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7POM_7VZ_holo_aligned_predicted_protein.pdb,,7POM_7VZ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PRI_7TI_holo_aligned_predicted_protein.pdb,,7PRI_7TI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PRM_81I_holo_aligned_predicted_protein.pdb,,7PRM_81I +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PT3_3KK_holo_aligned_predicted_protein.pdb,,7PT3_3KK +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PUV_84Z_holo_aligned_predicted_protein.pdb,,7PUV_84Z +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Q19_DSM_holo_aligned_predicted_protein.pdb,,7Q19_DSM +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Q25_8J9_holo_aligned_predicted_protein.pdb,,7Q25_8J9 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Q27_8KC_holo_aligned_predicted_protein.pdb,,7Q27_8KC +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Q2B_M6H_holo_aligned_predicted_protein.pdb,,7Q2B_M6H +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Q5I_I0F_holo_aligned_predicted_protein.pdb,,7Q5I_I0F +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QE4_NGA_holo_aligned_predicted_protein.pdb,,7QE4_NGA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QF4_RBF_holo_aligned_predicted_protein.pdb,,7QF4_RBF +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QFM_AY3_holo_aligned_predicted_protein.pdb,,7QFM_AY3 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QGP_DJ8_holo_aligned_predicted_protein.pdb,,7QGP_DJ8 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QHG_T3B_holo_aligned_predicted_protein.pdb,,7QHG_T3B +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QHL_D5P_holo_aligned_predicted_protein.pdb,,7QHL_D5P +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QK0_EBL_holo_aligned_predicted_protein.pdb,,7QK0_EBL +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QPP_VDX_holo_aligned_predicted_protein.pdb,,7QPP_VDX +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QSW_CAP_holo_aligned_predicted_protein.pdb,,7QSW_CAP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QTA_URI_holo_aligned_predicted_protein.pdb,,7QTA_URI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7R3D_APR_holo_aligned_predicted_protein.pdb,,7R3D_APR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7R59_I5F_holo_aligned_predicted_protein.pdb,,7R59_I5F +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7R6J_2I7_holo_aligned_predicted_protein.pdb,,7R6J_2I7 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7R7R_AWJ_holo_aligned_predicted_protein.pdb,,7R7R_AWJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7R9N_F97_holo_aligned_predicted_protein.pdb,,7R9N_F97 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RC3_SAH_holo_aligned_predicted_protein.pdb,,7RC3_SAH +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7REE_4LY_holo_aligned_predicted_protein.pdb,,7REE_4LY +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RH3_59O_holo_aligned_predicted_protein.pdb,,7RH3_59O +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RH8_UTP_holo_aligned_predicted_protein.pdb,,7RH8_UTP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RKW_5TV_holo_aligned_predicted_protein.pdb,,7RKW_5TV +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RNI_60I_holo_aligned_predicted_protein.pdb,,7RNI_60I +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ROR_69X_holo_aligned_predicted_protein.pdb,,7ROR_69X +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ROU_66I_holo_aligned_predicted_protein.pdb,,7ROU_66I +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RPZ_6IC_holo_aligned_predicted_protein.pdb,,7RPZ_6IC +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RSV_7IQ_holo_aligned_predicted_protein.pdb,,7RSV_7IQ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RUI_7QZ_holo_aligned_predicted_protein.pdb,,7RUI_7QZ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RWO_7WN_holo_aligned_predicted_protein.pdb,,7RWO_7WN +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RWS_4UR_holo_aligned_predicted_protein.pdb,,7RWS_4UR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RZL_NPO_holo_aligned_predicted_protein.pdb,,7RZL_NPO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7S45_ACO_holo_aligned_predicted_protein.pdb,,7S45_ACO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7S9H_7PP_holo_aligned_predicted_protein.pdb,,7S9H_7PP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SCW_GSP_holo_aligned_predicted_protein.pdb,,7SCW_GSP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SDD_4IP_holo_aligned_predicted_protein.pdb,,7SDD_4IP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SED_8VD_holo_aligned_predicted_protein.pdb,,7SED_8VD +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SFO_98L_holo_aligned_predicted_protein.pdb,,7SFO_98L +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SGV_L30_holo_aligned_predicted_protein.pdb,,7SGV_L30 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SIU_9ID_holo_aligned_predicted_protein.pdb,,7SIU_9ID +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SNE_9XR_holo_aligned_predicted_protein.pdb,,7SNE_9XR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SSM_B7L_holo_aligned_predicted_protein.pdb,,7SSM_B7L +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SUC_COM_holo_aligned_predicted_protein.pdb,,7SUC_COM +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SZA_DUI_holo_aligned_predicted_protein.pdb,,7SZA_DUI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7T0D_FPP_holo_aligned_predicted_protein.pdb,,7T0D_FPP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7T0U_E3I_holo_aligned_predicted_protein.pdb,,7T0U_E3I +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7T1D_E7K_holo_aligned_predicted_protein.pdb,,7T1D_E7K +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7T2I_E9F_holo_aligned_predicted_protein.pdb,,7T2I_E9F +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7T3E_SLB_holo_aligned_predicted_protein.pdb,,7T3E_SLB +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7T3F_EM0_holo_aligned_predicted_protein.pdb,,7T3F_EM0 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7T9O_GEI_holo_aligned_predicted_protein.pdb,,7T9O_GEI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TB0_UD1_holo_aligned_predicted_protein.pdb,,7TB0_UD1 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TBU_S3P_holo_aligned_predicted_protein.pdb,,7TBU_S3P +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TE8_P0T_holo_aligned_predicted_protein.pdb,,7TE8_P0T +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TH4_FFO_holo_aligned_predicted_protein.pdb,,7TH4_FFO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7THI_PGA_holo_aligned_predicted_protein.pdb,,7THI_PGA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TM6_GPJ_holo_aligned_predicted_protein.pdb,,7TM6_GPJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TOM_5AD_holo_aligned_predicted_protein.pdb,,7TOM_5AD +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TS6_KMI_holo_aligned_predicted_protein.pdb,,7TS6_KMI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TSF_H4B_holo_aligned_predicted_protein.pdb,,7TSF_H4B +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TUO_KL9_holo_aligned_predicted_protein.pdb,,7TUO_KL9 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TWC_CXS_holo_aligned_predicted_protein.pdb,,7TWC_CXS +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TXK_LW8_holo_aligned_predicted_protein.pdb,,7TXK_LW8 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TXP_0FX_holo_aligned_predicted_protein.pdb,,7TXP_0FX +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TYP_KUR_holo_aligned_predicted_protein.pdb,,7TYP_KUR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7U0U_FK5_holo_aligned_predicted_protein.pdb,,7U0U_FK5 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7U3J_L6U_holo_aligned_predicted_protein.pdb,,7U3J_L6U +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UAS_MBU_holo_aligned_predicted_protein.pdb,,7UAS_MBU +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UAW_MF6_holo_aligned_predicted_protein.pdb,,7UAW_MF6 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UEY_N0R_holo_aligned_predicted_protein.pdb,,7UEY_N0R +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UF2_5SP_holo_aligned_predicted_protein.pdb,,7UF2_5SP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UJ4_OQ4_holo_aligned_predicted_protein.pdb,,7UJ4_OQ4 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UJ5_DGL_holo_aligned_predicted_protein.pdb,,7UJ5_DGL +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UJF_R3V_holo_aligned_predicted_protein.pdb,,7UJF_R3V +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ULC_56B_holo_aligned_predicted_protein.pdb,,7ULC_56B +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UMV_NUU_holo_aligned_predicted_protein.pdb,,7UMV_NUU +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UMW_NAD_holo_aligned_predicted_protein.pdb,,7UMW_NAD +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UP3_NZ0_holo_aligned_predicted_protein.pdb,,7UP3_NZ0 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UQ3_O2U_holo_aligned_predicted_protein.pdb,,7UQ3_O2U +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7USH_82V_holo_aligned_predicted_protein.pdb,,7USH_82V +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UTW_NAI_holo_aligned_predicted_protein.pdb,,7UTW_NAI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UXS_OJC_holo_aligned_predicted_protein.pdb,,7UXS_OJC +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UY4_SMI_holo_aligned_predicted_protein.pdb,,7UY4_SMI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UYB_OK0_holo_aligned_predicted_protein.pdb,,7UYB_OK0 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7V14_ORU_holo_aligned_predicted_protein.pdb,,7V14_ORU +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7V3N_AKG_holo_aligned_predicted_protein.pdb,,7V3N_AKG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7V3S_5I9_holo_aligned_predicted_protein.pdb,,7V3S_5I9 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7V43_C4O_holo_aligned_predicted_protein.pdb,,7V43_C4O +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7V8Z_5YH_holo_aligned_predicted_protein.pdb,,7V8Z_5YH +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VB8_STL_holo_aligned_predicted_protein.pdb,,7VB8_STL +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VBU_6I4_holo_aligned_predicted_protein.pdb,,7VBU_6I4 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VC5_9SF_holo_aligned_predicted_protein.pdb,,7VC5_9SF +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VJT_7IJ_holo_aligned_predicted_protein.pdb,,7VJT_7IJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VKZ_NOJ_holo_aligned_predicted_protein.pdb,,7VKZ_NOJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VQ9_ISY_holo_aligned_predicted_protein.pdb,,7VQ9_ISY +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VWF_K55_holo_aligned_predicted_protein.pdb,,7VWF_K55 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VYJ_CA0_holo_aligned_predicted_protein.pdb,,7VYJ_CA0 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7W05_GMP_holo_aligned_predicted_protein.pdb,,7W05_GMP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7W06_ITN_holo_aligned_predicted_protein.pdb,,7W06_ITN +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7W6F_8I6_holo_aligned_predicted_protein.pdb,,7W6F_8I6 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WCF_ACP_holo_aligned_predicted_protein.pdb,,7WCF_ACP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WDT_NGS_holo_aligned_predicted_protein.pdb,,7WDT_NGS +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WJB_BGC_holo_aligned_predicted_protein.pdb,,7WJB_BGC +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WKL_CAQ_holo_aligned_predicted_protein.pdb,,7WKL_CAQ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WL4_JFU_holo_aligned_predicted_protein.pdb,,7WL4_JFU +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WN5_JGL_holo_aligned_predicted_protein.pdb,,7WN5_JGL +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WPW_F15_holo_aligned_predicted_protein.pdb,,7WPW_F15 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WQQ_5Z6_holo_aligned_predicted_protein.pdb,,7WQQ_5Z6 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WUX_6OI_holo_aligned_predicted_protein.pdb,,7WUX_6OI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WUY_76N_holo_aligned_predicted_protein.pdb,,7WUY_76N +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WY1_D0L_holo_aligned_predicted_protein.pdb,,7WY1_D0L +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7X5N_5M5_holo_aligned_predicted_protein.pdb,,7X5N_5M5 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7X9K_8OG_holo_aligned_predicted_protein.pdb,,7X9K_8OG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XBV_APC_holo_aligned_predicted_protein.pdb,,7XBV_APC +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XEK_9YX_holo_aligned_predicted_protein.pdb,,7XEK_9YX +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XFA_D9J_holo_aligned_predicted_protein.pdb,,7XFA_D9J +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XG5_PLP_holo_aligned_predicted_protein.pdb,,7XG5_PLP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XI7_4RI_holo_aligned_predicted_protein.pdb,,7XI7_4RI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XIJ_EJ3_holo_aligned_predicted_protein.pdb,,7XIJ_EJ3 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XJN_NSD_holo_aligned_predicted_protein.pdb,,7XJN_NSD +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XPO_UPG_holo_aligned_predicted_protein.pdb,,7XPO_UPG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XQZ_FPF_holo_aligned_predicted_protein.pdb,,7XQZ_FPF +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XRL_FWK_holo_aligned_predicted_protein.pdb,,7XRL_FWK +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7YZU_DO7_holo_aligned_predicted_protein.pdb,,7YZU_DO7 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Z1Q_NIO_holo_aligned_predicted_protein.pdb,,7Z1Q_NIO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Z2O_IAJ_holo_aligned_predicted_protein.pdb,,7Z2O_IAJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Z7F_IF3_holo_aligned_predicted_protein.pdb,,7Z7F_IF3 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZCC_OGA_holo_aligned_predicted_protein.pdb,,7ZCC_OGA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZDY_6MJ_holo_aligned_predicted_protein.pdb,,7ZDY_6MJ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZF0_DHR_holo_aligned_predicted_protein.pdb,,7ZF0_DHR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZHP_IQY_holo_aligned_predicted_protein.pdb,,7ZHP_IQY +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZL5_IWE_holo_aligned_predicted_protein.pdb,,7ZL5_IWE +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZOC_T8E_holo_aligned_predicted_protein.pdb,,7ZOC_T8E +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZTL_BCN_holo_aligned_predicted_protein.pdb,,7ZTL_BCN +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZU2_DHT_holo_aligned_predicted_protein.pdb,,7ZU2_DHT +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZXV_45D_holo_aligned_predicted_protein.pdb,,7ZXV_45D +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZXZ_K9R_holo_aligned_predicted_protein.pdb,,7ZXZ_K9R +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZYS_KNR_holo_aligned_predicted_protein.pdb,,7ZYS_KNR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZZB_KGX_holo_aligned_predicted_protein.pdb,,7ZZB_KGX +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZZW_KKW_holo_aligned_predicted_protein.pdb,,7ZZW_KKW +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8A1H_DLZ_holo_aligned_predicted_protein.pdb,,8A1H_DLZ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8A2D_KXY_holo_aligned_predicted_protein.pdb,,8A2D_KXY +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AAU_LH0_holo_aligned_predicted_protein.pdb,,8AAU_LH0 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8ACL_LQL_holo_aligned_predicted_protein.pdb,,8ACL_LQL +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AEM_LVF_holo_aligned_predicted_protein.pdb,,8AEM_LVF +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AEU_M0L_holo_aligned_predicted_protein.pdb,,8AEU_M0L +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AIE_M7L_holo_aligned_predicted_protein.pdb,,8AIE_M7L +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AIJ_M9I_holo_aligned_predicted_protein.pdb,,8AIJ_M9I +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AJX_FUM_holo_aligned_predicted_protein.pdb,,8AJX_FUM +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AP0_PRP_holo_aligned_predicted_protein.pdb,,8AP0_PRP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AQL_PLG_holo_aligned_predicted_protein.pdb,,8AQL_PLG +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AUH_L9I_holo_aligned_predicted_protein.pdb,,8AUH_L9I +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AY3_OE3_holo_aligned_predicted_protein.pdb,,8AY3_OE3 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8B8H_OJQ_holo_aligned_predicted_protein.pdb,,8B8H_OJQ +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8BN6_R53_holo_aligned_predicted_protein.pdb,,8BN6_R53 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8BOM_QU6_holo_aligned_predicted_protein.pdb,,8BOM_QU6 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8BPL_CP_holo_aligned_predicted_protein.pdb,,8BPL_CP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8BRO_R7E_holo_aligned_predicted_protein.pdb,,8BRO_R7E +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8BTI_RFO_holo_aligned_predicted_protein.pdb,,8BTI_RFO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8C3N_ADP_holo_aligned_predicted_protein.pdb,,8C3N_ADP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8C5D_GTB_holo_aligned_predicted_protein.pdb,,8C5D_GTB +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8C5M_MTA_holo_aligned_predicted_protein.pdb,,8C5M_MTA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8C7Y_TXV_holo_aligned_predicted_protein.pdb,,8C7Y_TXV +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8CGC_LMR_holo_aligned_predicted_protein.pdb,,8CGC_LMR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8CI0_8EL_holo_aligned_predicted_protein.pdb,,8CI0_8EL +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8CNH_V6U_holo_aligned_predicted_protein.pdb,,8CNH_V6U +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8CSD_C5P_holo_aligned_predicted_protein.pdb,,8CSD_C5P +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8D19_GSH_holo_aligned_predicted_protein.pdb,,8D19_GSH +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8D39_QDB_holo_aligned_predicted_protein.pdb,,8D39_QDB +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8D5D_5DK_holo_aligned_predicted_protein.pdb,,8D5D_5DK +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8DHG_T78_holo_aligned_predicted_protein.pdb,,8DHG_T78 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8DKO_TFB_holo_aligned_predicted_protein.pdb,,8DKO_TFB +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8DP2_UMA_holo_aligned_predicted_protein.pdb,,8DP2_UMA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8DSC_NCA_holo_aligned_predicted_protein.pdb,,8DSC_NCA +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8DW5_FQ7_holo_aligned_predicted_protein.pdb,,8DW5_FQ7 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8DZT_G4P_holo_aligned_predicted_protein.pdb,,8DZT_G4P +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8E77_ULP_holo_aligned_predicted_protein.pdb,,8E77_ULP +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8EAB_VN2_holo_aligned_predicted_protein.pdb,,8EAB_VN2 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8EAD_UY0_holo_aligned_predicted_protein.pdb,,8EAD_UY0 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8ERS_WQO_holo_aligned_predicted_protein.pdb,,8ERS_WQO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8EX2_Q2Q_holo_aligned_predicted_protein.pdb,,8EX2_Q2Q +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8EXL_799_holo_aligned_predicted_protein.pdb,,8EXL_799 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8EYE_X4I_holo_aligned_predicted_protein.pdb,,8EYE_X4I +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8F4J_PHO_holo_aligned_predicted_protein.pdb,,8F4J_PHO +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8F8E_XJI_holo_aligned_predicted_protein.pdb,,8F8E_XJI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8FAV_4Y5_holo_aligned_predicted_protein.pdb,,8FAV_4Y5 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8FLN_Y7W_holo_aligned_predicted_protein.pdb,,8FLN_Y7W +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8FLV_ZB9_holo_aligned_predicted_protein.pdb,,8FLV_ZB9 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8FO5_Y4U_holo_aligned_predicted_protein.pdb,,8FO5_Y4U +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8FV9_80J_holo_aligned_predicted_protein.pdb,,8FV9_80J +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8G0V_YHT_holo_aligned_predicted_protein.pdb,,8G0V_YHT +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8G43_ZU6_holo_aligned_predicted_protein.pdb,,8G43_ZU6 +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8G6P_API_holo_aligned_predicted_protein.pdb,,8G6P_API +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8GFD_ZHR_holo_aligned_predicted_protein.pdb,,8GFD_ZHR +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8H0M_2EH_holo_aligned_predicted_protein.pdb,,8H0M_2EH +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8HFN_XGC_holo_aligned_predicted_protein.pdb,,8HFN_XGC +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8HO0_3ZI_holo_aligned_predicted_protein.pdb,,8HO0_3ZI +data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8SLG_G5A_holo_aligned_predicted_protein.pdb,,8SLG_G5A diff --git a/docs/source/_static/PoseBench.png b/docs/source/_static/PoseBench.png index 8e45d221..c26ea239 100644 Binary files a/docs/source/_static/PoseBench.png and b/docs/source/_static/PoseBench.png differ diff --git a/docs/source/acknowledgements.rst b/docs/source/acknowledgements.rst index af2b6aa8..88460560 100644 --- a/docs/source/acknowledgements.rst +++ b/docs/source/acknowledgements.rst @@ -2,5 +2,5 @@ Acknowledgements ================ .. mdinclude:: ../../README.md - :start-line: 875 - :end-line: 892 + :start-line: 982 + :end-line: 1000 diff --git a/docs/source/available_methods.rst b/docs/source/available_methods.rst index 0ebb58dd..d195d8ed 100644 --- a/docs/source/available_methods.rst +++ b/docs/source/available_methods.rst @@ -2,8 +2,8 @@ Available inference methods ================ .. mdinclude:: ../../README.md - :start-line: 286 - :end-line: 323 + :start-line: 295 + :end-line: 334 .. note:: Have a new method to add? Please let us know by creating a pull request. We would be happy to work with you to integrate new methodology into this benchmark! diff --git a/docs/source/bonus.rst b/docs/source/bonus.rst index e0896f8b..b1140e1e 100644 --- a/docs/source/bonus.rst +++ b/docs/source/bonus.rst @@ -2,8 +2,8 @@ Bonus ================ .. mdinclude:: ../../README.md - :start-line: 910 - :end-line: 912 + :start-line: 1018 + :end-line: 1026 .. image:: ./_static/WorkBench.jpeg :alt: My brain after building PoseBench diff --git a/docs/source/citing_this_work.rst b/docs/source/citing_this_work.rst index cca6931b..adbd5f9e 100644 --- a/docs/source/citing_this_work.rst +++ b/docs/source/citing_this_work.rst @@ -2,5 +2,5 @@ Citing this work ================ .. mdinclude:: ../../README.md - :start-line: 894 - :end-line: 906 + :start-line: 1002 + :end-line: 1014 diff --git a/docs/source/comparative_plots.rst b/docs/source/comparative_plots.rst index c4cd431e..cfd55aa7 100644 --- a/docs/source/comparative_plots.rst +++ b/docs/source/comparative_plots.rst @@ -2,5 +2,5 @@ How to create comparative plots of inference results ================ .. mdinclude:: ../../README.md - :start-line: 822 - :end-line: 831 + :start-line: 929 + :end-line: 938 diff --git a/docs/source/conf.py b/docs/source/conf.py index 3782a50f..36088238 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -9,7 +9,7 @@ # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information project = "PoseBench" author = "Alex Morehead" -release = "0.4.0" +release = "0.5.0" copyright = f"{datetime.datetime.now().year}, {author}" # -- General configuration --------------------------------------------------- diff --git a/docs/source/configs/data.rst b/docs/source/configs/data.rst index fcd20f22..1cfa6715 100644 --- a/docs/source/configs/data.rst +++ b/docs/source/configs/data.rst @@ -75,6 +75,18 @@ RoseTTAFold-All-Atom output extraction :language: yaml :caption: :file:`data/rfaa_output_extraction.yaml` +Chai-1 input preparation +^^^^^^^^^^^^^^^^^^^^^^^^ +.. literalinclude:: ../../../configs/data/chai_input_preparation.yaml + :language: yaml + :caption: :file:`data/chai_input_preparation.yaml` + +Chai-1 output extraction +^^^^^^^^^^^^^^^^^^^^^^^^ +.. literalinclude:: ../../../configs/data/chai_output_extraction.yaml + :language: yaml + :caption: :file:`data/chai_output_extraction.yaml` + TULIP output extraction ^^^^^^^^^^^^^^^^^^^^^^^^ .. literalinclude:: ../../../configs/data/tulip_output_extraction.yaml diff --git a/docs/source/configs/model.rst b/docs/source/configs/model.rst index 960fb58c..30365357 100644 --- a/docs/source/configs/model.rst +++ b/docs/source/configs/model.rst @@ -39,6 +39,12 @@ RoseTTAFold-All-Atom inference :language: yaml :caption: :file:`model/rfaa_inference.yaml` +Chai-1 inference +^^^^^^^^^^^^^^^^^^^^^^^^ +.. literalinclude:: ../../../configs/model/chai_inference.yaml + :language: yaml + :caption: :file:`model/chai_inference.yaml` + Vina inference ^^^^^^^^^^^^^^^^^^^^^^^^ .. literalinclude:: ../../../configs/model/vina_inference.yaml diff --git a/docs/source/data_preparation.rst b/docs/source/data_preparation.rst index 10eecd28..e5fe7998 100644 --- a/docs/source/data_preparation.rst +++ b/docs/source/data_preparation.rst @@ -2,5 +2,5 @@ How to prepare `PoseBench` data ================ .. mdinclude:: ../../README.md - :start-line: 147 - :end-line: 280 + :start-line: 152 + :end-line: 289 diff --git a/docs/source/ensemble_inference.rst b/docs/source/ensemble_inference.rst index 11f20e4c..3902d078 100644 --- a/docs/source/ensemble_inference.rst +++ b/docs/source/ensemble_inference.rst @@ -2,8 +2,8 @@ How to run inference with a method ensemble ================ .. mdinclude:: ../../README.md - :start-line: 763 - :end-line: 814 + :start-line: 870 + :end-line: 921 .. note:: In addition to having `consensus` as an available value for `ensemble_ranking_method`, one can also set `ensemble_ranking_method=ff` to have the method ensemble's top-ranked predictions selected using the criterion of "minimum (molecular dynamics) force field energy" (albeit while incurring a very large runtime complexity). diff --git a/docs/source/for_developers.rst b/docs/source/for_developers.rst index ecc12d3a..fcfb6bde 100644 --- a/docs/source/for_developers.rst +++ b/docs/source/for_developers.rst @@ -2,5 +2,5 @@ For developers ================ .. mdinclude:: ../../README.md - :start-line: 837 - :end-line: 871 + :start-line: 944 + :end-line: 978 diff --git a/docs/source/index.rst b/docs/source/index.rst index 874b0120..ca81fad8 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -29,6 +29,7 @@ Welcome to PoseBench's documentation! tutorials data_preparation available_methods + sweep_inference method_inference ensemble_inference comparative_plots diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 3c00cf9f..3049842c 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -2,5 +2,5 @@ Installation ================ .. mdinclude:: ../../README.md - :start-line: 41 - :end-line: 129 + :start-line: 42 + :end-line: 134 diff --git a/docs/source/method_inference.rst b/docs/source/method_inference.rst index 2bfcb18e..0ba94a81 100644 --- a/docs/source/method_inference.rst +++ b/docs/source/method_inference.rst @@ -2,5 +2,5 @@ How to run inference with individual methods ================ .. mdinclude:: ../../README.md - :start-line: 331 - :end-line: 757 + :start-line: 362 + :end-line: 864 diff --git a/docs/source/sweep_inference.rst b/docs/source/sweep_inference.rst new file mode 100644 index 00000000..108b9d2c --- /dev/null +++ b/docs/source/sweep_inference.rst @@ -0,0 +1,6 @@ +How to run a sweep of benchmarking experiments +================ + +.. mdinclude:: ../../README.md + :start-line: 342 + :end-line: 356 diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst index cdaab80d..679387fc 100644 --- a/docs/source/tutorials.rst +++ b/docs/source/tutorials.rst @@ -2,5 +2,5 @@ Tutorials ================ .. mdinclude:: ../../README.md - :start-line: 135 - :end-line: 141 + :start-line: 140 + :end-line: 146 diff --git a/environments/chai_lab_environment.yaml b/environments/chai_lab_environment.yaml new file mode 100644 index 00000000..64503450 --- /dev/null +++ b/environments/chai_lab_environment.yaml @@ -0,0 +1,179 @@ +name: chai-lab +channels: + - conda-forge +dependencies: + - _libgcc_mutex=0.1=conda_forge + - _openmp_mutex=4.5=2_gnu + - bzip2=1.0.8=h4bc722e_7 + - ca-certificates=2024.8.30=hbcca054_0 + - ld_impl_linux-64=2.43=h712a8e2_0 + - libexpat=2.6.3=h5888daf_0 + - libffi=3.4.2=h7f98852_5 + - libgcc=14.1.0=h77fa898_1 + - libgcc-ng=14.1.0=h69a702a_1 + - libgomp=14.1.0=h77fa898_1 + - libnsl=2.0.1=hd590300_0 + - libsqlite=3.46.1=hadc24fc_0 + - libuuid=2.38.1=h0b41bf4_0 + - libxcrypt=4.4.36=hd590300_1 + - libzlib=1.3.1=h4ab18f5_1 + - ncurses=6.5=he02047a_1 + - openssl=3.3.2=hb9d3cd8_0 + - pip=24.2=pyh8b19718_1 + - python=3.11.10=hc5c86c4_1_cpython + - readline=8.2=h8228510_1 + - setuptools=74.1.2=pyhd8ed1ab_0 + - tk=8.6.13=noxft_h4845f30_101 + - wheel=0.44.0=pyhd8ed1ab_0 + - xz=5.2.6=h166bdaf_0 + - pip: + - aiobotocore==2.15.1 + - aiohappyeyeballs==2.4.0 + - aiohttp==3.10.5 + - aioitertools==0.12.0 + - aiosignal==1.3.1 + - antipickle==0.2.0 + - antlr4-python3-runtime==4.9.3 + - asttokens==2.4.1 + - attrs==24.2.0 + - beartype==0.18.5 + - biopython==1.83 + - botocore==1.35.23 + - cachetools==5.5.0 + - certifi==2024.8.30 + - cfgv==3.4.0 + - charset-normalizer==3.3.2 + - click==8.1.7 + - comm==0.2.2 + - contourpy==1.3.0 + - cycler==0.12.1 + - db-dtypes==1.3.0 + - debugpy==1.8.5 + - decorator==5.1.1 + - distlib==0.3.8 + - einops==0.8.0 + - executing==2.1.0 + - filelock==3.16.1 + - fonttools==4.54.1 + - frozenlist==1.4.1 + - fsspec==2024.9.0 + - gcsfs==2024.9.0.post1 + - gemmi==0.6.7 + - google-api-core==2.20.0 + - google-auth==2.35.0 + - google-auth-oauthlib==1.2.1 + - google-cloud-bigquery==3.25.0 + - google-cloud-core==2.4.1 + - google-cloud-storage==2.18.2 + - google-crc32c==1.6.0 + - google-resumable-media==2.7.2 + - googleapis-common-protos==1.65.0 + - grpcio==1.66.1 + - grpcio-status==1.66.1 + - huggingface-hub==0.25.1 + - hydra-core==1.3.2 + - identify==2.6.1 + - idna==3.10 + - iniconfig==2.0.0 + - ipykernel==6.29.5 + - ipython==8.27.0 + - jaxtyping==0.2.34 + - jedi==0.19.1 + - jinja2==3.1.4 + - jmespath==1.0.1 + - jupyter-client==8.6.3 + - jupyter-core==5.7.2 + - kiwisolver==1.4.7 + - markdown-it-py==3.0.0 + - markupsafe==2.1.5 + - matplotlib==3.9.2 + - matplotlib-inline==0.1.7 + - mdurl==0.1.2 + - mpmath==1.3.0 + - msgpack==1.1.0 + - multidict==6.1.0 + - mypy==1.11.2 + - mypy-extensions==1.0.0 + - nest-asyncio==1.6.0 + - networkx==3.3 + - nodeenv==1.9.1 + - numpy==1.26.4 + - nvidia-cublas-cu12==12.1.3.1 + - nvidia-cuda-cupti-cu12==12.1.105 + - nvidia-cuda-nvrtc-cu12==12.1.105 + - nvidia-cuda-runtime-cu12==12.1.105 + - nvidia-cudnn-cu12==8.9.2.26 + - nvidia-cufft-cu12==11.0.2.54 + - nvidia-curand-cu12==10.3.2.106 + - nvidia-cusolver-cu12==11.4.5.107 + - nvidia-cusparse-cu12==12.1.0.106 + - nvidia-nccl-cu12==2.20.5 + - nvidia-nvjitlink-cu12==12.6.68 + - nvidia-nvtx-cu12==12.1.105 + - oauthlib==3.2.2 + - omegaconf==2.3.0 + - packaging==24.1 + - pandas==2.2.3 + - pandas-gbq==0.23.2 + - pandas-stubs==2.2.2.240909 + - parso==0.8.4 + - pexpect==4.9.0 + - pillow==10.4.0 + - platformdirs==4.3.6 + - pluggy==1.5.0 + - pre-commit==3.8.0 + - prompt-toolkit==3.0.47 + - proto-plus==1.24.0 + - protobuf==5.28.2 + - psutil==6.0.0 + - ptyprocess==0.7.0 + - pure-eval==0.2.3 + - pyarrow==17.0.0 + - pyasn1==0.6.1 + - pyasn1-modules==0.4.1 + - pydata-google-auth==1.8.2 + - pygments==2.18.0 + - pyparsing==3.1.4 + - pytest==8.3.3 + - python-dateutil==2.9.0.post0 + - python-dotenv==1.0.1 + - pytz==2024.2 + - pyyaml==6.0.2 + - pyzmq==26.2.0 + - rdkit==2023.9.5 + - regex==2024.9.11 + - requests==2.32.3 + - requests-oauthlib==2.0.0 + - rich==13.8.1 + - rootutils==1.0.7 + - rsa==4.9 + - ruff==0.6.3 + - s3fs==2024.9.0 + - safetensors==0.4.5 + - shellingham==1.5.4 + - six==1.16.0 + - stack-data==0.6.3 + - sympy==1.13.3 + - tmtools==0.2.0 + - tokenizers==0.19.1 + - torch==2.3.1 + - tornado==6.4.1 + - tqdm==4.66.5 + - traitlets==5.14.3 + - transformers==4.44.2 + - triton==2.3.1 + - typeguard==2.13.3 + - typer==0.12.5 + - types-pytz==2024.2.0.20240913 + - types-pyyaml==6.0.12.20240917 + - types-requests==2.32.0.20240914 + - types-tqdm==4.66.0.20240417 + - typing-extensions==4.12.2 + - tzdata==2024.2 + - urllib3==2.2.3 + - virtualenv==20.26.5 + - wcwidth==0.2.13 + - wrapt==1.16.0 + - yarl==1.12.1 +prefix: + forks/chai-lab/chai-lab diff --git a/forks/DiffDock/inference/diffdock_astex_diverse_inputs.csv b/forks/DiffDock/inference/diffdock_astex_diverse_inputs.csv new file mode 100644 index 00000000..12b80b72 --- /dev/null +++ b/forks/DiffDock/inference/diffdock_astex_diverse_inputs.csv @@ -0,0 +1,81 @@ +complex_name,protein_path,ligand_description,protein_sequence +1MMV_3AR,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1MMV_3AR_holo_aligned_predicted_protein.pdb,CCCNC(=[NH2+])NCCC[C@H](N)C(=O)O, +1L2S_STC,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1L2S_STC_holo_aligned_predicted_protein.pdb,O=C(O)c1sccc1S(=O)(=O)Nc1ccc(Cl)cc1, +1N46_PFA,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1N46_PFA_holo_aligned_predicted_protein.pdb,Cc1cc(-n2ncc(=O)[nH]c2=O)cc(C)c1Oc1ccc(O)c(C(C)C)c1, +1Y6B_AAX,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1Y6B_AAX_holo_aligned_predicted_protein.pdb,COc1ccc(S(=O)(=O)NCC2CC2)cc1Nc1ncc(-c2cccc(-c3cccnc3)c2)o1, +1YWR_LI9,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1YWR_LI9_holo_aligned_predicted_protein.pdb,C[C@H](Nc1nccc(-c2c(-c3ccc(F)cc3)c(=O)n(C3CCNCC3)n2C)n1)c1ccccc1, +1YV3_BIT,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1YV3_BIT_holo_aligned_predicted_protein.pdb,Cc1ccc2c(c1)C(=O)[C@]1(O)CCN(c3ccccc3)C1=N2, +1R9O_FLP,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1R9O_FLP_holo_aligned_predicted_protein.pdb,C[C@H](C(=O)O)c1ccc(-c2ccccc2)c(F)c1, +1IG3_VIB,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1IG3_VIB_holo_aligned_predicted_protein.pdb,Cc1ncc(C[n+]2csc(CCO)c2C)c(N)n1, +1R58_AO5,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1R58_AO5_holo_aligned_predicted_protein.pdb,CC(C)SCC[C@@H](N)[C@H](O)C(=O)NNC(=O)c1cccc(Cl)c1, +1YGC_905,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1YGC_905_holo_aligned_predicted_protein.pdb,CCOc1cc(OCC)c(F)c([C@@H](Nc2ccc(C(=N)N)c(O)c2)C(=O)NS(=O)(=O)c2cccc(N)c2)c1, +1GPK_HUP,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1GPK_HUP_holo_aligned_predicted_protein.pdb,C/C=C1\[C@@H]2C=C(C)C[C@@]1(N)c1ccc(=O)[nH]c1C2, +1K3U_IAD,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1K3U_IAD_holo_aligned_predicted_protein.pdb,O=C(O)C[C@H](NC(=O)Cc1c[nH]c2ccccc12)C(=O)O, +1Q1G_MTI,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1Q1G_MTI_holo_aligned_predicted_protein.pdb,CSC[C@H]1[NH2+][C@@H](c2c[nH]c3c(=O)[nH]cnc23)[C@H](O)[C@@H]1O, +1GM8_SOX,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1GM8_SOX_holo_aligned_predicted_protein.pdb,CC1(C)[C@H](C(=O)O)N2C(=O)[C@@H](NC(=O)Cc3ccccc3)[C@H]2[S@H]1O, +1OPK_P16,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OPK_P16_holo_aligned_predicted_protein.pdb,Cn1c(=O)c(-c2c(Cl)cccc2Cl)cc2cnc(Nc3cccc(CO)c3)nc21, +1U1C_BAU,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1U1C_BAU_holo_aligned_predicted_protein.pdb,O=c1[nH]c(=O)n(COCCO)cc1Cc1ccccc1, +1T46_STI,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1T46_STI_holo_aligned_predicted_protein.pdb,Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1, +1HQ2_PH2,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1HQ2_PH2_holo_aligned_predicted_protein.pdb,Nc1nc2c(c(=O)[nH]1)N=C(CO)CN2, +1N2V_BDI,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1N2V_BDI_holo_aligned_predicted_protein.pdb,CCCCc1nc2c(=O)[nH][nH]c(=O)c2[nH]1, +1S19_MC9,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1S19_MC9_holo_aligned_predicted_protein.pdb,C=C1/C(=C\C=C2/CCC[C@]3(C)[C@@H]([C@H](C)/C=C/[C@@H](O)C4CC4)CC[C@@H]23)C[C@@H](O)C[C@@H]1O, +1OYT_FSN,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OYT_FSN_holo_aligned_predicted_protein.pdb,NC(=[NH2+])c1ccc([C@H]2[C@H]3C(=O)N(Cc4ccc(F)cc4)C(=O)[C@H]3[C@@H]3CCCN32)cc1, +1TZ8_DES,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1TZ8_DES_holo_aligned_predicted_protein.pdb,CC/C(=C(/CC)c1ccc(O)cc1)c1ccc(O)cc1, +2BSM_BSM,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/2BSM_BSM_holo_aligned_predicted_protein.pdb,CCNC(=O)c1n[nH]c(-c2cc(Cl)c(O)cc2O)c1-c1ccc(OC)cc1, +1P2Y_NCT,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1P2Y_NCT_holo_aligned_predicted_protein.pdb,CN1CCC[C@H]1c1cccnc1, +1V0P_PVB,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1V0P_PVB_holo_aligned_predicted_protein.pdb,CC(C)[C@H](CO)Nc1nc(Nc2ccc(C(=O)O)c(Cl)c2)c2ncn(C(C)C)c2n1, +1KZK_JE2,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1KZK_JE2_holo_aligned_predicted_protein.pdb,Cc1ccccc1CNC(=O)[C@H]1N(C(=O)[C@@H](O)[C@H](Cc2ccccc2)NC(=O)c2cccc(O)c2C)CSC1(C)C, +1R55_097,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1R55_097_holo_aligned_predicted_protein.pdb,CNC(=O)[C@@H](NC(=O)[C@H](CC(C)C)[C@H](O)C(=O)NO)C(C)(C)C, +1SG0_STL,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1SG0_STL_holo_aligned_predicted_protein.pdb,Oc1ccc(/C=C/c2cc(O)cc(O)c2)cc1, +1L7F_BCZ,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1L7F_BCZ_holo_aligned_predicted_protein.pdb,CCC(CC)[C@H](NC(C)=O)[C@@H]1[C@H](O)[C@@H](C(=O)O)C[C@H]1NC(=N)N, +1NAV_IH5,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1NAV_IH5_holo_aligned_predicted_protein.pdb,CC(C)c1cc(Oc2c(Cl)cc(CC(=O)O)cc2Cl)ccc1O, +1W2G_THM,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1W2G_THM_holo_aligned_predicted_protein.pdb,Cc1cn([C@H]2C[C@H](O)[C@@H](CO)O2)c(=O)[nH]c1=O, +1Z95_198,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1Z95_198_holo_aligned_predicted_protein.pdb,C[C@](O)(CS(=O)(=O)c1ccc(F)cc1)C(=O)Nc1ccc(C#N)c(C(F)(F)F)c1, +1YVF_PH7,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1YVF_PH7_holo_aligned_predicted_protein.pdb,O=C(O)/C(=C/c1ccc(Oc2ccccc2Br)cc1)NC(=O)c1ccccc1, +1X8X_TYR,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1X8X_TYR_holo_aligned_predicted_protein.pdb,N[C@@H](Cc1ccc(O)cc1)C(=O)O, +1HWI_115,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1HWI_115_holo_aligned_predicted_protein.pdb,CC(C)n1c(/C=C/[C@@H](O)C[C@@H](O)CC(=O)O)c(-c2ccc(F)cc2)c2ccccc21, +1S3V_TQD,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1S3V_TQD_holo_aligned_predicted_protein.pdb,COc1cc(N(C)C[C@@H]2CCC3=C(C2)C(N)=N[C@@H](N)N3)cc(OC)c1OC, +1W1P_GIO,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1W1P_GIO_holo_aligned_predicted_protein.pdb,O=C1NCC(=O)N2CCC[C@@H]12, +1U4D_DBQ,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1U4D_DBQ_holo_aligned_predicted_protein.pdb,NC1=N/C(=C2/CCNC(=O)c3[nH]ccc32)C(=O)N1, +1V4S_MRK,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1V4S_MRK_holo_aligned_predicted_protein.pdb,Cn1ccnc1Sc1cc(C(=O)Nc2nccs2)c(N)cc1F, +1OF1_SCT,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OF1_SCT_holo_aligned_predicted_protein.pdb,Cc1cn([C@@]23C[C@H](O)[C@@H](CO)[C@@H]2C3)c(=O)[nH]c1=O, +1V48_HA1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1V48_HA1_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2CCCCC(F)(F)P(=O)(O)O)c(=O)[nH]1, +1Q4G_BFL,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1Q4G_BFL_holo_aligned_predicted_protein.pdb,C[C@H](C(=O)O)c1ccc(-c2ccccc2)cc1, +1JJE_BYS,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1JJE_BYS_holo_aligned_predicted_protein.pdb,O=C(O)[C@@H](Cc1ccccc1)[C@H](Cc1ccc2c(c1)OCO2)C(=O)O, +1XOQ_ROF,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1XOQ_ROF_holo_aligned_predicted_protein.pdb,O=C(Nc1c(Cl)cncc1Cl)c1ccc(OC(F)F)c(OCC2CC2)c1, +1M2Z_DEX,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1M2Z_DEX_holo_aligned_predicted_protein.pdb,C[C@@H]1C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)[C@@]3(F)[C@@H](O)C[C@]2(C)[C@@]1(O)C(=O)CO, +1P62_GEO,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1P62_GEO_holo_aligned_predicted_protein.pdb,Nc1ccn([C@@H]2O[C@H](CO)[C@@H](O)C2(F)F)c(=O)n1, +1LRH_NLA,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1LRH_NLA_holo_aligned_predicted_protein.pdb,O=C(O)Cc1cccc2ccccc12, +1SJ0_E4D,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1SJ0_E4D_holo_aligned_predicted_protein.pdb,Oc1ccc([C@H]2Sc3cc(O)ccc3O[C@H]2c2ccc(OCCN3CCCCC3)cc2)cc1, +1PMN_984,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1PMN_984_holo_aligned_predicted_protein.pdb,CCCn1c(C2CCN(C)CC2)nc(-c2ccc(Cl)c(Cl)c2)c1-c1ccnc(NC2CC2)n1, +1SQN_NDR,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1SQN_NDR_holo_aligned_predicted_protein.pdb,C#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@@H]4[C@H]3CC[C@@]21C, +1GKC_NFH,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1GKC_NFH_holo_aligned_predicted_protein.pdb,CNC(=O)[C@@H](NC(=O)[C@H](CC(C)C)CN(O)C=O)C(C)(C)C, +1T40_ID5,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1T40_ID5_holo_aligned_predicted_protein.pdb,O=C(O)COc1cc(F)ccc1C(=O)NCc1nc2c(F)c(F)cc(F)c2s1, +1UML_FR4,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1UML_FR4_holo_aligned_predicted_protein.pdb,NC(=O)c1cn([C@@H](CO)CCn2ccc3ccc(NC(=O)CCc4ccccc4)cc32)cn1, +1UNL_RRC,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1UNL_RRC_holo_aligned_predicted_protein.pdb,CC[C@H](CO)Nc1nc(NCc2ccccc2)c2ncn(C(C)C)c2n1, +1OF6_DTY,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OF6_DTY_holo_aligned_predicted_protein.pdb,N[C@@H](Cc1ccc(O)cc1)C(=O)O, +1JD0_AZM,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1JD0_AZM_holo_aligned_predicted_protein.pdb,CC(=O)Nc1nnc(S(N)(=O)=O)s1, +1N2J_PAF,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1N2J_PAF_holo_aligned_predicted_protein.pdb,CC(C)(CO)[C@@H](O)C(=O)[O-], +1J3J_CP6,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1J3J_CP6_holo_aligned_predicted_protein.pdb,CCc1nc(N)nc(N)c1-c1ccc(Cl)cc1, +1MZC_BNE,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1MZC_BNE_holo_aligned_predicted_protein.pdb,CC[C@@]1(c2cccc(Oc3cc([C@](C)(N)c4cncn4C)ccc3C#N)c2)CCCCN(C)C1=O, +1OWE_675,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OWE_675_holo_aligned_predicted_protein.pdb,N=C(N)c1ccc2cc(C(=O)Nc3ccccc3)ccc2c1, +1T9B_1CS,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1T9B_1CS_holo_aligned_predicted_protein.pdb,COc1nc(C)nc(NC(=O)NS(=O)(=O)c2ccccc2Cl)n1, +2BR1_PFP,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/2BR1_PFP_holo_aligned_predicted_protein.pdb,COc1ccc(-c2oc3ncnc(NCCO)c3c2-c2ccc(OC)cc2)cc1, +1G9V_RQ3,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1G9V_RQ3_holo_aligned_predicted_protein.pdb,Cc1cc(C)cc(NC(=O)Cc2ccc(OC(C)(C)C(=O)O)cc2)c1, +2BM2_PM2,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/2BM2_PM2_holo_aligned_predicted_protein.pdb,NCc1cccc(C2CCN(C(=O)c3cncc(CCc4ccccc4)c3)CC2)c1, +1XOZ_CIA,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1XOZ_CIA_holo_aligned_predicted_protein.pdb,CN1CC(=O)N2[C@H](c3ccc4c(c3)OCO4)c3[nH]c4ccccc4c3C[C@@H]2C1=O, +1SQ5_PAU,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1SQ5_PAU_holo_aligned_predicted_protein.pdb,CC(C)(CO)[C@@H](O)C(=O)NCCC(=O)O, +1YQY_915,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1YQY_915_holo_aligned_predicted_protein.pdb,Cc1cc(S(=O)(=O)N[C@@H](C(=O)NO)C2CCOCC2)ccc1F, +1IA1_TQ3,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1IA1_TQ3_holo_aligned_predicted_protein.pdb,Nc1nc(N)c2c(Sc3ccccc3)cccc2n1, +1HWW_SWA,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1HWW_SWA_holo_aligned_predicted_protein.pdb,O[C@H]1[C@H]2[C@H](O)CCCN2C[C@H]1O, +1TT1_KAI,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1TT1_KAI_holo_aligned_predicted_protein.pdb,C=C(C)[C@H]1CN[C@H](C(=O)O)[C@H]1CC(=O)O, +1Q41_IXM,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1Q41_IXM_holo_aligned_predicted_protein.pdb,O=C1Nc2ccccc2/C1=C1/Nc2ccccc2/C1=N\O, +1N1M_A3M,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1N1M_A3M_holo_aligned_predicted_protein.pdb,CC(C)[C@H](N)C(=O)N1CCCC1, +1KE5_LS1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1KE5_LS1_holo_aligned_predicted_protein.pdb,CNS(=O)(=O)c1ccc(N/C=C2\C(=O)Nc3ccccc32)cc1, +1HP0_AD3,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1HP0_AD3_holo_aligned_predicted_protein.pdb,Nc1nccc2c1ncn2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O, +1UOU_CMU,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1UOU_CMU_holo_aligned_predicted_protein.pdb,N=C1CCCN1Cc1[nH]c(=O)[nH]c(=O)c1Cl, +1TOW_CRZ,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1TOW_CRZ_holo_aligned_predicted_protein.pdb,O=C(O)CCCn1c2ccccc2c2ccccc21, +1LPZ_CMB,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1LPZ_CMB_holo_aligned_predicted_protein.pdb,Cc1cccc2c1cc(C(=O)NCc1cc(Cl)cc(Cl)c1)n2Cc1cccc(C(=N)N)c1, +1VCJ_IBA,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1VCJ_IBA_holo_aligned_predicted_protein.pdb,CCC(CC)Nc1cc(C(=O)O)ccc1N1C(=O)CC[C@@]1(CN)CO, +1R1H_BIR,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1R1H_BIR_holo_aligned_predicted_protein.pdb,C[C@H](NC(=O)[C@H](Cc1ccc(-c2ccccc2)cc1)C[P@](=O)(O)[C@H](C)N)C(=O)O, +1OQ5_CEL,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OQ5_CEL_holo_aligned_predicted_protein.pdb,Cc1ccc(-c2cc(C(F)(F)F)nn2-c2ccc(S(N)(=O)=O)cc2)cc1, diff --git a/forks/DiffDock/inference/diffdock_astex_diverse_inputs_first_20.csv b/forks/DiffDock/inference/diffdock_astex_diverse_inputs_first_20.csv index 5c209bda..9dc264f9 100644 --- a/forks/DiffDock/inference/diffdock_astex_diverse_inputs_first_20.csv +++ b/forks/DiffDock/inference/diffdock_astex_diverse_inputs_first_20.csv @@ -1,21 +1,21 @@ complex_name,protein_path,ligand_description,protein_sequence -1VCJ_IBA,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1VCJ_IBA_holo_aligned_esmfold_protein.pdb,CCC(CC)Nc1cc(C(=O)O)ccc1N1C(=O)CC[C@@]1(CN)CO, -1G9V_RQ3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1G9V_RQ3_holo_aligned_esmfold_protein.pdb,Cc1cc(C)cc(NC(=O)Cc2ccc(OC(C)(C)C(=O)O)cc2)c1, -1UOU_CMU,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1UOU_CMU_holo_aligned_esmfold_protein.pdb,N=C1CCCN1Cc1[nH]c(=O)[nH]c(=O)c1Cl, -1TZ8_DES,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1TZ8_DES_holo_aligned_esmfold_protein.pdb,CC/C(=C(/CC)c1ccc(O)cc1)c1ccc(O)cc1, -1TOW_CRZ,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1TOW_CRZ_holo_aligned_esmfold_protein.pdb,O=C(O)CCCn1c2ccccc2c2ccccc21, -1XOQ_ROF,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1XOQ_ROF_holo_aligned_esmfold_protein.pdb,O=C(Nc1c(Cl)cncc1Cl)c1ccc(OC(F)F)c(OCC2CC2)c1, -1MZC_BNE,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1MZC_BNE_holo_aligned_esmfold_protein.pdb,CC[C@@]1(c2cccc(Oc3cc([C@](C)(N)c4cncn4C)ccc3C#N)c2)CCCCN(C)C1=O, -1N46_PFA,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1N46_PFA_holo_aligned_esmfold_protein.pdb,Cc1cc(-n2ncc(=O)[nH]c2=O)cc(C)c1Oc1ccc(O)c(C(C)C)c1, -1R9O_FLP,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1R9O_FLP_holo_aligned_esmfold_protein.pdb,C[C@H](C(=O)O)c1ccc(-c2ccccc2)c(F)c1, -1K3U_IAD,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1K3U_IAD_holo_aligned_esmfold_protein.pdb,O=C(O)C[C@H](NC(=O)Cc1c[nH]c2ccccc12)C(=O)O, -1X8X_TYR,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1X8X_TYR_holo_aligned_esmfold_protein.pdb,N[C@@H](Cc1ccc(O)cc1)C(=O)O, -1S19_MC9,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1S19_MC9_holo_aligned_esmfold_protein.pdb,C=C1/C(=C\C=C2/CCC[C@]3(C)[C@@H]([C@H](C)/C=C/[C@@H](O)C4CC4)CC[C@@H]23)C[C@@H](O)C[C@@H]1O, -1OF1_SCT,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1OF1_SCT_holo_aligned_esmfold_protein.pdb,Cc1cn([C@@]23C[C@H](O)[C@@H](CO)[C@@H]2C3)c(=O)[nH]c1=O, -1LPZ_CMB,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1LPZ_CMB_holo_aligned_esmfold_protein.pdb,Cc1cccc2c1cc(C(=O)NCc1cc(Cl)cc(Cl)c1)n2Cc1cccc(C(=N)N)c1, -1T46_STI,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1T46_STI_holo_aligned_esmfold_protein.pdb,Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1, -1XOZ_CIA,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1XOZ_CIA_holo_aligned_esmfold_protein.pdb,CN1CC(=O)N2[C@H](c3ccc4c(c3)OCO4)c3[nH]c4ccccc4c3C[C@@H]2C1=O, -1UML_FR4,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1UML_FR4_holo_aligned_esmfold_protein.pdb,NC(=O)c1cn([C@@H](CO)CCn2ccc3ccc(NC(=O)CCc4ccccc4)cc32)cn1, -1SQ5_PAU,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1SQ5_PAU_holo_aligned_esmfold_protein.pdb,CC(C)(CO)[C@@H](O)C(=O)NCCC(=O)O, -1MMV_3AR,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1MMV_3AR_holo_aligned_esmfold_protein.pdb,CCCNC(=[NH2+])NCCC[C@H](N)C(=O)O, -1J3J_CP6,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1J3J_CP6_holo_aligned_esmfold_protein.pdb,CCc1nc(N)nc(N)c1-c1ccc(Cl)cc1, +1VCJ_IBA,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1VCJ_IBA_holo_aligned_predicted_protein.pdb,CCC(CC)Nc1cc(C(=O)O)ccc1N1C(=O)CC[C@@]1(CN)CO, +1G9V_RQ3,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1G9V_RQ3_holo_aligned_predicted_protein.pdb,Cc1cc(C)cc(NC(=O)Cc2ccc(OC(C)(C)C(=O)O)cc2)c1, +1UOU_CMU,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1UOU_CMU_holo_aligned_predicted_protein.pdb,N=C1CCCN1Cc1[nH]c(=O)[nH]c(=O)c1Cl, +1TZ8_DES,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1TZ8_DES_holo_aligned_predicted_protein.pdb,CC/C(=C(/CC)c1ccc(O)cc1)c1ccc(O)cc1, +1TOW_CRZ,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1TOW_CRZ_holo_aligned_predicted_protein.pdb,O=C(O)CCCn1c2ccccc2c2ccccc21, +1XOQ_ROF,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1XOQ_ROF_holo_aligned_predicted_protein.pdb,O=C(Nc1c(Cl)cncc1Cl)c1ccc(OC(F)F)c(OCC2CC2)c1, +1MZC_BNE,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1MZC_BNE_holo_aligned_predicted_protein.pdb,CC[C@@]1(c2cccc(Oc3cc([C@](C)(N)c4cncn4C)ccc3C#N)c2)CCCCN(C)C1=O, +1N46_PFA,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1N46_PFA_holo_aligned_predicted_protein.pdb,Cc1cc(-n2ncc(=O)[nH]c2=O)cc(C)c1Oc1ccc(O)c(C(C)C)c1, +1R9O_FLP,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1R9O_FLP_holo_aligned_predicted_protein.pdb,C[C@H](C(=O)O)c1ccc(-c2ccccc2)c(F)c1, +1K3U_IAD,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1K3U_IAD_holo_aligned_predicted_protein.pdb,O=C(O)C[C@H](NC(=O)Cc1c[nH]c2ccccc12)C(=O)O, +1X8X_TYR,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1X8X_TYR_holo_aligned_predicted_protein.pdb,N[C@@H](Cc1ccc(O)cc1)C(=O)O, +1S19_MC9,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1S19_MC9_holo_aligned_predicted_protein.pdb,C=C1/C(=C\C=C2/CCC[C@]3(C)[C@@H]([C@H](C)/C=C/[C@@H](O)C4CC4)CC[C@@H]23)C[C@@H](O)C[C@@H]1O, +1OF1_SCT,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OF1_SCT_holo_aligned_predicted_protein.pdb,Cc1cn([C@@]23C[C@H](O)[C@@H](CO)[C@@H]2C3)c(=O)[nH]c1=O, +1LPZ_CMB,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1LPZ_CMB_holo_aligned_predicted_protein.pdb,Cc1cccc2c1cc(C(=O)NCc1cc(Cl)cc(Cl)c1)n2Cc1cccc(C(=N)N)c1, +1T46_STI,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1T46_STI_holo_aligned_predicted_protein.pdb,Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1, +1XOZ_CIA,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1XOZ_CIA_holo_aligned_predicted_protein.pdb,CN1CC(=O)N2[C@H](c3ccc4c(c3)OCO4)c3[nH]c4ccccc4c3C[C@@H]2C1=O, +1UML_FR4,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1UML_FR4_holo_aligned_predicted_protein.pdb,NC(=O)c1cn([C@@H](CO)CCn2ccc3ccc(NC(=O)CCc4ccccc4)cc32)cn1, +1SQ5_PAU,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1SQ5_PAU_holo_aligned_predicted_protein.pdb,CC(C)(CO)[C@@H](O)C(=O)NCCC(=O)O, +1MMV_3AR,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1MMV_3AR_holo_aligned_predicted_protein.pdb,CCCNC(=[NH2+])NCCC[C@H](N)C(=O)O, +1J3J_CP6,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1J3J_CP6_holo_aligned_predicted_protein.pdb,CCc1nc(N)nc(N)c1-c1ccc(Cl)cc1, diff --git a/forks/DiffDock/inference/diffdock_casp15_inputs.csv b/forks/DiffDock/inference/diffdock_casp15_inputs.csv new file mode 100644 index 00000000..782ef311 --- /dev/null +++ b/forks/DiffDock/inference/diffdock_casp15_inputs.csv @@ -0,0 +1,20 @@ +complex_name,protein_path,ligand_description,protein_sequence +T1124,data/casp15_set/casp15_holo_aligned_predicted_structures/T1124.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSCC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSCC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O|N[C@@H](Cc1ccc(O)cc1)C(=O)O|N[C@@H](Cc1ccc(O)cc1)C(=O)O, +T1186,data/casp15_set/casp15_holo_aligned_predicted_structures/T1186.pdb,Cc1onc(c1C(=O)N[C@H](C=O)[C@@H]1N[C@@H](C(O)=O)C(C)(C)S1)-c1c(Cl)cccc1Cl, +T1181,data/casp15_set/casp15_holo_aligned_predicted_structures/T1181.pdb,CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O|CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O|CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O|CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O|[Zn+2]|[Zn+2]|[Zn+2]|[Ca+2]|CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O, +T1187,data/casp15_set/casp15_holo_aligned_predicted_structures/T1187.pdb,CC(=O)N[C@H]1[C@H](O[C@@H]2[C@@H](CO)O[C@@H](O[C@@H]3[C@@H](CO)O[C@@H](O)[C@H](NC(C)=O)[C@H]3O)[C@H](NC(C)=O)[C@H]2O)O[C@H](CO)[C@@H](O)[C@@H]1O|CC(=O)N[C@H]1[C@H](O[C@@H]2[C@@H](CO)O[C@@H](O[C@@H]3[C@@H](CO)O[C@@H](O)[C@H](NC(C)=O)[C@H]3O)[C@H](NC(C)=O)[C@H]2O)O[C@H](CO)[C@@H](O)[C@@H]1O, +T1158v1,data/casp15_set/casp15_holo_aligned_predicted_structures/T1158v1.pdb,CCCCC[C@@H](/C=C/[C@H]1[C@@H](CC(=O)[C@@H]1CCCCCCC(=O)O)O)O, +H1172v2,data/casp15_set/casp15_holo_aligned_predicted_structures/H1172v2.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|[Mg+2]|[Mg+2]|[Mg+2], +T1158v2,data/casp15_set/casp15_holo_aligned_predicted_structures/T1158v2.pdb,CCCCC[C@@H](/C=C/[C@H]1[C@@H](CC(=O)[C@@H]1C/C=C\CCCC(=O)O)O)O, +H1172v3,data/casp15_set/casp15_holo_aligned_predicted_structures/H1172v3.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|[Mg+2]|[Mg+2], +H1172v1,data/casp15_set/casp15_holo_aligned_predicted_structures/H1172v1.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|[Mg+2]|[Mg+2]|[Mg+2], +H1172v4,data/casp15_set/casp15_holo_aligned_predicted_structures/H1172v4.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|[Mg+2]|[Mg+2]|[Mg+2], +T1127v2,data/casp15_set/casp15_holo_aligned_predicted_structures/T1127v2.pdb,CC(C)(CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS|CC(C)(CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS|O=S(=O)(O)CCN1CCN(CCO)CC1|O=S(=O)(O)CCN1CCN(CCO)CC1|C[C@H](O)CC(C)(C)O, +H1171v1,data/casp15_set/casp15_holo_aligned_predicted_structures/H1171v1.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|[Mg+2]|[Mg+2]|[Mg+2]|[Mg+2]|[Mg+2], +T1152,data/casp15_set/casp15_holo_aligned_predicted_structures/T1152.pdb,CC(=O)NC1C(O)OC(CO)C(OC2OC(CO)C(OC3OC(CO)C(O)C(O)C3NC(C)=O)C(O)C2NC(C)=O)C1O, +T1188,data/casp15_set/casp15_holo_aligned_predicted_structures/T1188.pdb,Cn1cnc2c1c(=O)n(CCCn1c(=O)c3c(ncn3C)n(C)c1=O)c(=O)n2C|Cn1cnc2c1c(=O)n(CCCn1c(=O)c3c(ncn3C)n(C)c1=O)c(=O)n2C|[Cd+2]|[Cd+2]|[Co+2], +T1146,data/casp15_set/casp15_holo_aligned_predicted_structures/T1146.pdb,CC(=O)N[C@H]1[C@H](O)O[C@H](CO)[C@@H](O)[C@@H]1O, +T1158v3,data/casp15_set/casp15_holo_aligned_predicted_structures/T1158v3.pdb,C[C@]12CC[C@H]3[C@H]([C@@H]1CCC2=O)CC=C4[C@@]3(CC[C@@H](C4)OS(=O)(=O)O)C, +H1171v2,data/casp15_set/casp15_holo_aligned_predicted_structures/H1171v2.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|[Mg+2]|[Mg+2]|[Mg+2], +H1135,data/casp15_set/casp15_holo_aligned_predicted_structures/H1135.pdb,[Cl-]|[Cl-]|[Cl-]|[K+]|[K+]|[K+]|[K+]|[K+]|[K+]|[K+]|[K+]|[K+], +T1158v4,data/casp15_set/casp15_holo_aligned_predicted_structures/T1158v4.pdb,C1=NC(=C2C(=N1)N(C=N2)[C@H]3[C@@H]([C@@H]([C@H](O3)COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O)O)N|C1=NC(=C2C(=N1)N(C=N2)[C@H]3[C@@H]([C@@H]([C@H](O3)COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O)O)N|[Mg+2]|[Mg+2], diff --git a/forks/DiffDock/inference/diffdock_dockgen_inputs.csv b/forks/DiffDock/inference/diffdock_dockgen_inputs.csv index 1a3cf8bd..a7d61976 100644 --- a/forks/DiffDock/inference/diffdock_dockgen_inputs.csv +++ b/forks/DiffDock/inference/diffdock_dockgen_inputs.csv @@ -1,190 +1,190 @@ complex_name,protein_path,ligand_description,protein_sequence -3gvl_1_SLB_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3gvl_1_SLB_2_holo_aligned_esmfold_protein.pdb,CC(O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(O)O)C[C@@H]1O, -3inr_1_GDU_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3inr_1_GDU_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](O[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCC(O)NC3O)[C@H](O)[C@@H]2O)[C@H](O)[C@@H](O)[C@H]1O, -3jqm_1_GTP_5,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3jqm_1_GTP_5_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]3O)C2N1, -3ju4_1_SLB_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ju4_1_SLB_2_holo_aligned_esmfold_protein.pdb,CC(O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(O)O)C[C@@H]1O, -4cnl_1_CHT_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4cnl_1_CHT_1_holo_aligned_esmfold_protein.pdb,C[N+](C)(C)CCO, -1hg0_1_SIN_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1hg0_1_SIN_1_holo_aligned_esmfold_protein.pdb,OC(O)CCC(O)O, -1i8t_1_FAD_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1i8t_1_FAD_1_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2CC1C, -1o28_1_UFP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1o28_1_UFP_2_holo_aligned_esmfold_protein.pdb,OC1NC(O)N([C@H]2C[C@H](O)[C@@H](CO[PH](O)(O)O)O2)CC1F, -1o72_2_PC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1o72_2_PC_0_holo_aligned_esmfold_protein.pdb,C[N+](C)(C)CCO[PH](O)(O)O, -1pj2_1_FUM_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1pj2_1_FUM_0_holo_aligned_esmfold_protein.pdb,OC(O)CCC(O)O, -1pj4_1_FUM_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1pj4_1_FUM_1_holo_aligned_esmfold_protein.pdb,OC(O)CCC(O)O, -1qaw_1_TRP_7,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1qaw_1_TRP_7_holo_aligned_esmfold_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O, -1rqp_1_SAM_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1rqp_1_SAM_0_holo_aligned_esmfold_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O, -1sbz_1_FMN_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1sbz_1_FMN_3_holo_aligned_esmfold_protein.pdb,C[C@@H]1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2C[C@@H]1C, -1sij_1_PCD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1sij_1_PCD_0_holo_aligned_esmfold_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@H]4NC5NC(N)NC(O)C5N[C@H]4[C@@H]4S[Mo](O)(O)SC43)[C@@H](O)[C@H]2O)C(O)N1, -1tke_1_SER_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1tke_1_SER_0_holo_aligned_esmfold_protein.pdb,N[C@@H](CO)C(O)O, -1tkg_1_SSA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1tkg_1_SSA_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](COS(O)(O)NC(O)[C@@H](N)CO)[C@@H](O)[C@H]1O, -1u8u_1_OCA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1u8u_1_OCA_0_holo_aligned_esmfold_protein.pdb,CCCCCCCC(O)O, -1uf5_1_CDT_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1uf5_1_CDT_0_holo_aligned_esmfold_protein.pdb,CSCC[C@@H](NC(N)O)C(O)O, -1uf7_1_CDV_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1uf7_1_CDV_0_holo_aligned_esmfold_protein.pdb,CC(C)[C@@H](NC(N)O)C(O)O, -1uf8_1_ING_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1uf8_1_ING_2_holo_aligned_esmfold_protein.pdb,NC(O)N[C@H](CC1CCCCC1)C(O)O, -1v2g_1_OCA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1v2g_1_OCA_0_holo_aligned_esmfold_protein.pdb,CCCCCCCC(O)O, -1v97_1_MTE_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1v97_1_MTE_1_holo_aligned_esmfold_protein.pdb,NC1NC(O)[C@@H]2N[C@H]3C(S)C(S)[C@@H](CO[PH](O)(O)O)O[C@H]3NC2N1, -1za2_1_CTP_4,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1za2_1_CTP_4_holo_aligned_esmfold_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, -2cdc_1_XYS_5,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2cdc_1_XYS_5_holo_aligned_esmfold_protein.pdb,O[C@@H]1[C@@H](O)[C@@H](O)OC[C@H]1O, -2ext_1_TRP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2ext_1_TRP_0_holo_aligned_esmfold_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O, -2g7c_1_NAG-GAL-GLA_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2g7c_1_NAG-GAL-GLA_1_holo_aligned_esmfold_protein.pdb,CC(O)N[C@@H]1[C@@H](O)[C@H](O[C@@H]2O[C@H](CO)[C@H](O)[C@H](O[C@H]3O[C@H](CO)[C@H](O)[C@H](O)[C@H]3O)[C@H]2O)[C@@H](CO)O[C@H]1O, -2gag_1_FOA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2gag_1_FOA_0_holo_aligned_esmfold_protein.pdb,OC(O)C1CCCO1, -2gag_1_NAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2gag_1_NAD_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCCC([C@H](N)O)C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O, -2gah_1_NAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2gah_1_NAD_0_holo_aligned_esmfold_protein.pdb,NC(O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1, -2gf3_2_FOA_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2gf3_2_FOA_1_holo_aligned_esmfold_protein.pdb,OC(O)C1CCCO1, -2him_1_ASN_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2him_1_ASN_3_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)O, -2hk9_1_SKM_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2hk9_1_SKM_0_holo_aligned_esmfold_protein.pdb,OC(O)[C@H]1C[C@@H](O)[C@@H](O)[C@H](O)C1, -2hs3_1_FGR_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2hs3_1_FGR_0_holo_aligned_esmfold_protein.pdb,OCNCC(O)N[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, -2o5m_1_MNR_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2o5m_1_MNR_0_holo_aligned_esmfold_protein.pdb,CCC1C(C)C2C[C@@H]3[C@H](C)[C@H](CCC(O)O)C4CC5[C@@H](CCC(O)O)C(C)C6CC7[C@@H](CC)[C@H](C)C8CC1N2[Mn@SP2](N65)(N87)N43, -2q37_1_3AL_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2q37_1_3AL_0_holo_aligned_esmfold_protein.pdb,NC(O)N[C@H]1NC(O)NC1O, -2q6k_1_ADN_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2q6k_1_ADN_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O, -2r4e_1_13P_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2r4e_1_13P_0_holo_aligned_esmfold_protein.pdb,OC[C@@H](O)CO[PH](O)(O)O, -2v5e_1_GU4-YYJ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v5e_1_GU4-YYJ_0_holo_aligned_esmfold_protein.pdb,OS(O)(O)OC[C@H]1O[C@@](COS(O)(O)O)(O[C@H]2O[C@H](COS(O)(O)O)[C@@H](OS(O)(O)O)[C@H](OS(O)(O)O)[C@H]2OS(O)(O)O)[C@@H](OS(O)(O)O)[C@@H]1OS(O)(O)O, -2v7t_1_SAH_4,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v7t_1_SAH_4_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CSCC[C@H](N)C(O)O)[C@@H](O)[C@H]1O, -2v7u_1_SAM_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v7u_1_SAM_2_holo_aligned_esmfold_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O, -2v7v_1_5FD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v7v_1_5FD_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CF)[C@@H](O)[C@H]1O, -2v7w_1_5FD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v7w_1_5FD_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CF)[C@@H](O)[C@H]1O, -2vdf_1_OCT_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2vdf_1_OCT_0_holo_aligned_esmfold_protein.pdb,CCCCCCCC, -2vfu_1_MTL_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2vfu_1_MTL_0_holo_aligned_esmfold_protein.pdb,OC[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)CO, -2wab_1_BGC-BGC-BGC-BGC-BGC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2wab_1_BGC-BGC-BGC-BGC-BGC_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O[C@H]2[C@H](O)[C@@H](O)[C@H](O[C@H]3[C@H](O)[C@@H](O)[C@H](O[C@H]4[C@H](O)[C@@H](O)[C@H](O[C@H]5[C@H](O)[C@@H](O)[C@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O, -2wao_1_BGC-BGC-BGC-BGC-BGC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2wao_1_BGC-BGC-BGC-BGC-BGC_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O[C@H]2[C@H](O)[C@@H](O)[C@H](O[C@H]3[C@H](O)[C@@H](O)[C@H](O[C@H]4[C@H](O)[C@@H](O)[C@H](O[C@H]5[C@H](O)[C@@H](O)[C@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O, -2wr8_1_SAH_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2wr8_1_SAH_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CSCC[C@H](N)C(O)O)[C@@H](O)[C@H]1O, -2wwc_1_CHT_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2wwc_1_CHT_2_holo_aligned_esmfold_protein.pdb,C[N+](C)(C)CCO, -2x34_2_UQ8_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2x34_2_UQ8_0_holo_aligned_esmfold_protein.pdb,COC1C(O)C(C)C(CCC(C)CCCC(C)CCCC(C)CCC[C@H](C)CCCC(C)CCC[C@H](C)CCCC(C)CCCC(C)C)C(O)C1OC, -2xrh_1_NIO_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2xrh_1_NIO_1_holo_aligned_esmfold_protein.pdb,OC(O)[C@H]1CCCNC1, -2xta_1_ACO_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2xta_1_ACO_0_holo_aligned_esmfold_protein.pdb,CCO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O[PH](O)(O)O, -2zcz_2_TRP_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2zcz_2_TRP_3_holo_aligned_esmfold_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O, -2zd0_1_TRP_9,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2zd0_1_TRP_9_holo_aligned_esmfold_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O, -2ze9_1_PD7_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2ze9_1_PD7_0_holo_aligned_esmfold_protein.pdb,CCCCCCC(O)OC[C@H](CO[PH](O)(O)O)OC(O)CCCCCC, -3ad7_1_NAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ad7_1_NAD_0_holo_aligned_esmfold_protein.pdb,N[C@H](O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5[C@H](N)NCN[C@H]54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1, -3ad9_1_NAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ad9_1_NAD_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2[C@@H]1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCCC([C@H](N)O)C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O, -3ada_1_NAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ada_1_NAD_0_holo_aligned_esmfold_protein.pdb,N[C@H](O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CN[C@@H]5[C@H](N)NCN[C@H]54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1, -3eca_1_ASP_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3eca_1_ASP_3_holo_aligned_esmfold_protein.pdb,N[C@@H](CC(O)O)C(O)O, -3gf4_1_FAD_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3gf4_1_FAD_1_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C4NCN[C@H]5N)[C@H](O)[C@@H]3O)C2CC1C, -3gf4_1_U5P_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3gf4_1_U5P_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, -3he3_5_UDP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3he3_5_UDP_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, -3it6_1_ORN_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3it6_1_ORN_1_holo_aligned_esmfold_protein.pdb,NCCC[C@H](N)C(O)O, -3k8l_1_GLC-GLC-GLC-GLC-GLC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3k8l_1_GLC-GLC-GLC-GLC-GLC_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](O[C@H]2[C@H](O)[C@@H](O)[C@@H](O[C@H]3[C@H](O)[C@@H](O)[C@@H](O[C@H]4[C@H](O)[C@@H](O)[C@@H](O[C@H]5[C@H](O)[C@@H](O)[C@@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O, -3k8m_1_GLC-GLC-AC1_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3k8m_1_GLC-GLC-AC1_0_holo_aligned_esmfold_protein.pdb,C[C@H]1O[C@H](O[C@H]2[C@H](O)[C@@H](O)[C@@H](O[C@H]3[C@H](O)[C@@H](O)[C@@H](O)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1N[C@H]1CC(CO)[C@@H](O)[C@H](O)[C@H]1O, -3nvv_1_MTE_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3nvv_1_MTE_1_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2N[C@@H]3[C@H](NC2N1)O[C@H](CO[PH](O)(O)O)C(S)[C@@H]3S, -3o01_2_DXC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3o01_2_DXC_0_holo_aligned_esmfold_protein.pdb,C[C@H](CCC(O)O)[C@H]1CC[C@H]2[C@@H]3CC[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3C[C@H](O)[C@]12C, -3o02_2_JN3_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3o02_2_JN3_0_holo_aligned_esmfold_protein.pdb,C[C@H](CCC(O)O)[C@H]1CC[C@H]2[C@@H]3[C@H](O)C[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3CC[C@]12C, -3o7j_1_2AL_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3o7j_1_2AL_0_holo_aligned_esmfold_protein.pdb,NC(O)NC1NC(O)NC1O, -3q14_1_PCR_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3q14_1_PCR_3_holo_aligned_esmfold_protein.pdb,CC1CCC(O)CC1, -3qrc_2_GU4-YYJ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3qrc_2_GU4-YYJ_0_holo_aligned_esmfold_protein.pdb,OS(O)(O)OC[C@H]1O[C@@](COS(O)(O)O)(O[C@H]2O[C@H](COS(O)(O)O)[C@@H](OS(O)(O)O)[C@H](OS(O)(O)O)[C@H]2OS(O)(O)O)[C@@H](OS(O)(O)O)[C@@H]1OS(O)(O)O, -3s5x_1_BMA-MAN-MAN-MAN-MAN_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3s5x_1_BMA-MAN-MAN-MAN-MAN_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](O[C@@H]2[C@H](O)[C@@H](OC[C@H]3O[C@@H](O)[C@@H](O)[C@@H](O[C@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]4O)[C@@H]3O)O[C@H](CO)[C@H]2O)[C@@H](O)[C@@H](O)[C@@H]1O, -3s6a_1_ANP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3s6a_1_ANP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](N)(O)O)[C@@H](O)[C@H]1O, -3se5_1_ANP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3se5_1_ANP_2_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)N[PH](O)(O)O)[C@@H](O)[C@H]1O, -3sr6_1_MTE_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3sr6_1_MTE_1_holo_aligned_esmfold_protein.pdb,NC1NC(O)[C@@H]2N[C@H]3C(S)C(S)[C@@H](CO[PH](O)(O)O)O[C@H]3NC2N1, -3ub7_1_ACM_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ub7_1_ACM_1_holo_aligned_esmfold_protein.pdb,CC(N)O, -3ub9_1_NHY_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ub9_1_NHY_1_holo_aligned_esmfold_protein.pdb,N[C@@H](O)NO, -3uni_1_SAL_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3uni_1_SAL_0_holo_aligned_esmfold_protein.pdb,OC(O)C1CCCCC1O, -3wrb_1_GDE_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3wrb_1_GDE_0_holo_aligned_esmfold_protein.pdb,OC(O)C1CC(O)C(O)C(O)C1, -3wvc_1_FEG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3wvc_1_FEG_0_holo_aligned_esmfold_protein.pdb,CC1C(CC(O)O)NC(O)[C@@H](C)[C@@H]1O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(O)NC(N)NC32)[C@H](O)[C@@H]1O, -3zec_1_ANP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3zec_1_ANP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)N[PH](O)(O)O)[C@@H](O)[C@H]1O, -3zjx_1_BOG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3zjx_1_BOG_0_holo_aligned_esmfold_protein.pdb,CCCCCCCCO[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O, -3zqu_1_FNR_5,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3zqu_1_FNR_5_holo_aligned_esmfold_protein.pdb,C[C@H]1CC2NC3[C@H](O)N[C@H](O)N[C@@H]3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@@H]2C[C@@H]1C, -3zzs_1_TRP_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3zzs_1_TRP_3_holo_aligned_esmfold_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O, -4b4v_1_L34_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4b4v_1_L34_0_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2C(NC[C@@H]3CN(C4CCC([C@H](O)N[C@@H](CCC(O)O)C(O)O)CC4)C(O)N23)N1, -4bc9_1_CNV-FAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4bc9_1_CNV-FAD_0_holo_aligned_esmfold_protein.pdb,CC1CC2[C@H](CC1C)N(C(C)CN)C1C(O)NC(O)NC1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCN[C@H]32)[C@H](O)[C@@H]1O, -4cdn_2_FAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4cdn_2_FAD_0_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2C[C@H]1C, -4fyv_1_DCP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4fyv_1_DCP_2_holo_aligned_esmfold_protein.pdb,NC1CCN([C@H]2C[C@H](O)[C@@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)O2)C(O)N1, -4fyw_1_CTP_4,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4fyw_1_CTP_4_holo_aligned_esmfold_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, -4gk9_1_BMA-MAN-MAN-MAN-MAN_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4gk9_1_BMA-MAN-MAN-MAN-MAN_3_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](OC[C@H]2O[C@H](OC[C@H]3O[C@@H](O)[C@@H](O)[C@@H](O[C@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]4O)[C@@H]3O)[C@@H](O)[C@@H](O[C@H]3O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]3O)[C@@H]2O)[C@@H](O)[C@@H](O)[C@@H]1O, -4h2f_1_ADN_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4h2f_1_ADN_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O, -4idk_1_1FE_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4idk_1_1FE_0_holo_aligned_esmfold_protein.pdb,NCC(O)NC1CCC2NC(O)NC2C1, -4kgx_1_CTP_5,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4kgx_1_CTP_5_holo_aligned_esmfold_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, -4mig_1_G3F_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4mig_1_G3F_2_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](F)[C@@H]1O, -4mo2_2_FDA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4mo2_2_FDA_0_holo_aligned_esmfold_protein.pdb,CC1CC2[C@@H](CC1C)N[C@H]1C(O)NC(O)N[C@H]1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O, -4mos_1_GAF_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4mos_1_GAF_1_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](O)[C@H](F)[C@@H](O)[C@H]1O, -4n4l_1_HG1_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4n4l_1_HG1_0_holo_aligned_esmfold_protein.pdb,NC(O)CCCC1CCCCC1, -4o0d_1_GLY_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4o0d_1_GLY_3_holo_aligned_esmfold_protein.pdb,NCC(O)O, -4o0f_1_GLY_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4o0f_1_GLY_3_holo_aligned_esmfold_protein.pdb,NCC(O)O, -4o95_1_245_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4o95_1_245_0_holo_aligned_esmfold_protein.pdb,OC(NC1CCCCC1)N[C@H]1CCNC(Cl)C1, -4oal_2_245_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4oal_2_245_0_holo_aligned_esmfold_protein.pdb,OC(NC1CCCCC1)N[C@H]1CCNC(Cl)C1, -4osx_1_GLY_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4osx_1_GLY_2_holo_aligned_esmfold_protein.pdb,NCC(O)O, -4osy_1_GLY_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4osy_1_GLY_3_holo_aligned_esmfold_protein.pdb,NCC(O)O, -4pfx_1_UDP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4pfx_1_UDP_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, -4phr_1_UDP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4phr_1_UDP_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, -4phs_1_UDP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4phs_1_UDP_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)([OH2+])O[PH](O)(O)[OH2+])[C@@H](O)[C@H]2O)C(O)N1, -4pya_1_2X3_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4pya_1_2X3_0_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2N[C@H]3N([C@@H]2N1)[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@]3(O)[C@H]1O, -4qa8_1_PJZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4qa8_1_PJZ_0_holo_aligned_esmfold_protein.pdb,CCCCCCCCCCCCCC[C@@H](O)OC[C@@H](C)O[C@@H](O)CCCCCCCCCCC, -4qo5_1_NAG_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4qo5_1_NAG_2_holo_aligned_esmfold_protein.pdb,CC(O)N[C@H]1CO[C@H](CO)[C@@H](O)[C@@H]1O, -4rhe_1_FMN_6,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4rhe_1_FMN_6_holo_aligned_esmfold_protein.pdb,CC1C[C@@H]2[C@@H](C[C@@H]1C)NC1[C@H](O)NC(O)N[C@@H]1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH]([O-])(O)O, -4rpj_1_UDP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4rpj_1_UDP_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, -4rpm_1_HXC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4rpm_1_HXC_0_holo_aligned_esmfold_protein.pdb,CCCCCC(O)SC, -4tvd_1_BGC_4,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4tvd_1_BGC_4_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1O, -4tvd_1_GLC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4tvd_1_GLC_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O, -4u63_1_FAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4u63_1_FAD_0_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2CC1C, -4uoc_1_NCN_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4uoc_1_NCN_1_holo_aligned_esmfold_protein.pdb,OC(O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]2O)C1, -4uuw_1_AMP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4uuw_1_AMP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, -4xdr_1_ADN_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4xdr_1_ADN_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O, -4xfm_1_THE_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4xfm_1_THE_0_holo_aligned_esmfold_protein.pdb,OC[C@@H](O)[C@H](O)C(O)O, -4ydx_1_TCE_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4ydx_1_TCE_0_holo_aligned_esmfold_protein.pdb,OC(O)CCP(CCC(O)O)CCC(O)O, -4zav_1_4LS_8,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zav_1_4LS_8_holo_aligned_esmfold_protein.pdb,CC(C)CCN1C2CC(C)[C@@H](C)CC2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@H]2NC(O)N[C@@H](O)C21, -4zaw_1_4LU_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zaw_1_4LU_1_holo_aligned_esmfold_protein.pdb,C[C@H]1[C@@H](C)C[C@@H]2C3[C@@H]1C(C)(C)CCN3[C@H]1C(O)NC(O)NC1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O, -4zay_1_4LS_6,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zay_1_4LS_6_holo_aligned_esmfold_protein.pdb,CC(C)CCN1[C@H]2[C@H](O)N[C@H](O)N[C@@H]2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@@H]2CC(C)C(C)C[C@H]21, -4zaz_1_4LS_6,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zaz_1_4LS_6_holo_aligned_esmfold_protein.pdb,CC(C)CCN1C2CC(C)[C@@H](C)CC2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2N[C@@H](O)N[C@@H](O)C21, -4zqx_1_ATP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zqx_1_ATP_2_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, -5a98_1_ATP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5a98_1_ATP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, -5ae3_2_AWB_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5ae3_2_AWB_1_holo_aligned_esmfold_protein.pdb,CCCCCC[C@H]1C(O)O[C@H](C)[C@H](N[C@H](O)[C@@H]2CCC[C@H](NCO)[C@H]2O)C(O)O[C@@H](C)[C@@H]1OC(O)CC(C)C, -5b5s_1_BOG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5b5s_1_BOG_0_holo_aligned_esmfold_protein.pdb,CCCCCCCCO[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O, -5d9g_1_GLU-ASN-LEU-TYR-PHE-GLN_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5d9g_1_GLU-ASN-LEU-TYR-PHE-GLN_0_holo_aligned_esmfold_protein.pdb,CC(C)C[C@H](N)C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)N[C@@H](Cc1ccccc1)C(=O)N[C@@H](CCC(N)=O)C(=O)O, -5dnc_1_ASN_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5dnc_1_ASN_2_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)O, -5eno_1_5QG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5eno_1_5QG_0_holo_aligned_esmfold_protein.pdb,CC1(C)CC2C(CO1)C(N1CCOCC1)NC(SCCC1CCCCC1)[C@@H]2CN, -5enp_1_5QF_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5enp_1_5QF_0_holo_aligned_esmfold_protein.pdb,COCCN1CCN([C@@H]2NC(SCC[C@H]3CCC(OC)[C@@H](OC)C3)[C@H](CN)[C@H]3CC(C)(C)OCC23)CC1, -5enq_1_5QE_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5enq_1_5QE_0_holo_aligned_esmfold_protein.pdb,CC(O)NC1CCC(CCSC2NC(N3C[C@@H](C)O[C@@H](C)C3)[C@@H]3COC(C)(C)CC3C2CN)CC1, -5enr_1_MBX_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5enr_1_MBX_0_holo_aligned_esmfold_protein.pdb,CCC(O)NC1CCC(CCS[C@@H]2NC(N3C[C@H](C)O[C@@H](C)C3)C3COC(C)(C)CC3C2CN)CC1, -5ent_1_MIY_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5ent_1_MIY_0_holo_aligned_esmfold_protein.pdb,CN(C)C1CCC(O)C2C1C[C@H]1C[C@H]3[C@H](N(C)C)C(O)C(C(N)O)[C@@H](O)[C@@]3(O)C(O)[C@@H]1C2O, -5ers_1_AMP_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5ers_1_AMP_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, -5f2t_1_PLM_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5f2t_1_PLM_0_holo_aligned_esmfold_protein.pdb,CCCCCCCCCCCCCCCC(O)O, -5f52_1_ASP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5f52_1_ASP_2_holo_aligned_esmfold_protein.pdb,N[C@@H](CC(O)O)C(O)O, -5fiu_1_Y3J_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5fiu_1_Y3J_3_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](C(F)F)[C@@H](O)[C@@H]1O, -5fxd_1_H7Y_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5fxd_1_H7Y_1_holo_aligned_esmfold_protein.pdb,CCC[C@H]1CCC(O)C(OC)C1, -5fxe_1_CIY_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5fxe_1_CIY_1_holo_aligned_esmfold_protein.pdb,CO[C@H]1CC(CCCO)CCC1O, -5fxf_1_BEZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5fxf_1_BEZ_0_holo_aligned_esmfold_protein.pdb,OC(O)C1CCCCC1, -5gqi_1_ATP_7,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5gqi_1_ATP_7_holo_aligned_esmfold_protein.pdb,N[C@@H]1NCN[C@H]2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, -5gql_1_ATP_4,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5gql_1_ATP_4_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, -5hhz_1_ZME_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5hhz_1_ZME_0_holo_aligned_esmfold_protein.pdb,CC1CCN(C2NCNC3NCNC32)C1, -5hmr_1_FDZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5hmr_1_FDZ_0_holo_aligned_esmfold_protein.pdb,OC(NC1CNNS1)N[C@H]1CCC[C@H](OC(F)(F)F)C1, -5hqx_1_EDZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5hqx_1_EDZ_0_holo_aligned_esmfold_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CNNS1, -5hw0_1_GLU_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5hw0_1_GLU_2_holo_aligned_esmfold_protein.pdb,N[C@@H](CCC(O)O)C(O)O, -5ida_1_BMA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5ida_1_BMA_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O)[C@@H](O)[C@@H](O)[C@@H]1O, -5k3o_2_ASP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k3o_2_ASP_0_holo_aligned_esmfold_protein.pdb,N[C@@H](CC(O)O)C(O)O, -5k45_2_GLU_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k45_2_GLU_1_holo_aligned_esmfold_protein.pdb,N[C@@H](CCC(O)O)C(O)O, -5k4h_2_GLU_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k4h_2_GLU_3_holo_aligned_esmfold_protein.pdb,N[C@@H](CCC(O)O)C(O)O, -5k62_1_ASN-VAL_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k62_1_ASN-VAL_0_holo_aligned_esmfold_protein.pdb,CC(C)[C@@H](CO)NC(O)[C@@H](N)CC(N)O, -5k63_1_ASN-GLY_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k63_1_ASN-GLY_0_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)NCCO, -5k66_1_ASN-GLU_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k66_1_ASN-GLU_0_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)N[C@H](CO)CCC(O)O, -5mh1_1_BMA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5mh1_1_BMA_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O)[C@@H](O)[C@@H](O)[C@@H]1O, -5u82_2_ZN0_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5u82_2_ZN0_0_holo_aligned_esmfold_protein.pdb,CC[SnH](CC)CC, -6a71_1_9UX_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6a71_1_9UX_0_holo_aligned_esmfold_protein.pdb,O[Mo@]12S[Mo@@]1(O)S2, -6a72_1_9UX_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6a72_1_9UX_0_holo_aligned_esmfold_protein.pdb,O[Mo@]12S[Mo@@]1(O)S2, -6b1b_1_TMO_15,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6b1b_1_TMO_15_holo_aligned_esmfold_protein.pdb,C[N+](C)(C)O, -6ea9_1_9BG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6ea9_1_9BG_0_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O[PH]([O-])(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O[PH]([O-])([O-])O)C2N1, -6ep5_1_ADP_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6ep5_1_ADP_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, -6etf_1_AMP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6etf_1_AMP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2[C@H]1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, -6fgc_1_ADP_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6fgc_1_ADP_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, -6fgc_1_D95_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6fgc_1_D95_1_holo_aligned_esmfold_protein.pdb,C[C@H]1[C@H](OC(O)CCC(O)O)O[C@@H]2O[C@@]3(C)CC[C@H]4[C@H](C)CC[C@@H]1[C@@]24OO3, -6gbf_1_AMP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6gbf_1_AMP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, -6jls_1_FMN_6,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6jls_1_FMN_6_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2CC1C, -6n19_2_K8V_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6n19_2_K8V_0_holo_aligned_esmfold_protein.pdb,CCC(O)NC1CCC(C([O-])O)C(C([O-])O)C1, -6nco_1_KQP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6nco_1_KQP_0_holo_aligned_esmfold_protein.pdb,CC(C)(O)C1CCC([C@H]2CC(Cl)C[C@@H](C34(C(N)N)CC3C4)C2)CC1, -6npp_1_KWG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6npp_1_KWG_0_holo_aligned_esmfold_protein.pdb,[O-]C(O)C1CCCC(CCC2CCCCC2)C1N1CCCC1, -6o6y_1_ACK_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6o6y_1_ACK_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@H]2O[PH]([O-])(O)O[C@H]21, -6o70_1_ACK_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6o70_1_ACK_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@H]2O[PH]([O-])(O)O[C@H]21, -6pa2_1_ASP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6pa2_1_ASP_2_holo_aligned_esmfold_protein.pdb,N[C@@H](CC(O)O)C(O)O, -6pa6_2_ASN_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6pa6_2_ASN_0_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)O, -6paa_1_ASP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6paa_1_ASP_2_holo_aligned_esmfold_protein.pdb,N[C@@H](CC(O)O)C(O)O, -6qkr_1_FAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6qkr_1_FAD_0_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(NC(O)N[C@H]3O)N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C4NCN[C@H]5N)[C@H](O)[C@@H]3O)C2CC1C, -6rms_1_AMP_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6rms_1_AMP_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, -6ryz_1_SAM_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6ryz_1_SAM_2_holo_aligned_esmfold_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C2NCN[C@@H]3N)[C@H](O)[C@@H]1O, -6rz2_1_5CD_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6rz2_1_5CD_2_holo_aligned_esmfold_protein.pdb,NC1NCNC2[C@H]1NCN2[C@@H]1O[C@H](CCl)[C@@H](O)[C@H]1O, -6tvg_1_AP2_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6tvg_1_AP2_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)C[PH](O)(O)O)[C@@H](O)[C@H]1O, -6uqy_2_AT3_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6uqy_2_AT3_0_holo_aligned_esmfold_protein.pdb,C[C@@H](O)SCC[N+](C)(C)C, -6ur1_2_AT3_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6ur1_2_AT3_0_holo_aligned_esmfold_protein.pdb,C[C@@H](O)SCC[N+](C)(C)C, -6v2a_1_ASN_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6v2a_1_ASN_3_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)O, -6wyz_1_DGL_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6wyz_1_DGL_1_holo_aligned_esmfold_protein.pdb,N[C@H](CCC(O)O)C(O)O, -6xb3_3_9BG_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6xb3_3_9BG_1_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O[PH]([O-])(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O[PH]([O-])(O)O)C2N1, -6xug_1_O1Q_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6xug_1_O1Q_0_holo_aligned_esmfold_protein.pdb,CC1CCC[C@H](N2NCC[C@H]2C2CC(Cl)C3NNN(C4CC[C@H]5CNNC5C4)C3C2)C1, -6yao_1_OJ2_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6yao_1_OJ2_0_holo_aligned_esmfold_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CCC[C@@H](OC(F)(F)F)C1, -6yap_1_OHZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6yap_1_OHZ_0_holo_aligned_esmfold_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CC(Cl)C[C@H](OC(F)(F)F)C1, -6yaq_1_OHZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6yaq_1_OHZ_0_holo_aligned_esmfold_protein.pdb,OCCC1CCCC[C@H]1N[C@H](O)N[C@H]1C[C@@H](Cl)C[C@@H](OC(F)(F)F)C1, +3gvl_1_SLB_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3gvl_1_SLB_2_holo_aligned_predicted_protein.pdb,CC(O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(O)O)C[C@@H]1O, +3inr_1_GDU_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3inr_1_GDU_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCC(O)NC3O)[C@H](O)[C@@H]2O)[C@H](O)[C@@H](O)[C@H]1O, +3jqm_1_GTP_5,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3jqm_1_GTP_5_holo_aligned_predicted_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]3O)C2N1, +3ju4_1_SLB_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ju4_1_SLB_2_holo_aligned_predicted_protein.pdb,CC(O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(O)O)C[C@@H]1O, +4cnl_1_CHT_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4cnl_1_CHT_1_holo_aligned_predicted_protein.pdb,C[N+](C)(C)CCO, +1hg0_1_SIN_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1hg0_1_SIN_1_holo_aligned_predicted_protein.pdb,OC(O)CCC(O)O, +1i8t_1_FAD_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1i8t_1_FAD_1_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2CC1C, +1o28_1_UFP_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1o28_1_UFP_2_holo_aligned_predicted_protein.pdb,OC1NC(O)N([C@H]2C[C@H](O)[C@@H](CO[PH](O)(O)O)O2)CC1F, +1o72_2_PC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1o72_2_PC_0_holo_aligned_predicted_protein.pdb,C[N+](C)(C)CCO[PH](O)(O)O, +1pj2_1_FUM_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1pj2_1_FUM_0_holo_aligned_predicted_protein.pdb,OC(O)CCC(O)O, +1pj4_1_FUM_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1pj4_1_FUM_1_holo_aligned_predicted_protein.pdb,OC(O)CCC(O)O, +1qaw_1_TRP_7,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1qaw_1_TRP_7_holo_aligned_predicted_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O, +1rqp_1_SAM_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1rqp_1_SAM_0_holo_aligned_predicted_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O, +1sbz_1_FMN_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1sbz_1_FMN_3_holo_aligned_predicted_protein.pdb,C[C@@H]1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2C[C@@H]1C, +1sij_1_PCD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1sij_1_PCD_0_holo_aligned_predicted_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@H]4NC5NC(N)NC(O)C5N[C@H]4[C@@H]4S[Mo](O)(O)SC43)[C@@H](O)[C@H]2O)C(O)N1, +1tke_1_SER_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1tke_1_SER_0_holo_aligned_predicted_protein.pdb,N[C@@H](CO)C(O)O, +1tkg_1_SSA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1tkg_1_SSA_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](COS(O)(O)NC(O)[C@@H](N)CO)[C@@H](O)[C@H]1O, +1u8u_1_OCA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1u8u_1_OCA_0_holo_aligned_predicted_protein.pdb,CCCCCCCC(O)O, +1uf5_1_CDT_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1uf5_1_CDT_0_holo_aligned_predicted_protein.pdb,CSCC[C@@H](NC(N)O)C(O)O, +1uf7_1_CDV_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1uf7_1_CDV_0_holo_aligned_predicted_protein.pdb,CC(C)[C@@H](NC(N)O)C(O)O, +1uf8_1_ING_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1uf8_1_ING_2_holo_aligned_predicted_protein.pdb,NC(O)N[C@H](CC1CCCCC1)C(O)O, +1v2g_1_OCA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1v2g_1_OCA_0_holo_aligned_predicted_protein.pdb,CCCCCCCC(O)O, +1v97_1_MTE_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1v97_1_MTE_1_holo_aligned_predicted_protein.pdb,NC1NC(O)[C@@H]2N[C@H]3C(S)C(S)[C@@H](CO[PH](O)(O)O)O[C@H]3NC2N1, +1za2_1_CTP_4,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1za2_1_CTP_4_holo_aligned_predicted_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, +2cdc_1_XYS_5,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2cdc_1_XYS_5_holo_aligned_predicted_protein.pdb,O[C@@H]1[C@@H](O)[C@@H](O)OC[C@H]1O, +2ext_1_TRP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2ext_1_TRP_0_holo_aligned_predicted_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O, +2g7c_1_NAG-GAL-GLA_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2g7c_1_NAG-GAL-GLA_1_holo_aligned_predicted_protein.pdb,CC(O)N[C@@H]1[C@@H](O)[C@H](O[C@@H]2O[C@H](CO)[C@H](O)[C@H](O[C@H]3O[C@H](CO)[C@H](O)[C@H](O)[C@H]3O)[C@H]2O)[C@@H](CO)O[C@H]1O, +2gag_1_FOA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2gag_1_FOA_0_holo_aligned_predicted_protein.pdb,OC(O)C1CCCO1, +2gag_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2gag_1_NAD_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCCC([C@H](N)O)C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O, +2gah_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2gah_1_NAD_0_holo_aligned_predicted_protein.pdb,NC(O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1, +2gf3_2_FOA_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2gf3_2_FOA_1_holo_aligned_predicted_protein.pdb,OC(O)C1CCCO1, +2him_1_ASN_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2him_1_ASN_3_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)O, +2hk9_1_SKM_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2hk9_1_SKM_0_holo_aligned_predicted_protein.pdb,OC(O)[C@H]1C[C@@H](O)[C@@H](O)[C@H](O)C1, +2hs3_1_FGR_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2hs3_1_FGR_0_holo_aligned_predicted_protein.pdb,OCNCC(O)N[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, +2o5m_1_MNR_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2o5m_1_MNR_0_holo_aligned_predicted_protein.pdb,CCC1C(C)C2C[C@@H]3[C@H](C)[C@H](CCC(O)O)C4CC5[C@@H](CCC(O)O)C(C)C6CC7[C@@H](CC)[C@H](C)C8CC1N2[Mn@SP2](N65)(N87)N43, +2q37_1_3AL_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2q37_1_3AL_0_holo_aligned_predicted_protein.pdb,NC(O)N[C@H]1NC(O)NC1O, +2q6k_1_ADN_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2q6k_1_ADN_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O, +2r4e_1_13P_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2r4e_1_13P_0_holo_aligned_predicted_protein.pdb,OC[C@@H](O)CO[PH](O)(O)O, +2v5e_1_GU4-YYJ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2v5e_1_GU4-YYJ_0_holo_aligned_predicted_protein.pdb,OS(O)(O)OC[C@H]1O[C@@](COS(O)(O)O)(O[C@H]2O[C@H](COS(O)(O)O)[C@@H](OS(O)(O)O)[C@H](OS(O)(O)O)[C@H]2OS(O)(O)O)[C@@H](OS(O)(O)O)[C@@H]1OS(O)(O)O, +2v7t_1_SAH_4,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2v7t_1_SAH_4_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CSCC[C@H](N)C(O)O)[C@@H](O)[C@H]1O, +2v7u_1_SAM_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2v7u_1_SAM_2_holo_aligned_predicted_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O, +2v7v_1_5FD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2v7v_1_5FD_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CF)[C@@H](O)[C@H]1O, +2v7w_1_5FD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2v7w_1_5FD_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CF)[C@@H](O)[C@H]1O, +2vdf_1_OCT_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2vdf_1_OCT_0_holo_aligned_predicted_protein.pdb,CCCCCCCC, +2vfu_1_MTL_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2vfu_1_MTL_0_holo_aligned_predicted_protein.pdb,OC[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)CO, +2wab_1_BGC-BGC-BGC-BGC-BGC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2wab_1_BGC-BGC-BGC-BGC-BGC_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O[C@H]2[C@H](O)[C@@H](O)[C@H](O[C@H]3[C@H](O)[C@@H](O)[C@H](O[C@H]4[C@H](O)[C@@H](O)[C@H](O[C@H]5[C@H](O)[C@@H](O)[C@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O, +2wao_1_BGC-BGC-BGC-BGC-BGC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2wao_1_BGC-BGC-BGC-BGC-BGC_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O[C@H]2[C@H](O)[C@@H](O)[C@H](O[C@H]3[C@H](O)[C@@H](O)[C@H](O[C@H]4[C@H](O)[C@@H](O)[C@H](O[C@H]5[C@H](O)[C@@H](O)[C@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O, +2wr8_1_SAH_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2wr8_1_SAH_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CSCC[C@H](N)C(O)O)[C@@H](O)[C@H]1O, +2wwc_1_CHT_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2wwc_1_CHT_2_holo_aligned_predicted_protein.pdb,C[N+](C)(C)CCO, +2x34_2_UQ8_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2x34_2_UQ8_0_holo_aligned_predicted_protein.pdb,COC1C(O)C(C)C(CCC(C)CCCC(C)CCCC(C)CCC[C@H](C)CCCC(C)CCC[C@H](C)CCCC(C)CCCC(C)C)C(O)C1OC, +2xrh_1_NIO_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2xrh_1_NIO_1_holo_aligned_predicted_protein.pdb,OC(O)[C@H]1CCCNC1, +2xta_1_ACO_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2xta_1_ACO_0_holo_aligned_predicted_protein.pdb,CCO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O[PH](O)(O)O, +2zcz_2_TRP_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2zcz_2_TRP_3_holo_aligned_predicted_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O, +2zd0_1_TRP_9,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2zd0_1_TRP_9_holo_aligned_predicted_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O, +2ze9_1_PD7_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2ze9_1_PD7_0_holo_aligned_predicted_protein.pdb,CCCCCCC(O)OC[C@H](CO[PH](O)(O)O)OC(O)CCCCCC, +3ad7_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ad7_1_NAD_0_holo_aligned_predicted_protein.pdb,N[C@H](O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5[C@H](N)NCN[C@H]54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1, +3ad9_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ad9_1_NAD_0_holo_aligned_predicted_protein.pdb,NC1NCNC2[C@@H]1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCCC([C@H](N)O)C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O, +3ada_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ada_1_NAD_0_holo_aligned_predicted_protein.pdb,N[C@H](O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CN[C@@H]5[C@H](N)NCN[C@H]54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1, +3eca_1_ASP_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3eca_1_ASP_3_holo_aligned_predicted_protein.pdb,N[C@@H](CC(O)O)C(O)O, +3gf4_1_FAD_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3gf4_1_FAD_1_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C4NCN[C@H]5N)[C@H](O)[C@@H]3O)C2CC1C, +3gf4_1_U5P_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3gf4_1_U5P_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, +3he3_5_UDP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3he3_5_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, +3it6_1_ORN_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3it6_1_ORN_1_holo_aligned_predicted_protein.pdb,NCCC[C@H](N)C(O)O, +3k8l_1_GLC-GLC-GLC-GLC-GLC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3k8l_1_GLC-GLC-GLC-GLC-GLC_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O[C@H]2[C@H](O)[C@@H](O)[C@@H](O[C@H]3[C@H](O)[C@@H](O)[C@@H](O[C@H]4[C@H](O)[C@@H](O)[C@@H](O[C@H]5[C@H](O)[C@@H](O)[C@@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O, +3k8m_1_GLC-GLC-AC1_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3k8m_1_GLC-GLC-AC1_0_holo_aligned_predicted_protein.pdb,C[C@H]1O[C@H](O[C@H]2[C@H](O)[C@@H](O)[C@@H](O[C@H]3[C@H](O)[C@@H](O)[C@@H](O)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1N[C@H]1CC(CO)[C@@H](O)[C@H](O)[C@H]1O, +3nvv_1_MTE_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3nvv_1_MTE_1_holo_aligned_predicted_protein.pdb,NC1NC(O)C2N[C@@H]3[C@H](NC2N1)O[C@H](CO[PH](O)(O)O)C(S)[C@@H]3S, +3o01_2_DXC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3o01_2_DXC_0_holo_aligned_predicted_protein.pdb,C[C@H](CCC(O)O)[C@H]1CC[C@H]2[C@@H]3CC[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3C[C@H](O)[C@]12C, +3o02_2_JN3_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3o02_2_JN3_0_holo_aligned_predicted_protein.pdb,C[C@H](CCC(O)O)[C@H]1CC[C@H]2[C@@H]3[C@H](O)C[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3CC[C@]12C, +3o7j_1_2AL_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3o7j_1_2AL_0_holo_aligned_predicted_protein.pdb,NC(O)NC1NC(O)NC1O, +3q14_1_PCR_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3q14_1_PCR_3_holo_aligned_predicted_protein.pdb,CC1CCC(O)CC1, +3qrc_2_GU4-YYJ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3qrc_2_GU4-YYJ_0_holo_aligned_predicted_protein.pdb,OS(O)(O)OC[C@H]1O[C@@](COS(O)(O)O)(O[C@H]2O[C@H](COS(O)(O)O)[C@@H](OS(O)(O)O)[C@H](OS(O)(O)O)[C@H]2OS(O)(O)O)[C@@H](OS(O)(O)O)[C@@H]1OS(O)(O)O, +3s5x_1_BMA-MAN-MAN-MAN-MAN_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3s5x_1_BMA-MAN-MAN-MAN-MAN_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O[C@@H]2[C@H](O)[C@@H](OC[C@H]3O[C@@H](O)[C@@H](O)[C@@H](O[C@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]4O)[C@@H]3O)O[C@H](CO)[C@H]2O)[C@@H](O)[C@@H](O)[C@@H]1O, +3s6a_1_ANP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3s6a_1_ANP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](N)(O)O)[C@@H](O)[C@H]1O, +3se5_1_ANP_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3se5_1_ANP_2_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)N[PH](O)(O)O)[C@@H](O)[C@H]1O, +3sr6_1_MTE_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3sr6_1_MTE_1_holo_aligned_predicted_protein.pdb,NC1NC(O)[C@@H]2N[C@H]3C(S)C(S)[C@@H](CO[PH](O)(O)O)O[C@H]3NC2N1, +3ub7_1_ACM_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ub7_1_ACM_1_holo_aligned_predicted_protein.pdb,CC(N)O, +3ub9_1_NHY_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ub9_1_NHY_1_holo_aligned_predicted_protein.pdb,N[C@@H](O)NO, +3uni_1_SAL_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3uni_1_SAL_0_holo_aligned_predicted_protein.pdb,OC(O)C1CCCCC1O, +3wrb_1_GDE_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3wrb_1_GDE_0_holo_aligned_predicted_protein.pdb,OC(O)C1CC(O)C(O)C(O)C1, +3wvc_1_FEG_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3wvc_1_FEG_0_holo_aligned_predicted_protein.pdb,CC1C(CC(O)O)NC(O)[C@@H](C)[C@@H]1O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(O)NC(N)NC32)[C@H](O)[C@@H]1O, +3zec_1_ANP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3zec_1_ANP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)N[PH](O)(O)O)[C@@H](O)[C@H]1O, +3zjx_1_BOG_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3zjx_1_BOG_0_holo_aligned_predicted_protein.pdb,CCCCCCCCO[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O, +3zqu_1_FNR_5,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3zqu_1_FNR_5_holo_aligned_predicted_protein.pdb,C[C@H]1CC2NC3[C@H](O)N[C@H](O)N[C@@H]3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@@H]2C[C@@H]1C, +3zzs_1_TRP_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3zzs_1_TRP_3_holo_aligned_predicted_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O, +4b4v_1_L34_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4b4v_1_L34_0_holo_aligned_predicted_protein.pdb,NC1NC(O)C2C(NC[C@@H]3CN(C4CCC([C@H](O)N[C@@H](CCC(O)O)C(O)O)CC4)C(O)N23)N1, +4bc9_1_CNV-FAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4bc9_1_CNV-FAD_0_holo_aligned_predicted_protein.pdb,CC1CC2[C@H](CC1C)N(C(C)CN)C1C(O)NC(O)NC1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCN[C@H]32)[C@H](O)[C@@H]1O, +4cdn_2_FAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4cdn_2_FAD_0_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2C[C@H]1C, +4fyv_1_DCP_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4fyv_1_DCP_2_holo_aligned_predicted_protein.pdb,NC1CCN([C@H]2C[C@H](O)[C@@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)O2)C(O)N1, +4fyw_1_CTP_4,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4fyw_1_CTP_4_holo_aligned_predicted_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, +4gk9_1_BMA-MAN-MAN-MAN-MAN_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4gk9_1_BMA-MAN-MAN-MAN-MAN_3_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](OC[C@H]2O[C@H](OC[C@H]3O[C@@H](O)[C@@H](O)[C@@H](O[C@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]4O)[C@@H]3O)[C@@H](O)[C@@H](O[C@H]3O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]3O)[C@@H]2O)[C@@H](O)[C@@H](O)[C@@H]1O, +4h2f_1_ADN_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4h2f_1_ADN_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O, +4idk_1_1FE_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4idk_1_1FE_0_holo_aligned_predicted_protein.pdb,NCC(O)NC1CCC2NC(O)NC2C1, +4kgx_1_CTP_5,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4kgx_1_CTP_5_holo_aligned_predicted_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, +4mig_1_G3F_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4mig_1_G3F_2_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](F)[C@@H]1O, +4mo2_2_FDA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4mo2_2_FDA_0_holo_aligned_predicted_protein.pdb,CC1CC2[C@@H](CC1C)N[C@H]1C(O)NC(O)N[C@H]1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O, +4mos_1_GAF_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4mos_1_GAF_1_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O)[C@H](F)[C@@H](O)[C@H]1O, +4n4l_1_HG1_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4n4l_1_HG1_0_holo_aligned_predicted_protein.pdb,NC(O)CCCC1CCCCC1, +4o0d_1_GLY_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4o0d_1_GLY_3_holo_aligned_predicted_protein.pdb,NCC(O)O, +4o0f_1_GLY_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4o0f_1_GLY_3_holo_aligned_predicted_protein.pdb,NCC(O)O, +4o95_1_245_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4o95_1_245_0_holo_aligned_predicted_protein.pdb,OC(NC1CCCCC1)N[C@H]1CCNC(Cl)C1, +4oal_2_245_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4oal_2_245_0_holo_aligned_predicted_protein.pdb,OC(NC1CCCCC1)N[C@H]1CCNC(Cl)C1, +4osx_1_GLY_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4osx_1_GLY_2_holo_aligned_predicted_protein.pdb,NCC(O)O, +4osy_1_GLY_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4osy_1_GLY_3_holo_aligned_predicted_protein.pdb,NCC(O)O, +4pfx_1_UDP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4pfx_1_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, +4phr_1_UDP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4phr_1_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, +4phs_1_UDP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4phs_1_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)([OH2+])O[PH](O)(O)[OH2+])[C@@H](O)[C@H]2O)C(O)N1, +4pya_1_2X3_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4pya_1_2X3_0_holo_aligned_predicted_protein.pdb,NC1NC(O)C2N[C@H]3N([C@@H]2N1)[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@]3(O)[C@H]1O, +4qa8_1_PJZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4qa8_1_PJZ_0_holo_aligned_predicted_protein.pdb,CCCCCCCCCCCCCC[C@@H](O)OC[C@@H](C)O[C@@H](O)CCCCCCCCCCC, +4qo5_1_NAG_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4qo5_1_NAG_2_holo_aligned_predicted_protein.pdb,CC(O)N[C@H]1CO[C@H](CO)[C@@H](O)[C@@H]1O, +4rhe_1_FMN_6,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4rhe_1_FMN_6_holo_aligned_predicted_protein.pdb,CC1C[C@@H]2[C@@H](C[C@@H]1C)NC1[C@H](O)NC(O)N[C@@H]1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH]([O-])(O)O, +4rpj_1_UDP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4rpj_1_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, +4rpm_1_HXC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4rpm_1_HXC_0_holo_aligned_predicted_protein.pdb,CCCCCC(O)SC, +4tvd_1_BGC_4,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4tvd_1_BGC_4_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1O, +4tvd_1_GLC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4tvd_1_GLC_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O, +4u63_1_FAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4u63_1_FAD_0_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2CC1C, +4uoc_1_NCN_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4uoc_1_NCN_1_holo_aligned_predicted_protein.pdb,OC(O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]2O)C1, +4uuw_1_AMP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4uuw_1_AMP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, +4xdr_1_ADN_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4xdr_1_ADN_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O, +4xfm_1_THE_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4xfm_1_THE_0_holo_aligned_predicted_protein.pdb,OC[C@@H](O)[C@H](O)C(O)O, +4ydx_1_TCE_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4ydx_1_TCE_0_holo_aligned_predicted_protein.pdb,OC(O)CCP(CCC(O)O)CCC(O)O, +4zav_1_4LS_8,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4zav_1_4LS_8_holo_aligned_predicted_protein.pdb,CC(C)CCN1C2CC(C)[C@@H](C)CC2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@H]2NC(O)N[C@@H](O)C21, +4zaw_1_4LU_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4zaw_1_4LU_1_holo_aligned_predicted_protein.pdb,C[C@H]1[C@@H](C)C[C@@H]2C3[C@@H]1C(C)(C)CCN3[C@H]1C(O)NC(O)NC1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O, +4zay_1_4LS_6,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4zay_1_4LS_6_holo_aligned_predicted_protein.pdb,CC(C)CCN1[C@H]2[C@H](O)N[C@H](O)N[C@@H]2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@@H]2CC(C)C(C)C[C@H]21, +4zaz_1_4LS_6,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4zaz_1_4LS_6_holo_aligned_predicted_protein.pdb,CC(C)CCN1C2CC(C)[C@@H](C)CC2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2N[C@@H](O)N[C@@H](O)C21, +4zqx_1_ATP_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4zqx_1_ATP_2_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, +5a98_1_ATP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5a98_1_ATP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, +5ae3_2_AWB_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5ae3_2_AWB_1_holo_aligned_predicted_protein.pdb,CCCCCC[C@H]1C(O)O[C@H](C)[C@H](N[C@H](O)[C@@H]2CCC[C@H](NCO)[C@H]2O)C(O)O[C@@H](C)[C@@H]1OC(O)CC(C)C, +5b5s_1_BOG_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5b5s_1_BOG_0_holo_aligned_predicted_protein.pdb,CCCCCCCCO[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O, +5d9g_1_GLU-ASN-LEU-TYR-PHE-GLN_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5d9g_1_GLU-ASN-LEU-TYR-PHE-GLN_0_holo_aligned_predicted_protein.pdb,CC(C)C[C@H](N)C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)N[C@@H](Cc1ccccc1)C(=O)N[C@@H](CCC(N)=O)C(=O)O, +5dnc_1_ASN_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5dnc_1_ASN_2_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)O, +5eno_1_5QG_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5eno_1_5QG_0_holo_aligned_predicted_protein.pdb,CC1(C)CC2C(CO1)C(N1CCOCC1)NC(SCCC1CCCCC1)[C@@H]2CN, +5enp_1_5QF_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5enp_1_5QF_0_holo_aligned_predicted_protein.pdb,COCCN1CCN([C@@H]2NC(SCC[C@H]3CCC(OC)[C@@H](OC)C3)[C@H](CN)[C@H]3CC(C)(C)OCC23)CC1, +5enq_1_5QE_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5enq_1_5QE_0_holo_aligned_predicted_protein.pdb,CC(O)NC1CCC(CCSC2NC(N3C[C@@H](C)O[C@@H](C)C3)[C@@H]3COC(C)(C)CC3C2CN)CC1, +5enr_1_MBX_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5enr_1_MBX_0_holo_aligned_predicted_protein.pdb,CCC(O)NC1CCC(CCS[C@@H]2NC(N3C[C@H](C)O[C@@H](C)C3)C3COC(C)(C)CC3C2CN)CC1, +5ent_1_MIY_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5ent_1_MIY_0_holo_aligned_predicted_protein.pdb,CN(C)C1CCC(O)C2C1C[C@H]1C[C@H]3[C@H](N(C)C)C(O)C(C(N)O)[C@@H](O)[C@@]3(O)C(O)[C@@H]1C2O, +5ers_1_AMP_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5ers_1_AMP_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, +5f2t_1_PLM_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5f2t_1_PLM_0_holo_aligned_predicted_protein.pdb,CCCCCCCCCCCCCCCC(O)O, +5f52_1_ASP_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5f52_1_ASP_2_holo_aligned_predicted_protein.pdb,N[C@@H](CC(O)O)C(O)O, +5fiu_1_Y3J_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5fiu_1_Y3J_3_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](C(F)F)[C@@H](O)[C@@H]1O, +5fxd_1_H7Y_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5fxd_1_H7Y_1_holo_aligned_predicted_protein.pdb,CCC[C@H]1CCC(O)C(OC)C1, +5fxe_1_CIY_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5fxe_1_CIY_1_holo_aligned_predicted_protein.pdb,CO[C@H]1CC(CCCO)CCC1O, +5fxf_1_BEZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5fxf_1_BEZ_0_holo_aligned_predicted_protein.pdb,OC(O)C1CCCCC1, +5gqi_1_ATP_7,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5gqi_1_ATP_7_holo_aligned_predicted_protein.pdb,N[C@@H]1NCN[C@H]2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, +5gql_1_ATP_4,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5gql_1_ATP_4_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, +5hhz_1_ZME_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5hhz_1_ZME_0_holo_aligned_predicted_protein.pdb,CC1CCN(C2NCNC3NCNC32)C1, +5hmr_1_FDZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5hmr_1_FDZ_0_holo_aligned_predicted_protein.pdb,OC(NC1CNNS1)N[C@H]1CCC[C@H](OC(F)(F)F)C1, +5hqx_1_EDZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5hqx_1_EDZ_0_holo_aligned_predicted_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CNNS1, +5hw0_1_GLU_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5hw0_1_GLU_2_holo_aligned_predicted_protein.pdb,N[C@@H](CCC(O)O)C(O)O, +5ida_1_BMA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5ida_1_BMA_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@@H](O)[C@@H](O)[C@@H]1O, +5k3o_2_ASP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k3o_2_ASP_0_holo_aligned_predicted_protein.pdb,N[C@@H](CC(O)O)C(O)O, +5k45_2_GLU_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k45_2_GLU_1_holo_aligned_predicted_protein.pdb,N[C@@H](CCC(O)O)C(O)O, +5k4h_2_GLU_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k4h_2_GLU_3_holo_aligned_predicted_protein.pdb,N[C@@H](CCC(O)O)C(O)O, +5k62_1_ASN-VAL_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k62_1_ASN-VAL_0_holo_aligned_predicted_protein.pdb,CC(C)[C@@H](CO)NC(O)[C@@H](N)CC(N)O, +5k63_1_ASN-GLY_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k63_1_ASN-GLY_0_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)NCCO, +5k66_1_ASN-GLU_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k66_1_ASN-GLU_0_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)N[C@H](CO)CCC(O)O, +5mh1_1_BMA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5mh1_1_BMA_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@@H](O)[C@@H](O)[C@@H]1O, +5u82_2_ZN0_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5u82_2_ZN0_0_holo_aligned_predicted_protein.pdb,CC[SnH](CC)CC, +6a71_1_9UX_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6a71_1_9UX_0_holo_aligned_predicted_protein.pdb,O[Mo@]12S[Mo@@]1(O)S2, +6a72_1_9UX_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6a72_1_9UX_0_holo_aligned_predicted_protein.pdb,O[Mo@]12S[Mo@@]1(O)S2, +6b1b_1_TMO_15,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6b1b_1_TMO_15_holo_aligned_predicted_protein.pdb,C[N+](C)(C)O, +6ea9_1_9BG_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6ea9_1_9BG_0_holo_aligned_predicted_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O[PH]([O-])(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O[PH]([O-])([O-])O)C2N1, +6ep5_1_ADP_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6ep5_1_ADP_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, +6etf_1_AMP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6etf_1_AMP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2[C@H]1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, +6fgc_1_ADP_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6fgc_1_ADP_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, +6fgc_1_D95_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6fgc_1_D95_1_holo_aligned_predicted_protein.pdb,C[C@H]1[C@H](OC(O)CCC(O)O)O[C@@H]2O[C@@]3(C)CC[C@H]4[C@H](C)CC[C@@H]1[C@@]24OO3, +6gbf_1_AMP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6gbf_1_AMP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, +6jls_1_FMN_6,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6jls_1_FMN_6_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2CC1C, +6n19_2_K8V_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6n19_2_K8V_0_holo_aligned_predicted_protein.pdb,CCC(O)NC1CCC(C([O-])O)C(C([O-])O)C1, +6nco_1_KQP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6nco_1_KQP_0_holo_aligned_predicted_protein.pdb,CC(C)(O)C1CCC([C@H]2CC(Cl)C[C@@H](C34(C(N)N)CC3C4)C2)CC1, +6npp_1_KWG_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6npp_1_KWG_0_holo_aligned_predicted_protein.pdb,[O-]C(O)C1CCCC(CCC2CCCCC2)C1N1CCCC1, +6o6y_1_ACK_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6o6y_1_ACK_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@H]2O[PH]([O-])(O)O[C@H]21, +6o70_1_ACK_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6o70_1_ACK_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@H]2O[PH]([O-])(O)O[C@H]21, +6pa2_1_ASP_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6pa2_1_ASP_2_holo_aligned_predicted_protein.pdb,N[C@@H](CC(O)O)C(O)O, +6pa6_2_ASN_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6pa6_2_ASN_0_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)O, +6paa_1_ASP_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6paa_1_ASP_2_holo_aligned_predicted_protein.pdb,N[C@@H](CC(O)O)C(O)O, +6qkr_1_FAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6qkr_1_FAD_0_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(NC(O)N[C@H]3O)N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C4NCN[C@H]5N)[C@H](O)[C@@H]3O)C2CC1C, +6rms_1_AMP_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6rms_1_AMP_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, +6ryz_1_SAM_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6ryz_1_SAM_2_holo_aligned_predicted_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C2NCN[C@@H]3N)[C@H](O)[C@@H]1O, +6rz2_1_5CD_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6rz2_1_5CD_2_holo_aligned_predicted_protein.pdb,NC1NCNC2[C@H]1NCN2[C@@H]1O[C@H](CCl)[C@@H](O)[C@H]1O, +6tvg_1_AP2_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6tvg_1_AP2_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)C[PH](O)(O)O)[C@@H](O)[C@H]1O, +6uqy_2_AT3_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6uqy_2_AT3_0_holo_aligned_predicted_protein.pdb,C[C@@H](O)SCC[N+](C)(C)C, +6ur1_2_AT3_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6ur1_2_AT3_0_holo_aligned_predicted_protein.pdb,C[C@@H](O)SCC[N+](C)(C)C, +6v2a_1_ASN_3,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6v2a_1_ASN_3_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)O, +6wyz_1_DGL_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6wyz_1_DGL_1_holo_aligned_predicted_protein.pdb,N[C@H](CCC(O)O)C(O)O, +6xb3_3_9BG_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6xb3_3_9BG_1_holo_aligned_predicted_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O[PH]([O-])(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O[PH]([O-])(O)O)C2N1, +6xug_1_O1Q_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6xug_1_O1Q_0_holo_aligned_predicted_protein.pdb,CC1CCC[C@H](N2NCC[C@H]2C2CC(Cl)C3NNN(C4CC[C@H]5CNNC5C4)C3C2)C1, +6yao_1_OJ2_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6yao_1_OJ2_0_holo_aligned_predicted_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CCC[C@@H](OC(F)(F)F)C1, +6yap_1_OHZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6yap_1_OHZ_0_holo_aligned_predicted_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CC(Cl)C[C@H](OC(F)(F)F)C1, +6yaq_1_OHZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6yaq_1_OHZ_0_holo_aligned_predicted_protein.pdb,OCCC1CCCC[C@H]1N[C@H](O)N[C@H]1C[C@@H](Cl)C[C@@H](OC(F)(F)F)C1, diff --git a/forks/DiffDock/inference/diffdock_pocket_only_posebusters_benchmark_inputs.csv b/forks/DiffDock/inference/diffdock_pocket_only_posebusters_benchmark_inputs.csv new file mode 100644 index 00000000..95c00d9d --- /dev/null +++ b/forks/DiffDock/inference/diffdock_pocket_only_posebusters_benchmark_inputs.csv @@ -0,0 +1,281 @@ +complex_name,protein_path,ligand_description,protein_sequence +7CNQ_G8X,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CNQ_G8X_holo_aligned_predicted_protein.pdb,O=C(O)[C@H]1NCC[C@H]1O, +7C0U_FGO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7C0U_FGO_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@@H]2O[C@H](CO[P@](=O)(O)O[C@H]3[C@@H](O)[C@H](n4ccc(=O)[nH]c4=O)O[C@@H]3CO)[C@H]3CC(O)(O)C[C@H]32)c(=O)[nH]1, +7EBG_J0L,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7EBG_J0L_holo_aligned_predicted_protein.pdb,CNc1cccc2c1NC(=O)C2(C)C, +7WUY_76N,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WUY_76N_holo_aligned_predicted_protein.pdb,C[C@@H]1C=C[C@@H]2CCCC[C@@H]2[C@H]1C(=O)c1c(O)c([C@]2(O)CC[C@H](O)[C@@H]3O[C@@H]32)c[nH]c1=O, +7PT3_3KK,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PT3_3KK_holo_aligned_predicted_protein.pdb,CC(C)(O)C(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)CO[P@@](=O)(O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O, +7C3U_AZG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7C3U_AZG_holo_aligned_predicted_protein.pdb,Nc1nc(O)c2[nH]nnc2n1, +7PGX_FMN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PGX_FMN_holo_aligned_predicted_protein.pdb,Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O)c2cc1C, +7FB7_8NF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7FB7_8NF_holo_aligned_predicted_protein.pdb,Cc1cc(C)c(N)cn1, +8AIE_M7L,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AIE_M7L_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(C/N=C(\CON)C(=O)O)c1O, +7LCU_XTA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LCU_XTA_holo_aligned_predicted_protein.pdb,CN1CCN(c2ncc(Oc3cc(CN4CCC(CC(=O)O)CC4)cc(-c4cc(Cl)cc(Cl)c4)n3)cn2)CC1, +6YMS_OZH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YMS_OZH_holo_aligned_predicted_protein.pdb,CC(C)C[C@H](NC(=O)CN[P@@](=O)(O)[C@@H](Cc1ccccc1)NC(=O)OCc1ccccc1)C(=O)O, +7PK0_BYC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PK0_BYC_holo_aligned_predicted_protein.pdb,CC(C)(CO[P@@](=O)(O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCSC(=O)c1ccccc1, +7M6K_YRJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7M6K_YRJ_holo_aligned_predicted_protein.pdb,O=C(NCc1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c1)Nc1ccc([N+](=O)[O-])cc1, +7SFO_98L,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SFO_98L_holo_aligned_predicted_protein.pdb,Oc1cccc(CNc2nc(Cl)nc3scc(-c4ccccc4)c23)c1, +8EAB_VN2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8EAB_VN2_holo_aligned_predicted_protein.pdb,O=C(N[C@H](C(=O)Nc1cnccc1-c1ccc(C(F)(F)F)cc1)c1cccc(C(F)(F)F)c1)c1cnccc1-c1ccc(C(F)(F)F)cc1, +7MWU_ZPM,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MWU_ZPM_holo_aligned_predicted_protein.pdb,O=C(O)C1CCC1, +7TYP_KUR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TYP_KUR_holo_aligned_predicted_protein.pdb,Cc1nccnc1-c1nn2c(=O)cc(-c3ccc(C4CCCCC4)cc3)[nH]c2c1C(=O)N1CC(CF)C1, +6XM9_V55,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6XM9_V55_holo_aligned_predicted_protein.pdb,COc1cc(C=O)ccc1O, +7T0D_FPP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7T0D_FPP_holo_aligned_predicted_protein.pdb,CC(C)=CCC/C(C)=C/CC/C(C)=C/CO[P@](=O)(O)OP(=O)(O)O, +7XI7_4RI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XI7_4RI_holo_aligned_predicted_protein.pdb,CCCCCCc1nc(N)nc(N)c1-c1ccccc1, +7PRI_7TI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PRI_7TI_holo_aligned_predicted_protein.pdb,Nc1cc(C(Cl)=C(Cl)Cl)c(S(N)(=O)=O)cc1S(N)(=O)=O, +8FLV_ZB9,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8FLV_ZB9_holo_aligned_predicted_protein.pdb,CN(c1ncnc2[nH]ccc12)[C@@H]1CCCN(C(=O)CNc2cc(Cl)cc(Cl)c2)C1, +7N4W_P4V,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N4W_P4V_holo_aligned_predicted_protein.pdb,COc1cc2c(cc1OC)[C@H]1Cc3ccc(OC)c(OC)c3CN1CC2, +7OEO_V9Z,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OEO_V9Z_holo_aligned_predicted_protein.pdb,CNC(=O)CN(CC(c1ccccc1)c1ccccc1)C(=O)c1cc(C)c(OC)c(C)c1, +5SB2_1K2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/5SB2_1K2_holo_aligned_predicted_protein.pdb,O=C(N[C@@H]1C[C@H]1c1ccccc1)c1cc(Cl)cc(COc2cnc3[nH]ccc3c2)c1, +7MGY_ZD1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MGY_ZD1_holo_aligned_predicted_protein.pdb,N[C@H]1C=C(CO)[C@@H](O[C@H]2O[C@H](CO)[C@@H](O)[C@H](O)[C@H]2O)[C@H](O)[C@H]1O, +7OPG_06N,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OPG_06N_holo_aligned_predicted_protein.pdb,CCCNc1nn2c(-c3ccc(O)cc3)cnc2s1, +7OLI_8HG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OLI_8HG_holo_aligned_predicted_protein.pdb,Nc1nc2c([nH]c(=O)n2[C@H]2C[C@H](O)[C@@H](CO)O2)c(=O)[nH]1, +8FAV_4Y5,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8FAV_4Y5_holo_aligned_predicted_protein.pdb,O=C(O)c1ccc(-c2nn(C(=O)c3c(Cl)cccc3C(F)(F)F)c3cccc(F)c23)c(F)c1, +7R59_I5F,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7R59_I5F_holo_aligned_predicted_protein.pdb,Oc1ccc2c(c1)sc1nncn12, +6XHT_V2V,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6XHT_V2V_holo_aligned_predicted_protein.pdb,Nc1ccn([C@@H]2O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OC[C@@H](O)[C@@H](O)[C@@H](O)CO)[C@@H](O)[C@H]2O)c(=O)n1, +8EX2_Q2Q,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8EX2_Q2Q_holo_aligned_predicted_protein.pdb,O=C1CC(c2ccccc2)=Nc2c(-c3ccccc3)c(C(F)(F)F)nn21, +7LOU_IFM,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LOU_IFM_holo_aligned_predicted_protein.pdb,OC[C@H]1CNC[C@@H](O)[C@@H]1O, +6XBO_5MC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6XBO_5MC_holo_aligned_predicted_protein.pdb,Cc1cn([C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)nc1N, +6TW7_NZB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6TW7_NZB_holo_aligned_predicted_protein.pdb,CN(C)Cc1[nH]nc2ccc(-c3ccc(F)cc3OCCc3cccnc3)cc12, +7U3J_L6U,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7U3J_L6U_holo_aligned_predicted_protein.pdb,CNC(=O)[C@H](CCc1ccccc1)NC(=O)[C@H](NC(=O)CNCc1ccc(OC)cc1OC)c1cccs1, +7UYB_OK0,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UYB_OK0_holo_aligned_predicted_protein.pdb,NS(=O)(=O)c1c(C(F)(F)F)ccc(-c2ccc(C3CCNCC3)cc2)c1-c1nnn[nH]1, +7WJB_BGC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WJB_BGC_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1O, +7RC3_SAH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RC3_SAH_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSCC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O, +7MY1_IPE,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MY1_IPE_holo_aligned_predicted_protein.pdb,C=C(C)CCO[P@@](=O)(O)OP(=O)(O)O, +7AN5_RDH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7AN5_RDH_holo_aligned_predicted_protein.pdb,C=C(Oc1cccc(C(=O)O)c1)C(=O)O, +7JY3_VUD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7JY3_VUD_holo_aligned_predicted_protein.pdb,C[C@H](Oc1cc2cc(F)ccc2nc1N)c1[nH]c(=O)ccc1-n1cccn1, +7D5C_GV6,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7D5C_GV6_holo_aligned_predicted_protein.pdb,CC[C@H](C)[C@H](N)C(=O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O, +7WUX_6OI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WUX_6OI_holo_aligned_predicted_protein.pdb,N[C@@H](CC[C@H](O)[C@@H](N)COS(=O)(=O)O)C(=O)O, +8F8E_XJI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8F8E_XJI_holo_aligned_predicted_protein.pdb,NC(=O)C[C@H](NC(=O)c1c[nH]nc1-c1ccc(Cl)cc1F)c1ccc(F)c(Cl)c1, +7PL1_SFG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PL1_SFG_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](C[C@@H](N)CC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O, +6Z14_Q4Z,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6Z14_Q4Z_holo_aligned_predicted_protein.pdb,CC1=[NH+][C@H]2[C@@H](O1)O[C@H](COS(=O)(=O)[O-])[C@@H](O)[C@@H]2O, +7ODY_DGI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ODY_DGI_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@H]2C[C@H](O)[C@@H](CO[P@@](=O)(O)OP(=O)(O)O)O2)c(=O)[nH]1, +7RKW_5TV,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RKW_5TV_holo_aligned_predicted_protein.pdb,O=C(O[C@H](Cn1ccnc1)c1ccc(F)cc1)c1cc(Cl)cc(Cl)c1, +5SAK_ZRY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/5SAK_ZRY_holo_aligned_predicted_protein.pdb,N=C1N/C(=N\Nc2ccccc2)c2ccccc21, +8AUH_L9I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AUH_L9I_holo_aligned_predicted_protein.pdb,CCOC(=O)/C(=N\O)C(C)=O, +8C5M_MTA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8C5M_MTA_holo_aligned_predicted_protein.pdb,CSC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O, +7N03_ZRP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N03_ZRP_holo_aligned_predicted_protein.pdb,CCCCCCNc1ccc2ncc(C(=O)NC)c(Nc3ccccc3)c2c1, +7U0U_FK5,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7U0U_FK5_holo_aligned_predicted_protein.pdb,C=CC[C@@H]1/C=C(\C)C[C@H](C)C[C@H](OC)[C@H]2O[C@@](O)(C(=O)C(=O)N3CCCC[C@H]3C(=O)O[C@H](/C(C)=C/[C@@H]3CC[C@@H](O)[C@H](OC)C3)[C@H](C)[C@@H](O)CC1=O)[C@H](C)C[C@@H]2OC, +7OFK_VCH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OFK_VCH_holo_aligned_predicted_protein.pdb,CC(=O)N1Cc2cc(S(C)(=O)=O)ccc2[C@@H]1C(=O)Nc1ccc(C(O)(C(F)(F)F)C(F)(F)F)cc1, +7ULC_56B,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ULC_56B_holo_aligned_predicted_protein.pdb,Nc1nc2c(c(CN[C@H]3C=C[C@H](O)[C@@H]3O)cn2[C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1, +7TS6_KMI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TS6_KMI_holo_aligned_predicted_protein.pdb,CNCc1cccc(-c2cc(C)cc(N)n2)c1, +7NF3_4LU,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NF3_4LU_holo_aligned_predicted_protein.pdb,Cc1cc2c3c(c1C)C(C)(C)CC=[N+]3c1c([nH]c(=O)[nH]c1=O)N2C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O, +7Z1Q_NIO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Z1Q_NIO_holo_aligned_predicted_protein.pdb,O=C(O)c1cccnc1, +7QPP_VDX,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QPP_VDX_holo_aligned_predicted_protein.pdb,C=C1/C(=C\C=C2/CCC[C@]3(C)[C@@H]([C@H](C)CCCC(C)(C)O)CC[C@@H]23)C[C@@H](O)C[C@@H]1O, +7P4C_5OV,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7P4C_5OV_holo_aligned_predicted_protein.pdb,O=S1(=O)N[C@@H]2[C@H](O)[C@@H](O)[C@H](O)[C@@H](CO)[C@@H]2O1, +7VQ9_ISY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VQ9_ISY_holo_aligned_predicted_protein.pdb,C=C(C)CCS[P@@](=O)(O)OP(=O)(O)O, +6VTA_AKN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6VTA_AKN_holo_aligned_predicted_protein.pdb,NCC[C@H](O)C(=O)N[C@@H]1C[C@H](N)[C@@H](O[C@H]2O[C@H](CN)[C@@H](O)[C@H](O)[C@H]2O)[C@H](O)[C@H]1O[C@H]1O[C@H](CO)[C@@H](O)[C@H](N)[C@H]1O, +7V43_C4O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7V43_C4O_holo_aligned_predicted_protein.pdb,Cc1ccc(Cl)cc1, +6M2B_EZO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6M2B_EZO_holo_aligned_predicted_protein.pdb,CN(/N=C/c1ccccc1C(=O)O)c1nc(-c2ccccc2Cl)cs1, +7UJ5_DGL,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UJ5_DGL_holo_aligned_predicted_protein.pdb,N[C@H](CCC(=O)O)C(=O)O, +7OZC_G6S,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OZC_G6S_holo_aligned_predicted_protein.pdb,O=S(=O)(O)OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@H]1O, +7TB0_UD1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TB0_UD1_holo_aligned_predicted_protein.pdb,CC(=O)N[C@H]1[C@@H](O[P@@](=O)(O)O[P@@](=O)(O)OC[C@H]2O[C@@H](n3ccc(=O)[nH]c3=O)[C@H](O)[C@@H]2O)O[C@H](CO)[C@@H](O)[C@@H]1O, +8C3N_ADP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8C3N_ADP_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O, +7NUT_GLP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NUT_GLP_holo_aligned_predicted_protein.pdb,N[C@@H]1[C@@H](O)[C@H](O)[C@@H](COP(=O)(O)O)O[C@@H]1O, +7P1M_4IU,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7P1M_4IU_holo_aligned_predicted_protein.pdb,Cn1c(CO[C@@H]2C=CO[C@H](CO)[C@@H]2O)nc2ccc(C(=O)O)cc21, +7EPV_FDA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7EPV_FDA_holo_aligned_predicted_protein.pdb,Cc1cc2c(cc1C)N(C[C@H](O)[C@H](O)[C@H](O)CO[P@@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n3cnc4c(N)ncnc43)[C@H](O)[C@@H]1O)c1[nH]c(=O)[nH]c(=O)c1N2, +6Z0R_Q4H,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6Z0R_Q4H_holo_aligned_predicted_protein.pdb,N#Cc1cncnc1N, +7P1F_KFN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7P1F_KFN_holo_aligned_predicted_protein.pdb,O=C(O)C1=C[C@H](O)[C@@H](O)[C@H]([C@H](O)[C@H](O)CO)O1, +7VB8_STL,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VB8_STL_holo_aligned_predicted_protein.pdb,Oc1ccc(/C=C/c2cc(O)cc(O)c2)cc1, +6YR2_T1C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YR2_T1C_holo_aligned_predicted_protein.pdb,C[NH+](C)c1cc(NC(=O)CNC(C)(C)C)c(O)c2c1C[C@H]1C[C@H]3[C@H]([NH+](C)C)C(O)=C(C(N)=O)C(=O)[C@@]3(O)C(O)=C1C2=O, +7CIJ_G0C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CIJ_G0C_holo_aligned_predicted_protein.pdb,CSCCC/N=C/c1c(COP(=O)(O)O)cnc(C)c1O, +7UMW_NAD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UMW_NAD_holo_aligned_predicted_protein.pdb,NC(=O)c1ccc[n+]([C@@H]2O[C@H](CO[P@@](=O)([O-])O[P@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1, +7TOM_5AD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TOM_5AD_holo_aligned_predicted_protein.pdb,C[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O, +7DQL_4CL,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7DQL_4CL_holo_aligned_predicted_protein.pdb,Oc1ccc(Cl)cc1O, +7CUO_PHB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CUO_PHB_holo_aligned_predicted_protein.pdb,O=C(O)c1ccc(O)cc1, +7Q2B_M6H,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Q2B_M6H_holo_aligned_predicted_protein.pdb,c1ccc(CC2NCCN2)cc1, +7MWN_WI5,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MWN_WI5_holo_aligned_predicted_protein.pdb,Cc1c(C(=O)c2cccc3ccccc23)c2cccc3c2n1[C@H](CN1CCOCC1)CO3, +7L5F_XNG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7L5F_XNG_holo_aligned_predicted_protein.pdb,CCCCCCCCCC(=O)NCCCC(=O)O, +8DSC_NCA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8DSC_NCA_holo_aligned_predicted_protein.pdb,NC(=O)c1cccnc1, +7ZDY_6MJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZDY_6MJ_holo_aligned_predicted_protein.pdb,CO[C@@H]1OC[C@@H](O)[C@H](O)[C@H]1O, +7NP6_UK8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NP6_UK8_holo_aligned_predicted_protein.pdb,O=C(O)c1ccc(OCc2c(-c3c(Cl)cccc3C(F)(F)F)noc2-c2cn[nH]c2)cc1, +7JXX_VP7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7JXX_VP7_holo_aligned_predicted_protein.pdb,CC(C)(O)C#Cc1ccc2[nH]c3c(c2c1)-c1nc(N)ncc1CCC3, +7LOE_Y84,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LOE_Y84_holo_aligned_predicted_protein.pdb,Fc1cccc2ccccc12, +7KM8_WPD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KM8_WPD_holo_aligned_predicted_protein.pdb,CCc1nc(N)nc(N)c1OCCCOc1cccc(C[C@@H](C(=O)O)C(F)F)c1, +7UY4_SMI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UY4_SMI_holo_aligned_predicted_protein.pdb,CN[C@@H]1[C@H](O)[C@H](NC)[C@H]2O[C@]3(O)[C@H](O[C@@H]2[C@H]1O)O[C@H](C)CC3(O)O, +7CL8_TES,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CL8_TES_holo_aligned_predicted_protein.pdb,C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@@H]2O, +6Z1C_7EY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6Z1C_7EY_holo_aligned_predicted_protein.pdb,Cc1ccc(-c2csc3ncnc(SCCC(=O)O)c23)cc1, +8HFN_XGC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8HFN_XGC_holo_aligned_predicted_protein.pdb,COc1ccc(-c2cccc(S(=O)(=O)NC(=O)[C@@H](N)CS)c2)cn1, +8EXL_799,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8EXL_799_holo_aligned_predicted_protein.pdb,Cc1nc(-c2cn3c(n2)-c2ccc(-c4cnn(C(C)(C)C(N)=O)c4)cc2OCC3)n(C(C)C)n1, +7Z7F_IF3,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Z7F_IF3_holo_aligned_predicted_protein.pdb,CNc1nc(NC)c2ncn(C)c2n1, +7XG5_PLP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XG5_PLP_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(C=O)c1O, +7MOI_HPS,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MOI_HPS_holo_aligned_predicted_protein.pdb,O=P(O)(O)Oc1ccccc1, +7LMO_NYO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LMO_NYO_holo_aligned_predicted_protein.pdb,CCN(CC)c1ccc2c(C)c(CCN3C(=O)N[C@@]4(CCN(C(=O)c5c[nH]cn5)C4)C3=O)c(=O)oc2c1, +7MSR_DCA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MSR_DCA_holo_aligned_predicted_protein.pdb,CCNC(=O)CCNC(=O)[C@H](O)C(C)(C)CO[P@@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O, +6ZCY_QF8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZCY_QF8_holo_aligned_predicted_protein.pdb,CNC(=O)c1nn(C)c2ccc(Nc3nccc(-n4cc(N[C@@H]5CCNC5)c(C)n4)n3)cc12, +6ZC3_JOR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZC3_JOR_holo_aligned_predicted_protein.pdb,CS(=O)(=O)Nc1ccc(F)cc1C(=O)O, +7LZD_YHY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LZD_YHY_holo_aligned_predicted_protein.pdb,Cc1ccc(F)c2cc(C(=O)Nc3cccc(N4CCC(N(C)C)CC4)c3)[nH]c12, +7N4N_0BK,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N4N_0BK_holo_aligned_predicted_protein.pdb,COc1cc(C(=O)Nc2ccc(F)c([C@]3(CF)CC[C@@](C)(S(C)(=O)=O)C(N)=N3)c2)ncn1, +7KRU_ATP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KRU_ATP_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)O[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O, +7UTW_NAI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UTW_NAI_holo_aligned_predicted_protein.pdb,NC(=O)C1=CN([C@@H]2O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1, +7BKA_4JC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BKA_4JC_holo_aligned_predicted_protein.pdb,CCc1ccc(S(N)(=O)=O)cc1, +5SD5_HWI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/5SD5_HWI_holo_aligned_predicted_protein.pdb,CCc1nc(N)nc(N)c1OCCCOc1cc(C)ccc1N1CC(C(=O)O)C1, +7SCW_GSP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SCW_GSP_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@@H]2O[C@H](CO[P@](=O)(O)O[P@](=O)(O)OP(O)(O)=S)[C@@H](O)[C@H]2O)c(=O)[nH]1, +7SDD_4IP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SDD_4IP_holo_aligned_predicted_protein.pdb,O=P(O)(O)O[C@H]1[C@H](O)[C@@H](OP(=O)(O)O)[C@H](OP(=O)(O)O)[C@@H](OP(=O)(O)O)[C@H]1O, +7TH4_FFO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TH4_FFO_holo_aligned_predicted_protein.pdb,Nc1nc2c(c(=O)[nH]1)N(C=O)[C@@H](CNc1ccc(C(=O)N[C@@H](CCC(=O)O)C(=O)O)cc1)CN2, +8D19_GSH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8D19_GSH_holo_aligned_predicted_protein.pdb,N[C@@H](CCC(=O)N[C@@H](CS)C(=O)NCC(=O)O)C(=O)O, +7A1P_QW2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7A1P_QW2_holo_aligned_predicted_protein.pdb,CCC[C@@H](CC(=O)C(=O)O)C(=O)O, +8G0V_YHT,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8G0V_YHT_holo_aligned_predicted_protein.pdb,C#CCO[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O, +7MFP_Z7P,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MFP_Z7P_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(/C=N/[C@H]2[C@H](O)[C@@H](O)[C@@H](O[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](n4ccc(=O)[nH]c4=O)[C@H](O)[C@@H]3O)O[C@@H]2C)c1O, +7N7H_CTP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N7H_CTP_holo_aligned_predicted_protein.pdb,Nc1ccn([C@@H]2O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)n1, +7X9K_8OG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7X9K_8OG_holo_aligned_predicted_protein.pdb,Nc1nc2c([nH]c(=O)n2[C@H]2C[C@H](O)[C@@H](COP(=O)(O)O)O2)c(=O)[nH]1, +7NFB_GEN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NFB_GEN_holo_aligned_predicted_protein.pdb,O=c1c(-c2ccc(O)cc2)coc2cc(O)cc(O)c12, +7DKT_GLF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7DKT_GLF_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](F)[C@H](O)[C@@H](O)[C@@H]1O, +7R3D_APR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7R3D_APR_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]2O[C@@H](O)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O, +6YYO_Q1K,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YYO_Q1K_holo_aligned_predicted_protein.pdb,CS(=O)(=O)N1CCN(c2ccc3nncn3n2)CC1, +7V14_ORU,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7V14_ORU_holo_aligned_predicted_protein.pdb,[O-][n+]1cc(-c2c(-n3cnnn3)ccc(Cl)c2F)ccc1[C@@H](CC1CC1)n1cc(-c2cncs2)cn1, +7SIU_9ID,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SIU_9ID_holo_aligned_predicted_protein.pdb,Cn1cc(Nc2ncc(C3=CCC[C@@H](NC(=O)c4ccccc4)C3)nc2C(N)=O)cn1, +7TE8_P0T,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TE8_P0T_holo_aligned_predicted_protein.pdb,C=C(C)[C@@H]1CCC(C)=C[C@H]1c1c(O)cc(CCCCC)cc1O, +7F8T_FAD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7F8T_FAD_holo_aligned_predicted_protein.pdb,Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO[P@@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)c2cc1C, +7TUO_KL9,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TUO_KL9_holo_aligned_predicted_protein.pdb,Cc1cnc2c(N)c(C(=O)NCCc3ccc([C@@H]4C[C@H]5CC[C@@H](C4)N5)cc3)sc2n1, +7B94_ANP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7B94_ANP_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)O[P@](=O)(O)NP(=O)(O)O)[C@@H](O)[C@H]1O, +7LEV_0JO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LEV_0JO_holo_aligned_predicted_protein.pdb,C=C(/N=C/c1c(COP(=O)(O)O)cnc(C)c1O)C(=O)O, +8A1H_DLZ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8A1H_DLZ_holo_aligned_predicted_protein.pdb,Cc1nc2c(=O)[nH]c(=O)nc-2n(C[C@H](O)[C@H](O)[C@H](O)CO)c1C, +7NU0_DCL,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NU0_DCL_holo_aligned_predicted_protein.pdb,CC(C)C[C@H](N)CO, +7QF4_RBF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QF4_RBF_holo_aligned_predicted_protein.pdb,Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO)c2cc1C, +7Z2O_IAJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Z2O_IAJ_holo_aligned_predicted_protein.pdb,COc1cccc2sc3nncn3c12, +7O0N_CDP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7O0N_CDP_holo_aligned_predicted_protein.pdb,Nc1ccn([C@@H]2O[C@H](CO[P@@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)n1, +7X5N_5M5,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7X5N_5M5_holo_aligned_predicted_protein.pdb,Cc1n[nH]c2c1[C@](c1cc(CO)cc(-c3ccccc3)c1)(C(C)C)C(C#N)=C(N)O2, +8BOM_QU6,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8BOM_QU6_holo_aligned_predicted_protein.pdb,COc1cccc(NC(=O)c2ccc(C)c(Nc3nc(-c4cccnc4)nc4c3cnn4C)c2)c1, +7K0V_VQP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7K0V_VQP_holo_aligned_predicted_protein.pdb,Cc1cc(F)c(NC(=O)NCCC(C)(C)C)cc1Nc1ccc2ncn(C)c(=O)c2c1F, +7MMH_ZJY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MMH_ZJY_holo_aligned_predicted_protein.pdb,COc1ccc2nc(C)c(O[C@@H]3C[C@H]4C(=O)N[C@]5(C(=O)NS(=O)(=O)C6(C)CC6)C[C@H]5/C=C\CCCCC[C@H](NC(=O)OCC5(C(F)(F)F)CC5)C(=O)N4C3)nc2c1, +7PJQ_OWH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PJQ_OWH_holo_aligned_predicted_protein.pdb,CNC(=S)c1cccnc1, +7FHA_ADX,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7FHA_ADX_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)OS(=O)(=O)O)[C@@H](O)[C@H]1O, +7BTT_F8R,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BTT_F8R_holo_aligned_predicted_protein.pdb,COc1cc2c(cc1Nc1nc(Nc3ccccc3S(=O)(=O)C(C)C)c3[nH]ccc3n1)N(C(=O)CN(C)C)CC2, +7QHG_T3B,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QHG_T3B_holo_aligned_predicted_protein.pdb,CC(C)C(=O)Nc1ncc(C(=O)NCCN(Cc2ccccc2)C(=O)c2ccc(S(=O)(=O)Nc3ccccc3)cc2)s1, +7N6F_0I1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N6F_0I1_holo_aligned_predicted_protein.pdb,O=C(C1CCN(c2cncnc2-c2ccc(F)cc2)CC1)N1CC(F)C1, +8AAU_LH0,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AAU_LH0_holo_aligned_predicted_protein.pdb,CC(C)C(=O)Nc1ncc(-c2cc(C(F)F)nn2-c2c(Cl)cccc2Cl)s1, +8GFD_ZHR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8GFD_ZHR_holo_aligned_predicted_protein.pdb,CC(=O)N[C@H]1[C@H](OCCc2ccc3occc3c2)O[C@H](CO)[C@@H](O)[C@@H]1O, +7TBU_S3P,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TBU_S3P_holo_aligned_predicted_protein.pdb,O=C(O)C1=C[C@@H](OP(=O)(O)O)[C@@H](O)[C@H](O)C1, +6YQW_82I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YQW_82I_holo_aligned_predicted_protein.pdb,CNc1cnn(C)c(=O)c1Cl, +7W06_ITN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7W06_ITN_holo_aligned_predicted_protein.pdb,C=C(CC(=O)O)C(=O)O, +7ZL5_IWE,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZL5_IWE_holo_aligned_predicted_protein.pdb,NS(=O)(=O)c1cc(-c2nnn[nH]2)c(NCc2cccs2)cc1Cl, +7L03_F9F,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7L03_F9F_holo_aligned_predicted_protein.pdb,O=P(O)(O)OCCNS(=O)(=O)c1ccc(OC(F)(F)F)cc1, +7XJN_NSD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XJN_NSD_holo_aligned_predicted_protein.pdb,NCCCNCCCN, +7ZCC_OGA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZCC_OGA_holo_aligned_predicted_protein.pdb,O=C(O)CNC(=O)C(=O)O, +6TW5_9M2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6TW5_9M2_holo_aligned_predicted_protein.pdb,Cc1nn(C)c(C)c1CCOc1cc(F)ccc1-c1ccc2n[nH]c(CN(C)C)c2c1, +7VWF_K55,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VWF_K55_holo_aligned_predicted_protein.pdb,CCCCOc1ccc(C[C@H](CC)C(=O)O)cc1CNC(=O)c1ccc(C(F)(F)F)cc1F, +7SZA_DUI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SZA_DUI_holo_aligned_predicted_protein.pdb,Nc1cccc2c1C(=O)N([C@@H]1CCC(=O)NC1=O)C2=O, +7POM_7VZ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7POM_7VZ_holo_aligned_predicted_protein.pdb,COC(=O)c1cc(S(N)(=O)=O)c(SC2CCCCC2)cc1Cl, +7KQU_YOF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KQU_YOF_holo_aligned_predicted_protein.pdb,N[C@@H](Cc1ccc(O)c(F)c1)C(=O)O, +7ZOC_T8E,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZOC_T8E_holo_aligned_predicted_protein.pdb,CC(=O)c1ccc(NC(=O)[C@H](C)S)cc1, +7ELT_TYM,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ELT_TYM_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)OC(=O)[C@@H](N)Cc2c[nH]c3ccccc23)[C@@H](O)[C@H]1O, +6YRV_PJ8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YRV_PJ8_holo_aligned_predicted_protein.pdb,CCCCCCCCCCCCCCCCC, +7CNS_PMV,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CNS_PMV_holo_aligned_predicted_protein.pdb,C[C@@](O)(CCOP(=O)(O)O)CC(=O)O, +6XG5_TOP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6XG5_TOP_holo_aligned_predicted_protein.pdb,COc1cc(Cc2cnc(N)nc2N)cc(OC)c1OC, +7USH_82V,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7USH_82V_holo_aligned_predicted_protein.pdb,O=c1cc(N2CCOCC2)oc2c(-c3ccc4c(c3)OCCO4)csc12, +7OZ9_NGK,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OZ9_NGK_holo_aligned_predicted_protein.pdb,CC(=O)N[C@@H]1[C@@H](O)[C@@H](OS(=O)(=O)O)[C@@H](CO)O[C@@H]1O, +7TXK_LW8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TXK_LW8_holo_aligned_predicted_protein.pdb,C[N+](C)(C)[C@@H](Cc1c[nH]c(=S)[nH]1)C(=O)O, +7ZHP_IQY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZHP_IQY_holo_aligned_predicted_protein.pdb,CCC(O)(C#Cc1ccc2[nH]c3c(c2c1)-c1nc(N)ncc1CCC3)CC, +8AQL_PLG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AQL_PLG_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(CNCC(=O)O)c1O, +8BTI_RFO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8BTI_RFO_holo_aligned_predicted_protein.pdb,COCC(=O)n1ccc2c(Cl)cccc21, +7ROU_66I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ROU_66I_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1c(OC(F)F)nn2[C@@H]1O[C@H](COS(=O)(=O)NC(=O)[C@@H](N)Cc2ccc(O)cc2)[C@@H](O)[C@H]1O, +6ZAE_ACV,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZAE_ACV_holo_aligned_predicted_protein.pdb,CC(C)[C@@H](NC(=O)[C@H](CS)NC(=O)CCC[C@H](N)C(=O)O)C(=O)O, +6YSP_PAL,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YSP_PAL_holo_aligned_predicted_protein.pdb,O=C(O)C[C@H](NC(=O)CP(=O)(O)O)C(=O)O, +8B8H_OJQ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8B8H_OJQ_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(C[NH2+]c2conc2O)c1O, +7THI_PGA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7THI_PGA_holo_aligned_predicted_protein.pdb,O=C(O)COP(=O)(O)O, +7OFF_VCB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OFF_VCB_holo_aligned_predicted_protein.pdb,O=C(N[C@@H](C(=O)O)c1ccccc1)c1cccc2c1-c1ccccc1C2=O, +7WY1_D0L,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WY1_D0L_holo_aligned_predicted_protein.pdb,CCCCCCCN1CCC[C@H]1C(=O)N[C@@H](Cc1ccccc1)C(=O)O, +7ES1_UDP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ES1_UDP_holo_aligned_predicted_protein.pdb,O=c1ccn([C@@H]2O[C@H](CO[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1, +7V3N_AKG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7V3N_AKG_holo_aligned_predicted_protein.pdb,O=C(O)CCC(=O)C(=O)O, +7TM6_GPJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TM6_GPJ_holo_aligned_predicted_protein.pdb,O=C(O)C[NH2+]CP(=O)(O)O, +7ECR_SIN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ECR_SIN_holo_aligned_predicted_protein.pdb,O=C(O)CCC(=O)O, +7A9E_R4W,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7A9E_R4W_holo_aligned_predicted_protein.pdb,CCS(C)(=O)=O, +7ZF0_DHR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZF0_DHR_holo_aligned_predicted_protein.pdb,N#C[C@@H](O)c1ccc(O)cc1, +7F51_BA7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7F51_BA7_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]2O[C@@H](O)[C@H](OC(=O)c3ccccc3)[C@@H]2O)[C@@H](O)[C@H]1O, +7XFA_D9J,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XFA_D9J_holo_aligned_predicted_protein.pdb,Cc1nc([C@@H]2O[C@H](CO)[C@H](O)[C@H](n3cc(-c4cc(F)c(Cl)c(F)c4)nn3)[C@H]2O)n(-c2cc(Cl)ccc2C(F)(F)F)n1, +8DKO_TFB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8DKO_TFB_holo_aligned_predicted_protein.pdb,O=C(O)[C@@H]1CCCO1, +6T88_MWQ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6T88_MWQ_holo_aligned_predicted_protein.pdb,O=C(O)CCc1cnc[nH]1, +7BCP_GCO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BCP_GCO_holo_aligned_predicted_protein.pdb,O=C(O)[C@H](O)[C@@H](O)[C@H](O)[C@H](O)CO, +7NF0_BYN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NF0_BYN_holo_aligned_predicted_protein.pdb,Cc1cc2c3c(c1C)C(C)(C)C[C@@H](O)N3c1c(nc(O)[nH]c1=O)N2C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O, +7QE4_NGA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QE4_NGA_holo_aligned_predicted_protein.pdb,CC(=O)N[C@@H]1[C@@H](O)[C@@H](O)[C@@H](CO)O[C@H]1O, +7M3H_YPV,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7M3H_YPV_holo_aligned_predicted_protein.pdb,CCCCc1ccc(NS(=O)(=O)c2ccc(O)c(C(=O)O)c2)cc1, +6Z2C_Q5E,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6Z2C_Q5E_holo_aligned_predicted_protein.pdb,O=C(O)CCCCCN1C(=O)[C@@H]2[C@H](C1=O)[C@]1(Cl)C(Cl)=C(Cl)[C@@]2(Cl)C1(Cl)Cl, +8A2D_KXY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8A2D_KXY_holo_aligned_predicted_protein.pdb,Cc1c(C#Cc2ccc(CN3CCC(CO)CC3)cc2)cc(C(F)F)c2cn([C@@H](C(=O)Nc3nccs3)c3ncn4c3CCC43CC3)nc12, +7NGW_UAW,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NGW_UAW_holo_aligned_predicted_protein.pdb,CC1CCN(C(=O)Nc2ccc(O)cc2)CC1, +7KZ9_XN7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KZ9_XN7_holo_aligned_predicted_protein.pdb,O=C(CNCCO)NCCO, +7UAS_MBU,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UAS_MBU_holo_aligned_predicted_protein.pdb,CCn1cc(-c2cc(Cn3ccnc3C)cc3c2CCN([C@H](c2cc(C)ccn2)C2CC2)C3=O)c(C(F)(F)F)n1, +7YZU_DO7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7YZU_DO7_holo_aligned_predicted_protein.pdb,CO[C@H]1O[C@H](CS(=O)(=O)O)[C@@H](O)[C@H](O)[C@H]1O, +7VKZ_NOJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VKZ_NOJ_holo_aligned_predicted_protein.pdb,OC[C@H]1NC[C@H](O)[C@@H](O)[C@@H]1O, +7ROR_69X,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ROR_69X_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)OC(=O)[C@@H](N)Cc2ccc(O)cc2)[C@@H](O)[C@H]1O, +8AY3_OE3,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AY3_OE3_holo_aligned_predicted_protein.pdb,CCn1c(=O)cc(C)c2cc(CNS(=O)(=O)c3ccccc3)ccc21, +7C8Q_DSG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7C8Q_DSG_holo_aligned_predicted_protein.pdb,NC(=O)C[C@@H](N)C(=O)O, +7XRL_FWK,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XRL_FWK_holo_aligned_predicted_protein.pdb,CC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O, +7CD9_FVR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CD9_FVR_holo_aligned_predicted_protein.pdb,CN1C[C@H](Nc2nc3cc[nH]c3c(=O)n2C)C[C@H](c2ccc(OCc3ccccc3)cc2)C1, +7T3E_SLB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7T3E_SLB_holo_aligned_predicted_protein.pdb,CC(=O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(=O)O)C[C@@H]1O, +6YJA_2BA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YJA_2BA_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@@H]2CO[P@@](=O)(O)O[C@H]3[C@@H](O)[C@H](n4cnc5c(N)ncnc54)O[C@@H]3CO[P@](=O)(O)O[C@H]2[C@H]1O, +7LT0_ONJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LT0_ONJ_holo_aligned_predicted_protein.pdb,Cc1ccc(Sc2cc(C(=O)N3Cc4ccccc4C3)ccc2O)cc1C, +7D6O_MTE,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7D6O_MTE_holo_aligned_predicted_protein.pdb,Nc1nc2c(c(=O)[nH]1)N[C@H]1C(S)=C(S)[C@@H](COP(=O)(O)O)O[C@H]1N2, +7UJ4_OQ4,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UJ4_OQ4_holo_aligned_predicted_protein.pdb,CCN(C(=O)c1cc(F)ccc1Oc1cncnc1N1CC2(CCN(C[C@H]3CC[C@H](NS(=O)(=O)CC)CC3)CC2)C1)C(C)C, +7OSO_0V1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OSO_0V1_holo_aligned_predicted_protein.pdb,OCC[C@H](O)CO, +7AFX_R9K,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7AFX_R9K_holo_aligned_predicted_protein.pdb,Cc1cccc2c(-c3ccccc3Cl)c(C(=O)O)[nH]c12, +7T1D_E7K,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7T1D_E7K_holo_aligned_predicted_protein.pdb,Cc1cn(-c2ccc3c(c2)CN(c2ncc(Cc4ccc(-n5cccn5)cc4)s2)CC3)c(C)n1, +7R9N_F97,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7R9N_F97_holo_aligned_predicted_protein.pdb,CC(C)n1ncc2cnc(Nc3cc([C@@H]4CCNC4)nc(N4CCC(F)(F)C4)n3)cc21, +7MGT_ZD4,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MGT_ZD4_holo_aligned_predicted_protein.pdb,Nc1nc(Cl)nc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O, +7MYU_ZR7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MYU_ZR7_holo_aligned_predicted_protein.pdb,COc1cnc(C(=O)Nc2ccc(F)c([C@]34CN(c5ncc(F)cn5)C[C@H]3CSC(N)=N4)c2)cn1, +7RH3_59O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RH3_59O_holo_aligned_predicted_protein.pdb,CO[C@@H]1O[C@H](CO)[C@H](O)[C@H](OC(=O)c2ccc(C)cc2)[C@@H]1OC(=O)c1ccc(Cl)cc1[N+](=O)[O-], +7OMX_CNA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OMX_CNA_holo_aligned_predicted_protein.pdb,NC(=O)c1ccc[n+]([C@@H]2C[C@H](CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1, +7NXO_UU8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NXO_UU8_holo_aligned_predicted_protein.pdb,Cc1ccccc1Oc1cc(-n2c(=O)cc(S(=O)(=O)c3ccccc3)[nH]c2=O)c(F)cc1C#N, +8DHG_T78,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8DHG_T78_holo_aligned_predicted_protein.pdb,CCn1c(CO)nn(-c2nc(O[C@@H](C)C(F)(F)F)c(C(=O)Nc3c(C)ccnc3Cl)cc2F)c1=O, +7NPL_UKZ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NPL_UKZ_holo_aligned_predicted_protein.pdb,CCC[C@@H](NC(=O)c1cccc2c1CC(=O)N2)[C@@H](O)c1cccc(Cl)c1C, +7PRM_81I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PRM_81I_holo_aligned_predicted_protein.pdb,O=C(c1ccco1)N1CCN([C@@H]2CC(=O)N(c3ccc(-c4ccc(F)cc4)cc3)C2)CC1, +7WDT_NGS,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WDT_NGS_holo_aligned_predicted_protein.pdb,CC(=O)N[C@@H]1[C@@H](O)[C@H](O)[C@@H](COS(=O)(=O)O)O[C@H]1O, +7UAW_MF6,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UAW_MF6_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@@H]2CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](O[C@@H]1[C@@H]2O)[C@H](O)[C@@H]3O, +7W05_GMP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7W05_GMP_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)[nH]1, +7UJF_R3V,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UJF_R3V_holo_aligned_predicted_protein.pdb,C[C@H]1CCN(CCOc2ccc([C@@H]3c4ccc(O)cc4CC[C@@H]3c3ccccc3)cc2)C1, +8D39_QDB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8D39_QDB_holo_aligned_predicted_protein.pdb,O=C(O)c1ccc(C(=O)c2ccccc2)cc1, +7F5D_EUO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7F5D_EUO_holo_aligned_predicted_protein.pdb,CNc1cc(-c2ccc3[nH]ccc3c2)nc(S(C)(=O)=O)n1, +7BMI_U4B,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BMI_U4B_holo_aligned_predicted_protein.pdb,O=C(O)c1ccnc(C(=O)O)c1F, +7KB1_WBJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KB1_WBJ_holo_aligned_predicted_protein.pdb,C=C/C(=N\Cc1c(COP(=O)(O)O)cnc(C)c1O)C(=O)O, +7R7R_AWJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7R7R_AWJ_holo_aligned_predicted_protein.pdb,Cc1nc([C@](C)(O)CO)sc1-c1cnc(N)c(O[C@H](C)c2cc(F)ccc2N2NC=CN2)c1, +7L00_XCJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7L00_XCJ_holo_aligned_predicted_protein.pdb,CS(=O)(=O)c1ccc2nc(NC(=O)Cc3csc(-n4cccc4)n3)sc2c1, +7BJJ_TVW,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BJJ_TVW_holo_aligned_predicted_protein.pdb,Nc1ncnc2n[nH]cc12, +7UQ3_O2U,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UQ3_O2U_holo_aligned_predicted_protein.pdb,O=C(O)C[C@@H]1CC(=O)N(O)C1=O, +7XQZ_FPF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XQZ_FPF_holo_aligned_predicted_protein.pdb,CC(C)=CCC/C(C)=C/CC/C(C)=C(\F)CO[P@@](=O)(O)OP(=O)(O)O, +7JMV_4NC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7JMV_4NC_holo_aligned_predicted_protein.pdb,O=[N+]([O-])c1ccc(O)c(O)c1, +7BNH_BEZ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BNH_BEZ_holo_aligned_predicted_protein.pdb,O=C(O)c1ccccc1, +8FO5_Y4U,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8FO5_Y4U_holo_aligned_predicted_protein.pdb,CC(=O)c1ccn(S(=O)(=O)c2ccccc2)c1, +7ZU2_DHT,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZU2_DHT_holo_aligned_predicted_protein.pdb,C[C@]12CCC(=O)C[C@@H]1CC[C@@H]1[C@@H]2CC[C@]2(C)[C@@H](O)CC[C@@H]12, +7A9H_TPP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7A9H_TPP_holo_aligned_predicted_protein.pdb,Cc1ncc(C[n+]2csc(CCO[P@](=O)(O)OP(=O)(O)O)c2C)c(N)n1, +7DUA_HJ0,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7DUA_HJ0_holo_aligned_predicted_protein.pdb,Cc1cc(NC(=O)c2cn(C3(C)CC3)c3ncnc(N)c23)n[nH]1, +7P5T_5YG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7P5T_5YG_holo_aligned_predicted_protein.pdb,COc1ccc(CNc2ccc(Cc3ccncc3)cc2)cc1, +7RNI_60I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RNI_60I_holo_aligned_predicted_protein.pdb,Cc1nn(C)c(C)c1C(=O)N1CCN(Cc2nc3ccccc3n2CC(C)(C)C)CC1, +6M73_FNR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6M73_FNR_holo_aligned_predicted_protein.pdb,Cc1cc2c(cc1C)N(C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O)c1[nH]c(=O)[nH]c(=O)c1N2, +6ZK5_IMH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZK5_IMH_holo_aligned_predicted_protein.pdb,OC[C@H]1N[C@@H](c2c[nH]c3c(O)ncnc23)[C@H](O)[C@@H]1O, +7VC5_9SF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VC5_9SF_holo_aligned_predicted_protein.pdb,O=C(C[C@H]1NCCC[C@@H]1O)Cn1cnc2ccccc2c1=O, +7ZZW_KKW,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZZW_KKW_holo_aligned_predicted_protein.pdb,O=C([C@H]1C[C@@H](c2cccc(Cl)c2)CN1)N1CCN(c2nccc3ccsc23)CC1, +7R6J_2I7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7R6J_2I7_holo_aligned_predicted_protein.pdb,Cc1cc(NCc2cccc(CN3C[C@H](O)[C@@H](O)[C@H](O)[C@H]3CO)c2)cc(-c2ncccn2)c1, +8HO0_3ZI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8HO0_3ZI_holo_aligned_predicted_protein.pdb,O=C1N[C@@H](Cc2c[nH]c3c(F)cccc23)C(=O)N2CCC[C@@H]12, +7XBV_APC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XBV_APC_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)C[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O, +7UXS_OJC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UXS_OJC_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@@H]2CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](O[C@H]2[C@H]1O)[C@H](O)[C@@H]3O, +7WPW_F15,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WPW_F15_holo_aligned_predicted_protein.pdb,CCCCCCCCCCCCCCC(=O)O, +8AEM_LVF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AEM_LVF_holo_aligned_predicted_protein.pdb,N#CCc1c[nH]c2ccc(Cl)cc12, +7Q25_8J9,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Q25_8J9_holo_aligned_predicted_protein.pdb,CCCC[C@H](N[C@@H](CCc1ccccc1)C(=O)O)C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)O, +6ZPB_3D1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZPB_3D1_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@H]1C[C@H](O)[C@@H](CO)O1, +7TSF_H4B,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TSF_H4B_holo_aligned_predicted_protein.pdb,C[C@H](O)[C@H](O)[C@H]1CNc2nc(N)[nH]c(=O)c2N1, +7LJN_GTP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LJN_GTP_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@@H]2O[C@H](CO[P@](=O)(O)O[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1, +7E4L_MDN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7E4L_MDN_holo_aligned_predicted_protein.pdb,O=P(O)(O)CP(=O)(O)O, +7N7B_T3F,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N7B_T3F_holo_aligned_predicted_protein.pdb,Cc1cn([C@H]2C[C@H](O)[C@@H](CO[P@@](=O)(O)O[P@](=O)(O)O[C@H]3O[C@H](C)[C@H](O)[C@H](N)[C@H]3O)O2)c(=O)[nH]c1=O, +7WKL_CAQ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WKL_CAQ_holo_aligned_predicted_protein.pdb,Oc1ccccc1O, +8AP0_PRP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AP0_PRP_holo_aligned_predicted_protein.pdb,O=P(O)(O)OC[C@H]1O[C@H](O[P@@](=O)(O)OP(=O)(O)O)[C@H](O)[C@@H]1O, +7V3S_5I9,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7V3S_5I9_holo_aligned_predicted_protein.pdb,O=C(Nc1ccc(F)cc1)C1(C(=O)Nc2ccc(Oc3ccnc4c3Oc3ccccc3N4)c(F)c2)CC1, +7XPO_UPG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XPO_UPG_holo_aligned_predicted_protein.pdb,O=c1ccn([C@@H]2O[C@H](CO[P@](=O)(O)O[P@](=O)(O)O[C@H]3O[C@H](CO)[C@@H](O)[C@H](O)[C@H]3O)[C@@H](O)[C@H]2O)c(=O)[nH]1, +7KC5_BJZ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KC5_BJZ_holo_aligned_predicted_protein.pdb,CN1C(=O)N(c2cc(Cl)cc(Cl)c2)C(=O)[C@]12CN(c1ccc(C(=O)O)cn1)C[C@H]2c1ccc(C#N)cc1, +7NSW_HC4,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NSW_HC4_holo_aligned_predicted_protein.pdb,O=C(O)/C=C/c1ccc(O)cc1, +7RWS_4UR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RWS_4UR_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@@H]2O[C@@H]3CO[P@@](=O)(O)O[C@@H]4[C@H](O)[C@@H](CO[P@](=O)(O)O[C@H]3[C@H]2O)O[C@H]4n2cnc3c(N)ncnc32)c(=O)[nH]1, +7VBU_6I4,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VBU_6I4_holo_aligned_predicted_protein.pdb,Cc1ccc2c(n1)[nH]c1c(C3CC3)cccc12, +7QTA_URI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QTA_URI_holo_aligned_predicted_protein.pdb,O=c1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)[nH]1, +7WQQ_5Z6,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WQQ_5Z6_holo_aligned_predicted_protein.pdb,CC(C)(C)c1cc(C(=O)/C=C/c2ccc(C(=O)O)cc2)cc(C(C)(C)C)c1, +8D5D_5DK,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8D5D_5DK_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(/C=N/[C@H](CCCNC(=N)N)C(=O)O)c1O, +7Q27_8KC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Q27_8KC_holo_aligned_predicted_protein.pdb,CCCC[C@H](N[C@@H](CCc1ccccc1)C(=O)O)C(=O)N[C@@H](Cc1c[nH]c2ccccc12)C(=O)O, +7ED2_A3P,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ED2_A3P_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](OP(=O)(O)O)[C@H]1O, +6YT6_PKE,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YT6_PKE_holo_aligned_predicted_protein.pdb,CN(c1ncccc1CNc1ccnc(Nc2ccc3c(c2)CC(=O)N3)n1)S(C)(=O)=O, +7JG0_GAR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7JG0_GAR_holo_aligned_predicted_protein.pdb,NCC(=O)N[C@@H]1O[C@H](COP(=O)([O-])[O-])[C@@H](O)[C@H]1O, +8EYE_X4I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8EYE_X4I_holo_aligned_predicted_protein.pdb,O=C(CNc1cc(F)cc(F)c1)N[C@@H](C(=O)NO)c1ccc(-c2cc(F)c(F)c(F)c2)cc1, +7O1T_5X8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7O1T_5X8_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O, +6Z4N_Q7B,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6Z4N_Q7B_holo_aligned_predicted_protein.pdb,Cc1ccc(C[C@@]2(C(=O)O)C[C@H]2c2ccccc2)cc1, +7WL4_JFU,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WL4_JFU_holo_aligned_predicted_protein.pdb,CCN1C(=O)c2cc(N3CCN(C)CC3)nc3c(NS(=O)(=O)c4ccc(F)cc4F)ccc1c23, +8SLG_G5A,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8SLG_G5A_holo_aligned_predicted_protein.pdb,NCC(=O)NS(=O)(=O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O, +7L7C_XQ1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7L7C_XQ1_holo_aligned_predicted_protein.pdb,COc1cccc(-c2ccc3c(CC(=O)O)coc3c2)c1, +7NLV_UJE,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NLV_UJE_holo_aligned_predicted_protein.pdb,O=C(CCCC[C@@H]1SC[C@@H]2NC(=O)N[C@@H]21)N[C@H]1CCNC1, +7VYJ_CA0,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VYJ_CA0_holo_aligned_predicted_protein.pdb,NC(=O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O, +7PUV_84Z,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PUV_84Z_holo_aligned_predicted_protein.pdb,COC(=O)c1cc(S(N)(=O)=O)c(Cl)cc1S(=O)(=O)c1ccccc1, +7RSV_7IQ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RSV_7IQ_holo_aligned_predicted_protein.pdb,C[C@@H]1COCCN1c1cc2n(n1)[C@@H]1CCC[C@@H]1NC2=O, +7QGP_DJ8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QGP_DJ8_holo_aligned_predicted_protein.pdb,Cc1cc(Cl)ccc1CNC(=O)Nc1ccc2cc1OCCOCCNc1ccn3ncc-2c3n1, diff --git a/forks/DiffDock/inference/diffdock_posebusters_benchmark_inputs.csv b/forks/DiffDock/inference/diffdock_posebusters_benchmark_inputs.csv new file mode 100644 index 00000000..539c7d58 --- /dev/null +++ b/forks/DiffDock/inference/diffdock_posebusters_benchmark_inputs.csv @@ -0,0 +1,281 @@ +complex_name,protein_path,ligand_description,protein_sequence +7CNQ_G8X,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CNQ_G8X_holo_aligned_predicted_protein.pdb,O=C(O)[C@H]1NCC[C@H]1O, +7C0U_FGO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7C0U_FGO_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@@H]2O[C@H](CO[P@](=O)(O)O[C@H]3[C@@H](O)[C@H](n4ccc(=O)[nH]c4=O)O[C@@H]3CO)[C@H]3CC(O)(O)C[C@H]32)c(=O)[nH]1, +7EBG_J0L,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7EBG_J0L_holo_aligned_predicted_protein.pdb,CNc1cccc2c1NC(=O)C2(C)C, +7WUY_76N,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WUY_76N_holo_aligned_predicted_protein.pdb,C[C@@H]1C=C[C@@H]2CCCC[C@@H]2[C@H]1C(=O)c1c(O)c([C@]2(O)CC[C@H](O)[C@@H]3O[C@@H]32)c[nH]c1=O, +7PT3_3KK,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PT3_3KK_holo_aligned_predicted_protein.pdb,CC(C)(O)C(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)CO[P@@](=O)(O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O, +7C3U_AZG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7C3U_AZG_holo_aligned_predicted_protein.pdb,Nc1nc(O)c2[nH]nnc2n1, +7PGX_FMN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PGX_FMN_holo_aligned_predicted_protein.pdb,Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O)c2cc1C, +7FB7_8NF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7FB7_8NF_holo_aligned_predicted_protein.pdb,Cc1cc(C)c(N)cn1, +8AIE_M7L,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AIE_M7L_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(C/N=C(\CON)C(=O)O)c1O, +7LCU_XTA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LCU_XTA_holo_aligned_predicted_protein.pdb,CN1CCN(c2ncc(Oc3cc(CN4CCC(CC(=O)O)CC4)cc(-c4cc(Cl)cc(Cl)c4)n3)cn2)CC1, +6YMS_OZH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YMS_OZH_holo_aligned_predicted_protein.pdb,CC(C)C[C@H](NC(=O)CN[P@@](=O)(O)[C@@H](Cc1ccccc1)NC(=O)OCc1ccccc1)C(=O)O, +7PK0_BYC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PK0_BYC_holo_aligned_predicted_protein.pdb,CC(C)(CO[P@@](=O)(O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCSC(=O)c1ccccc1, +7M6K_YRJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7M6K_YRJ_holo_aligned_predicted_protein.pdb,O=C(NCc1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c1)Nc1ccc([N+](=O)[O-])cc1, +7SFO_98L,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SFO_98L_holo_aligned_predicted_protein.pdb,Oc1cccc(CNc2nc(Cl)nc3scc(-c4ccccc4)c23)c1, +8EAB_VN2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8EAB_VN2_holo_aligned_predicted_protein.pdb,O=C(N[C@H](C(=O)Nc1cnccc1-c1ccc(C(F)(F)F)cc1)c1cccc(C(F)(F)F)c1)c1cnccc1-c1ccc(C(F)(F)F)cc1, +7MWU_ZPM,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MWU_ZPM_holo_aligned_predicted_protein.pdb,O=C(O)C1CCC1, +7TYP_KUR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TYP_KUR_holo_aligned_predicted_protein.pdb,Cc1nccnc1-c1nn2c(=O)cc(-c3ccc(C4CCCCC4)cc3)[nH]c2c1C(=O)N1CC(CF)C1, +6XM9_V55,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6XM9_V55_holo_aligned_predicted_protein.pdb,COc1cc(C=O)ccc1O, +7T0D_FPP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7T0D_FPP_holo_aligned_predicted_protein.pdb,CC(C)=CCC/C(C)=C/CC/C(C)=C/CO[P@](=O)(O)OP(=O)(O)O, +7XI7_4RI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XI7_4RI_holo_aligned_predicted_protein.pdb,CCCCCCc1nc(N)nc(N)c1-c1ccccc1, +7PRI_7TI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PRI_7TI_holo_aligned_predicted_protein.pdb,Nc1cc(C(Cl)=C(Cl)Cl)c(S(N)(=O)=O)cc1S(N)(=O)=O, +8FLV_ZB9,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8FLV_ZB9_holo_aligned_predicted_protein.pdb,CN(c1ncnc2[nH]ccc12)[C@@H]1CCCN(C(=O)CNc2cc(Cl)cc(Cl)c2)C1, +7N4W_P4V,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N4W_P4V_holo_aligned_predicted_protein.pdb,COc1cc2c(cc1OC)[C@H]1Cc3ccc(OC)c(OC)c3CN1CC2, +7OEO_V9Z,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OEO_V9Z_holo_aligned_predicted_protein.pdb,CNC(=O)CN(CC(c1ccccc1)c1ccccc1)C(=O)c1cc(C)c(OC)c(C)c1, +5SB2_1K2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/5SB2_1K2_holo_aligned_predicted_protein.pdb,O=C(N[C@@H]1C[C@H]1c1ccccc1)c1cc(Cl)cc(COc2cnc3[nH]ccc3c2)c1, +7MGY_ZD1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MGY_ZD1_holo_aligned_predicted_protein.pdb,N[C@H]1C=C(CO)[C@@H](O[C@H]2O[C@H](CO)[C@@H](O)[C@H](O)[C@H]2O)[C@H](O)[C@H]1O, +7OPG_06N,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OPG_06N_holo_aligned_predicted_protein.pdb,CCCNc1nn2c(-c3ccc(O)cc3)cnc2s1, +7OLI_8HG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OLI_8HG_holo_aligned_predicted_protein.pdb,Nc1nc2c([nH]c(=O)n2[C@H]2C[C@H](O)[C@@H](CO)O2)c(=O)[nH]1, +8FAV_4Y5,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8FAV_4Y5_holo_aligned_predicted_protein.pdb,O=C(O)c1ccc(-c2nn(C(=O)c3c(Cl)cccc3C(F)(F)F)c3cccc(F)c23)c(F)c1, +7R59_I5F,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7R59_I5F_holo_aligned_predicted_protein.pdb,Oc1ccc2c(c1)sc1nncn12, +6XHT_V2V,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6XHT_V2V_holo_aligned_predicted_protein.pdb,Nc1ccn([C@@H]2O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OC[C@@H](O)[C@@H](O)[C@@H](O)CO)[C@@H](O)[C@H]2O)c(=O)n1, +8EX2_Q2Q,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8EX2_Q2Q_holo_aligned_predicted_protein.pdb,O=C1CC(c2ccccc2)=Nc2c(-c3ccccc3)c(C(F)(F)F)nn21, +7LOU_IFM,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LOU_IFM_holo_aligned_predicted_protein.pdb,OC[C@H]1CNC[C@@H](O)[C@@H]1O, +6XBO_5MC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6XBO_5MC_holo_aligned_predicted_protein.pdb,Cc1cn([C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)nc1N, +6TW7_NZB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6TW7_NZB_holo_aligned_predicted_protein.pdb,CN(C)Cc1[nH]nc2ccc(-c3ccc(F)cc3OCCc3cccnc3)cc12, +7U3J_L6U,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7U3J_L6U_holo_aligned_predicted_protein.pdb,CNC(=O)[C@H](CCc1ccccc1)NC(=O)[C@H](NC(=O)CNCc1ccc(OC)cc1OC)c1cccs1, +7UYB_OK0,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UYB_OK0_holo_aligned_predicted_protein.pdb,NS(=O)(=O)c1c(C(F)(F)F)ccc(-c2ccc(C3CCNCC3)cc2)c1-c1nnn[nH]1, +7WJB_BGC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WJB_BGC_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1O, +7RC3_SAH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RC3_SAH_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSCC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O, +7MY1_IPE,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MY1_IPE_holo_aligned_predicted_protein.pdb,C=C(C)CCO[P@@](=O)(O)OP(=O)(O)O, +7AN5_RDH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7AN5_RDH_holo_aligned_predicted_protein.pdb,C=C(Oc1cccc(C(=O)O)c1)C(=O)O, +7JY3_VUD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7JY3_VUD_holo_aligned_predicted_protein.pdb,C[C@H](Oc1cc2cc(F)ccc2nc1N)c1[nH]c(=O)ccc1-n1cccn1, +7D5C_GV6,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7D5C_GV6_holo_aligned_predicted_protein.pdb,CC[C@H](C)[C@H](N)C(=O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O, +7WUX_6OI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WUX_6OI_holo_aligned_predicted_protein.pdb,N[C@@H](CC[C@H](O)[C@@H](N)COS(=O)(=O)O)C(=O)O, +8F8E_XJI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8F8E_XJI_holo_aligned_predicted_protein.pdb,NC(=O)C[C@H](NC(=O)c1c[nH]nc1-c1ccc(Cl)cc1F)c1ccc(F)c(Cl)c1, +7PL1_SFG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PL1_SFG_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](C[C@@H](N)CC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O, +6Z14_Q4Z,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6Z14_Q4Z_holo_aligned_predicted_protein.pdb,CC1=[NH+][C@H]2[C@@H](O1)O[C@H](COS(=O)(=O)[O-])[C@@H](O)[C@@H]2O, +7ODY_DGI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ODY_DGI_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@H]2C[C@H](O)[C@@H](CO[P@@](=O)(O)OP(=O)(O)O)O2)c(=O)[nH]1, +7RKW_5TV,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RKW_5TV_holo_aligned_predicted_protein.pdb,O=C(O[C@H](Cn1ccnc1)c1ccc(F)cc1)c1cc(Cl)cc(Cl)c1, +5SAK_ZRY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/5SAK_ZRY_holo_aligned_predicted_protein.pdb,N=C1N/C(=N\Nc2ccccc2)c2ccccc21, +8AUH_L9I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AUH_L9I_holo_aligned_predicted_protein.pdb,CCOC(=O)/C(=N\O)C(C)=O, +8C5M_MTA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8C5M_MTA_holo_aligned_predicted_protein.pdb,CSC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O, +7N03_ZRP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N03_ZRP_holo_aligned_predicted_protein.pdb,CCCCCCNc1ccc2ncc(C(=O)NC)c(Nc3ccccc3)c2c1, +7U0U_FK5,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7U0U_FK5_holo_aligned_predicted_protein.pdb,C=CC[C@@H]1/C=C(\C)C[C@H](C)C[C@H](OC)[C@H]2O[C@@](O)(C(=O)C(=O)N3CCCC[C@H]3C(=O)O[C@H](/C(C)=C/[C@@H]3CC[C@@H](O)[C@H](OC)C3)[C@H](C)[C@@H](O)CC1=O)[C@H](C)C[C@@H]2OC, +7OFK_VCH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OFK_VCH_holo_aligned_predicted_protein.pdb,CC(=O)N1Cc2cc(S(C)(=O)=O)ccc2[C@@H]1C(=O)Nc1ccc(C(O)(C(F)(F)F)C(F)(F)F)cc1, +7ULC_56B,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ULC_56B_holo_aligned_predicted_protein.pdb,Nc1nc2c(c(CN[C@H]3C=C[C@H](O)[C@@H]3O)cn2[C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1, +7TS6_KMI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TS6_KMI_holo_aligned_predicted_protein.pdb,CNCc1cccc(-c2cc(C)cc(N)n2)c1, +7NF3_4LU,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NF3_4LU_holo_aligned_predicted_protein.pdb,Cc1cc2c3c(c1C)C(C)(C)CC=[N+]3c1c([nH]c(=O)[nH]c1=O)N2C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O, +7Z1Q_NIO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Z1Q_NIO_holo_aligned_predicted_protein.pdb,O=C(O)c1cccnc1, +7QPP_VDX,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QPP_VDX_holo_aligned_predicted_protein.pdb,C=C1/C(=C\C=C2/CCC[C@]3(C)[C@@H]([C@H](C)CCCC(C)(C)O)CC[C@@H]23)C[C@@H](O)C[C@@H]1O, +7P4C_5OV,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7P4C_5OV_holo_aligned_predicted_protein.pdb,O=S1(=O)N[C@@H]2[C@H](O)[C@@H](O)[C@H](O)[C@@H](CO)[C@@H]2O1, +7VQ9_ISY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VQ9_ISY_holo_aligned_predicted_protein.pdb,C=C(C)CCS[P@@](=O)(O)OP(=O)(O)O, +6VTA_AKN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6VTA_AKN_holo_aligned_predicted_protein.pdb,NCC[C@H](O)C(=O)N[C@@H]1C[C@H](N)[C@@H](O[C@H]2O[C@H](CN)[C@@H](O)[C@H](O)[C@H]2O)[C@H](O)[C@H]1O[C@H]1O[C@H](CO)[C@@H](O)[C@H](N)[C@H]1O, +7V43_C4O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7V43_C4O_holo_aligned_predicted_protein.pdb,Cc1ccc(Cl)cc1, +6M2B_EZO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6M2B_EZO_holo_aligned_predicted_protein.pdb,CN(/N=C/c1ccccc1C(=O)O)c1nc(-c2ccccc2Cl)cs1, +7UJ5_DGL,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UJ5_DGL_holo_aligned_predicted_protein.pdb,N[C@H](CCC(=O)O)C(=O)O, +7OZC_G6S,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OZC_G6S_holo_aligned_predicted_protein.pdb,O=S(=O)(O)OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@H]1O, +7TB0_UD1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TB0_UD1_holo_aligned_predicted_protein.pdb,CC(=O)N[C@H]1[C@@H](O[P@@](=O)(O)O[P@@](=O)(O)OC[C@H]2O[C@@H](n3ccc(=O)[nH]c3=O)[C@H](O)[C@@H]2O)O[C@H](CO)[C@@H](O)[C@@H]1O, +8C3N_ADP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8C3N_ADP_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O, +7NUT_GLP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NUT_GLP_holo_aligned_predicted_protein.pdb,N[C@@H]1[C@@H](O)[C@H](O)[C@@H](COP(=O)(O)O)O[C@@H]1O, +7P1M_4IU,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7P1M_4IU_holo_aligned_predicted_protein.pdb,Cn1c(CO[C@@H]2C=CO[C@H](CO)[C@@H]2O)nc2ccc(C(=O)O)cc21, +7EPV_FDA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7EPV_FDA_holo_aligned_predicted_protein.pdb,Cc1cc2c(cc1C)N(C[C@H](O)[C@H](O)[C@H](O)CO[P@@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n3cnc4c(N)ncnc43)[C@H](O)[C@@H]1O)c1[nH]c(=O)[nH]c(=O)c1N2, +6Z0R_Q4H,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6Z0R_Q4H_holo_aligned_predicted_protein.pdb,N#Cc1cncnc1N, +7P1F_KFN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7P1F_KFN_holo_aligned_predicted_protein.pdb,O=C(O)C1=C[C@H](O)[C@@H](O)[C@H]([C@H](O)[C@H](O)CO)O1, +7VB8_STL,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VB8_STL_holo_aligned_predicted_protein.pdb,Oc1ccc(/C=C/c2cc(O)cc(O)c2)cc1, +6YR2_T1C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YR2_T1C_holo_aligned_predicted_protein.pdb,C[NH+](C)c1cc(NC(=O)CNC(C)(C)C)c(O)c2c1C[C@H]1C[C@H]3[C@H]([NH+](C)C)C(O)=C(C(N)=O)C(=O)[C@@]3(O)C(O)=C1C2=O, +7CIJ_G0C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CIJ_G0C_holo_aligned_predicted_protein.pdb,CSCCC/N=C/c1c(COP(=O)(O)O)cnc(C)c1O, +7UMW_NAD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UMW_NAD_holo_aligned_predicted_protein.pdb,NC(=O)c1ccc[n+]([C@@H]2O[C@H](CO[P@@](=O)([O-])O[P@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1, +7TOM_5AD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TOM_5AD_holo_aligned_predicted_protein.pdb,C[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O, +7DQL_4CL,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7DQL_4CL_holo_aligned_predicted_protein.pdb,Oc1ccc(Cl)cc1O, +7CUO_PHB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CUO_PHB_holo_aligned_predicted_protein.pdb,O=C(O)c1ccc(O)cc1, +7Q2B_M6H,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Q2B_M6H_holo_aligned_predicted_protein.pdb,c1ccc(CC2NCCN2)cc1, +7MWN_WI5,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MWN_WI5_holo_aligned_predicted_protein.pdb,Cc1c(C(=O)c2cccc3ccccc23)c2cccc3c2n1[C@H](CN1CCOCC1)CO3, +7L5F_XNG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7L5F_XNG_holo_aligned_predicted_protein.pdb,CCCCCCCCCC(=O)NCCCC(=O)O, +8DSC_NCA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8DSC_NCA_holo_aligned_predicted_protein.pdb,NC(=O)c1cccnc1, +7ZDY_6MJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZDY_6MJ_holo_aligned_predicted_protein.pdb,CO[C@@H]1OC[C@@H](O)[C@H](O)[C@H]1O, +7NP6_UK8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NP6_UK8_holo_aligned_predicted_protein.pdb,O=C(O)c1ccc(OCc2c(-c3c(Cl)cccc3C(F)(F)F)noc2-c2cn[nH]c2)cc1, +7JXX_VP7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7JXX_VP7_holo_aligned_predicted_protein.pdb,CC(C)(O)C#Cc1ccc2[nH]c3c(c2c1)-c1nc(N)ncc1CCC3, +7LOE_Y84,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LOE_Y84_holo_aligned_predicted_protein.pdb,Fc1cccc2ccccc12, +7KM8_WPD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KM8_WPD_holo_aligned_predicted_protein.pdb,CCc1nc(N)nc(N)c1OCCCOc1cccc(C[C@@H](C(=O)O)C(F)F)c1, +7UY4_SMI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UY4_SMI_holo_aligned_predicted_protein.pdb,CN[C@@H]1[C@H](O)[C@H](NC)[C@H]2O[C@]3(O)[C@H](O[C@@H]2[C@H]1O)O[C@H](C)CC3(O)O, +7CL8_TES,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CL8_TES_holo_aligned_predicted_protein.pdb,C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@@H]2O, +6Z1C_7EY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6Z1C_7EY_holo_aligned_predicted_protein.pdb,Cc1ccc(-c2csc3ncnc(SCCC(=O)O)c23)cc1, +8HFN_XGC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8HFN_XGC_holo_aligned_predicted_protein.pdb,COc1ccc(-c2cccc(S(=O)(=O)NC(=O)[C@@H](N)CS)c2)cn1, +8EXL_799,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8EXL_799_holo_aligned_predicted_protein.pdb,Cc1nc(-c2cn3c(n2)-c2ccc(-c4cnn(C(C)(C)C(N)=O)c4)cc2OCC3)n(C(C)C)n1, +7Z7F_IF3,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Z7F_IF3_holo_aligned_predicted_protein.pdb,CNc1nc(NC)c2ncn(C)c2n1, +7XG5_PLP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XG5_PLP_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(C=O)c1O, +7MOI_HPS,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MOI_HPS_holo_aligned_predicted_protein.pdb,O=P(O)(O)Oc1ccccc1, +7LMO_NYO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LMO_NYO_holo_aligned_predicted_protein.pdb,CCN(CC)c1ccc2c(C)c(CCN3C(=O)N[C@@]4(CCN(C(=O)c5c[nH]cn5)C4)C3=O)c(=O)oc2c1, +7MSR_DCA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MSR_DCA_holo_aligned_predicted_protein.pdb,CCNC(=O)CCNC(=O)[C@H](O)C(C)(C)CO[P@@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O, +6ZCY_QF8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZCY_QF8_holo_aligned_predicted_protein.pdb,CNC(=O)c1nn(C)c2ccc(Nc3nccc(-n4cc(N[C@@H]5CCNC5)c(C)n4)n3)cc12, +6ZC3_JOR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZC3_JOR_holo_aligned_predicted_protein.pdb,CS(=O)(=O)Nc1ccc(F)cc1C(=O)O, +7LZD_YHY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LZD_YHY_holo_aligned_predicted_protein.pdb,Cc1ccc(F)c2cc(C(=O)Nc3cccc(N4CCC(N(C)C)CC4)c3)[nH]c12, +7N4N_0BK,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N4N_0BK_holo_aligned_predicted_protein.pdb,COc1cc(C(=O)Nc2ccc(F)c([C@]3(CF)CC[C@@](C)(S(C)(=O)=O)C(N)=N3)c2)ncn1, +7KRU_ATP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KRU_ATP_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)O[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O, +7UTW_NAI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UTW_NAI_holo_aligned_predicted_protein.pdb,NC(=O)C1=CN([C@@H]2O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1, +7BKA_4JC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BKA_4JC_holo_aligned_predicted_protein.pdb,CCc1ccc(S(N)(=O)=O)cc1, +5SD5_HWI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/5SD5_HWI_holo_aligned_predicted_protein.pdb,CCc1nc(N)nc(N)c1OCCCOc1cc(C)ccc1N1CC(C(=O)O)C1, +7SCW_GSP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SCW_GSP_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@@H]2O[C@H](CO[P@](=O)(O)O[P@](=O)(O)OP(O)(O)=S)[C@@H](O)[C@H]2O)c(=O)[nH]1, +7SDD_4IP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SDD_4IP_holo_aligned_predicted_protein.pdb,O=P(O)(O)O[C@H]1[C@H](O)[C@@H](OP(=O)(O)O)[C@H](OP(=O)(O)O)[C@@H](OP(=O)(O)O)[C@H]1O, +7TH4_FFO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TH4_FFO_holo_aligned_predicted_protein.pdb,Nc1nc2c(c(=O)[nH]1)N(C=O)[C@@H](CNc1ccc(C(=O)N[C@@H](CCC(=O)O)C(=O)O)cc1)CN2, +8D19_GSH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8D19_GSH_holo_aligned_predicted_protein.pdb,N[C@@H](CCC(=O)N[C@@H](CS)C(=O)NCC(=O)O)C(=O)O, +7A1P_QW2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7A1P_QW2_holo_aligned_predicted_protein.pdb,CCC[C@@H](CC(=O)C(=O)O)C(=O)O, +8G0V_YHT,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8G0V_YHT_holo_aligned_predicted_protein.pdb,C#CCO[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O, +7MFP_Z7P,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MFP_Z7P_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(/C=N/[C@H]2[C@H](O)[C@@H](O)[C@@H](O[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](n4ccc(=O)[nH]c4=O)[C@H](O)[C@@H]3O)O[C@@H]2C)c1O, +7N7H_CTP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N7H_CTP_holo_aligned_predicted_protein.pdb,Nc1ccn([C@@H]2O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)n1, +7X9K_8OG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7X9K_8OG_holo_aligned_predicted_protein.pdb,Nc1nc2c([nH]c(=O)n2[C@H]2C[C@H](O)[C@@H](COP(=O)(O)O)O2)c(=O)[nH]1, +7NFB_GEN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NFB_GEN_holo_aligned_predicted_protein.pdb,O=c1c(-c2ccc(O)cc2)coc2cc(O)cc(O)c12, +7DKT_GLF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7DKT_GLF_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](F)[C@H](O)[C@@H](O)[C@@H]1O, +7R3D_APR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7R3D_APR_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]2O[C@@H](O)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O, +6YYO_Q1K,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YYO_Q1K_holo_aligned_predicted_protein.pdb,CS(=O)(=O)N1CCN(c2ccc3nncn3n2)CC1, +7V14_ORU,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7V14_ORU_holo_aligned_predicted_protein.pdb,[O-][n+]1cc(-c2c(-n3cnnn3)ccc(Cl)c2F)ccc1[C@@H](CC1CC1)n1cc(-c2cncs2)cn1, +7SIU_9ID,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SIU_9ID_holo_aligned_predicted_protein.pdb,Cn1cc(Nc2ncc(C3=CCC[C@@H](NC(=O)c4ccccc4)C3)nc2C(N)=O)cn1, +7TE8_P0T,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TE8_P0T_holo_aligned_predicted_protein.pdb,C=C(C)[C@@H]1CCC(C)=C[C@H]1c1c(O)cc(CCCCC)cc1O, +7F8T_FAD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7F8T_FAD_holo_aligned_predicted_protein.pdb,Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO[P@@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)c2cc1C, +7TUO_KL9,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TUO_KL9_holo_aligned_predicted_protein.pdb,Cc1cnc2c(N)c(C(=O)NCCc3ccc([C@@H]4C[C@H]5CC[C@@H](C4)N5)cc3)sc2n1, +7B94_ANP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7B94_ANP_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)O[P@](=O)(O)NP(=O)(O)O)[C@@H](O)[C@H]1O, +7LEV_0JO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LEV_0JO_holo_aligned_predicted_protein.pdb,C=C(/N=C/c1c(COP(=O)(O)O)cnc(C)c1O)C(=O)O, +8A1H_DLZ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8A1H_DLZ_holo_aligned_predicted_protein.pdb,Cc1nc2c(=O)[nH]c(=O)nc-2n(C[C@H](O)[C@H](O)[C@H](O)CO)c1C, +7NU0_DCL,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NU0_DCL_holo_aligned_predicted_protein.pdb,CC(C)C[C@H](N)CO, +7QF4_RBF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QF4_RBF_holo_aligned_predicted_protein.pdb,Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO)c2cc1C, +7Z2O_IAJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Z2O_IAJ_holo_aligned_predicted_protein.pdb,COc1cccc2sc3nncn3c12, +7O0N_CDP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7O0N_CDP_holo_aligned_predicted_protein.pdb,Nc1ccn([C@@H]2O[C@H](CO[P@@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)n1, +7X5N_5M5,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7X5N_5M5_holo_aligned_predicted_protein.pdb,Cc1n[nH]c2c1[C@](c1cc(CO)cc(-c3ccccc3)c1)(C(C)C)C(C#N)=C(N)O2, +8BOM_QU6,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8BOM_QU6_holo_aligned_predicted_protein.pdb,COc1cccc(NC(=O)c2ccc(C)c(Nc3nc(-c4cccnc4)nc4c3cnn4C)c2)c1, +7K0V_VQP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7K0V_VQP_holo_aligned_predicted_protein.pdb,Cc1cc(F)c(NC(=O)NCCC(C)(C)C)cc1Nc1ccc2ncn(C)c(=O)c2c1F, +7MMH_ZJY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MMH_ZJY_holo_aligned_predicted_protein.pdb,COc1ccc2nc(C)c(O[C@@H]3C[C@H]4C(=O)N[C@]5(C(=O)NS(=O)(=O)C6(C)CC6)C[C@H]5/C=C\CCCCC[C@H](NC(=O)OCC5(C(F)(F)F)CC5)C(=O)N4C3)nc2c1, +7PJQ_OWH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PJQ_OWH_holo_aligned_predicted_protein.pdb,CNC(=S)c1cccnc1, +7FHA_ADX,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7FHA_ADX_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)OS(=O)(=O)O)[C@@H](O)[C@H]1O, +7BTT_F8R,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BTT_F8R_holo_aligned_predicted_protein.pdb,COc1cc2c(cc1Nc1nc(Nc3ccccc3S(=O)(=O)C(C)C)c3[nH]ccc3n1)N(C(=O)CN(C)C)CC2, +7QHG_T3B,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QHG_T3B_holo_aligned_predicted_protein.pdb,CC(C)C(=O)Nc1ncc(C(=O)NCCN(Cc2ccccc2)C(=O)c2ccc(S(=O)(=O)Nc3ccccc3)cc2)s1, +7N6F_0I1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N6F_0I1_holo_aligned_predicted_protein.pdb,O=C(C1CCN(c2cncnc2-c2ccc(F)cc2)CC1)N1CC(F)C1, +8AAU_LH0,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AAU_LH0_holo_aligned_predicted_protein.pdb,CC(C)C(=O)Nc1ncc(-c2cc(C(F)F)nn2-c2c(Cl)cccc2Cl)s1, +8GFD_ZHR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8GFD_ZHR_holo_aligned_predicted_protein.pdb,CC(=O)N[C@H]1[C@H](OCCc2ccc3occc3c2)O[C@H](CO)[C@@H](O)[C@@H]1O, +7TBU_S3P,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TBU_S3P_holo_aligned_predicted_protein.pdb,O=C(O)C1=C[C@@H](OP(=O)(O)O)[C@@H](O)[C@H](O)C1, +6YQW_82I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YQW_82I_holo_aligned_predicted_protein.pdb,CNc1cnn(C)c(=O)c1Cl, +7W06_ITN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7W06_ITN_holo_aligned_predicted_protein.pdb,C=C(CC(=O)O)C(=O)O, +7ZL5_IWE,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZL5_IWE_holo_aligned_predicted_protein.pdb,NS(=O)(=O)c1cc(-c2nnn[nH]2)c(NCc2cccs2)cc1Cl, +7L03_F9F,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7L03_F9F_holo_aligned_predicted_protein.pdb,O=P(O)(O)OCCNS(=O)(=O)c1ccc(OC(F)(F)F)cc1, +7XJN_NSD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XJN_NSD_holo_aligned_predicted_protein.pdb,NCCCNCCCN, +7ZCC_OGA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZCC_OGA_holo_aligned_predicted_protein.pdb,O=C(O)CNC(=O)C(=O)O, +6TW5_9M2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6TW5_9M2_holo_aligned_predicted_protein.pdb,Cc1nn(C)c(C)c1CCOc1cc(F)ccc1-c1ccc2n[nH]c(CN(C)C)c2c1, +7VWF_K55,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VWF_K55_holo_aligned_predicted_protein.pdb,CCCCOc1ccc(C[C@H](CC)C(=O)O)cc1CNC(=O)c1ccc(C(F)(F)F)cc1F, +7SZA_DUI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SZA_DUI_holo_aligned_predicted_protein.pdb,Nc1cccc2c1C(=O)N([C@@H]1CCC(=O)NC1=O)C2=O, +7POM_7VZ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7POM_7VZ_holo_aligned_predicted_protein.pdb,COC(=O)c1cc(S(N)(=O)=O)c(SC2CCCCC2)cc1Cl, +7KQU_YOF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KQU_YOF_holo_aligned_predicted_protein.pdb,N[C@@H](Cc1ccc(O)c(F)c1)C(=O)O, +7ZOC_T8E,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZOC_T8E_holo_aligned_predicted_protein.pdb,CC(=O)c1ccc(NC(=O)[C@H](C)S)cc1, +7ELT_TYM,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ELT_TYM_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)OC(=O)[C@@H](N)Cc2c[nH]c3ccccc23)[C@@H](O)[C@H]1O, +6YRV_PJ8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YRV_PJ8_holo_aligned_predicted_protein.pdb,CCCCCCCCCCCCCCCCC, +7CNS_PMV,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CNS_PMV_holo_aligned_predicted_protein.pdb,C[C@@](O)(CCOP(=O)(O)O)CC(=O)O, +6XG5_TOP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6XG5_TOP_holo_aligned_predicted_protein.pdb,COc1cc(Cc2cnc(N)nc2N)cc(OC)c1OC, +7USH_82V,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7USH_82V_holo_aligned_predicted_protein.pdb,O=c1cc(N2CCOCC2)oc2c(-c3ccc4c(c3)OCCO4)csc12, +7OZ9_NGK,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OZ9_NGK_holo_aligned_predicted_protein.pdb,CC(=O)N[C@@H]1[C@@H](O)[C@@H](OS(=O)(=O)O)[C@@H](CO)O[C@@H]1O, +7TXK_LW8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TXK_LW8_holo_aligned_predicted_protein.pdb,C[N+](C)(C)[C@@H](Cc1c[nH]c(=S)[nH]1)C(=O)O, +7ZHP_IQY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZHP_IQY_holo_aligned_predicted_protein.pdb,CCC(O)(C#Cc1ccc2[nH]c3c(c2c1)-c1nc(N)ncc1CCC3)CC, +8AQL_PLG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AQL_PLG_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(CNCC(=O)O)c1O, +8BTI_RFO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8BTI_RFO_holo_aligned_predicted_protein.pdb,COCC(=O)n1ccc2c(Cl)cccc21, +7ROU_66I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ROU_66I_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1c(OC(F)F)nn2[C@@H]1O[C@H](COS(=O)(=O)NC(=O)[C@@H](N)Cc2ccc(O)cc2)[C@@H](O)[C@H]1O, +6ZAE_ACV,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZAE_ACV_holo_aligned_predicted_protein.pdb,CC(C)[C@@H](NC(=O)[C@H](CS)NC(=O)CCC[C@H](N)C(=O)O)C(=O)O, +6YSP_PAL,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YSP_PAL_holo_aligned_predicted_protein.pdb,O=C(O)C[C@H](NC(=O)CP(=O)(O)O)C(=O)O, +8B8H_OJQ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8B8H_OJQ_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(C[NH2+]c2conc2O)c1O, +7THI_PGA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7THI_PGA_holo_aligned_predicted_protein.pdb,O=C(O)COP(=O)(O)O, +7OFF_VCB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OFF_VCB_holo_aligned_predicted_protein.pdb,O=C(N[C@@H](C(=O)O)c1ccccc1)c1cccc2c1-c1ccccc1C2=O, +7WY1_D0L,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WY1_D0L_holo_aligned_predicted_protein.pdb,CCCCCCCN1CCC[C@H]1C(=O)N[C@@H](Cc1ccccc1)C(=O)O, +7ES1_UDP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ES1_UDP_holo_aligned_predicted_protein.pdb,O=c1ccn([C@@H]2O[C@H](CO[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1, +7V3N_AKG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7V3N_AKG_holo_aligned_predicted_protein.pdb,O=C(O)CCC(=O)C(=O)O, +7TM6_GPJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TM6_GPJ_holo_aligned_predicted_protein.pdb,O=C(O)C[NH2+]CP(=O)(O)O, +7ECR_SIN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ECR_SIN_holo_aligned_predicted_protein.pdb,O=C(O)CCC(=O)O, +7A9E_R4W,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7A9E_R4W_holo_aligned_predicted_protein.pdb,CCS(C)(=O)=O, +7ZF0_DHR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZF0_DHR_holo_aligned_predicted_protein.pdb,N#C[C@@H](O)c1ccc(O)cc1, +7F51_BA7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7F51_BA7_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]2O[C@@H](O)[C@H](OC(=O)c3ccccc3)[C@@H]2O)[C@@H](O)[C@H]1O, +7XFA_D9J,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XFA_D9J_holo_aligned_predicted_protein.pdb,Cc1nc([C@@H]2O[C@H](CO)[C@H](O)[C@H](n3cc(-c4cc(F)c(Cl)c(F)c4)nn3)[C@H]2O)n(-c2cc(Cl)ccc2C(F)(F)F)n1, +8DKO_TFB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8DKO_TFB_holo_aligned_predicted_protein.pdb,O=C(O)[C@@H]1CCCO1, +6T88_MWQ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6T88_MWQ_holo_aligned_predicted_protein.pdb,O=C(O)CCc1cnc[nH]1, +7BCP_GCO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BCP_GCO_holo_aligned_predicted_protein.pdb,O=C(O)[C@H](O)[C@@H](O)[C@H](O)[C@H](O)CO, +7NF0_BYN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NF0_BYN_holo_aligned_predicted_protein.pdb,Cc1cc2c3c(c1C)C(C)(C)C[C@@H](O)N3c1c(nc(O)[nH]c1=O)N2C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O, +7QE4_NGA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QE4_NGA_holo_aligned_predicted_protein.pdb,CC(=O)N[C@@H]1[C@@H](O)[C@@H](O)[C@@H](CO)O[C@H]1O, +7M3H_YPV,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7M3H_YPV_holo_aligned_predicted_protein.pdb,CCCCc1ccc(NS(=O)(=O)c2ccc(O)c(C(=O)O)c2)cc1, +6Z2C_Q5E,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6Z2C_Q5E_holo_aligned_predicted_protein.pdb,O=C(O)CCCCCN1C(=O)[C@@H]2[C@H](C1=O)[C@]1(Cl)C(Cl)=C(Cl)[C@@]2(Cl)C1(Cl)Cl, +8A2D_KXY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8A2D_KXY_holo_aligned_predicted_protein.pdb,Cc1c(C#Cc2ccc(CN3CCC(CO)CC3)cc2)cc(C(F)F)c2cn([C@@H](C(=O)Nc3nccs3)c3ncn4c3CCC43CC3)nc12, +7NGW_UAW,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NGW_UAW_holo_aligned_predicted_protein.pdb,CC1CCN(C(=O)Nc2ccc(O)cc2)CC1, +7KZ9_XN7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KZ9_XN7_holo_aligned_predicted_protein.pdb,O=C(CNCCO)NCCO, +7UAS_MBU,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UAS_MBU_holo_aligned_predicted_protein.pdb,CCn1cc(-c2cc(Cn3ccnc3C)cc3c2CCN([C@H](c2cc(C)ccn2)C2CC2)C3=O)c(C(F)(F)F)n1, +7YZU_DO7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7YZU_DO7_holo_aligned_predicted_protein.pdb,CO[C@H]1O[C@H](CS(=O)(=O)O)[C@@H](O)[C@H](O)[C@H]1O, +7VKZ_NOJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VKZ_NOJ_holo_aligned_predicted_protein.pdb,OC[C@H]1NC[C@H](O)[C@@H](O)[C@@H]1O, +7ROR_69X,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ROR_69X_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)OC(=O)[C@@H](N)Cc2ccc(O)cc2)[C@@H](O)[C@H]1O, +8AY3_OE3,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AY3_OE3_holo_aligned_predicted_protein.pdb,CCn1c(=O)cc(C)c2cc(CNS(=O)(=O)c3ccccc3)ccc21, +7C8Q_DSG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7C8Q_DSG_holo_aligned_predicted_protein.pdb,NC(=O)C[C@@H](N)C(=O)O, +7XRL_FWK,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XRL_FWK_holo_aligned_predicted_protein.pdb,CC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O, +7CD9_FVR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CD9_FVR_holo_aligned_predicted_protein.pdb,CN1C[C@H](Nc2nc3cc[nH]c3c(=O)n2C)C[C@H](c2ccc(OCc3ccccc3)cc2)C1, +7T3E_SLB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7T3E_SLB_holo_aligned_predicted_protein.pdb,CC(=O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(=O)O)C[C@@H]1O, +6YJA_2BA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YJA_2BA_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@@H]2CO[P@@](=O)(O)O[C@H]3[C@@H](O)[C@H](n4cnc5c(N)ncnc54)O[C@@H]3CO[P@](=O)(O)O[C@H]2[C@H]1O, +7LT0_ONJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LT0_ONJ_holo_aligned_predicted_protein.pdb,Cc1ccc(Sc2cc(C(=O)N3Cc4ccccc4C3)ccc2O)cc1C, +7D6O_MTE,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7D6O_MTE_holo_aligned_predicted_protein.pdb,Nc1nc2c(c(=O)[nH]1)N[C@H]1C(S)=C(S)[C@@H](COP(=O)(O)O)O[C@H]1N2, +7UJ4_OQ4,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UJ4_OQ4_holo_aligned_predicted_protein.pdb,CCN(C(=O)c1cc(F)ccc1Oc1cncnc1N1CC2(CCN(C[C@H]3CC[C@H](NS(=O)(=O)CC)CC3)CC2)C1)C(C)C, +7OSO_0V1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OSO_0V1_holo_aligned_predicted_protein.pdb,OCC[C@H](O)CO, +7AFX_R9K,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7AFX_R9K_holo_aligned_predicted_protein.pdb,Cc1cccc2c(-c3ccccc3Cl)c(C(=O)O)[nH]c12, +7T1D_E7K,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7T1D_E7K_holo_aligned_predicted_protein.pdb,Cc1cn(-c2ccc3c(c2)CN(c2ncc(Cc4ccc(-n5cccn5)cc4)s2)CC3)c(C)n1, +7R9N_F97,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7R9N_F97_holo_aligned_predicted_protein.pdb,CC(C)n1ncc2cnc(Nc3cc([C@@H]4CCNC4)nc(N4CCC(F)(F)C4)n3)cc21, +7MGT_ZD4,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MGT_ZD4_holo_aligned_predicted_protein.pdb,Nc1nc(Cl)nc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O, +7MYU_ZR7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MYU_ZR7_holo_aligned_predicted_protein.pdb,COc1cnc(C(=O)Nc2ccc(F)c([C@]34CN(c5ncc(F)cn5)C[C@H]3CSC(N)=N4)c2)cn1, +7RH3_59O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RH3_59O_holo_aligned_predicted_protein.pdb,CO[C@@H]1O[C@H](CO)[C@H](O)[C@H](OC(=O)c2ccc(C)cc2)[C@@H]1OC(=O)c1ccc(Cl)cc1[N+](=O)[O-], +7OMX_CNA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OMX_CNA_holo_aligned_predicted_protein.pdb,NC(=O)c1ccc[n+]([C@@H]2C[C@H](CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1, +7NXO_UU8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NXO_UU8_holo_aligned_predicted_protein.pdb,Cc1ccccc1Oc1cc(-n2c(=O)cc(S(=O)(=O)c3ccccc3)[nH]c2=O)c(F)cc1C#N, +8DHG_T78,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8DHG_T78_holo_aligned_predicted_protein.pdb,CCn1c(CO)nn(-c2nc(O[C@@H](C)C(F)(F)F)c(C(=O)Nc3c(C)ccnc3Cl)cc2F)c1=O, +7NPL_UKZ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NPL_UKZ_holo_aligned_predicted_protein.pdb,CCC[C@@H](NC(=O)c1cccc2c1CC(=O)N2)[C@@H](O)c1cccc(Cl)c1C, +7PRM_81I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PRM_81I_holo_aligned_predicted_protein.pdb,O=C(c1ccco1)N1CCN([C@@H]2CC(=O)N(c3ccc(-c4ccc(F)cc4)cc3)C2)CC1, +7WDT_NGS,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WDT_NGS_holo_aligned_predicted_protein.pdb,CC(=O)N[C@@H]1[C@@H](O)[C@H](O)[C@@H](COS(=O)(=O)O)O[C@H]1O, +7UAW_MF6,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UAW_MF6_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@@H]2CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](O[C@@H]1[C@@H]2O)[C@H](O)[C@@H]3O, +7W05_GMP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7W05_GMP_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)[nH]1, +7UJF_R3V,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UJF_R3V_holo_aligned_predicted_protein.pdb,C[C@H]1CCN(CCOc2ccc([C@@H]3c4ccc(O)cc4CC[C@@H]3c3ccccc3)cc2)C1, +8D39_QDB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8D39_QDB_holo_aligned_predicted_protein.pdb,O=C(O)c1ccc(C(=O)c2ccccc2)cc1, +7F5D_EUO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7F5D_EUO_holo_aligned_predicted_protein.pdb,CNc1cc(-c2ccc3[nH]ccc3c2)nc(S(C)(=O)=O)n1, +7BMI_U4B,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BMI_U4B_holo_aligned_predicted_protein.pdb,O=C(O)c1ccnc(C(=O)O)c1F, +7KB1_WBJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KB1_WBJ_holo_aligned_predicted_protein.pdb,C=C/C(=N\Cc1c(COP(=O)(O)O)cnc(C)c1O)C(=O)O, +7R7R_AWJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7R7R_AWJ_holo_aligned_predicted_protein.pdb,Cc1nc([C@](C)(O)CO)sc1-c1cnc(N)c(O[C@H](C)c2cc(F)ccc2N2NC=CN2)c1, +7L00_XCJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7L00_XCJ_holo_aligned_predicted_protein.pdb,CS(=O)(=O)c1ccc2nc(NC(=O)Cc3csc(-n4cccc4)n3)sc2c1, +7BJJ_TVW,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BJJ_TVW_holo_aligned_predicted_protein.pdb,Nc1ncnc2n[nH]cc12, +7UQ3_O2U,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UQ3_O2U_holo_aligned_predicted_protein.pdb,O=C(O)C[C@@H]1CC(=O)N(O)C1=O, +7XQZ_FPF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XQZ_FPF_holo_aligned_predicted_protein.pdb,CC(C)=CCC/C(C)=C/CC/C(C)=C(\F)CO[P@@](=O)(O)OP(=O)(O)O, +7JMV_4NC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7JMV_4NC_holo_aligned_predicted_protein.pdb,O=[N+]([O-])c1ccc(O)c(O)c1, +7BNH_BEZ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BNH_BEZ_holo_aligned_predicted_protein.pdb,O=C(O)c1ccccc1, +8FO5_Y4U,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8FO5_Y4U_holo_aligned_predicted_protein.pdb,CC(=O)c1ccn(S(=O)(=O)c2ccccc2)c1, +7ZU2_DHT,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZU2_DHT_holo_aligned_predicted_protein.pdb,C[C@]12CCC(=O)C[C@@H]1CC[C@@H]1[C@@H]2CC[C@]2(C)[C@@H](O)CC[C@@H]12, +7A9H_TPP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7A9H_TPP_holo_aligned_predicted_protein.pdb,Cc1ncc(C[n+]2csc(CCO[P@](=O)(O)OP(=O)(O)O)c2C)c(N)n1, +7DUA_HJ0,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7DUA_HJ0_holo_aligned_predicted_protein.pdb,Cc1cc(NC(=O)c2cn(C3(C)CC3)c3ncnc(N)c23)n[nH]1, +7P5T_5YG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7P5T_5YG_holo_aligned_predicted_protein.pdb,COc1ccc(CNc2ccc(Cc3ccncc3)cc2)cc1, +7RNI_60I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RNI_60I_holo_aligned_predicted_protein.pdb,Cc1nn(C)c(C)c1C(=O)N1CCN(Cc2nc3ccccc3n2CC(C)(C)C)CC1, +6M73_FNR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6M73_FNR_holo_aligned_predicted_protein.pdb,Cc1cc2c(cc1C)N(C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O)c1[nH]c(=O)[nH]c(=O)c1N2, +6ZK5_IMH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZK5_IMH_holo_aligned_predicted_protein.pdb,OC[C@H]1N[C@@H](c2c[nH]c3c(O)ncnc23)[C@H](O)[C@@H]1O, +7VC5_9SF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VC5_9SF_holo_aligned_predicted_protein.pdb,O=C(C[C@H]1NCCC[C@@H]1O)Cn1cnc2ccccc2c1=O, +7ZZW_KKW,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZZW_KKW_holo_aligned_predicted_protein.pdb,O=C([C@H]1C[C@@H](c2cccc(Cl)c2)CN1)N1CCN(c2nccc3ccsc23)CC1, +7R6J_2I7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7R6J_2I7_holo_aligned_predicted_protein.pdb,Cc1cc(NCc2cccc(CN3C[C@H](O)[C@@H](O)[C@H](O)[C@H]3CO)c2)cc(-c2ncccn2)c1, +8HO0_3ZI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8HO0_3ZI_holo_aligned_predicted_protein.pdb,O=C1N[C@@H](Cc2c[nH]c3c(F)cccc23)C(=O)N2CCC[C@@H]12, +7XBV_APC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XBV_APC_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)C[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O, +7UXS_OJC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UXS_OJC_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@@H]2CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](O[C@H]2[C@H]1O)[C@H](O)[C@@H]3O, +7WPW_F15,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WPW_F15_holo_aligned_predicted_protein.pdb,CCCCCCCCCCCCCCC(=O)O, +8AEM_LVF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AEM_LVF_holo_aligned_predicted_protein.pdb,N#CCc1c[nH]c2ccc(Cl)cc12, +7Q25_8J9,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Q25_8J9_holo_aligned_predicted_protein.pdb,CCCC[C@H](N[C@@H](CCc1ccccc1)C(=O)O)C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)O, +6ZPB_3D1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZPB_3D1_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@H]1C[C@H](O)[C@@H](CO)O1, +7TSF_H4B,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TSF_H4B_holo_aligned_predicted_protein.pdb,C[C@H](O)[C@H](O)[C@H]1CNc2nc(N)[nH]c(=O)c2N1, +7LJN_GTP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LJN_GTP_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@@H]2O[C@H](CO[P@](=O)(O)O[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1, +7E4L_MDN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7E4L_MDN_holo_aligned_predicted_protein.pdb,O=P(O)(O)CP(=O)(O)O, +7N7B_T3F,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N7B_T3F_holo_aligned_predicted_protein.pdb,Cc1cn([C@H]2C[C@H](O)[C@@H](CO[P@@](=O)(O)O[P@](=O)(O)O[C@H]3O[C@H](C)[C@H](O)[C@H](N)[C@H]3O)O2)c(=O)[nH]c1=O, +7WKL_CAQ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WKL_CAQ_holo_aligned_predicted_protein.pdb,Oc1ccccc1O, +8AP0_PRP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AP0_PRP_holo_aligned_predicted_protein.pdb,O=P(O)(O)OC[C@H]1O[C@H](O[P@@](=O)(O)OP(=O)(O)O)[C@H](O)[C@@H]1O, +7V3S_5I9,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7V3S_5I9_holo_aligned_predicted_protein.pdb,O=C(Nc1ccc(F)cc1)C1(C(=O)Nc2ccc(Oc3ccnc4c3Oc3ccccc3N4)c(F)c2)CC1, +7XPO_UPG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XPO_UPG_holo_aligned_predicted_protein.pdb,O=c1ccn([C@@H]2O[C@H](CO[P@](=O)(O)O[P@](=O)(O)O[C@H]3O[C@H](CO)[C@@H](O)[C@H](O)[C@H]3O)[C@@H](O)[C@H]2O)c(=O)[nH]1, +7KC5_BJZ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KC5_BJZ_holo_aligned_predicted_protein.pdb,CN1C(=O)N(c2cc(Cl)cc(Cl)c2)C(=O)[C@]12CN(c1ccc(C(=O)O)cn1)C[C@H]2c1ccc(C#N)cc1, +7NSW_HC4,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NSW_HC4_holo_aligned_predicted_protein.pdb,O=C(O)/C=C/c1ccc(O)cc1, +7RWS_4UR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RWS_4UR_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@@H]2O[C@@H]3CO[P@@](=O)(O)O[C@@H]4[C@H](O)[C@@H](CO[P@](=O)(O)O[C@H]3[C@H]2O)O[C@H]4n2cnc3c(N)ncnc32)c(=O)[nH]1, +7VBU_6I4,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VBU_6I4_holo_aligned_predicted_protein.pdb,Cc1ccc2c(n1)[nH]c1c(C3CC3)cccc12, +7QTA_URI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QTA_URI_holo_aligned_predicted_protein.pdb,O=c1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)[nH]1, +7WQQ_5Z6,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WQQ_5Z6_holo_aligned_predicted_protein.pdb,CC(C)(C)c1cc(C(=O)/C=C/c2ccc(C(=O)O)cc2)cc(C(C)(C)C)c1, +8D5D_5DK,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8D5D_5DK_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(/C=N/[C@H](CCCNC(=N)N)C(=O)O)c1O, +7Q27_8KC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Q27_8KC_holo_aligned_predicted_protein.pdb,CCCC[C@H](N[C@@H](CCc1ccccc1)C(=O)O)C(=O)N[C@@H](Cc1c[nH]c2ccccc12)C(=O)O, +7ED2_A3P,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ED2_A3P_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](OP(=O)(O)O)[C@H]1O, +6YT6_PKE,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YT6_PKE_holo_aligned_predicted_protein.pdb,CN(c1ncccc1CNc1ccnc(Nc2ccc3c(c2)CC(=O)N3)n1)S(C)(=O)=O, +7JG0_GAR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7JG0_GAR_holo_aligned_predicted_protein.pdb,NCC(=O)N[C@@H]1O[C@H](COP(=O)([O-])[O-])[C@@H](O)[C@H]1O, +8EYE_X4I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8EYE_X4I_holo_aligned_predicted_protein.pdb,O=C(CNc1cc(F)cc(F)c1)N[C@@H](C(=O)NO)c1ccc(-c2cc(F)c(F)c(F)c2)cc1, +7O1T_5X8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7O1T_5X8_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O, +6Z4N_Q7B,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6Z4N_Q7B_holo_aligned_predicted_protein.pdb,Cc1ccc(C[C@@]2(C(=O)O)C[C@H]2c2ccccc2)cc1, +7WL4_JFU,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WL4_JFU_holo_aligned_predicted_protein.pdb,CCN1C(=O)c2cc(N3CCN(C)CC3)nc3c(NS(=O)(=O)c4ccc(F)cc4F)ccc1c23, +8SLG_G5A,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8SLG_G5A_holo_aligned_predicted_protein.pdb,NCC(=O)NS(=O)(=O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O, +7L7C_XQ1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7L7C_XQ1_holo_aligned_predicted_protein.pdb,COc1cccc(-c2ccc3c(CC(=O)O)coc3c2)c1, +7NLV_UJE,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NLV_UJE_holo_aligned_predicted_protein.pdb,O=C(CCCC[C@@H]1SC[C@@H]2NC(=O)N[C@@H]21)N[C@H]1CCNC1, +7VYJ_CA0,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VYJ_CA0_holo_aligned_predicted_protein.pdb,NC(=O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O, +7PUV_84Z,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PUV_84Z_holo_aligned_predicted_protein.pdb,COC(=O)c1cc(S(N)(=O)=O)c(Cl)cc1S(=O)(=O)c1ccccc1, +7RSV_7IQ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RSV_7IQ_holo_aligned_predicted_protein.pdb,C[C@@H]1COCCN1c1cc2n(n1)[C@@H]1CCC[C@@H]1NC2=O, +7QGP_DJ8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QGP_DJ8_holo_aligned_predicted_protein.pdb,Cc1cc(Cl)ccc1CNC(=O)Nc1ccc2cc1OCCOCCNc1ccn3ncc-2c3n1, diff --git a/forks/DiffDockv1/.gitattributes b/forks/DiffDockv1/.gitattributes new file mode 100644 index 00000000..fd2cc86c --- /dev/null +++ b/forks/DiffDockv1/.gitattributes @@ -0,0 +1,12 @@ +*.ipynb linguist-vendored=false +*.ipynb linguist-detectable=false + +/jupyter_notebooks linguist-vendored=false + +jupyter_notebooks/** linguist-vendored + +jupyter_notebooks/** linguist-vendored=false + + +jupyter_notebooks/* linguist-vendored +jupyter_notebooks/* linguist-vendored=false \ No newline at end of file diff --git a/forks/DiffDockv1/.gitignore b/forks/DiffDockv1/.gitignore new file mode 100644 index 00000000..63114464 --- /dev/null +++ b/forks/DiffDockv1/.gitignore @@ -0,0 +1,166 @@ +homework +inference_out_dir_not_specified +.plotly_cache +.DS_store +renew.sh +tmux_renew.sh +images +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so +.so3_* + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +local_config_inference2.yml +.vscode/ + + +*.zip + +.idea/ + + +#################### Project specific +.p.npy +.score.npy +# this ignores everything in data except for the file +!/data +/data/* +!/data/splits +!/data/protein_ligand_example_csv.csv +!/data/testset_csv.csv +!/data/INDEX_general_PL_data.2020 +test_run + +cache +wandb +logs + +# temporary files +.openbabel_cache +temp/ +bsub* +stderr* +stdout* +!/workdir +/workdir/* +!/workdir/paper_confidence_model +!/workdir/paper_score_model +runs2 +results +# this excludes everything in the runs directory except for that specific run +!/runs +/runs/* +!/runs/rigid_redocking +!/runs/flexible_self_docking +local_config.yml +local_config_inference.yml +local_config_confidence.yml +temp1.py +temp5.py +temp3.py +temp4.py +temp5.py +temp6.py +temp7.py +esm + diff --git a/forks/DiffDockv1/LICENSE b/forks/DiffDockv1/LICENSE new file mode 100644 index 00000000..6e5e321d --- /dev/null +++ b/forks/DiffDockv1/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Gabriele Corso, Hannes Stärk, Bowen Jing + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/forks/DiffDockv1/README.md b/forks/DiffDockv1/README.md new file mode 100644 index 00000000..e2891305 --- /dev/null +++ b/forks/DiffDockv1/README.md @@ -0,0 +1,143 @@ +# DiffDock: Diffusion Steps, Twists, and Turns for Molecular Docking +[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/diffdock-diffusion-steps-twists-and-turns-for/blind-docking-on-pdbbind)](https://paperswithcode.com/sota/blind-docking-on-pdbbind?p=diffdock-diffusion-steps-twists-and-turns-for) + +### [Paper on arXiv](https://arxiv.org/abs/2210.01776) + +Implementation of DiffDock, state-of-the-art method for molecular docking, by Gabriele Corso*, Hannes Stark*, Bowen Jing*, Regina Barzilay and Tommi Jaakkola. +This repository contains all code, instructions and model weights necessary to run the method or to retrain a model. +If you have any question, feel free to open an issue or reach out to us: [gcorso@mit.edu](gcorso@mit.edu), [hstark@mit.edu](hstark@mit.edu), [bjing@mit.edu](bjing@mit.edu). + +![Alt Text](visualizations/overview.png) + +The repository also contains all the scripts to run the baselines and generate the figures. +Additionally, there are visualization videos in `visualizations`. + +You might also be interested in this [Google Colab notebook](https://colab.research.google.com/drive/1CTtUGg05-2MtlWmfJhqzLTtkDDaxCDOQ#scrollTo=zlPOKLIBsiPU) to run DiffDock by Brian Naughton. + +# Dataset + +The files in `data` contain the names for the time-based data split. + +If you want to train one of our models with the data then: +1. download it from [zenodo](https://zenodo.org/record/6408497) +2. unzip the directory and place it into `data` such that you have the path `data/PDBBind_processed` + + + +## Setup Environment + +We will set up the environment using [Anaconda](https://docs.anaconda.com/anaconda/install/index.html). Clone the +current repo + + git clone https://github.com/gcorso/DiffDock.git + +This is an example for how to set up a working conda environment to run the code (but make sure to use the correct pytorch, pytorch-geometric, cuda versions or cpu only versions): + + conda create --name diffdock python=3.9 + conda activate diffdock + conda install pytorch==1.11.0 pytorch-cuda=11.7 -c pytorch -c nvidia + pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric==2.0.4 -f https://data.pyg.org/whl/torch-1.11.0+cu117.html + python -m pip install PyYAML scipy "networkx[default]" biopython rdkit-pypi e3nn spyrmsd pandas biopandas + +Then you need to install ESM that we use both for protein sequence embeddings and for the protein structure prediction in case you only have the sequence of your target. Note that OpenFold (and so ESMFold) requires a GPU. If you don't have a GPU, you can still use DiffDock with existing protein structures. + + pip install "fair-esm[esmfold]" + pip install 'dllogger @ git+https://github.com/NVIDIA/dllogger.git' + pip install 'openfold @ git+https://github.com/aqlaboratory/openfold.git@4b41059694619831a7db195b7e0988fc4ff3a307' + + +# Running DiffDock on your own complexes +We support multiple input formats depending on whether you only want to make predictions for a single complex or for many at once.\ +The protein inputs need to be `.pdb` files or sequences that will be folded with ESMFold. The ligand input can either be a SMILES string or a filetype that RDKit can read like `.sdf` or `.mol2`. + +For a single complex: specify the protein with `--protein_path protein.pdb` or `--protein_sequence GIQSYCTPPYSVLQDPPQPVV` and the ligand with `--ligand ligand.sdf` or `--ligand "COc(cc1)ccc1C#N"` + +For many complexes: create a csv file with paths to proteins and ligand files or SMILES. It contains as columns `complex_name` (name used to save predictions, can be left empty), `protein_path` (path to `.pdb` file, if empty uses sequence), `ligand_description` (SMILE or file path) and `protein_sequence` (to fold with ESMFold in case the protein_path is empty). +An example .csv is at `data/protein_ligand_example_csv.csv` and you would use it with `--protein_ligand_csv protein_ligand_example_csv.csv`. + +And you are ready to run inference: + + python -m inference --protein_ligand_csv data/protein_ligand_example_csv.csv --out_dir results/user_predictions_small --inference_steps 20 --samples_per_complex 40 --batch_size 10 --actual_steps 18 --no_final_step_noise + +When providing the `.pdb` files you can run DiffDock also on CPU, however, if possible, we recommend using a GPU as the model runs significantly faster. Note that the first time you run DiffDock on a device the program will precompute and store in cache look-up tables for SO(2) and SO(3) distributions (typically takes a couple of minutes), this won't be repeated in following runs. + + +# Retraining DiffDock +Download the data and place it as described in the "Dataset" section above. + +### Generate the ESM2 embeddings for the proteins +First run: + + python datasets/pdbbind_lm_embedding_preparation.py + +Use the generated file `data/pdbbind_sequences.fasta` to generate the ESM2 language model embeddings using the library https://github.com/facebookresearch/esm by installing their repository and executing the following in their repository: + + python scripts/extract.py esm2_t33_650M_UR50D pdbbind_sequences.fasta embeddings_output --repr_layers 33 --include per_tok --truncation_seq_length 4096 + +This generates the `embeddings_output` directory which you have to copy into the `data` folder of our repository to have `data/embeddings_output`. +Then run the command: + + python datasets/esm_embeddings_to_pt.py + +### Using the provided model weights for evaluation +We first generate the language model embeddings for the testset, then run inference with DiffDock, and then evaluate the files that DiffDock produced: + + python datasets/esm_embedding_preparation.py --protein_ligand_csv data/testset_csv.csv --out_file data/prepared_for_esm_testset.fasta + git clone https://github.com/facebookresearch/esm + cd esm + pip install -e . + cd .. + HOME=esm/model_weights python esm/scripts/extract.py esm2_t33_650M_UR50D data/prepared_for_esm_testset.fasta data/esm2_output --repr_layers 33 --include per_tok + python -m inference --protein_ligand_csv data/testset_csv.csv --out_dir results/user_predictions_testset --inference_steps 20 --samples_per_complex 40 --batch_size 10 --actual_steps 18 --no_final_step_noise + python evaluate_files.py --results_path results/user_predictions_testset --file_to_exclude rank1.sdf --num_predictions 40 + + +### Training a model yourself and using those weights +Train the large score model: + + python -m train --run_name big_score_model --test_sigma_intervals --esm_embeddings_path data/esm2_3billion_embeddings.pt --log_dir workdir --lr 1e-3 --tr_sigma_min 0.1 --tr_sigma_max 19 --rot_sigma_min 0.03 --rot_sigma_max 1.55 --batch_size 16 --ns 48 --nv 10 --num_conv_layers 6 --dynamic_max_cross --scheduler plateau --scale_by_sigma --dropout 0.1 --remove_hs --c_alpha_max_neighbors 24 --receptor_radius 15 --num_dataloader_workers 1 --cudnn_benchmark --val_inference_freq 5 --num_inference_complexes 500 --use_ema --distance_embed_dim 64 --cross_distance_embed_dim 64 --sigma_embed_dim 64 --scheduler_patience 30 --n_epochs 850 + +The model weights are saved in the `workdir` directory. + +Train a small score model with higher maximum translation sigma that will be used to generate the samples for training the confidence model: + + python -m train --run_name small_score_model --test_sigma_intervals --esm_embeddings_path data/esm2_3billion_embeddings.pt --log_dir workdir --lr 1e-3 --tr_sigma_min 0.1 --tr_sigma_max 34 --rot_sigma_min 0.03 --rot_sigma_max 1.55 --batch_size 16 --ns 24 --nv 6 --num_conv_layers 5 --dynamic_max_cross --scheduler plateau --scale_by_sigma --dropout 0.1 --remove_hs --c_alpha_max_neighbors 24 --receptor_radius 15 --num_dataloader_workers 1 --cudnn_benchmark --val_inference_freq 5 --num_inference_complexes 500 --use_ema --scheduler_patience 30 --n_epochs 300 + +In practice, you could also likely achieve the same or better results by using the first score model for creating the samples to train the confidence model, but this is what we did in the paper. +The score model used to generate the samples to train the confidence model does not have to be the same as the score model that is used with that confidence model during inference. + +Train the confidence model by running the following: + + python -m confidence.confidence_train --original_model_dir workdir/small_score_model --run_name confidence_model --inference_steps 20 --samples_per_complex 7 --batch_size 16 --n_epochs 100 --lr 3e-4 --scheduler_patience 50 --ns 24 --nv 6 --num_conv_layers 5 --dynamic_max_cross --scale_by_sigma --dropout 0.1 --all_atoms --remove_hs --c_alpha_max_neighbors 24 --receptor_radius 15 --esm_embeddings_path data/esm2_3billion_embeddings.pt --main_metric loss --main_metric_goal min --best_model_save_frequency 5 --rmsd_classification_cutoff 2 --cache_creation_id 1 --cache_ids_to_combine 1 2 3 4 + +first with `--cache_creation_id 1` then `--cache_creation_id 2` etc. up to 4 + +Now everything is trained and you can run inference with: + + python -m evaluate --model_dir workdir/big_score_model --ckpt best_ema_inference_epoch_model.pt --confidence_ckpt best_model_epoch75.pt --confidence_model_dir workdir/confidence_model --run_name DiffDockInference --inference_steps 20 --split_path data/splits/timesplit_test --samples_per_complex 40 --batch_size 10 --actual_steps 18 --no_final_step_noise + +Note: the notebook `data/apo_alignment.ipynb` contains the code used to align the ESMFold-generated apo-structures to the holo-structures. + +## Citation + @article{corso2023diffdock, + title={DiffDock: Diffusion Steps, Twists, and Turns for Molecular Docking}, + author = {Corso, Gabriele and Stärk, Hannes and Jing, Bowen and Barzilay, Regina and Jaakkola, Tommi}, + journal={International Conference on Learning Representations (ICLR)}, + year={2023} + } + +## License +MIT + +## Acknowledgements + +We thank Wei Lu and Rachel Wu for pointing out some issues with the code. + + +![Alt Text](visualizations/example_6agt_symmetric.gif) diff --git a/forks/DiffDockv1/baselines/baseline_evaluation.py b/forks/DiffDockv1/baselines/baseline_evaluation.py new file mode 100644 index 00000000..6ce83de2 --- /dev/null +++ b/forks/DiffDockv1/baselines/baseline_evaluation.py @@ -0,0 +1,219 @@ +# small script to extract the ligand and save it in a separate file because GNINA will use the ligand position as initial pose +import os + +import plotly.express as px +import time +from argparse import FileType, ArgumentParser + +import numpy as np +import pandas as pd +import wandb +from biopandas.pdb import PandasPdb +from rdkit import Chem + +from tqdm import tqdm + +from datasets.pdbbind import read_mol +from datasets.process_mols import read_molecule +from utils.utils import read_strings_from_txt, get_symmetry_rmsd + +parser = ArgumentParser() +parser.add_argument('--config', type=FileType(mode='r'), default=None) +parser.add_argument('--run_name', type=str, default='gnina_results', help='') +parser.add_argument('--data_dir', type=str, default='data/PDBBind_processed', help='') +parser.add_argument('--results_path', type=str, default='results/user_inference', help='Path to folder with trained model and hyperparameters') +parser.add_argument('--file_suffix', type=str, default='_baseline_ligand.pdb', help='Path to folder with trained model and hyperparameters') +parser.add_argument('--project', type=str, default='ligbind_inf', help='') +parser.add_argument('--wandb', action='store_true', default=False, help='') +parser.add_argument('--file_to_exclude', type=str, default=None, help='') +parser.add_argument('--all_dirs_in_results', action='store_true', default=True, help='Evaluate all directories in the results path instead of using directly looking for the names') +parser.add_argument('--num_predictions', type=int, default=10, help='') +parser.add_argument('--no_id_in_filename', action='store_true', default=False, help='') +args = parser.parse_args() + +print('Reading paths and names.') +names = read_strings_from_txt(f'data/splits/timesplit_test') +names_no_rec_overlap = read_strings_from_txt(f'data/splits/timesplit_test_no_rec_overlap') +results_path_containments = os.listdir(args.results_path) + +if args.wandb: + wandb.init( + entity='coarse-graining-mit', + settings=wandb.Settings(start_method="fork"), + project=args.project, + name=args.run_name, + config=args + ) + +all_times = [] +successful_names_list = [] +rmsds_list = [] +centroid_distances_list = [] +min_cross_distances_list = [] +min_self_distances_list = [] +without_rec_overlap_list = [] +start_time = time.time() +for i, name in enumerate(tqdm(names)): + mol = read_mol(args.data_dir, name, remove_hs=True) + mol = Chem.RemoveAllHs(mol) + orig_ligand_pos = np.array(mol.GetConformer().GetPositions()) + + if args.all_dirs_in_results: + directory_with_name = [directory for directory in results_path_containments if name in directory][0] + ligand_pos = [] + for i in range(args.num_predictions): + file_paths = os.listdir(os.path.join(args.results_path, directory_with_name)) + file_path = [path for path in file_paths if f'rank{i+1}' in path][0] + if args.file_to_exclude is not None and args.file_to_exclude in file_path: continue + mol_pred = read_molecule(os.path.join(args.results_path, directory_with_name, file_path),remove_hs=True, sanitize=True) + mol_pred = Chem.RemoveAllHs(mol_pred) + ligand_pos.append(mol_pred.GetConformer().GetPositions()) + ligand_pos = np.asarray(ligand_pos) + else: + if not os.path.exists(os.path.join(args.results_path, name, f'{"" if args.no_id_in_filename else name}{args.file_suffix}')): raise Exception('path did not exists:', os.path.join(args.results_path, name, f'{"" if args.no_id_in_filename else name}{args.file_suffix}')) + mol_pred = read_molecule(os.path.join(args.results_path, name, f'{"" if args.no_id_in_filename else name}{args.file_suffix}'), remove_hs=True, sanitize=True) + if mol_pred == None: + print("Skipping ", name, ' because RDKIT could not read it.') + continue + mol_pred = Chem.RemoveAllHs(mol_pred) + ligand_pos = np.asarray([np.array(mol_pred.GetConformer(i).GetPositions()) for i in range(args.num_predictions)]) + try: + rmsd = get_symmetry_rmsd(mol, orig_ligand_pos, [l for l in ligand_pos], mol_pred) + except Exception as e: + print("Using non corrected RMSD because of the error:", e) + rmsd = np.sqrt(((ligand_pos - orig_ligand_pos) ** 2).sum(axis=2).mean(axis=1)) + + rmsds_list.append(rmsd) + centroid_distances_list.append(np.linalg.norm(ligand_pos.mean(axis=1) - orig_ligand_pos[None,:].mean(axis=1), axis=1)) + + rec_path = os.path.join(args.data_dir, name, f'{name}_protein_processed.pdb') + if not os.path.exists(rec_path): + rec_path = os.path.join(args.data_dir, name,f'{name}_protein_obabel_reduce.pdb') + rec = PandasPdb().read_pdb(rec_path) + rec_df = rec.df['ATOM'] + receptor_pos = rec_df[['x_coord', 'y_coord', 'z_coord']].to_numpy().squeeze().astype(np.float32) + receptor_pos = np.tile(receptor_pos, (args.num_predictions, 1, 1)) + + cross_distances = np.linalg.norm(receptor_pos[:, :, None, :] - ligand_pos[:, None, :, :], axis=-1) + self_distances = np.linalg.norm(ligand_pos[:, :, None, :] - ligand_pos[:, None, :, :], axis=-1) + self_distances = np.where(np.eye(self_distances.shape[2]), np.inf, self_distances) + min_cross_distances_list.append(np.min(cross_distances, axis=(1,2))) + min_self_distances_list.append(np.min(self_distances, axis=(1, 2))) + successful_names_list.append(name) + without_rec_overlap_list.append(1 if name in names_no_rec_overlap else 0) +performance_metrics = {} +for overlap in ['', 'no_overlap_']: + if 'no_overlap_' == overlap: + without_rec_overlap = np.array(without_rec_overlap_list, dtype=bool) + rmsds = np.array(rmsds_list)[without_rec_overlap] + centroid_distances = np.array(centroid_distances_list)[without_rec_overlap] + min_cross_distances = np.array(min_cross_distances_list)[without_rec_overlap] + min_self_distances = np.array(min_self_distances_list)[without_rec_overlap] + successful_names = np.array(successful_names_list)[without_rec_overlap] + else: + rmsds = np.array(rmsds_list) + centroid_distances = np.array(centroid_distances_list) + min_cross_distances = np.array(min_cross_distances_list) + min_self_distances = np.array(min_self_distances_list) + successful_names = np.array(successful_names_list) + + np.save(os.path.join(args.results_path, f'{overlap}rmsds.npy'), rmsds) + np.save(os.path.join(args.results_path, f'{overlap}names.npy'), successful_names) + np.save(os.path.join(args.results_path, f'{overlap}min_cross_distances.npy'), np.array(min_cross_distances)) + np.save(os.path.join(args.results_path, f'{overlap}min_self_distances.npy'), np.array(min_self_distances)) + + performance_metrics.update({ + f'{overlap}steric_clash_fraction': (100 * (min_cross_distances < 0.4).sum() / len(min_cross_distances) / args.num_predictions).__round__(2), + f'{overlap}self_intersect_fraction': (100 * (min_self_distances < 0.4).sum() / len(min_self_distances) / args.num_predictions).__round__(2), + f'{overlap}mean_rmsd': rmsds[:,0].mean(), + f'{overlap}rmsds_below_2': (100 * (rmsds[:,0] < 2).sum() / len(rmsds[:,0])), + f'{overlap}rmsds_below_5': (100 * (rmsds[:,0] < 5).sum() / len(rmsds[:,0])), + f'{overlap}rmsds_percentile_25': np.percentile(rmsds[:,0], 25).round(2), + f'{overlap}rmsds_percentile_50': np.percentile(rmsds[:,0], 50).round(2), + f'{overlap}rmsds_percentile_75': np.percentile(rmsds[:,0], 75).round(2), + + f'{overlap}mean_centroid': centroid_distances[:,0].mean().__round__(2), + f'{overlap}centroid_below_2': (100 * (centroid_distances[:,0] < 2).sum() / len(centroid_distances[:,0])).__round__(2), + f'{overlap}centroid_below_5': (100 * (centroid_distances[:,0] < 5).sum() / len(centroid_distances[:,0])).__round__(2), + f'{overlap}centroid_percentile_25': np.percentile(centroid_distances[:,0], 25).round(2), + f'{overlap}centroid_percentile_50': np.percentile(centroid_distances[:,0], 50).round(2), + f'{overlap}centroid_percentile_75': np.percentile(centroid_distances[:,0], 75).round(2), + }) + + top5_rmsds = np.min(rmsds[:, :5], axis=1) + top5_centroid_distances = centroid_distances[np.arange(rmsds.shape[0])[:,None],np.argsort(rmsds[:, :5], axis=1)][:,0] + top5_min_cross_distances = min_cross_distances[np.arange(rmsds.shape[0])[:,None],np.argsort(rmsds[:, :5], axis=1)][:,0] + top5_min_self_distances = min_self_distances[np.arange(rmsds.shape[0])[:,None],np.argsort(rmsds[:, :5], axis=1)][:,0] + performance_metrics.update({ + f'{overlap}top5_steric_clash_fraction': (100 * (top5_min_cross_distances < 0.4).sum() / len(top5_min_cross_distances)).__round__(2), + f'{overlap}top5_self_intersect_fraction': (100 * (top5_min_self_distances < 0.4).sum() / len(top5_min_self_distances)).__round__(2), + f'{overlap}top5_rmsds_below_2': (100 * (top5_rmsds < 2).sum() / len(top5_rmsds)).__round__(2), + f'{overlap}top5_rmsds_below_5': (100 * (top5_rmsds < 5).sum() / len(top5_rmsds)).__round__(2), + f'{overlap}top5_rmsds_percentile_25': np.percentile(top5_rmsds, 25).round(2), + f'{overlap}top5_rmsds_percentile_50': np.percentile(top5_rmsds, 50).round(2), + f'{overlap}top5_rmsds_percentile_75': np.percentile(top5_rmsds, 75).round(2), + + f'{overlap}top5_centroid_below_2': (100 * (top5_centroid_distances < 2).sum() / len(top5_centroid_distances)).__round__(2), + f'{overlap}top5_centroid_below_5': (100 * (top5_centroid_distances < 5).sum() / len(top5_centroid_distances)).__round__(2), + f'{overlap}top5_centroid_percentile_25': np.percentile(top5_centroid_distances, 25).round(2), + f'{overlap}top5_centroid_percentile_50': np.percentile(top5_centroid_distances, 50).round(2), + f'{overlap}top5_centroid_percentile_75': np.percentile(top5_centroid_distances, 75).round(2), + }) + + + top10_rmsds = np.min(rmsds[:, :10], axis=1) + top10_centroid_distances = centroid_distances[np.arange(rmsds.shape[0])[:,None],np.argsort(rmsds[:, :10], axis=1)][:,0] + top10_min_cross_distances = min_cross_distances[np.arange(rmsds.shape[0])[:,None],np.argsort(rmsds[:, :10], axis=1)][:,0] + top10_min_self_distances = min_self_distances[np.arange(rmsds.shape[0])[:,None],np.argsort(rmsds[:, :10], axis=1)][:,0] + performance_metrics.update({ + f'{overlap}top10_self_intersect_fraction': (100 * (top10_min_self_distances < 0.4).sum() / len(top10_min_self_distances)).__round__(2), + f'{overlap}top10_steric_clash_fraction': ( 100 * (top10_min_cross_distances < 0.4).sum() / len(top10_min_cross_distances)).__round__(2), + f'{overlap}top10_rmsds_below_2': (100 * (top10_rmsds < 2).sum() / len(top10_rmsds)).__round__(2), + f'{overlap}top10_rmsds_below_5': (100 * (top10_rmsds < 5).sum() / len(top10_rmsds)).__round__(2), + f'{overlap}top10_rmsds_percentile_25': np.percentile(top10_rmsds, 25).round(2), + f'{overlap}top10_rmsds_percentile_50': np.percentile(top10_rmsds, 50).round(2), + f'{overlap}top10_rmsds_percentile_75': np.percentile(top10_rmsds, 75).round(2), + + f'{overlap}top10_centroid_below_2': (100 * (top10_centroid_distances < 2).sum() / len(top10_centroid_distances)).__round__(2), + f'{overlap}top10_centroid_below_5': (100 * (top10_centroid_distances < 5).sum() / len(top10_centroid_distances)).__round__(2), + f'{overlap}top10_centroid_percentile_25': np.percentile(top10_centroid_distances, 25).round(2), + f'{overlap}top10_centroid_percentile_50': np.percentile(top10_centroid_distances, 50).round(2), + f'{overlap}top10_centroid_percentile_75': np.percentile(top10_centroid_distances, 75).round(2), + }) +for k in performance_metrics: + print(k, performance_metrics[k]) + +if args.wandb: + wandb.log(performance_metrics) + histogram_metrics_list = [('rmsd', rmsds[:,0]), + ('centroid_distance', centroid_distances[:,0]), + ('mean_rmsd', rmsds[:,0]), + ('mean_centroid_distance', centroid_distances[:,0])] + histogram_metrics_list.append(('top5_rmsds', top5_rmsds)) + histogram_metrics_list.append(('top5_centroid_distances', top5_centroid_distances)) + histogram_metrics_list.append(('top10_rmsds', top10_rmsds)) + histogram_metrics_list.append(('top10_centroid_distances', top10_centroid_distances)) + + os.makedirs(f'.plotly_cache/baseline_cache', exist_ok=True) + images = [] + for metric_name, metric in histogram_metrics_list: + d = {args.results_path: metric} + df = pd.DataFrame(data=d) + fig = px.ecdf(df, width=900, height=600, range_x=[0, 40]) + fig.add_vline(x=2, annotation_text='2 A;', annotation_font_size=20, annotation_position="top right", + line_dash='dash', line_color='firebrick', annotation_font_color='firebrick') + fig.add_vline(x=5, annotation_text='5 A;', annotation_font_size=20, annotation_position="top right", + line_dash='dash', line_color='green', annotation_font_color='green') + fig.update_xaxes(title=f'{metric_name} in Angstrom', title_font={"size": 20}, tickfont={"size": 20}) + fig.update_yaxes(title=f'Fraction of predictions with lower error', title_font={"size": 20}, + tickfont={"size": 20}) + fig.update_layout(autosize=False, margin={'l': 0, 'r': 0, 't': 0, 'b': 0}, plot_bgcolor='white', + paper_bgcolor='white', legend_title_text='Method', legend_title_font_size=17, + legend=dict(yanchor="bottom", y=0.1, xanchor="right", x=0.99, font=dict(size=17), ), ) + fig.update_xaxes(showgrid=True, gridcolor='lightgrey') + fig.update_yaxes(showgrid=True, gridcolor='lightgrey') + + fig.write_image(os.path.join(f'.plotly_cache/baseline_cache', f'{metric_name}.png')) + wandb.log({metric_name: wandb.Image(os.path.join(f'.plotly_cache/baseline_cache', f'{metric_name}.png'), caption=f"{metric_name}")}) + images.append(wandb.Image(os.path.join(f'.plotly_cache/baseline_cache', f'{metric_name}.png'), caption=f"{metric_name}")) + wandb.log({'images': images}) \ No newline at end of file diff --git a/forks/DiffDockv1/baselines/baseline_gnina.py b/forks/DiffDockv1/baselines/baseline_gnina.py new file mode 100644 index 00000000..cd150707 --- /dev/null +++ b/forks/DiffDockv1/baselines/baseline_gnina.py @@ -0,0 +1,175 @@ +# small script to extract the ligand and save it in a separate file because GNINA will use the ligand position as +# initial pose +import os +import shutil +import subprocess +import sys + +import time +from argparse import ArgumentParser, FileType +from datetime import datetime + +import numpy as np +import pandas as pd +from biopandas.pdb import PandasPdb +from rdkit import Chem +from rdkit.Chem import AllChem, MolToPDBFile +from scipy.spatial.distance import cdist + +from datasets.pdbbind import read_mol +from utils.utils import read_strings_from_txt + +parser = ArgumentParser() +parser.add_argument('--data_dir', type=str, default='data/PDBBind_processed', help='') +parser.add_argument('--file_suffix', type=str, default='_baseline_ligand', help='Path to folder with trained model and hyperparameters') +parser.add_argument('--results_path', type=str, default='results/gnina_predictions', help='') +parser.add_argument('--complex_names_path', type=str, default='data/splits/timesplit_test', help='') +parser.add_argument('--seed_molecules_path', type=str, default=None, help='Use the molecules at seed molecule path as initialization and only search around them') +parser.add_argument('--seed_molecule_filename', type=str, default='equibind_corrected.sdf', help='Use the molecules at seed molecule path as initialization and only search around them') +parser.add_argument('--smina', action='store_true', default=False, help='') +parser.add_argument('--no_gpu', action='store_true', default=False, help='') +parser.add_argument('--exhaustiveness', type=int, default=8, help='') +parser.add_argument('--num_cpu', type=int, default=16, help='') +parser.add_argument('--pocket_mode', action='store_true', default=False, help='') +parser.add_argument('--pocket_cutoff', type=int, default=5, help='') +parser.add_argument('--num_modes', type=int, default=10, help='') +parser.add_argument('--autobox_add', type=int, default=4, help='') +parser.add_argument('--use_p2rank_pocket', action='store_true', default=False, help='') +parser.add_argument('--skip_p2rank', action='store_true', default=False, help='') +parser.add_argument('--prank_path', type=str, default='/Users/hstark/projects/p2rank_2.3/prank', help='') +parser.add_argument('--skip_existing', action='store_true', default=False, help='') + + + + + +args = parser.parse_args() + +class Logger(object): + def __init__(self, logpath, syspart=sys.stdout): + self.terminal = syspart + self.log = open(logpath, "a") + + def write(self, message): + self.terminal.write(message) + self.log.write(message) + self.log.flush() + + def flush(self): + # this flush method is needed for python 3 compatibility. + # this handles the flush command by doing nothing. + # you might want to specify some extra behavior here. + pass + + +def log(*args): + print(f'[{datetime.now()}]', *args) + + +# parameters +names = read_strings_from_txt(args.complex_names_path) + +if os.path.exists(args.results_path) and not args.skip_existing: + shutil.rmtree(args.results_path) +os.makedirs(args.results_path, exist_ok=True) +sys.stdout = Logger(logpath=f'{args.results_path}/gnina.log', syspart=sys.stdout) +sys.stderr = Logger(logpath=f'{args.results_path}/error.log', syspart=sys.stderr) + +p2rank_cache_path = "results/.p2rank_cache" +if args.use_p2rank_pocket and not args.skip_p2rank: + os.makedirs(p2rank_cache_path, exist_ok=True) + pdb_files_cache = os.path.join(p2rank_cache_path,'pdb_files') + os.makedirs(pdb_files_cache, exist_ok=True) + with open(f"{p2rank_cache_path}/pdb_list_p2rank.txt", "w") as out: + for name in names: + shutil.copy(os.path.join(args.data_dir, name, f'{name}_protein_processed.pdb'), f'{pdb_files_cache}/{name}_protein_processed.pdb') + out.write(os.path.join('pdb_files', f'{name}_protein_processed.pdb\n')) + cmd = f"bash {args.prank_path} predict {p2rank_cache_path}/pdb_list_p2rank.txt -o {p2rank_cache_path}/p2rank_output -threads 4" + os.system(cmd) + + +all_times = [] +start_time = time.time() +for i, name in enumerate(names): + os.makedirs(os.path.join(args.results_path, name), exist_ok=True) + log('\n') + log(f'complex {i} of {len(names)}') + # call gnina to find binding pose + rec_path = os.path.join(args.data_dir, name, f'{name}_protein_processed.pdb') + prediction_output_name = os.path.join(args.results_path, name, f'{name}{args.file_suffix}.pdb') + log_path = os.path.join(args.results_path, name, f'{name}{args.file_suffix}.log') + if args.seed_molecules_path is not None: seed_mol_path = os.path.join(args.seed_molecules_path, name, f'{args.seed_molecule_filename}') + if args.skip_existing and os.path.exists(prediction_output_name): continue + + if args.pocket_mode: + mol = read_mol(args.data_dir, name, remove_hs=False) + rec = PandasPdb().read_pdb(rec_path) + rec_df = rec.get(s='c-alpha') + rec_pos = rec_df[['x_coord', 'y_coord', 'z_coord']].to_numpy().squeeze().astype(np.float32) + lig_pos = mol.GetConformer().GetPositions() + d = cdist(rec_pos, lig_pos) + label = np.any(d < args.pocket_cutoff, axis=1) + + if np.any(label): + center_pocket = rec_pos[label].mean(axis=0) + else: + print("No pocket residue below minimum distance ", args.pocket_cutoff, "taking closest at", np.min(d)) + center_pocket = rec_pos[np.argmin(np.min(d, axis=1)[0])] + radius_pocket = np.max(np.linalg.norm(lig_pos - center_pocket[None, :], axis=1)) + diameter_pocket = radius_pocket * 2 + center_x = center_pocket[0] + size_x = diameter_pocket + 8 + center_y = center_pocket[1] + size_y = diameter_pocket + 8 + center_z = center_pocket[2] + size_z = diameter_pocket + 8 + + + mol_rdkit = read_mol(args.data_dir, name, remove_hs=False) + single_time = time.time() + + mol_rdkit.RemoveAllConformers() + ps = AllChem.ETKDGv2() + id = AllChem.EmbedMolecule(mol_rdkit, ps) + if id == -1: + print('rdkit pos could not be generated without using random pos. using random pos now.') + ps.useRandomCoords = True + AllChem.EmbedMolecule(mol_rdkit, ps) + AllChem.MMFFOptimizeMolecule(mol_rdkit, confId=0) + rdkit_mol_path = os.path.join(args.data_dir, name, f'{name}_rdkit_ligand.pdb') + MolToPDBFile(mol_rdkit, rdkit_mol_path) + + fallback_without_p2rank = False + if args.use_p2rank_pocket: + df = pd.read_csv(f'{p2rank_cache_path}/p2rank_output/{name}_protein_processed.pdb_predictions.csv') + rdkit_lig_pos = mol_rdkit.GetConformer().GetPositions() + diameter_pocket = np.max(cdist(rdkit_lig_pos, rdkit_lig_pos)) + size_x = diameter_pocket + args.autobox_add * 2 + size_y = diameter_pocket + args.autobox_add * 2 + size_z = diameter_pocket + args.autobox_add * 2 + if df.empty: + fallback_without_p2rank = True + else: + center_x = df.iloc[0][' center_x'] + center_y = df.iloc[0][' center_y'] + center_z = df.iloc[0][' center_z'] + + + + log(f'processing {rec_path}') + if not args.pocket_mode and not args.use_p2rank_pocket or fallback_without_p2rank: + return_code = subprocess.run( + f"gnina --receptor {rec_path} --ligand {rdkit_mol_path} --num_modes {args.num_modes} -o {prediction_output_name} {'--no_gpu' if args.no_gpu else ''} --autobox_ligand {rec_path if args.seed_molecules_path is None else seed_mol_path} --autobox_add {args.autobox_add} --log {log_path} --exhaustiveness {args.exhaustiveness} --cpu {args.num_cpu} {'--cnn_scoring none' if args.smina else ''}", + shell=True) + else: + return_code = subprocess.run( + f"gnina --receptor {rec_path} --ligand {rdkit_mol_path} --num_modes {args.num_modes} -o {prediction_output_name} {'--no_gpu' if args.no_gpu else ''} --log {log_path} --exhaustiveness {args.exhaustiveness} --cpu {args.num_cpu} {'--cnn_scoring none' if args.smina else ''} --center_x {center_x} --center_y {center_y} --center_z {center_z} --size_x {size_x} --size_y {size_y} --size_z {size_z}", + shell=True) + log(return_code) + all_times.append(time.time() - single_time) + + log("single time: --- %s seconds ---" % (time.time() - single_time)) + log("time so far: --- %s seconds ---" % (time.time() - start_time)) + log('\n') +log(all_times) +log("--- %s seconds ---" % (time.time() - start_time)) diff --git a/forks/DiffDockv1/baselines/baseline_run_tankbind_parallel.sh b/forks/DiffDockv1/baselines/baseline_run_tankbind_parallel.sh new file mode 100644 index 00000000..7ac71588 --- /dev/null +++ b/forks/DiffDockv1/baselines/baseline_run_tankbind_parallel.sh @@ -0,0 +1,5 @@ +for i in $(seq 0 15); do + python baseline_tankbind_runtime.py --parallel_id $i --parallel_tot 16 --prank_path /data/rsg/nlp/hstark/TankBind/packages/p2rank_2.3/prank --data_dir /data/rsg/nlp/hstark/ligbind/data/PDBBind_processed --split_path /data/rsg/nlp/hstark/ligbind/data/splits/timesplit_test --results_path /data/rsg/nlp/hstark/ligbind/results/tankbind_16_worker_runtime --device cpu --skip_p2rank --num_workers 1 --skip_multiple_pocket_outputs & +done +wait + diff --git a/forks/DiffDockv1/baselines/baseline_tankbind_evaluation.py b/forks/DiffDockv1/baselines/baseline_tankbind_evaluation.py new file mode 100644 index 00000000..6cb2666c --- /dev/null +++ b/forks/DiffDockv1/baselines/baseline_tankbind_evaluation.py @@ -0,0 +1,239 @@ + +import copy +import os + +import plotly.express as px +import time +from argparse import FileType, ArgumentParser + +import numpy as np +import pandas as pd +import wandb +from biopandas.pdb import PandasPdb +from rdkit import Chem +from rdkit.Chem import RemoveHs + +from tqdm import tqdm + +from datasets.pdbbind import read_mol +from datasets.process_mols import read_molecule, read_sdf_or_mol2 +from utils.utils import read_strings_from_txt, get_symmetry_rmsd, remove_all_hs + +parser = ArgumentParser() +parser.add_argument('--config', type=FileType(mode='r'), default=None) +parser.add_argument('--run_name', type=str, default='tankbind', help='') +parser.add_argument('--data_dir', type=str, default='data/PDBBind_processed', help='') +parser.add_argument('--renumbered_atoms_dir', type=str, default='../TankBind/examples/tankbind_pdb/renumber_atom_index_same_as_smiles', help='') +parser.add_argument('--results_path', type=str, default='results/tankbind_top5', help='Path to folder with trained model and hyperparameters') +parser.add_argument('--project', type=str, default='ligbind_inf', help='') +parser.add_argument('--wandb', action='store_true', default=True, help='') +parser.add_argument('--num_predictions', type=int, default=5, help='') +args = parser.parse_args() + +names = read_strings_from_txt(f'data/splits/timesplit_test') +names_no_rec_overlap = read_strings_from_txt(f'data/splits/timesplit_test_no_rec_overlap') + +if args.wandb: + wandb.init( + entity='coarse-graining-mit', + settings=wandb.Settings(start_method="fork"), + project=args.project, + name=args.run_name, + config=args + ) + +all_times = [] +rmsds_list = [] +unsym_rmsds_list = [] +centroid_distances_list = [] +min_cross_distances_list = [] +min_self_distances_list = [] +made_prediction_list = [] +steric_clash_list = [] +without_rec_overlap_list = [] + +start_time = time.time() +successful_names_list = [] +for i, name in enumerate(tqdm(names)): + mol, _ = read_sdf_or_mol2(f"{args.renumbered_atoms_dir}/{name}.sdf", None) + sm = Chem.MolToSmiles(mol) + m_order = list(mol.GetPropsAsDict(includePrivate=True, includeComputed=True)['_smilesAtomOutputOrder']) + mol = Chem.RenumberAtoms(mol, m_order) + mol = Chem.RemoveHs(mol) + orig_ligand_pos = np.array(mol.GetConformer().GetPositions()) + + assert(os.path.exists(os.path.join(args.results_path, name, f'{name}_tankbind_0.sdf'))) + ligand_pos = [] + for i in range(args.num_predictions): + if not os.path.exists(os.path.join(args.results_path, name, f'{name}_tankbind_{i}.sdf')): break + mol_pred, _ = read_sdf_or_mol2(os.path.join(args.results_path, name, f'{name}_tankbind_{i}.sdf'),None) + sm = Chem.MolToSmiles(mol_pred) + m_order = list(mol_pred.GetPropsAsDict(includePrivate=True, includeComputed=True)['_smilesAtomOutputOrder']) + mol_pred = Chem.RenumberAtoms(mol_pred, m_order) + mol_pred = RemoveHs(mol_pred) + ligand_pos.append(np.array(mol_pred.GetConformer().GetPositions())) + ligand_pos = np.asarray(ligand_pos) + + try: + unsym_rmsd = np.sqrt(((ligand_pos - orig_ligand_pos) ** 2).sum(axis=2).mean(axis=1)) + rmsd = np.array(get_symmetry_rmsd(mol, orig_ligand_pos, [l for l in ligand_pos], mol_pred)) + except Exception as e: + print("Using non corrected RMSD because of the error:", e) + rmsd = np.sqrt(((ligand_pos - orig_ligand_pos) ** 2).sum(axis=2).mean(axis=1)) + + num_pockets = len(ligand_pos) + unsym_rmsds_list.append(np.lib.pad(unsym_rmsd, (0,10-len(unsym_rmsd)), 'constant', constant_values=(0)) ) + rmsds_list.append(np.lib.pad(rmsd, (0,10-len(rmsd)), 'constant', constant_values=(0)) ) + centroid_distance = np.linalg.norm(ligand_pos.mean(axis=1) - orig_ligand_pos[None,:].mean(axis=1), axis=1) + centroid_distances_list.append(np.lib.pad(centroid_distance, (0,10-len(rmsd)), 'constant', constant_values=(0)) ) + + rec_path = os.path.join(args.data_dir, name, f'{name}_protein_processed.pdb') + if not os.path.exists(rec_path): + rec_path = os.path.join(args.data_dir, name,f'{name}_protein_obabel_reduce.pdb') + rec = PandasPdb().read_pdb(rec_path) + rec_df = rec.df['ATOM'] + receptor_pos = rec_df[['x_coord', 'y_coord', 'z_coord']].to_numpy().squeeze().astype(np.float32) + receptor_pos = np.tile(receptor_pos, (10, 1, 1)) + + ligand_pos_padded = np.lib.pad(ligand_pos, ((0,10-len(ligand_pos)), (0,0), (0,0)), 'constant', constant_values=(np.inf)) + ligand_pos_padded_zero = np.lib.pad(ligand_pos, ((0, 10 - len(ligand_pos)), (0, 0), (0, 0)), 'constant',constant_values=0) + cross_distances = np.linalg.norm(receptor_pos[:, :, None, :] - ligand_pos_padded[:, None, :, :], axis=-1) + self_distances = np.linalg.norm(ligand_pos_padded_zero[:, :, None, :] - ligand_pos_padded_zero[:, None, :, :], axis=-1) + self_distances = np.where(np.eye(self_distances.shape[2]), np.inf, self_distances) + min_self_distances_list.append(np.min(self_distances, axis=(1, 2))) + min_cross_distance = np.min(cross_distances, axis=(1, 2)) + individual_made_prediction = np.lib.pad(np.ones(num_pockets), (0,10-len(rmsd)), 'constant', constant_values=(0)) + made_prediction_list.append(individual_made_prediction) + min_cross_distances_list.append(min_cross_distance) + successful_names_list.append(name) + without_rec_overlap_list.append(1 if name in names_no_rec_overlap else 0) + +performance_metrics = {} +for overlap in ['', 'no_overlap_']: + if 'no_overlap_' == overlap: + without_rec_overlap = np.array(without_rec_overlap_list, dtype=bool) + unsym_rmsds = np.array(unsym_rmsds_list)[without_rec_overlap] + rmsds = np.array(rmsds_list)[without_rec_overlap] + centroid_distances = np.array(centroid_distances_list)[without_rec_overlap] + min_cross_distances = np.array(min_cross_distances_list)[without_rec_overlap] + min_self_distances = np.array(min_self_distances_list)[without_rec_overlap] + made_prediction = np.array(made_prediction_list)[without_rec_overlap] + successful_names = np.array(successful_names_list)[without_rec_overlap] + else: + unsym_rmsds = np.array(unsym_rmsds_list) + rmsds = np.array(rmsds_list) + centroid_distances = np.array(centroid_distances_list) + min_cross_distances = np.array(min_cross_distances_list) + min_self_distances = np.array(min_self_distances_list) + made_prediction = np.array(made_prediction_list) + successful_names = np.array(successful_names_list) + + inf_rmsds = copy.deepcopy(rmsds) + inf_rmsds[~made_prediction.astype(bool)] = np.inf + inf_centroid_distances = copy.deepcopy(centroid_distances) + inf_centroid_distances[~made_prediction.astype(bool)] = np.inf + + np.save(os.path.join(args.results_path, f'{overlap}rmsds.npy'), rmsds) + np.save(os.path.join(args.results_path, f'{overlap}names.npy'), np.array(successful_names)) + np.save(os.path.join(args.results_path, f'{overlap}centroid_distances.npy'), centroid_distances) + np.save(os.path.join(args.results_path, f'{overlap}min_cross_distances.npy'), min_cross_distances) + np.save(os.path.join(args.results_path, f'{overlap}min_self_distances.npy'), min_self_distances) + + performance_metrics.update({ + f'{overlap}self_intersect_fraction': (100 * (min_self_distances[:, 0] < 0.4).sum() / len(min_self_distances[:, 0])), + f'{overlap}steric_clash_fraction': (100 * (min_cross_distances[:,0] < 0.4).sum() / len(min_cross_distances[:,0])), + f'{overlap}mean_rmsd': rmsds[:,0].mean(), + f'{overlap}unsym_rmsds_below_2': (100 * (unsym_rmsds[:,0] < 2).sum() / len(unsym_rmsds[:,0])), + f'{overlap}rmsds_below_2': (100 * (rmsds[:,0] < 2).sum() / len(rmsds[:,0])), + f'{overlap}rmsds_below_5': (100 * (rmsds[:,0] < 5).sum() / len(rmsds[:,0])), + f'{overlap}rmsds_percentile_25': np.percentile(rmsds[:,0], 25).round(2), + f'{overlap}rmsds_percentile_50': np.percentile(rmsds[:,0], 50).round(2), + f'{overlap}rmsds_percentile_75': np.percentile(rmsds[:,0], 75).round(2), + + f'{overlap}mean_centroid': centroid_distances[:,0].mean().__round__(2), + f'{overlap}centroid_below_2': (100 * (centroid_distances[:,0] < 2).sum() / len(centroid_distances[:,0])).__round__(2), + f'{overlap}centroid_below_5': (100 * (centroid_distances[:,0] < 5).sum() / len(centroid_distances[:,0])).__round__(2), + f'{overlap}centroid_percentile_25': np.percentile(centroid_distances[:,0], 25).round(2), + f'{overlap}centroid_percentile_50': np.percentile(centroid_distances[:,0], 50).round(2), + f'{overlap}centroid_percentile_75': np.percentile(centroid_distances[:,0], 75).round(2), + }) + + top5_rmsds = np.min(inf_rmsds[:, :5], axis=1) + top5_centroid_distances = centroid_distances[np.arange(rmsds.shape[0])[:,None],np.argsort(inf_rmsds[:, :5], axis=1)][:,0] + top5_min_cross_distances = min_cross_distances[np.arange(rmsds.shape[0])[:,None],np.argsort(inf_rmsds[:, :5], axis=1)][:,0] + top5_min_self_distances = min_self_distances[np.arange(rmsds.shape[0])[:,None],np.argsort(inf_rmsds[:, :5], axis=1)][:,0] + performance_metrics.update({ + f'{overlap}top5_steric_clash_fraction': (100 * (top5_min_cross_distances < 0.4).sum() / len(top5_min_cross_distances)).__round__(2), + f'{overlap}top5_self_intersect_fraction': (100 * (top5_min_self_distances < 0.4).sum() / len(top5_min_self_distances)).__round__(2), + f'{overlap}top5_rmsds_below_2': (100 * (top5_rmsds < 2).sum() / len(top5_rmsds)).__round__(2), + f'{overlap}top5_rmsds_below_5': (100 * (top5_rmsds < 5).sum() / len(top5_rmsds)).__round__(2), + f'{overlap}top5_rmsds_percentile_25': np.percentile(top5_rmsds, 25).round(2), + f'{overlap}top5_rmsds_percentile_50': np.percentile(top5_rmsds, 50).round(2), + f'{overlap}top5_rmsds_percentile_75': np.percentile(top5_rmsds, 75).round(2), + + f'{overlap}top5_centroid_below_2': (100 * (top5_centroid_distances < 2).sum() / len(top5_centroid_distances)).__round__(2), + f'{overlap}top5_centroid_below_5': (100 * (top5_centroid_distances < 5).sum() / len(top5_centroid_distances)).__round__(2), + f'{overlap}top5_centroid_percentile_25': np.percentile(top5_centroid_distances, 25).round(2), + f'{overlap}top5_centroid_percentile_50': np.percentile(top5_centroid_distances, 50).round(2), + f'{overlap}top5_centroid_percentile_75': np.percentile(top5_centroid_distances, 75).round(2), + }) + + + + + top10_rmsds = np.min(inf_rmsds[:, :10], axis=1) + top10_centroid_distances = centroid_distances[np.arange(rmsds.shape[0])[:,None],np.argsort(inf_rmsds[:, :10], axis=1)][:,0] + top10_min_cross_distances = min_cross_distances[np.arange(rmsds.shape[0])[:,None],np.argsort(inf_rmsds[:, :10], axis=1)][:,0] + top10_min_self_distances = min_self_distances[np.arange(rmsds.shape[0])[:,None],np.argsort(inf_rmsds[:, :10], axis=1)][:,0] + performance_metrics.update({ + f'{overlap}top10_steric_clash_fraction': (100 * (top10_min_cross_distances < 0.4).sum() / len(top10_min_cross_distances)).__round__(2), + f'{overlap}top10_self_intersect_fraction': (100 * (top10_min_self_distances < 0.4).sum() / len(top10_min_self_distances)).__round__(2), + f'{overlap}top10_rmsds_below_2': (100 * (top10_rmsds < 2).sum() / len(top10_rmsds)).__round__(2), + f'{overlap}top10_rmsds_below_5': (100 * (top10_rmsds < 5).sum() / len(top10_rmsds)).__round__(2), + f'{overlap}top10_rmsds_percentile_25': np.percentile(top10_rmsds, 25).round(2), + f'{overlap}top10_rmsds_percentile_50': np.percentile(top10_rmsds, 50).round(2), + f'{overlap}top10_rmsds_percentile_75': np.percentile(top10_rmsds, 75).round(2), + + f'{overlap}top10_centroid_below_2': (100 * (top10_centroid_distances < 2).sum() / len(top10_centroid_distances)).__round__(2), + f'{overlap}top10_centroid_below_5': (100 * (top10_centroid_distances < 5).sum() / len(top10_centroid_distances)).__round__(2), + f'{overlap}top10_centroid_percentile_25': np.percentile(top10_centroid_distances, 25).round(2), + f'{overlap}top10_centroid_percentile_50': np.percentile(top10_centroid_distances, 50).round(2), + f'{overlap}top10_centroid_percentile_75': np.percentile(top10_centroid_distances, 75).round(2), + }) +for k in performance_metrics: + print(k, performance_metrics[k]) + +if args.wandb: + wandb.log(performance_metrics) + histogram_metrics_list = [('rmsd', rmsds[:,0]), + ('centroid_distance', centroid_distances[:,0]), + ('mean_rmsd', rmsds[:,0]), + ('mean_centroid_distance', centroid_distances[:,0])] + histogram_metrics_list.append(('top5_rmsds', top5_rmsds)) + histogram_metrics_list.append(('top5_centroid_distances', top5_centroid_distances)) + histogram_metrics_list.append(('top10_rmsds', top10_rmsds)) + histogram_metrics_list.append(('top10_centroid_distances', top10_centroid_distances)) + + os.makedirs(f'.plotly_cache/baseline_cache', exist_ok=True) + images = [] + for metric_name, metric in histogram_metrics_list: + d = {args.results_path: metric} + df = pd.DataFrame(data=d) + fig = px.ecdf(df, width=900, height=600, range_x=[0, 40]) + fig.add_vline(x=2, annotation_text='2 A;', annotation_font_size=20, annotation_position="top right", + line_dash='dash', line_color='firebrick', annotation_font_color='firebrick') + fig.add_vline(x=5, annotation_text='5 A;', annotation_font_size=20, annotation_position="top right", + line_dash='dash', line_color='green', annotation_font_color='green') + fig.update_xaxes(title=f'{metric_name} in Angstrom', title_font={"size": 20}, tickfont={"size": 20}) + fig.update_yaxes(title=f'Fraction of predictions with lower error', title_font={"size": 20}, + tickfont={"size": 20}) + fig.update_layout(autosize=False, margin={'l': 0, 'r': 0, 't': 0, 'b': 0}, plot_bgcolor='white', + paper_bgcolor='white', legend_title_text='Method', legend_title_font_size=17, + legend=dict(yanchor="bottom", y=0.1, xanchor="right", x=0.99, font=dict(size=17), ), ) + fig.update_xaxes(showgrid=True, gridcolor='lightgrey') + fig.update_yaxes(showgrid=True, gridcolor='lightgrey') + + fig.write_image(os.path.join(f'.plotly_cache/baseline_cache', f'{metric_name}.png')) + wandb.log({metric_name: wandb.Image(os.path.join(f'.plotly_cache/baseline_cache', f'{metric_name}.png'), caption=f"{metric_name}")}) + images.append(wandb.Image(os.path.join(f'.plotly_cache/baseline_cache', f'{metric_name}.png'), caption=f"{metric_name}")) + wandb.log({'images': images}) \ No newline at end of file diff --git a/forks/DiffDockv1/baselines/baseline_tankbind_runtime.py b/forks/DiffDockv1/baselines/baseline_tankbind_runtime.py new file mode 100644 index 00000000..4df6eb1d --- /dev/null +++ b/forks/DiffDockv1/baselines/baseline_tankbind_runtime.py @@ -0,0 +1,342 @@ +# This file needs to be ran in the TANKBind repository together with baseline_run_tankbind_parallel.sh + +import sys +import time +from multiprocessing import Pool + + +import copy +import warnings +from argparse import ArgumentParser + +from rdkit.Chem import AllChem, RemoveHs + +from feature_utils import save_cleaned_protein, read_mol +from generation_utils import get_LAS_distance_constraint_mask, get_info_pred_distance, write_with_new_coords +import logging +from torch_geometric.loader import DataLoader +from tqdm import tqdm # pip install tqdm if fails. +from model import get_model +# from utils import * +import torch + + +from data import TankBind_prediction + +import os +import numpy as np +import pandas as pd +import rdkit.Chem as Chem +from feature_utils import generate_sdf_from_smiles_using_rdkit +from feature_utils import get_protein_feature +from Bio.PDB import PDBParser +from feature_utils import extract_torchdrug_feature_from_mol + + +def read_strings_from_txt(path): + # every line will be one element of the returned list + with open(path) as file: + lines = file.readlines() + return [line.rstrip() for line in lines] + + +def read_molecule(molecule_file, sanitize=False, calc_charges=False, remove_hs=False): + if molecule_file.endswith('.mol2'): + mol = Chem.MolFromMol2File(molecule_file, sanitize=False, removeHs=False) + elif molecule_file.endswith('.sdf'): + supplier = Chem.SDMolSupplier(molecule_file, sanitize=False, removeHs=False) + mol = supplier[0] + elif molecule_file.endswith('.pdbqt'): + with open(molecule_file) as file: + pdbqt_data = file.readlines() + pdb_block = '' + for line in pdbqt_data: + pdb_block += '{}\n'.format(line[:66]) + mol = Chem.MolFromPDBBlock(pdb_block, sanitize=False, removeHs=False) + elif molecule_file.endswith('.pdb'): + mol = Chem.MolFromPDBFile(molecule_file, sanitize=False, removeHs=False) + else: + return ValueError('Expect the format of the molecule_file to be ' + 'one of .mol2, .sdf, .pdbqt and .pdb, got {}'.format(molecule_file)) + try: + if sanitize or calc_charges: + Chem.SanitizeMol(mol) + + if calc_charges: + # Compute Gasteiger charges on the molecule. + try: + AllChem.ComputeGasteigerCharges(mol) + except: + warnings.warn('Unable to compute charges for the molecule.') + + if remove_hs: + mol = Chem.RemoveHs(mol, sanitize=sanitize) + except: + return None + + return mol + + +def parallel_save_prediction(arguments): + dataset, y_pred_list, chosen,rdkit_mol_path, result_folder, name = arguments + for idx, line in chosen.iterrows(): + pocket_name = line['pocket_name'] + compound_name = line['compound_name'] + ligandName = compound_name.split("_")[1] + dataset_index = line['dataset_index'] + coords = dataset[dataset_index].coords.to('cpu') + protein_nodes_xyz = dataset[dataset_index].node_xyz.to('cpu') + n_compound = coords.shape[0] + n_protein = protein_nodes_xyz.shape[0] + y_pred = y_pred_list[dataset_index].reshape(n_protein, n_compound).to('cpu') + compound_pair_dis_constraint = torch.cdist(coords, coords) + mol = Chem.MolFromMolFile(rdkit_mol_path) + LAS_distance_constraint_mask = get_LAS_distance_constraint_mask(mol).bool() + pred_dist_info = get_info_pred_distance(coords, y_pred, protein_nodes_xyz, compound_pair_dis_constraint, + LAS_distance_constraint_mask=LAS_distance_constraint_mask, + n_repeat=1, show_progress=False) + + toFile = f'{result_folder}/{name}_tankbind_chosen.sdf' + new_coords = pred_dist_info.sort_values("loss")['coords'].iloc[0].astype(np.double) + write_with_new_coords(mol, new_coords, toFile) + +if __name__ == '__main__': + tankbind_src_folder = "../tankbind" + sys.path.insert(0, tankbind_src_folder) + torch.set_num_threads(16) + parser = ArgumentParser() + parser.add_argument('--data_dir', type=str, default='/Users/hstark/projects/ligbind/data/PDBBind_processed', help='') + parser.add_argument('--split_path', type=str, default='/Users/hstark/projects/ligbind/data/splits/timesplit_test', help='') + parser.add_argument('--prank_path', type=str, default='/Users/hstark/projects/p2rank_2.3/prank', help='') + parser.add_argument('--results_path', type=str, default='results/tankbind_results', help='') + parser.add_argument('--skip_existing', action='store_true', default=False, help='') + parser.add_argument('--skip_p2rank', action='store_true', default=False, help='') + parser.add_argument('--skip_multiple_pocket_outputs', action='store_true', default=False, help='') + parser.add_argument('--device', type=str, default='cpu', help='') + parser.add_argument('--num_workers', type=int, default=1, help='') + parser.add_argument('--parallel_id', type=int, default=0, help='') + parser.add_argument('--parallel_tot', type=int, default=1, help='') + args = parser.parse_args() + + device = args.device + cache_path = "tankbind_cache" + os.makedirs(cache_path, exist_ok=True) + os.makedirs(args.results_path, exist_ok=True) + + + + logging.basicConfig(level=logging.INFO) + model = get_model(0, logging, device) + # re-dock model + # modelFile = "../saved_models/re_dock.pt" + # self-dock model + modelFile = f"{tankbind_src_folder}/../saved_models/self_dock.pt" + + model.load_state_dict(torch.load(modelFile, map_location=device)) + _ = model.eval() + batch_size = 5 + names = read_strings_from_txt(args.split_path) + if args.parallel_tot > 1: + size = len(names) // args.parallel_tot + 1 + names = names[args.parallel_id*size:(args.parallel_id+1)*size] + rmsds = [] + + forward_pass_time = [] + times_preprocess = [] + times_inference = [] + top_10_generation_time = [] + top_1_generation_time = [] + start_time = time.time() + if not args.skip_p2rank: + for name in names: + if args.skip_existing and os.path.exists(f'{args.results_path}/{name}/{name}_tankbind_1.sdf'): continue + print("Now processing: ", name) + protein_path = f'{args.data_dir}/{name}/{name}_protein_processed.pdb' + cleaned_protein_path = f"{cache_path}/{name}_protein_tankbind_cleaned.pdb" # if you change this you also need to change below + parser = PDBParser(QUIET=True) + s = parser.get_structure(name, protein_path) + c = s[0] + clean_res_list, ligand_list = save_cleaned_protein(c, cleaned_protein_path) + + with open(f"{cache_path}/pdb_list_p2rank.txt", "w") as out: + for name in names: + out.write(f"{name}_protein_tankbind_cleaned.pdb\n") + cmd = f"bash {args.prank_path} predict {cache_path}/pdb_list_p2rank.txt -o {cache_path}/p2rank -threads 4" + os.system(cmd) + times_preprocess.append(time.time() - start_time) + p2_rank_time = time.time() - start_time + + + + + list_to_parallelize = [] + for name in tqdm(names): + single_preprocess_time = time.time() + if args.skip_existing and os.path.exists(f'{args.results_path}/{name}/{name}_tankbind_1.sdf'): continue + print("Now processing: ", name) + protein_path = f'{args.data_dir}/{name}/{name}_protein_processed.pdb' + ligand_path = f"{args.data_dir}/{name}/{name}_ligand.sdf" + cleaned_protein_path = f"{cache_path}/{name}_protein_tankbind_cleaned.pdb" # if you change this you also need to change below + rdkit_mol_path = f"{cache_path}/{name}_rdkit_ligand.sdf" + + parser = PDBParser(QUIET=True) + s = parser.get_structure(name, protein_path) + c = s[0] + clean_res_list, ligand_list = save_cleaned_protein(c, cleaned_protein_path) + lig, _ = read_mol(f"{args.data_dir}/{name}/{name}_ligand.sdf", f"{args.data_dir}/{name}/{name}_ligand.mol2") + + lig = RemoveHs(lig) + smiles = Chem.MolToSmiles(lig) + generate_sdf_from_smiles_using_rdkit(smiles, rdkit_mol_path, shift_dis=0) + + parser = PDBParser(QUIET=True) + s = parser.get_structure("x", cleaned_protein_path) + res_list = list(s.get_residues()) + + protein_dict = {} + protein_dict[name] = get_protein_feature(res_list) + compound_dict = {} + + mol = Chem.MolFromMolFile(rdkit_mol_path) + compound_dict[name + f"_{name}" + "_rdkit"] = extract_torchdrug_feature_from_mol(mol, has_LAS_mask=True) + + info = [] + for compound_name in list(compound_dict.keys()): + # use protein center as the block center. + com = ",".join([str(a.round(3)) for a in protein_dict[name][0].mean(axis=0).numpy()]) + info.append([name, compound_name, "protein_center", com]) + + p2rankFile = f"{cache_path}/p2rank/{name}_protein_tankbind_cleaned.pdb_predictions.csv" + pocket = pd.read_csv(p2rankFile) + pocket.columns = pocket.columns.str.strip() + pocket_coms = pocket[['center_x', 'center_y', 'center_z']].values + for ith_pocket, com in enumerate(pocket_coms): + com = ",".join([str(a.round(3)) for a in com]) + info.append([name, compound_name, f"pocket_{ith_pocket + 1}", com]) + info = pd.DataFrame(info, columns=['protein_name', 'compound_name', 'pocket_name', 'pocket_com']) + + dataset_path = f"{cache_path}/{name}_dataset/" + os.system(f"rm -r {dataset_path}") + os.system(f"mkdir -p {dataset_path}") + dataset = TankBind_prediction(dataset_path, data=info, protein_dict=protein_dict, compound_dict=compound_dict) + + # dataset = TankBind_prediction(dataset_path) + times_preprocess.append(time.time() - single_preprocess_time) + single_forward_pass_time = time.time() + data_loader = DataLoader(dataset, batch_size=batch_size, follow_batch=['x', 'y', 'compound_pair'], shuffle=False, + num_workers=0) + affinity_pred_list = [] + y_pred_list = [] + for data in tqdm(data_loader): + data = data.to(device) + y_pred, affinity_pred = model(data) + affinity_pred_list.append(affinity_pred.detach().cpu()) + for i in range(data.y_batch.max() + 1): + y_pred_list.append((y_pred[data['y_batch'] == i]).detach().cpu()) + + affinity_pred_list = torch.cat(affinity_pred_list) + forward_pass_time.append(time.time() - single_forward_pass_time) + output_info = copy.deepcopy(dataset.data) + output_info['affinity'] = affinity_pred_list + output_info['dataset_index'] = range(len(output_info)) + output_info_sorted = output_info.sort_values('affinity', ascending=False) + + + result_folder = f'{args.results_path}/{name}' + os.makedirs(result_folder, exist_ok=True) + output_info_sorted.to_csv(f"{result_folder}/output_info_sorted_by_affinity.csv") + + if not args.skip_multiple_pocket_outputs: + for idx, (dataframe_idx, line) in enumerate(copy.deepcopy(output_info_sorted).iterrows()): + single_top10_generation_time = time.time() + pocket_name = line['pocket_name'] + compound_name = line['compound_name'] + ligandName = compound_name.split("_")[1] + coords = dataset[dataframe_idx].coords.to('cpu') + protein_nodes_xyz = dataset[dataframe_idx].node_xyz.to('cpu') + n_compound = coords.shape[0] + n_protein = protein_nodes_xyz.shape[0] + y_pred = y_pred_list[dataframe_idx].reshape(n_protein, n_compound).to('cpu') + y = dataset[dataframe_idx].dis_map.reshape(n_protein, n_compound).to('cpu') + compound_pair_dis_constraint = torch.cdist(coords, coords) + mol = Chem.MolFromMolFile(rdkit_mol_path) + LAS_distance_constraint_mask = get_LAS_distance_constraint_mask(mol).bool() + pred_dist_info = get_info_pred_distance(coords, y_pred, protein_nodes_xyz, compound_pair_dis_constraint, + LAS_distance_constraint_mask=LAS_distance_constraint_mask, + n_repeat=1, show_progress=False) + + toFile = f'{result_folder}/{name}_tankbind_{idx}.sdf' + new_coords = pred_dist_info.sort_values("loss")['coords'].iloc[0].astype(np.double) + write_with_new_coords(mol, new_coords, toFile) + if idx < 10: + top_10_generation_time.append(time.time() - single_top10_generation_time) + if idx == 0: + top_1_generation_time.append(time.time() - single_top10_generation_time) + + output_info_chosen = copy.deepcopy(dataset.data) + output_info_chosen['affinity'] = affinity_pred_list + output_info_chosen['dataset_index'] = range(len(output_info_chosen)) + chosen = output_info_chosen.loc[ + output_info_chosen.groupby(['protein_name', 'compound_name'], sort=False)['affinity'].agg( + 'idxmax')].reset_index() + + list_to_parallelize.append((dataset, y_pred_list, chosen, rdkit_mol_path, result_folder, name)) + + chosen_generation_start_time = time.time() + if args.num_workers > 1: + p = Pool(args.num_workers, maxtasksperchild=1) + p.__enter__() + with tqdm(total=len(list_to_parallelize), desc=f'running optimization {i}/{len(list_to_parallelize)}') as pbar: + map_fn = p.imap_unordered if args.num_workers > 1 else map + for t in map_fn(parallel_save_prediction, list_to_parallelize): + pbar.update() + if args.num_workers > 1: p.__exit__(None, None, None) + chosen_generation_time = time.time() - chosen_generation_start_time + """ + lig, _ = read_mol(f"{args.data_dir}/{name}/{name}_ligand.sdf", f"{args.data_dir}/{name}/{name}_ligand.mol2") + sm = Chem.MolToSmiles(lig) + m_order = list(lig.GetPropsAsDict(includePrivate=True, includeComputed=True)['_smilesAtomOutputOrder']) + lig = Chem.RenumberAtoms(lig, m_order) + lig = Chem.RemoveAllHs(lig) + lig = RemoveHs(lig) + true_ligand_pos = np.array(lig.GetConformer().GetPositions()) + + toFile = f'{result_folder}/{name}_tankbind_chosen.sdf' + mol_pred, _ = read_mol(toFile, None) + sm = Chem.MolToSmiles(mol_pred) + m_order = list(mol_pred.GetPropsAsDict(includePrivate=True, includeComputed=True)['_smilesAtomOutputOrder']) + mol_pred = Chem.RenumberAtoms(mol_pred, m_order) + mol_pred = RemoveHs(mol_pred) + mol_pred_pos = np.array(mol_pred.GetConformer().GetPositions()) + rmsds.append(np.sqrt(((true_ligand_pos - mol_pred_pos) ** 2).sum(axis=1).mean(axis=0))) + print(np.sqrt(((true_ligand_pos - mol_pred_pos) ** 2).sum(axis=1).mean(axis=0))) + """ + forward_pass_time = np.array(forward_pass_time).sum() + times_preprocess = np.array(times_preprocess).sum() + times_inference = np.array(times_inference).sum() + top_10_generation_time = np.array(top_10_generation_time).sum() + top_1_generation_time = np.array(top_1_generation_time).sum() + + rmsds = np.array(rmsds) + + print(f'forward_pass_time: {forward_pass_time}') + print(f'times_preprocess: {times_preprocess}') + print(f'times_inference: {times_inference}') + print(f'top_10_generation_time: {top_10_generation_time}') + print(f'top_1_generation_time: {top_1_generation_time}') + print(f'chosen_generation_time: {chosen_generation_time}') + print(f'rmsds_below_2: {(100 * (rmsds < 2).sum() / len(rmsds))}') + print(f'p2rank Time: {p2_rank_time}') + print( + f'total_time: ' + f'{forward_pass_time + times_preprocess + times_inference + top_10_generation_time + top_1_generation_time + p2_rank_time}') + + with open(os.path.join(args.results_path, 'tankbind_log.log'), 'w') as file: + file.write(f'forward_pass_time: {forward_pass_time}') + file.write(f'times_preprocess: {times_preprocess}') + file.write(f'times_inference: {times_inference}') + file.write(f'top_10_generation_time: {top_10_generation_time}') + file.write(f'top_1_generation_time: {top_1_generation_time}') + file.write(f'rmsds_below_2: {(100 * (rmsds < 2).sum() / len(rmsds))}') + file.write(f'p2rank Time: {p2_rank_time}') + file.write(f'total_time: {forward_pass_time + times_preprocess + times_inference + top_10_generation_time + top_1_generation_time + p2_rank_time}') diff --git a/forks/DiffDockv1/confidence/confidence_train.py b/forks/DiffDockv1/confidence/confidence_train.py new file mode 100644 index 00000000..6e52280d --- /dev/null +++ b/forks/DiffDockv1/confidence/confidence_train.py @@ -0,0 +1,320 @@ +import gc +import math +import os + +import shutil + +from argparse import Namespace, ArgumentParser, FileType +import torch.nn.functional as F + +import wandb +import torch +from sklearn.metrics import roc_auc_score +from torch_geometric.loader import DataListLoader, DataLoader +from tqdm import tqdm + +from confidence.dataset import ConfidenceDataset +from utils.training import AverageMeter + +torch.multiprocessing.set_sharing_strategy('file_system') + +import yaml +from utils.utils import save_yaml_file, get_optimizer_and_scheduler, get_model + + +parser = ArgumentParser() +parser.add_argument('--config', type=FileType(mode='r'), default=None) +parser.add_argument('--original_model_dir', type=str, default='workdir', help='Path to folder with trained model and hyperparameters') +parser.add_argument('--restart_dir', type=str, default=None, help='') +parser.add_argument('--use_original_model_cache', action='store_true', default=False, help='If this is true, the same dataset as in the original model will be used. Otherwise, the dataset parameters are used.') +parser.add_argument('--data_dir', type=str, default='data/PDBBind_processed/', help='Folder containing original structures') +parser.add_argument('--ckpt', type=str, default='best_model.pt', help='Checkpoint to use inside the folder') +parser.add_argument('--model_save_frequency', type=int, default=0, help='Frequency with which to save the last model. If 0, then only the early stopping criterion best model is saved and overwritten.') +parser.add_argument('--best_model_save_frequency', type=int, default=0, help='Frequency with which to save the best model. If 0, then only the early stopping criterion best model is saved and overwritten.') +parser.add_argument('--run_name', type=str, default='test_confidence', help='') +parser.add_argument('--project', type=str, default='diffdock_confidence', help='') +parser.add_argument('--split_train', type=str, default='data/splits/timesplit_no_lig_overlap_train', help='Path of file defining the split') +parser.add_argument('--split_val', type=str, default='data/splits/timesplit_no_lig_overlap_val', help='Path of file defining the split') +parser.add_argument('--split_test', type=str, default='data/splits/timesplit_test', help='Path of file defining the split') + +# Inference parameters for creating the positions and rmsds that the confidence predictor will be trained on. +parser.add_argument('--cache_path', type=str, default='data/cacheNew', help='Folder from where to load/restore cached dataset') +parser.add_argument('--cache_ids_to_combine', nargs='+', type=str, default=None, help='RMSD value below which a prediction is considered a postitive. This can also be multiple cutoffs.') +parser.add_argument('--cache_creation_id', type=int, default=None, help='number of times that inference is run on the full dataset before concatenating it and coming up with the full confidence dataset') +parser.add_argument('--wandb', action='store_true', default=False, help='') +parser.add_argument('--inference_steps', type=int, default=2, help='Number of denoising steps') +parser.add_argument('--samples_per_complex', type=int, default=3, help='') +parser.add_argument('--balance', action='store_true', default=False, help='If this is true than we do not force the samples seen during training to be the same amount of negatives as positives') +parser.add_argument('--rmsd_prediction', action='store_true', default=False, help='') +parser.add_argument('--rmsd_classification_cutoff', nargs='+', type=float, default=2, help='RMSD value below which a prediction is considered a postitive. This can also be multiple cutoffs.') + +parser.add_argument('--log_dir', type=str, default='workdir', help='') +parser.add_argument('--main_metric', type=str, default='accuracy', help='Metric to track for early stopping. Mostly [loss, accuracy, ROC AUC]') +parser.add_argument('--main_metric_goal', type=str, default='max', help='Can be [min, max]') +parser.add_argument('--transfer_weights', action='store_true', default=False, help='') +parser.add_argument('--batch_size', type=int, default=5, help='') +parser.add_argument('--lr', type=float, default=1e-3, help='') +parser.add_argument('--w_decay', type=float, default=0.0, help='') +parser.add_argument('--scheduler', type=str, default='plateau', help='') +parser.add_argument('--scheduler_patience', type=int, default=20, help='') +parser.add_argument('--n_epochs', type=int, default=5, help='') + +# Dataset +parser.add_argument('--limit_complexes', type=int, default=0, help='') +parser.add_argument('--all_atoms', action='store_true', default=True, help='') +parser.add_argument('--multiplicity', type=int, default=1, help='') +parser.add_argument('--chain_cutoff', type=float, default=10, help='') +parser.add_argument('--receptor_radius', type=float, default=30, help='') +parser.add_argument('--c_alpha_max_neighbors', type=int, default=10, help='') +parser.add_argument('--atom_radius', type=float, default=5, help='') +parser.add_argument('--atom_max_neighbors', type=int, default=8, help='') +parser.add_argument('--matching_popsize', type=int, default=20, help='') +parser.add_argument('--matching_maxiter', type=int, default=20, help='') +parser.add_argument('--max_lig_size', type=int, default=None, help='Maximum number of heavy atoms') +parser.add_argument('--remove_hs', action='store_true', default=False, help='remove Hs') +parser.add_argument('--num_conformers', type=int, default=1, help='') +parser.add_argument('--esm_embeddings_path', type=str, default=None,help='If this is set then the LM embeddings at that path will be used for the receptor features') +parser.add_argument('--no_torsion', action='store_true', default=False, help='') + +# Model +parser.add_argument('--num_conv_layers', type=int, default=2, help='Number of interaction layers') +parser.add_argument('--max_radius', type=float, default=5.0, help='Radius cutoff for geometric graph') +parser.add_argument('--scale_by_sigma', action='store_true', default=True, help='Whether to normalise the score') +parser.add_argument('--ns', type=int, default=16, help='Number of hidden features per node of order 0') +parser.add_argument('--nv', type=int, default=4, help='Number of hidden features per node of order >0') +parser.add_argument('--distance_embed_dim', type=int, default=32, help='') +parser.add_argument('--cross_distance_embed_dim', type=int, default=32, help='') +parser.add_argument('--no_batch_norm', action='store_true', default=False, help='If set, it removes the batch norm') +parser.add_argument('--use_second_order_repr', action='store_true', default=False, help='Whether to use only up to first order representations or also second') +parser.add_argument('--cross_max_distance', type=float, default=80, help='') +parser.add_argument('--dynamic_max_cross', action='store_true', default=False, help='') +parser.add_argument('--dropout', type=float, default=0.0, help='MLP dropout') +parser.add_argument('--embedding_type', type=str, default="sinusoidal", help='') +parser.add_argument('--sigma_embed_dim', type=int, default=32, help='') +parser.add_argument('--embedding_scale', type=int, default=10000, help='') +parser.add_argument('--confidence_no_batchnorm', action='store_true', default=False, help='') +parser.add_argument('--confidence_dropout', type=float, default=0.0, help='MLP dropout in confidence readout') + +args = parser.parse_args() +if args.config: + config_dict = yaml.load(args.config, Loader=yaml.FullLoader) + arg_dict = args.__dict__ + for key, value in config_dict.items(): + if isinstance(value, list): + for v in value: + arg_dict[key].append(v) + else: + arg_dict[key] = value + args.config = args.config.name +assert(args.main_metric_goal == 'max' or args.main_metric_goal == 'min') + +def train_epoch(model, loader, optimizer, rmsd_prediction): + model.train() + meter = AverageMeter(['confidence_loss']) + + for data in tqdm(loader, total=len(loader)): + if device.type == 'cuda' and len(data) % torch.cuda.device_count() == 1 or device.type == 'cpu' and data.num_graphs == 1: + print("Skipping batch of size 1 since otherwise batchnorm would not work.") + optimizer.zero_grad() + try: + pred = model(data) + if rmsd_prediction: + labels = torch.cat([graph.rmsd for graph in data]).to(device) if isinstance(data, list) else data.rmsd + confidence_loss = F.mse_loss(pred, labels) + else: + if isinstance(args.rmsd_classification_cutoff, list): + labels = torch.cat([graph.y_binned for graph in data]).to(device) if isinstance(data, list) else data.y_binned + confidence_loss = F.cross_entropy(pred, labels) + else: + labels = torch.cat([graph.y for graph in data]).to(device) if isinstance(data, list) else data.y + confidence_loss = F.binary_cross_entropy_with_logits(pred, labels) + confidence_loss.backward() + optimizer.step() + meter.add([confidence_loss.cpu().detach()]) + except RuntimeError as e: + if 'out of memory' in str(e): + print('| WARNING: ran out of memory, skipping batch') + for p in model.parameters(): + if p.grad is not None: + del p.grad # free some memory + torch.cuda.empty_cache() + gc.collect() + continue + else: + raise e + + return meter.summary() + +def test_epoch(model, loader, rmsd_prediction): + model.eval() + meter = AverageMeter(['loss'], unpooled_metrics=True) if rmsd_prediction else AverageMeter(['confidence_loss', 'accuracy', 'ROC AUC'], unpooled_metrics=True) + all_labels = [] + for data in tqdm(loader, total=len(loader)): + try: + with torch.no_grad(): + pred = model(data) + affinity_loss = torch.tensor(0.0, dtype=torch.float, device=pred[0].device) + accuracy = torch.tensor(0.0, dtype=torch.float, device=pred[0].device) + if rmsd_prediction: + labels = torch.cat([graph.rmsd for graph in data]).to(device) if isinstance(data, list) else data.rmsd + confidence_loss = F.mse_loss(pred, labels) + meter.add([confidence_loss.cpu().detach()]) + else: + if isinstance(args.rmsd_classification_cutoff, list): + labels = torch.cat([graph.y_binned for graph in data]).to(device) if isinstance(data,list) else data.y_binned + confidence_loss = F.cross_entropy(pred, labels) + else: + labels = torch.cat([graph.y for graph in data]).to(device) if isinstance(data, list) else data.y + confidence_loss = F.binary_cross_entropy_with_logits(pred, labels) + accuracy = torch.mean((labels == (pred > 0).float()).float()) + try: + roc_auc = roc_auc_score(labels.detach().cpu().numpy(), pred.detach().cpu().numpy()) + except ValueError as e: + if 'Only one class present in y_true. ROC AUC score is not defined in that case.' in str(e): + roc_auc = 0 + else: + raise e + meter.add([confidence_loss.cpu().detach(), accuracy.cpu().detach(), torch.tensor(roc_auc)]) + all_labels.append(labels) + + except RuntimeError as e: + if 'out of memory' in str(e): + print('| WARNING: ran out of memory, skipping batch') + for p in model.parameters(): + if p.grad is not None: + del p.grad # free some memory + torch.cuda.empty_cache() + continue + else: + raise e + + all_labels = torch.cat(all_labels) + + if rmsd_prediction: + baseline_metric = ((all_labels - all_labels.mean()).abs()).mean() + else: + baseline_metric = all_labels.sum() / len(all_labels) + results = meter.summary() + results.update({'baseline_metric': baseline_metric}) + return meter.summary(), baseline_metric + + +def train(args, model, optimizer, scheduler, train_loader, val_loader, run_dir): + best_val_metric = math.inf if args.main_metric_goal == 'min' else 0 + best_epoch = 0 + + print("Starting training...") + for epoch in range(args.n_epochs): + logs = {} + train_metrics = train_epoch(model, train_loader, optimizer, args.rmsd_prediction) + print("Epoch {}: Training loss {:.4f}".format(epoch, train_metrics['confidence_loss'])) + + val_metrics, baseline_metric = test_epoch(model, val_loader, args.rmsd_prediction) + if args.rmsd_prediction: + print("Epoch {}: Validation loss {:.4f}".format(epoch, val_metrics['confidence_loss'])) + else: + print("Epoch {}: Validation loss {:.4f} accuracy {:.4f}".format(epoch, val_metrics['confidence_loss'], val_metrics['accuracy'])) + + if args.wandb: + logs.update({'valinf_' + k: v for k, v in val_metrics.items()}, step=epoch + 1) + logs.update({'train_' + k: v for k, v in train_metrics.items()}, step=epoch + 1) + logs.update({'mean_rmsd' if args.rmsd_prediction else 'fraction_positives': baseline_metric, + 'current_lr': optimizer.param_groups[0]['lr']}) + wandb.log(logs, step=epoch + 1) + + if scheduler: + scheduler.step(val_metrics[args.main_metric]) + + state_dict = model.module.state_dict() if device.type == 'cuda' else model.state_dict() + + if args.main_metric_goal == 'min' and val_metrics[args.main_metric] < best_val_metric or \ + args.main_metric_goal == 'max' and val_metrics[args.main_metric] > best_val_metric: + best_val_metric = val_metrics[args.main_metric] + best_epoch = epoch + torch.save(state_dict, os.path.join(run_dir, 'best_model.pt')) + if args.model_save_frequency > 0 and (epoch + 1) % args.model_save_frequency == 0: + torch.save(state_dict, os.path.join(run_dir, f'model_epoch{epoch+1}.pt')) + if args.best_model_save_frequency > 0 and (epoch + 1) % args.best_model_save_frequency == 0: + shutil.copyfile(os.path.join(run_dir, 'best_model.pt'), os.path.join(run_dir, f'best_model_epoch{epoch+1}.pt')) + + torch.save({ + 'epoch': epoch, + 'model': state_dict, + 'optimizer': optimizer.state_dict(), + }, os.path.join(run_dir, 'last_model.pt')) + + print("Best Validation accuracy {} on Epoch {}".format(best_val_metric, best_epoch)) + + +def construct_loader_confidence(args, device): + common_args = {'cache_path': args.cache_path, 'original_model_dir': args.original_model_dir, 'device': device, + 'inference_steps': args.inference_steps, 'samples_per_complex': args.samples_per_complex, + 'limit_complexes': args.limit_complexes, 'all_atoms': args.all_atoms, 'balance': args.balance, + 'rmsd_classification_cutoff': args.rmsd_classification_cutoff, 'use_original_model_cache': args.use_original_model_cache, + 'cache_creation_id': args.cache_creation_id, "cache_ids_to_combine": args.cache_ids_to_combine, + "model_ckpt": args.ckpt} + loader_class = DataListLoader if torch.cuda.is_available() else DataLoader + + exception_flag = False + try: + train_dataset = ConfidenceDataset(split="train", args=args, **common_args) + train_loader = loader_class(dataset=train_dataset, batch_size=args.batch_size, shuffle=True) + except Exception as e: + if 'The generated ligand positions with cache_id do not exist:' in str(e): + print("HAPPENING | Encountered the following exception when loading the confidence train dataset:") + print(str(e)) + print("HAPPENING | We are still continuing because we want to try to generate the validation dataset if it has not been created yet:") + exception_flag = True + else: raise e + + val_dataset = ConfidenceDataset(split="val", args=args, **common_args) + val_loader = loader_class(dataset=val_dataset, batch_size=args.batch_size, shuffle=True) + + if exception_flag: raise Exception('We encountered the exception during train dataset loading: ', e) + return train_loader, val_loader + + +if __name__ == '__main__': + device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') + with open(f'{args.original_model_dir}/model_parameters.yml') as f: + score_model_args = Namespace(**yaml.full_load(f)) + + # construct loader + train_loader, val_loader = construct_loader_confidence(args, device) + model = get_model(score_model_args if args.transfer_weights else args, device, t_to_sigma=None, confidence_mode=True) + optimizer, scheduler = get_optimizer_and_scheduler(args, model, scheduler_mode=args.main_metric_goal) + + if args.transfer_weights: + print("HAPPENING | Transferring weights from original_model_dir to the new model after using original_model_dir's arguments to construct the new model.") + checkpoint = torch.load(os.path.join(args.original_model_dir,args.ckpt), map_location=device) + model_state_dict = model.state_dict() + transfer_weights_dict = {k: v for k, v in checkpoint.items() if k in list(model_state_dict.keys())} + model_state_dict.update(transfer_weights_dict) # update the layers with the pretrained weights + model.load_state_dict(model_state_dict) + + elif args.restart_dir: + dict = torch.load(f'{args.restart_dir}/last_model.pt', map_location=torch.device('cpu')) + model.module.load_state_dict(dict['model'], strict=True) + optimizer.load_state_dict(dict['optimizer']) + print("Restarting from epoch", dict['epoch']) + + numel = sum([p.numel() for p in model.parameters()]) + print('Model with', numel, 'parameters') + + if args.wandb: + wandb.init( + entity='entity', + settings=wandb.Settings(start_method="fork"), + project=args.project, + name=args.run_name, + config=args + ) + wandb.log({'numel': numel}) + + # record parameters + run_dir = os.path.join(args.log_dir, args.run_name) + yaml_file_name = os.path.join(run_dir, 'model_parameters.yml') + save_yaml_file(yaml_file_name, args.__dict__) + args.device = device + + train(args, model, optimizer, scheduler, train_loader, val_loader, run_dir) diff --git a/forks/DiffDockv1/confidence/dataset.py b/forks/DiffDockv1/confidence/dataset.py new file mode 100644 index 00000000..f9f660d8 --- /dev/null +++ b/forks/DiffDockv1/confidence/dataset.py @@ -0,0 +1,276 @@ +import itertools +import math +import os +import pickle +import random +from argparse import Namespace +from functools import partial +import copy + +import numpy as np +import pandas as pd +import torch +import yaml +from torch_geometric.data import Dataset, Data +from torch_geometric.loader import DataLoader +from tqdm import tqdm + +from datasets.pdbbind import PDBBind +from utils.diffusion_utils import get_t_schedule +from utils.sampling import randomize_position, sampling +from utils.utils import get_model +from utils.diffusion_utils import t_to_sigma as t_to_sigma_compl + + +class ListDataset(Dataset): + def __init__(self, list): + super().__init__() + self.data_list = list + + def len(self) -> int: + return len(self.data_list) + + def get(self, idx: int) -> Data: + return self.data_list[idx] + +def get_cache_path(args, split): + cache_path = args.cache_path + if not args.no_torsion: + cache_path += '_torsion' + if args.all_atoms: + cache_path += '_allatoms' + split_path = args.split_train if split == 'train' else args.split_val + cache_path = os.path.join(cache_path, f'limit{args.limit_complexes}_INDEX{os.path.splitext(os.path.basename(split_path))[0]}_maxLigSize{args.max_lig_size}_H{int(not args.remove_hs)}_recRad{args.receptor_radius}_recMax{args.c_alpha_max_neighbors}' + + ('' if not args.all_atoms else f'_atomRad{args.atom_radius}_atomMax{args.atom_max_neighbors}') + + ('' if args.no_torsion or args.num_conformers == 1 else + f'_confs{args.num_conformers}') + + ('' if args.esm_embeddings_path is None else f'_esmEmbeddings')) + return cache_path + +def get_args_and_cache_path(original_model_dir, split): + with open(f'{original_model_dir}/model_parameters.yml') as f: + model_args = Namespace(**yaml.full_load(f)) + return model_args, get_cache_path(model_args,split) + + + +class ConfidenceDataset(Dataset): + def __init__(self, cache_path, original_model_dir, split, device, limit_complexes, + inference_steps, samples_per_complex, all_atoms, + args, model_ckpt, balance=False, use_original_model_cache=True, rmsd_classification_cutoff=2, + cache_ids_to_combine=None, cache_creation_id=None): + + super(ConfidenceDataset, self).__init__() + + self.device = device + self.inference_steps = inference_steps + self.limit_complexes = limit_complexes + self.all_atoms = all_atoms + self.original_model_dir = original_model_dir + self.balance = balance + self.use_original_model_cache = use_original_model_cache + self.rmsd_classification_cutoff = rmsd_classification_cutoff + self.cache_ids_to_combine = cache_ids_to_combine + self.cache_creation_id = cache_creation_id + self.samples_per_complex = samples_per_complex + self.model_ckpt = model_ckpt + + self.original_model_args, original_model_cache = get_args_and_cache_path(original_model_dir, split) + self.complex_graphs_cache = original_model_cache if self.use_original_model_cache else get_cache_path(args, split) + + # check if the docked positions have already been computed, if not run the preprocessing (docking every complex) + self.full_cache_path = os.path.join(cache_path, f'model_{os.path.splitext(os.path.basename(original_model_dir))[0]}' + f'_split_{split}_limit_{limit_complexes}') + + if (not os.path.exists(os.path.join(self.full_cache_path, "ligand_positions.pkl")) and self.cache_creation_id is None) or \ + (not os.path.exists(os.path.join(self.full_cache_path, f"ligand_positions_id{self.cache_creation_id}.pkl")) and self.cache_creation_id is not None): + os.makedirs(self.full_cache_path, exist_ok=True) + self.preprocessing(original_model_cache) + + # load the graphs that the confidence model will use + print('Using the cached complex graphs of the original model args' if self.use_original_model_cache else 'Not using the cached complex graphs of the original model args. Instead the complex graphs are used that are at the location given by the dataset parameters given to confidence_train.py') + print(self.complex_graphs_cache) + if not os.path.exists(os.path.join(self.complex_graphs_cache, "heterographs.pkl")): + print(f'HAPPENING | Complex graphs path does not exist yet: {os.path.join(self.complex_graphs_cache, "heterographs.pkl")}. For that reason, we are now creating the dataset.') + PDBBind(transform=None, root=args.data_dir, limit_complexes=args.limit_complexes, + receptor_radius=args.receptor_radius, + cache_path=args.cache_path, split_path=args.split_val if split == 'val' else args.split_train, + remove_hs=args.remove_hs, max_lig_size=None, + c_alpha_max_neighbors=args.c_alpha_max_neighbors, + matching=not args.no_torsion, keep_original=True, + popsize=args.matching_popsize, + maxiter=args.matching_maxiter, + all_atoms=args.all_atoms, + atom_radius=args.atom_radius, + atom_max_neighbors=args.atom_max_neighbors, + esm_embeddings_path=args.esm_embeddings_path, + require_ligand=True) + + print(f'HAPPENING | Loading complex graphs from: {os.path.join(self.complex_graphs_cache, "heterographs.pkl")}') + with open(os.path.join(self.complex_graphs_cache, "heterographs.pkl"), 'rb') as f: + complex_graphs = pickle.load(f) + self.complex_graph_dict = {d.name: d for d in complex_graphs} + + if self.cache_ids_to_combine is None: + print(f'HAPPENING | Loading positions and rmsds from: {os.path.join(self.full_cache_path, "ligand_positions.pkl")}') + with open(os.path.join(self.full_cache_path, "ligand_positions.pkl"), 'rb') as f: + self.full_ligand_positions, self.rmsds = pickle.load(f) + if os.path.exists(os.path.join(self.full_cache_path, "complex_names_in_same_order.pkl")): + with open(os.path.join(self.full_cache_path, "complex_names_in_same_order.pkl"), 'rb') as f: + generated_rmsd_complex_names = pickle.load(f) + else: + print('HAPPENING | The path, ', os.path.join(self.full_cache_path, "complex_names_in_same_order.pkl"), + ' does not exist. \n => We assume that means that we are using a ligand_positions.pkl where the ' + 'code was not saving the complex names for them yet. We now instead use the complex names of ' + 'the dataset that the original model used to create the ligand positions and RMSDs.') + with open(os.path.join(original_model_cache, "heterographs.pkl"), 'rb') as f: + original_model_complex_graphs = pickle.load(f) + generated_rmsd_complex_names = [d.name for d in original_model_complex_graphs] + assert (len(self.rmsds) == len(generated_rmsd_complex_names)) + else: + all_rmsds_unsorted, all_full_ligand_positions_unsorted, all_names_unsorted = [], [], [] + for idx, cache_id in enumerate(self.cache_ids_to_combine): + print(f'HAPPENING | Loading positions and rmsds from cache_id from the path: {os.path.join(self.full_cache_path, "ligand_positions_"+ str(cache_id)+ ".pkl")}') + if not os.path.exists(os.path.join(self.full_cache_path, f"ligand_positions_id{cache_id}.pkl")): raise Exception(f'The generated ligand positions with cache_id do not exist: {cache_id}') # be careful with changing this error message since it is sometimes cought in a try catch + with open(os.path.join(self.full_cache_path, f"ligand_positions_id{cache_id}.pkl"), 'rb') as f: + full_ligand_positions, rmsds = pickle.load(f) + with open(os.path.join(self.full_cache_path, f"complex_names_in_same_order_id{cache_id}.pkl"), 'rb') as f: + names_unsorted = pickle.load(f) + all_names_unsorted.append(names_unsorted) + all_rmsds_unsorted.append(rmsds) + all_full_ligand_positions_unsorted.append(full_ligand_positions) + names_order = list(set(sum(all_names_unsorted, []))) + all_rmsds, all_full_ligand_positions, all_names = [], [], [] + for idx, (rmsds_unsorted, full_ligand_positions_unsorted, names_unsorted) in enumerate(zip(all_rmsds_unsorted,all_full_ligand_positions_unsorted, all_names_unsorted)): + name_to_pos_dict = {name: (rmsd, pos) for name, rmsd, pos in zip(names_unsorted, full_ligand_positions_unsorted, rmsds_unsorted) } + intermediate_rmsds = [name_to_pos_dict[name][1] for name in names_order] + all_rmsds.append((intermediate_rmsds)) + intermediate_pos = [name_to_pos_dict[name][0] for name in names_order] + all_full_ligand_positions.append((intermediate_pos)) + self.full_ligand_positions, self.rmsds = [], [] + for positions_tuple in list(zip(*all_full_ligand_positions)): + self.full_ligand_positions.append(np.concatenate(positions_tuple, axis=0)) + for positions_tuple in list(zip(*all_rmsds)): + self.rmsds.append(np.concatenate(positions_tuple, axis=0)) + generated_rmsd_complex_names = names_order + print('Number of complex graphs: ', len(self.complex_graph_dict)) + print('Number of RMSDs and positions for the complex graphs: ', len(self.full_ligand_positions)) + + self.all_samples_per_complex = samples_per_complex * (1 if self.cache_ids_to_combine is None else len(self.cache_ids_to_combine)) + + self.positions_rmsds_dict = {name: (pos, rmsd) for name, pos, rmsd in zip (generated_rmsd_complex_names, self.full_ligand_positions, self.rmsds)} + self.dataset_names = list(set(self.positions_rmsds_dict.keys()) & set(self.complex_graph_dict.keys())) + if limit_complexes > 0: + self.dataset_names = self.dataset_names[:limit_complexes] + + def len(self): + return len(self.dataset_names) + + def get(self, idx): + complex_graph = copy.deepcopy(self.complex_graph_dict[self.dataset_names[idx]]) + positions, rmsds = self.positions_rmsds_dict[self.dataset_names[idx]] + + if self.balance: + if isinstance(self.rmsd_classification_cutoff, list): raise ValueError("a list for --rmsd_classification_cutoff can only be used without --balance") + label = random.randint(0, 1) + success = rmsds < self.rmsd_classification_cutoff + n_success = np.count_nonzero(success) + if label == 0 and n_success != self.all_samples_per_complex: + # sample negative complex + sample = random.randint(0, self.all_samples_per_complex - n_success - 1) + lig_pos = positions[~success][sample] + complex_graph['ligand'].pos = torch.from_numpy(lig_pos) + else: + # sample positive complex + if n_success > 0: # if no successfull sample returns the matched complex + sample = random.randint(0, n_success - 1) + lig_pos = positions[success][sample] + complex_graph['ligand'].pos = torch.from_numpy(lig_pos) + complex_graph.y = torch.tensor(label).float() + else: + sample = random.randint(0, self.all_samples_per_complex - 1) + complex_graph['ligand'].pos = torch.from_numpy(positions[sample]) + complex_graph.y = torch.tensor(rmsds[sample] < self.rmsd_classification_cutoff).float().unsqueeze(0) + if isinstance(self.rmsd_classification_cutoff, list): + complex_graph.y_binned = torch.tensor(np.logical_and(rmsds[sample] < self.rmsd_classification_cutoff + [math.inf],rmsds[sample] >= [0] + self.rmsd_classification_cutoff), dtype=torch.float).unsqueeze(0) + complex_graph.y = torch.tensor(rmsds[sample] < self.rmsd_classification_cutoff[0]).unsqueeze(0).float() + complex_graph.rmsd = torch.tensor(rmsds[sample]).unsqueeze(0).float() + + complex_graph['ligand'].node_t = {'tr': 0 * torch.ones(complex_graph['ligand'].num_nodes), + 'rot': 0 * torch.ones(complex_graph['ligand'].num_nodes), + 'tor': 0 * torch.ones(complex_graph['ligand'].num_nodes)} + complex_graph['receptor'].node_t = {'tr': 0 * torch.ones(complex_graph['receptor'].num_nodes), + 'rot': 0 * torch.ones(complex_graph['receptor'].num_nodes), + 'tor': 0 * torch.ones(complex_graph['receptor'].num_nodes)} + if self.all_atoms: + complex_graph['atom'].node_t = {'tr': 0 * torch.ones(complex_graph['atom'].num_nodes), + 'rot': 0 * torch.ones(complex_graph['atom'].num_nodes), + 'tor': 0 * torch.ones(complex_graph['atom'].num_nodes)} + complex_graph.complex_t = {'tr': 0 * torch.ones(1), 'rot': 0 * torch.ones(1), 'tor': 0 * torch.ones(1)} + return complex_graph + + def preprocessing(self, original_model_cache): + t_to_sigma = partial(t_to_sigma_compl, args=self.original_model_args) + + model = get_model(self.original_model_args, self.device, t_to_sigma=t_to_sigma, no_parallel=True) + state_dict = torch.load(f'{self.original_model_dir}/{self.model_ckpt}', map_location=torch.device('cpu')) + model.load_state_dict(state_dict, strict=True) + model = model.to(self.device) + model.eval() + + tr_schedule = get_t_schedule(inference_steps=self.inference_steps) + rot_schedule = tr_schedule + tor_schedule = tr_schedule + print('common t schedule', tr_schedule) + + print('HAPPENING | loading cached complexes of the original model to create the confidence dataset RMSDs and predicted positions. Doing that from: ', os.path.join(self.complex_graphs_cache, "heterographs.pkl")) + with open(os.path.join(original_model_cache, "heterographs.pkl"), 'rb') as f: + complex_graphs = pickle.load(f) + dataset = ListDataset(complex_graphs) + loader = DataLoader(dataset=dataset, batch_size=1, shuffle=False) + + rmsds, full_ligand_positions, names = [], [], [] + for idx, orig_complex_graph in tqdm(enumerate(loader)): + data_list = [copy.deepcopy(orig_complex_graph) for _ in range(self.samples_per_complex)] + randomize_position(data_list, self.original_model_args.no_torsion, False, self.original_model_args.tr_sigma_max) + + predictions_list = None + failed_convergence_counter = 0 + while predictions_list is None: + try: + predictions_list, confidences = sampling(data_list=data_list, model=model, inference_steps=self.inference_steps, + tr_schedule=tr_schedule, rot_schedule=rot_schedule, tor_schedule=tor_schedule, + device=self.device, t_to_sigma=t_to_sigma, model_args=self.original_model_args) + except Exception as e: + if 'failed to converge' in str(e): + failed_convergence_counter += 1 + if failed_convergence_counter > 5: + print('| WARNING: SVD failed to converge 5 times - skipping the complex') + break + print('| WARNING: SVD failed to converge - trying again with a new sample') + else: + raise e + if failed_convergence_counter > 5: predictions_list = data_list + if self.original_model_args.no_torsion: + orig_complex_graph['ligand'].orig_pos = (orig_complex_graph['ligand'].pos.cpu().numpy() + orig_complex_graph.original_center.cpu().numpy()) + + filterHs = torch.not_equal(predictions_list[0]['ligand'].x[:, 0], 0).cpu().numpy() + + if isinstance(orig_complex_graph['ligand'].orig_pos, list): + orig_complex_graph['ligand'].orig_pos = orig_complex_graph['ligand'].orig_pos[0] + + ligand_pos = np.asarray([complex_graph['ligand'].pos.cpu().numpy()[filterHs] for complex_graph in predictions_list]) + orig_ligand_pos = np.expand_dims(orig_complex_graph['ligand'].orig_pos[filterHs] - orig_complex_graph.original_center.cpu().numpy(), axis=0) + rmsd = np.sqrt(((ligand_pos - orig_ligand_pos) ** 2).sum(axis=2).mean(axis=1)) + + rmsds.append(rmsd) + full_ligand_positions.append(np.asarray([complex_graph['ligand'].pos.cpu().numpy() for complex_graph in predictions_list])) + names.append(orig_complex_graph.name[0]) + assert(len(orig_complex_graph.name) == 1) # I just put this assert here because of the above line where I assumed that the list is always only lenght 1. Just in case it isn't maybe check what the names in there are. + with open(os.path.join(self.full_cache_path, f"ligand_positions{'' if self.cache_creation_id is None else '_id' + str(self.cache_creation_id)}.pkl"), 'wb') as f: + pickle.dump((full_ligand_positions, rmsds), f) + with open(os.path.join(self.full_cache_path, f"complex_names_in_same_order{'' if self.cache_creation_id is None else '_id' + str(self.cache_creation_id)}.pkl"), 'wb') as f: + pickle.dump((names), f) + + + diff --git a/forks/DiffDockv1/data/protein_ligand_example_csv.csv b/forks/DiffDockv1/data/protein_ligand_example_csv.csv new file mode 100644 index 00000000..ee075243 --- /dev/null +++ b/forks/DiffDockv1/data/protein_ligand_example_csv.csv @@ -0,0 +1,3 @@ +complex_name,protein_path,ligand_description,protein_sequence +,data/1a0q/1a0q_protein_processed.pdb,data/1a0q/1a0q_ligand.sdf, +,data/1a0q/1a0q_protein_processed.pdb,COc(cc1)ccc1C#N, \ No newline at end of file diff --git a/forks/DiffDockv1/data/splits/timesplit_no_lig_overlap_train b/forks/DiffDockv1/data/splits/timesplit_no_lig_overlap_train new file mode 100644 index 00000000..3694baa2 --- /dev/null +++ b/forks/DiffDockv1/data/splits/timesplit_no_lig_overlap_train @@ -0,0 +1,16379 @@ +3dpf +2zy1 +6h77 +5c0m +4o2c +3a2c +3b2t +1v2q +4q87 +4j28 +3efw +4mi6 +3k8c +3s54 +2fda +2q55 +5ylv +4ozo +3fzn +3eoc +4tn2 +3hfv +5gg4 +5svi +2r58 +5d1n +4yqv +1hs6 +3zxh +1ft7 +4hww +6b1j +3di6 +5jaz +5ema +6gip +5am5 +4bnt +1fq6 +3pqz +6eil +2q11 +4nw7 +4ahr +1p1o +3le8 +3nzk +1eub +1eoc +3std +3qw8 +4za0 +5l8a +5teg +5u6j +3g2n +2wp1 +1zyr +1xs7 +4m6q +2jq9 +4m5h +4d63 +3shb +3i6m +4hw3 +6eq5 +3hqy +3zos +6h96 +2gv6 +5yjb +4lm4 +4lwv +3g30 +5f32 +4dkr +6b4l +4j9a +5w86 +4mpn +5m51 +1ro6 +5q0m +3bea +1mu6 +2wtd +3sr4 +6dqb +5hh5 +5q0e +2r03 +4xv9 +4y0a +5ota +3u5l +3m3o +4idz +2w47 +2g5u +6cec +5wzs +3iu7 +5mky +3ibc +2qwf +3p76 +4np3 +4kb8 +1uyi +4tkf +2fai +2xvd +5ar8 +2lzg +3e62 +1p6e +5wqa +1p02 +6gjr +1nhu +4yyl +3jzq +4pce +4gvu +4n7g +3s9e +3k3b +1pmn +1ogd +2j4i +1oss +1v0k +6cn6 +3k3e +2btr +1ppk +6egw +4zx1 +4zfi +5yso +2rfn +5cep +2xyu +3eou +4mme +4fnn +4og7 +4gkh +6d1l +6dlj +5i94 +1eb2 +4qzw +3fu0 +4aif +5eec +2qlj +5cfb +5sy3 +1mhw +2flu +3e6y +4wiv +5oot +4bae +6bfd +3po6 +2wo9 +1rp7 +1l2s +5ab1 +3pce +2w6z +1iew +3e85 +3e12 +5arf +4jgv +1ikw +6goo +4zdu +3wcb +2w1h +4x2i +6ggv +3h0b +5nwe +1o46 +5f2u +2feq +2jnw +3gpj +5uex +1ndw +1zhk +4mcd +3r4m +4cps +3gcp +2g9v +5try +4c2v +4og3 +3otq +2yns +4m0e +5tkj +2m3z +3pa5 +3iww +5o0b +3m58 +5lz2 +5u6k +4yw6 +3ewh +3wc7 +1rhk +3gi4 +2wd7 +3jq8 +5yr5 +6fnf +3hx3 +4mz6 +1ol2 +4aa5 +1rry +1ozv +5jf6 +4mnq +5wa5 +3nlb +3f6e +2xu5 +3wk7 +4p6w +2pmc +3byo +5ehe +6chq +6c8x +3pp1 +3uev +3ad7 +5aqv +3qzq +2z97 +5d1r +5ajy +4k67 +2w1e +5j3v +3upk +4gj8 +1td7 +5lz9 +6erw +4rhx +1hxw +6cbf +5qal +1q8w +4mf0 +5t1s +1fzk +4tju +5t2y +4el5 +2v87 +1epo +2r5p +4pis +3ehy +5erg +2r1x +2f8i +2wi7 +5wre +3dek +3hp2 +4bcw +4uau +5ap3 +3udy +2ksa +5ekg +5lh8 +2uw7 +3uvq +2glp +3m3r +6bsx +2ddf +6eqm +3ufl +2mwp +1h4n +5f4r +4ybj +4kz8 +2z60 +4ogi +5ufp +4uiw +3igv +5b56 +1bn3 +2xg9 +1u59 +4m5j +5oua +5qaa +6h7j +2o7v +5v7t +2r02 +2j94 +5o87 +1o3k +4mr4 +5am7 +6g35 +1ljt +3srv +4bi6 +2yoh +4zed +5tln +3rkb +5jiq +2ll6 +1tkz +2vj6 +3qrk +3c45 +4pks +3nf3 +2oaz +3pwk +3ryy +2y7k +6dm8 +5ism +5i7u +2qtn +5cuu +5n1y +4kot +5wa1 +2wyi +188l +3h2o +1sw1 +1zuc +2nnq +4l8m +5j75 +4afe +5nwi +2n06 +3l4w +3ibl +4l31 +4ruy +4ufm +5sxk +4hnn +5q0x +4zij +3fdn +5fe6 +3nuj +5h8x +5ets +4x5z +1w3k +3hs9 +3ro0 +3zi8 +1nl4 +5cao +4gjd +2p3d +2z8e +3w2p +1f28 +2iw6 +1h9l +4o78 +1soj +3krw +5g4n +3t4v +5drr +5o0a +4c4i +2vwx +6miq +6guk +5c3k +5fhn +3o75 +5kap +4zo5 +2jko +3iss +4eg6 +4fcd +3v7s +4bcf +2qyn +1ikx +6f6i +5os5 +1om2 +6axb +3s3n +1fl6 +4luo +1qiw +5f2w +3udj +5tbj +5k0c +5ndf +1ek2 +4zb8 +3r9o +5qah +3avb +4c52 +4ybi +5mi6 +6fu5 +1yc4 +4ymq +1xn3 +3dpo +4z7q +5ehn +5mri +2vcw +2byp +5du8 +1i7z +1ec3 +5jmx +4yxi +3iw7 +3l4v +3wcf +5igk +1sz0 +4x6i +1wum +1d3d +3rv6 +1x7r +2c57 +4u82 +6cwh +2vrj +2xag +5nw7 +5qau +4ocz +3chg +5oh1 +1hkm +3ayd +3mn8 +5ngf +4bo6 +1b57 +5wzt +4uzd +1cyn +3eys +4umc +6guh +2e99 +2xb8 +5tus +6ap7 +4ajn +4od7 +6g2l +5v40 +5wjj +2bow +3kc0 +3zs1 +4igq +3b24 +5e8r +5a4l +4ci1 +3qd0 +2ohv +3uw5 +3lp4 +3ftw +6c5q +4e0x +4hbw +2yoj +4b2i +3mkn +1i5r +5tig +5bqs +5j2x +1oz0 +5a2s +3fkv +3qnj +5w5o +2cht +5dj5 +5oje +5o9y +4hxs +5t36 +5gmh +4eh9 +3kr2 +3rk5 +2b54 +4ufh +2zxa +4mji +4e70 +4huo +3zrk +3wb5 +2iw8 +4ge4 +6ccs +5nwc +5lmb +4i6h +4dhu +5y6e +5bvn +4lxd +3b3s +5j9f +3jzp +2no3 +1ouk +5epk +1fo2 +1g7g +4dve +6au2 +4wt6 +3w2q +5tdr +4xx9 +1jg0 +2n27 +5lyn +1p1q +1hi3 +2wih +4hym +4ew3 +5ixq +1h60 +3ppk +4x8s +5nzn +3ddf +1yvm +1kv2 +4djq +3sym +4fz3 +1ze8 +5ia1 +2qcm +5m9d +2erz +4g0l +5tqg +2i5j +3ow6 +1met +3r42 +5tdi +3i97 +1alw +1doj +3f5j +2r5q +6eir +2ie4 +5cav +1n4m +5wj6 +2o7n +2r4f +5nkn +2g6p +1bnu +4uc5 +3kze +5dg6 +1nym +6cki +4kmz +6mjf +4ch8 +6hsh +1txr +4zj8 +6ggn +4qzx +4qsu +5ety +1o5a +4otf +1pgp +5wex +4p4b +4h4m +3k54 +4j53 +5dva +1e3v +4wop +1c3i +4jls +5mxv +4qrc +3fvn +5ivn +4xu1 +2o4n +1m5b +3t1a +1mqi +5n84 +5ifu +5eie +5lgr +5unf +5w5u +5tq1 +3dkj +5ye8 +1br8 +3tsd +6dvn +1oxg +4kz5 +5v3h +5in9 +4avi +220l +4b7z +2xbv +3twx +5zo8 +2w4i +5opc +4q8y +5acy +5ncz +3in4 +3arp +4rt1 +4mq6 +6ey7 +3uzd +2b9a +5o4z +2ow0 +6f5w +1x7q +5agv +5pzm +6afh +4z1e +5flp +5xsr +1b4z +5f1r +1o2g +1uwh +3bmo +1p19 +2ans +6fbv +4lrr +1gi9 +4xuh +2weo +5xpi +4btm +5h0e +4gye +6bgu +5wlt +5oul +1t49 +5k1v +5lz4 +1fsw +6cvx +6c3e +6cj5 +5fqc +1cze +3uwl +5l3g +5jnc +3ps6 +6csr +6bw4 +3hau +3ud9 +6csp +5uyu +5lqf +4bfz +2xp6 +3sjo +5os3 +4z8d +5un1 +6esy +3kjq +3sl4 +5mgl +5dp5 +3n0h +5fao +4dmy +6dxl +4qw0 +3q92 +3rwf +2xke +4gfo +3t03 +3pd8 +3f18 +4utr +4cu7 +5n1s +3wkb +2zdt +2yit +4cmt +5dpa +4rt0 +2y5h +3cde +5lud +3vva +2wev +5mrm +4ypf +5ea4 +3fr5 +4tkh +5tyi +1db1 +4hxj +3hu1 +5ftg +2eh8 +1jiz +1gww +5nih +1rr6 +4fp1 +4y24 +1fki +3lea +4lch +2xk1 +4qjr +4os1 +4pft +5eb3 +5qad +3cpc +3c94 +5jhb +6b22 +3vfj +6hmx +1kuk +2gh9 +1xh6 +3eyl +5hng +3nes +3i1y +2anl +4xit +3n2e +2n0u +5hdx +4zt6 +4f6v +5n7g +4mc1 +4yc9 +3q6w +2cmb +5g4m +3pww +5d7x +4cpr +3muf +5f95 +5o9o +5e2w +2chx +2pk6 +3bet +6cje +2rd6 +5khk +4eh5 +1i9l +1swg +4q4o +1kf0 +1llb +5aep +2zp0 +3zze +5csp +5eyk +3u7n +4z0e +5fh6 +5kj2 +2o4k +5llg +4b5t +3li2 +3of8 +3mvh +1jut +1auj +5awj +5x73 +1eol +1f90 +4crd +1d2e +2oht +1ua4 +2h9m +5byz +3rse +4mp2 +3hhk +5tur +2xxr +2r3w +4u6e +4qlt +3db8 +2bub +1ths +3s8o +3x1k +3eg6 +1jaq +3anr +3ua8 +2kfg +4m3g +3d3x +5a5v +3nuu +1jh1 +1iy7 +5dlz +4wpf +4pvy +1sve +4qmy +1pyg +5drq +4ivt +2gcd +1azg +3wjw +2qiq +4leq +2qfu +1thz +1y3a +5yjp +5cgv +1oi9 +3cd5 +3w1f +2v12 +5f37 +1e03 +3b67 +4q8x +5uk8 +4v25 +3u8k +5l8o +5hlb +4qfn +4li7 +1dth +4gkm +6f3e +4rn4 +3vtb +5l6i +4lrh +4jvq +5est +3gtc +2jbu +2yfx +4yx9 +5bs0 +5ea3 +1vyf +5a4e +3kku +3cyw +3tvx +5mkj +6b4d +4bny +5j8x +5tza +1uk1 +5yg2 +1d6v +5t4e +2j4g +5tci +1z9y +2ca8 +4hyf +5mg2 +4bcq +1lqf +6b3v +5htc +2qh6 +4pul +1gj7 +4mmf +3sfg +5ap2 +2oi9 +4h81 +3s77 +1awh +3s74 +3oyp +4u5u +6euz +6eq3 +4obq +4u4x +2r23 +4tjw +4anq +2a4z +2mwy +4pzh +5m7m +4mzj +1o3l +3d62 +2xow +5hog +3rwq +2kmx +4fpf +5ajx +1f5k +4de7 +5c7c +3q77 +4pnr +5tkb +5hz8 +2vle +5ho7 +5xmx +2xpk +6mdq +3tge +5fls +3oku +5jr2 +3e0q +2l98 +2uzj +5csz +5yr6 +3f7i +1ex8 +6ft7 +2bdj +5l7g +1cnx +1mmp +5j8m +2ay2 +6fnq +5eng +1pg2 +4a6v +3ivv +2zq2 +4wag +3s7a +4is6 +1lhw +5tq8 +3v51 +4w9x +5lto +1o86 +5a69 +4zg7 +4unq +2q88 +5c1m +6bgy +4fil +2qi1 +2x7o +6exj +3nkk +1q65 +2xxw +3l7d +1om9 +4jq7 +1lf2 +4xg4 +3sgx +1sdt +3ow4 +4kp8 +2cem +5nzo +3oaw +2wwj +2v6n +4g11 +1lf3 +3ljz +1amn +6hm4 +3jq9 +1qx1 +2wi5 +4old +1g5s +3tic +3cs8 +4er4 +5kr0 +2pe0 +5abf +3v04 +4e6q +2yc3 +3bys +5jeo +4b00 +4bek +4fci +4qna +5eh5 +4r02 +5ef7 +4hu1 +5v5n +5ehp +5yr4 +3dt1 +4cd0 +6gl8 +3uvl +3v0l +5ne5 +4nvp +4qok +4ipj +4zyi +1o3j +1lrt +3iog +5zwe +4hbn +2gz2 +5tco +3gc5 +4l6t +5mnx +5m23 +2etr +5gwy +3b5j +1oai +2ath +2v11 +5wuk +2ym6 +4mwu +5u69 +4i8w +6g86 +4f7l +2yi5 +4nan +1j37 +2xup +2byi +4f63 +3m3x +2bpm +2wb5 +6fel +3tdc +4z1s +4fgy +3cyz +6bh1 +3m8p +2df6 +3jrs +2wmv +6fau +5oah +3d50 +3aza +1uwt +4nct +1jqe +4nb3 +4y3b +1okw +1db4 +4n98 +3inh +6hzp +1hdt +6aoy +4o3u +5es1 +5i9i +6ccm +5nev +5wp5 +4bo7 +5byy +3su2 +4hlf +4c1g +3sww +2os9 +4men +2wxd +4qq4 +5eei +4hnf +3b26 +3eht +3oqf +3dd0 +3si4 +4v1f +4e6d +2c8w +1jyi +4ob1 +5x4n +5alc +2pbw +5eym +4e28 +2xui +4qtl +5jz9 +3chr +2p2a +1v1j +4o3b +5v83 +5v5e +3byu +7abp +5jga +3sw2 +5jgb +4dtk +1q91 +3cqu +1nje +3vhd +4knm +4yrt +1bgo +5ih5 +6afe +4res +1akt +1kyv +1sqn +4oas +5u5l +1v79 +3hp5 +5ivc +1q84 +3avg +3tws +1rmz +4rse +5zae +5w2q +5glu +5b4w +5f62 +4y6r +4mha +5etk +5nzm +4o2b +4cwq +2pj8 +3lvw +2fw3 +1dwc +3zln +3ejq +2f6y +6bic +4p0x +2cct +3dxh +2xhx +1d3q +3qbh +5tkt +4acd +1yt7 +5aki +3zy2 +4tw7 +3b3w +1nkm +5uoo +2l75 +4l7u +4iut +1pbq +4dow +3hab +1owe +6ft3 +4y2q +2hs1 +6gu6 +4dpi +5aac +4j2c +1s50 +6nao +5fox +3jvs +4ozl +1zub +5wxp +5iuh +5oh7 +2c90 +5owl +3oof +4ona +4ydg +1h08 +1uk0 +2fxs +3f37 +4xtz +4mww +6gg4 +4zz3 +5cf4 +3blr +4twc +3zsz +4er2 +4uuh +1i8j +6faw +5opv +3voz +3d1g +5q0n +3kfc +3qqs +1oay +1uvs +5os4 +4abe +1r0x +4jwr +1l6m +3nf6 +3qip +1nlt +3zdg +3o84 +3ehn +3k16 +5szb +2j2u +5aiv +2weg +3dz4 +4jdf +1ttm +3wyy +3ifo +5j41 +3fl5 +1q1m +2y80 +3fr4 +3o56 +1yrs +2gvv +3uxk +2zq0 +5uac +3sfi +2wk6 +184l +6gvz +6dik +3u8w +3rxb +2qpq +3nwe +13gs +4gpl +4rxz +4qbm +5y9l +4nh8 +4rra +5ey8 +4j03 +2ou7 +5tt3 +1uwf +4dgr +5xzr +4mw4 +3n5k +4umq +2l6e +6drg +6ghp +2ksp +1sps +5hna +6gnp +3dxk +5eom +1e34 +2xfi +2itp +2zlf +4iva +2y4m +4o1b +2o9v +4i7f +4ezw +1k4h +2xgm +4nud +3m55 +6ezg +4x34 +1czo +5fqr +5he4 +2h6k +3fmr +4kzb +2g9q +3tpx +4hpy +2bo4 +4nhx +4mvh +2nq7 +3n5u +3rjw +3emg +2xm9 +1kzn +5faq +5f1u +1c6y +4j8g +5lvr +1xuo +2vgo +1grp +4uxb +2rol +5cbm +6hke +5tr6 +1uyc +3d6o +5awu +4wz8 +5egm +1duv +6b4n +1t7j +5aqj +4jh0 +4q7s +5cpr +1bqm +1g9c +4fem +4wnk +4uv8 +5hgq +1ayu +6co4 +6ck3 +3uzj +2qwe +3u2q +1nl9 +6bd1 +5yzd +4r92 +2i0j +4hvh +4ool +3s0n +1ba8 +3b4p +2ea4 +5k76 +2xzq +4yur +1pq6 +2y81 +3f8s +2hwh +1q4w +4h39 +3zmh +1ftl +4ok5 +2qcn +3qtf +1g52 +3vw0 +3i4a +5x79 +2am2 +2ym7 +5odu +3adu +6cea +3t6y +4pgc +1k1j +3d14 +3el4 +4nyf +2y0j +2w1c +5l7k +4n6g +5cp5 +3ekr +2isc +5cbs +5hvu +4n00 +5mpn +5zoo +3v43 +1t69 +1oj5 +5lle +5wef +2ya8 +4ezk +2c6o +5vrl +1b4d +5za8 +1i80 +2ccb +6ce2 +4cfx +3m5e +1k22 +6eea +3s3m +5t4f +2xyn +6br3 +3krl +3upy +2rm0 +5q1f +4app +7gch +5hx8 +4ok6 +4zla +5g3w +1h27 +1fkb +1idg +5c8n +1h46 +3w33 +3rzb +4xg6 +2rib +2uuo +1r1h +5agu +6dcz +1r58 +1nlo +3g70 +2ofu +4k6z +4x13 +5ihc +2yiw +6bgw +6gji +1tve +1gpn +5tq5 +4ujb +4l33 +6cqz +4avh +5uga +5tzy +5op6 +1m7d +4aia +2bxt +4qgh +3uuo +5vkm +4jr3 +1wdy +4o4r +2c3j +2r9m +5ual +3vfq +4c16 +6h7l +3tv6 +2pyn +2bj4 +4qvy +4abk +3m53 +3cwk +3d52 +1a4q +4rvl +1d04 +3thd +1ogz +3luo +2ftd +4ran +3uph +3c8b +2xp3 +2vtl +1fj4 +2zm3 +3rxl +1xmy +3uo5 +1q1y +2ql9 +1bhf +2jg8 +6fuh +2q2n +4aof +3ff3 +1heg +4cae +6fkq +4i9i +3ask +1dbj +4zbf +2io6 +3d1y +5t2l +3kwf +1juj +3i0r +4gui +2hy0 +3i7c +6g9m +3nnx +5u48 +2v2v +6f5h +5mwh +5m0d +5fue +3k5k +5edi +4d2p +2cm7 +8cpa +3doy +3k99 +1vyg +6ckw +6bmv +5m4q +5j4n +4tpt +2jbv +3uqr +4di2 +5ue4 +5enj +3csl +3m35 +4a9t +5khx +5f0h +4wq3 +4ktu +2qbr +1sbr +2f6z +6cgt +1t4v +5nra +3fed +5l2o +3mb7 +4bzn +1efi +3nrm +5kjm +2cle +1j07 +3h9k +4d83 +5n4s +5yie +5yz7 +5cr7 +4e35 +6di0 +3omm +4w9n +4z1j +6b98 +4x61 +4x6m +2pu1 +4q1s +3n7a +4qbb +6fng +5ya5 +3qiz +5awt +3sv8 +3doz +6dlx +4rkx +3h91 +4uma +1lvc +1uy9 +5gso +5gsa +5drc +4j5e +2o22 +4loo +5t66 +3rz0 +5unj +4g17 +6bhv +3cii +5qcl +4nw6 +4qqi +3r0i +5uff +4lbp +4bqy +4l51 +5akh +2yjb +3dx0 +3v49 +3g42 +5xpp +2c4g +2bz6 +4fny +3x00 +4nat +6f8g +4ppc +3lpb +3tlh +4bcs +3nok +3lfs +3o9h +2vf6 +3we4 +2yig +2kgi +5llc +2jgs +1fkh +5ogb +5w8i +1a42 +5web +2llq +5j7b +2c6l +4u0e +2g9r +1lt5 +5ai5 +3khj +6hh3 +4uac +5nob +1ynd +2on6 +6hd4 +3msj +5ut4 +2wkz +5jvi +3gww +4uja +1pq3 +4u5v +4z83 +6dh8 +1h36 +3vjk +1unh +3hv7 +2vxj +3l38 +1a08 +5lgo +1fgi +5fqs +5a5p +5oss +4o0z +1gvx +1xur +5vqe +2j7w +2adm +3b1m +5zah +1iwq +4zx8 +3wkd +5mtv +5myd +3t6r +2c6n +2c6e +2gpp +3opm +5n9r +3r0t +6gue +4b70 +3gnv +5nyz +1fh7 +5wae +2bvs +1tow +1toj +5een +5fog +6e9a +5jek +1jet +5qck +6coj +1if8 +5t6g +4b85 +2fm0 +5eje +1ezf +3twd +3mo8 +4rvt +6bbu +4ejl +1waw +4heu +4hgs +6b67 +2cmc +3arv +4r4t +1j80 +5ads +6abp +3el7 +6fex +4m5o +3qgw +5t8o +3pcn +3nw3 +5w84 +3bqn +4pin +2w3o +5yic +4lwh +3vo3 +4unr +4cp5 +4b73 +6c6o +1xz8 +5zo9 +2qn3 +2g1r +4xc2 +1oe8 +2z94 +5aic +4m12 +1sld +2h96 +6cn5 +3bvb +5x27 +3usx +1ddm +3fal +2iit +3fzt +5vgi +5jv1 +1py1 +5ktx +4b35 +1m0q +5kjk +2vey +6dkg +5ei3 +6gbx +3e01 +1p03 +4zuq +4wbo +4rfd +3zlo +5alb +5b5o +3liw +2mwo +4yjl +4ab8 +5nau +4at4 +1ke0 +1zz2 +2ccs +5hz6 +2cll +4m2v +1q3d +3ozt +4ge7 +3pkd +5t70 +3ts4 +5q0y +5q1a +3ogm +5t18 +5c7d +3g2k +1ikv +2j7b +4obp +4k6y +2fb8 +3vbd +5ung +4b32 +2qlq +3jzs +3tiz +1v2k +6ee2 +3zzf +1hpv +5xo2 +5e2l +1jn4 +5aly +2jjr +4agm +3rwp +3r7r +3zbf +4url +2wm0 +6dcy +2brg +5dlx +4g2y +5jzs +5fd2 +5nqr +4jpe +3tkm +5ti0 +5lxp +3avi +5zuj +3p3h +3d9v +4gs8 +4cwn +5q1i +6b7a +1gjc +4dbn +5yp5 +5ukj +6arj +4iuo +4ibk +5ali +6fgq +1b5h +2bvx +2y36 +3f81 +4e3f +3t84 +4na7 +2ow7 +4ob0 +1qb1 +2iku +5kh7 +1eve +1k2v +3npc +2w68 +6gqm +1b2h +4dfl +4l19 +3dri +5ab9 +4qzs +3sqq +1d4l +4dn0 +9hvp +2lko +1lev +3aau +4mib +3g2l +1lee +1o5e +4os4 +4b34 +3m56 +4qvw +4i4f +5wf6 +3u7k +4j5d +1r4w +5iaw +3buw +1eou +3vbq +6g5j +5t4u +3fuh +2pj4 +2zn7 +4n4v +2obo +1tjp +6b1w +1ru2 +1fh9 +4wcf +6hh5 +5orz +4xmb +4jfv +6ekq +5q0i +3h1z +4ycv +4ura +4prn +6ev0 +1v3x +2pqz +2xjx +3k5i +5dtq +4pn1 +5bue +3ik1 +2h5d +4jbl +2x97 +2q72 +2qpj +1acj +4k6t +6b1e +3f3e +2yog +4kio +3qem +3ued +5mge +2bfq +3mxs +4k1b +4p6g +3td4 +5mnh +5ien +3lk8 +2zx6 +1rql +3rm4 +5u4c +4kb7 +5ap1 +4rj8 +5tkk +4j04 +6i8b +2nxl +3zm4 +4dy6 +3h0z +4zyf +3e7o +5li3 +4qjw +4aq4 +3kpu +5oq7 +3h2c +2qnq +5jn9 +5lvx +1r10 +2l8r +5ega +2wi3 +3rxk +1upf +6chm +4w9k +1vja +4zw7 +5ov8 +5wag +1x8j +5a6b +6br2 +4owv +5ev8 +3drs +6fdc +3tvl +5mlo +2xaf +3h0j +4yas +1o35 +5dex +2x52 +3lgs +3fkt +3dcr +2ax6 +6cnj +4y5d +4umb +3l4y +6fkp +5aoj +1ony +6b95 +5ueu +5ahj +2xel +2g1y +6iiv +3b1t +5dht +4zup +4rlk +1axs +5fnj +1izi +4gvm +4xmr +5yhg +3g45 +3tao +2q7o +1pme +4rrs +6gl3 +4pct +2tsr +3qtw +4uua +5abe +3ozp +4lc7 +2bu5 +5kya +2q6h +1n95 +3k84 +3bl1 +3ui2 +2cgv +4ez5 +3t3c +3puk +6css +1ghw +3ezr +5il1 +1a2c +5j6l +2gss +1n1m +5f1x +5vt4 +4y62 +1bcd +1pwu +4fht +3w9k +3qu0 +1zxc +2vtj +3dnt +2pu2 +4hej +4x3e +5ive +2wo8 +4ozn +5i2f +6dj7 +4a6w +4q1x +2g63 +1jak +5knr +5y7z +5npb +2izx +3jpx +2x38 +4knr +3rwe +4aze +4nrm +6er3 +6d59 +5a2k +4cr9 +5i29 +4mbi +6ht1 +2d1n +1uj5 +3ljt +1kyn +5osd +5whc +4qws +3uwo +3th9 +2r2l +5n8v +2y54 +4m5n +1yy6 +6fui +3lvp +2gdo +3hyf +2zva +3zmu +2pt9 +5nxg +4zsm +5eif +4qsk +4djr +2ceo +4apo +5gj9 +1q4x +4wz5 +2vx9 +2qzl +4keq +2b17 +4kzu +1bdr +4zjr +3ftz +3cic +2c2l +4yo8 +1vj5 +1w4p +1aaq +2q9n +2yay +4bea +1szd +5acb +5db1 +5ne1 +1vj6 +3n5h +3kpv +2hvc +3arr +3v0p +2fqw +5jf7 +3udq +2n14 +5mk3 +2pj6 +4pp5 +4c68 +5q15 +3u8h +4erw +1sb1 +5iz8 +6iiu +3txo +4nah +4ir6 +4ufy +6fer +4zba +1jij +2piy +5td2 +4yzc +5qap +5yhl +1h1p +5ni7 +4pf3 +4a6b +4uy1 +2xwy +4el0 +2w71 +2p8h +4d7b +4yoj +2ms4 +4iwz +5ctb +3t85 +4lpg +5bvw +3uri +3hfb +4mr3 +3lq8 +3mlb +4qwf +4ixv +4njd +5tzo +5qa9 +5ai8 +3o23 +4rn6 +5i22 +5u2j +4o72 +4ej8 +1rv1 +4ixh +1w82 +3arx +3k02 +3uf9 +5wij +4oq5 +1z4o +2qf6 +3lq5 +4yw2 +3u4h +4f20 +4mg5 +5ufi +4n8d +6mu1 +5zag +4az2 +4fbx +4emv +4bqt +3t2q +5ajw +4q81 +2fqx +4kza +5k51 +3u4i +5ho8 +3lkz +4ciz +1yy4 +3bmy +3vf5 +2w6p +6fa2 +4flh +4w4s +3i28 +3oyl +2nn1 +5lrk +4e3i +4xg9 +4d8n +3lik +4aj2 +4jt8 +5j7w +5std +3h2a +2pwd +6bjo +3nik +5lxc +3qce +1vrt +4qxo +2y1w +1wok +5lne +5am6 +5er1 +3ob1 +1w4l +6e2m +5wh6 +3kf4 +4lnp +3el5 +1w84 +4gm8 +3o6t +1q83 +6fut +2ycm +3cqw +5ot3 +5dp8 +1g53 +5va9 +4wx7 +5kww +6bu1 +1q6t +1uto +2bks +4fmq +5opr +3ies +6hti +5u66 +4qz6 +6b1o +4gy5 +5mrb +2xml +3skk +1c2t +4in9 +4mss +4kju +1hti +5l6o +4wkb +4dus +2am1 +3t5i +1dkd +2onc +6hvh +1ypg +6cvf +6bkw +2jt5 +3fdt +4wh7 +5i3m +1azx +3fck +5u6c +4ded +4k0u +5xmu +3wk9 +4kz0 +5a7c +6g9j +3iw6 +4ddk +1icj +6eyt +4z2o +4e3h +4c70 +2y57 +3dzt +5e0g +4nrb +4qr3 +5ltn +3omg +6bod +4jpa +3fzs +6f5l +5edd +2oyl +2bmc +2xqq +3kqy +1oiy +5y13 +4d1s +3ked +5djp +3bxe +4bo0 +3oy3 +4o2a +5em6 +5tfx +4d9p +3tku +4p1r +5nwz +4bs4 +3oys +5lgs +4nyi +3l58 +2vxn +1y98 +3ck7 +1nh0 +4an0 +1mcz +2h9n +3h0s +2ycq +2x4s +4acc +5ows +3rxa +4dhm +5lhi +5l8c +3rxi +3r00 +5oq5 +1jev +4ztn +4yb7 +4a7j +4os7 +3e3c +4acu +5hvt +3lf0 +4jfj +4p44 +5dxh +1d4k +5ewk +1gx8 +1x6u +3g2z +1i7c +5opb +4y85 +6fii +6c7q +4jkt +4lbl +1w76 +5dgz +3run +4qoc +456c +5twz +4a4h +6gl9 +6h7z +4fxz +2hu6 +2ohp +4ayt +4fgx +1x8b +2y7x +4yb6 +5i8c +2aoi +3kgt +3g2h +3gwx +5lyr +3nk8 +1mrx +1pf8 +6dj1 +4jss +3tu9 +5cqt +1yds +3hy9 +5y5u +6dko +3s0j +5hm0 +3s3o +5om7 +2j9n +4bfd +5wzw +1d3v +4pvv +4i0s +4pmm +6ftp +4hct +1c5t +5i3v +5urk +5q0f +5xv7 +4bj9 +1css +4azc +1hqf +3kej +2p8n +5tg1 +3cvk +6awp +1e3g +1t4e +2nns +5al5 +3cr5 +6equ +5fl5 +2yex +5lsh +4fgz +3mv0 +3l5d +1q6m +5khh +5c85 +2f3e +1npw +5w1e +4j0a +4mvn +5ioz +4yve +5tbm +2fx7 +4oex +4pjt +1nnu +3c4h +3vp1 +1bky +5ajc +1jcx +8hvp +4wvs +2vcx +5hkh +2llo +2jkm +1q6p +5mf6 +5afj +4l50 +1o3e +6evm +1pyw +3l3a +4mnx +4fll +6hx5 +3ivc +6mx8 +5wf5 +5a1f +4c4g +1w5w +2w26 +4an9 +5n8j +3aqt +4dho +4l7r +4aro +1stp +6g07 +4hze +2cbj +5wbr +5afx +3bh3 +4ca8 +6aam +1r78 +6aol +1gcz +3bjc +1o2x +1b40 +4u45 +5nat +4qiy +3v30 +3kyq +3unn +5elf +1xoe +2oqi +5mpz +4hxz +4rfr +3vp3 +6upj +5xyy +4wci +2fwz +6buu +3ocz +3v3v +2yq6 +4m8e +4r6e +5ihh +5ngs +2v77 +3nzs +5tyk +4l7g +3dne +1m5f +2xk8 +4urx +3ft5 +4br3 +1bgq +3fa3 +2y07 +4lpb +5k0m +4tlr +1ocq +4j5c +4awm +1ado +2n3k +4mf1 +4oho +2xv1 +5hh6 +5kmh +4lkt +3n2v +3o7u +2ieo +1wbo +5i25 +4mhz +5mmp +3qs8 +2ykj +5igm +4zji +3zxz +4b71 +1fpp +2y76 +6eya +5lch +5gp7 +3mmr +4qc1 +3sbi +4bsq +5yhe +5hj9 +4qw1 +1px4 +1q5l +4i9z +1pbk +1o5b +1ryf +3oeu +3nx7 +4izy +5lvf +3hll +4jbp +5icv +2yln +5om3 +1e2l +2oyk +2c0o +5w5v +1b2i +3w8o +4x60 +6h2z +2a3i +5jnl +6m9d +4twp +5gmu +5wg4 +2mkr +5e89 +2j78 +3elc +3eb1 +5wlv +3udk +4p0v +5ahu +5two +2jf4 +1mm6 +3c4c +5a2j +3ti4 +3pd9 +4rx8 +4yhz +2vr0 +6mim +2k00 +3fr2 +5q0o +5dgw +3tiy +4jxs +5xst +5tpx +4j1h +5xva +1o49 +2mlm +4alu +1tv6 +4g2f +2xp4 +1lhg +5aoi +2bz8 +4kbc +1h22 +5tho +1wun +1d8m +3m17 +1slg +3pcc +1oh4 +2vio +3v6r +4n7h +3fv8 +4f08 +2p33 +5a5d +3n1v +4j4o +5o1d +5ewm +3ds4 +4b3b +1sqi +2nmx +1gai +1fax +4deg +3suu +4rcd +6f6s +1juf +4zx3 +1m48 +5ttf +4man +5ljt +1y0l +6hsk +1gyy +3bi6 +3fue +1c87 +4byj +4r18 +6bnh +3qpn +4r4i +4e5h +4ojq +5aqt +3fu3 +3oap +3d1x +4j48 +5uig +5vcz +5tzd +4hva +3mvl +6b1c +4e96 +4d88 +4ufi +4uvu +3met +5aeh +6dh5 +4mrh +5fqv +5t8q +6f7c +5l17 +3zim +3k83 +2p3a +4wsj +4cd1 +2xk7 +5w6u +4zs3 +4u58 +2qn1 +6f5m +2c8y +3vw6 +1o42 +5uor +2ycs +2i4z +4q1d +5m0s +3qup +1tr7 +5xpn +2aa6 +5uv2 +3pgl +4mk7 +5uir +1p57 +4ezx +3shj +5csw +3ibn +4ag8 +6ceh +2p98 +4yht +3k0k +5neb +4ysi +1bo5 +1a28 +3wha +4nq6 +1czc +1ckb +5j6d +3hr1 +4acm +6hrq +1py5 +3km4 +4c66 +1sr7 +1rnm +3krx +3d25 +5d6e +5w4v +1xbc +2x8e +4cmu +4f09 +5afv +6bkx +3lqj +2hug +5dnu +2woa +5xvw +3fi3 +4yho +1h1s +1w3j +5y59 +1sdv +5tmn +5myk +5f29 +1efy +4ayy +4ykk +3m6p +1utl +3zls +4ff8 +5f6d +4k8s +2mpa +2gfs +1c86 +5e5g +3zvy +2f3r +4e5j +3l5r +5l01 +2vj8 +4b9w +4i7m +4qwu +3rpr +4i7k +2c1n +4ret +1owk +4mxc +2ohs +1akw +5tyh +5huw +5w14 +5lb7 +1f8e +4ciy +5etm +2pwr +5o1f +4r1e +4ncg +4abf +1my4 +4jib +4nxr +2jbo +1njd +1pdq +1qj7 +5fi6 +2vtp +5dro +1o3b +4jln +2yis +1rt2 +4feq +3gjs +5eef +2zx8 +3ujd +1fkf +5eto +1c5p +5tys +6ayo +4eh4 +3djv +3mhm +2pow +1w0y +4dbm +2ywp +5uhi +1mj7 +5am3 +1pxp +4uff +2bq6 +1n1g +5nxq +966c +3vws +2of2 +2ydf +4nbk +1l0a +4u43 +5trh +1rpa +5etv +1h61 +2vcb +3d1v +1xq0 +1b3h +3svj +5fun +4bw1 +1kti +4jof +4izm +4phu +3jy9 +4urw +5ael +4irx +3avn +5mm9 +5twl +4w9l +2ad5 +4b4g +4a22 +5vfc +3l3q +6fn9 +4m14 +2gl0 +4c35 +3iob +6apr +6g3v +4cik +2xpb +3loo +3drr +3e4a +5thi +5mgm +3mrv +2c4v +4i8n +5obr +2qyl +6ee4 +2wzx +5j8z +2itz +5t2i +4mdn +5nw0 +5l9o +1c5n +3bel +1nt1 +4g69 +5ovv +1i9n +1hlk +5hh4 +2hhn +5v13 +2ww0 +2rcx +5zms +2ncz +1a86 +4uuq +5g2n +1o5g +2gfj +5d3j +3dpe +5ldp +3veu +2vaq +4ui5 +1hp5 +5g1a +3f39 +4owm +1dl7 +3ce3 +2qmg +4poh +5azg +6by8 +5l7f +4awo +6fh7 +4ifi +6b7e +5i3x +5lws +5sve +3kn0 +3q2h +5eoc +6b7h +5ekn +3rqw +4erk +5fso +5e80 +2bpx +1yw8 +3sc1 +5o4t +5c2o +4u0f +2w17 +5nxy +1oy7 +6dvm +3k00 +1py2 +4zx0 +4nkt +3s1h +2ym5 +6cis +3hdm +2yj2 +4ef4 +3kvw +5cu2 +6c1s +4j6i +6bij +5xgh +5u2c +3m3e +1yid +1b6k +4gd6 +5lp1 +4ww6 +1zhy +3eky +2q38 +4jym +2ymt +3gur +3suv +1njs +4na8 +4msa +5nk8 +1o4k +3l6h +1i5h +3t6b +5e1s +6gzd +2eum +1z71 +2yc5 +6hai +2a14 +4aa4 +4ask +4ea1 +3pxz +4n4s +5wkm +4jg0 +3sie +3pkc +1w1g +5y8c +2c6m +5ou3 +6d50 +5v3y +5lzj +5tya +4c9x +3eyu +4xtm +3agl +2brh +4mbf +5d4a +2zz6 +2gbi +1t5f +5jiy +1nw7 +3hl7 +6h7f +1zd4 +3suw +5fdc +3cct +4pm0 +2y1x +5n9n +4gj2 +5d6f +4joj +5klz +4jju +2gg7 +5hn9 +4tjy +4r91 +6ffn +5usf +2clk +5n70 +2ohu +3eka +5eq1 +4qlq +6eis +4acf +5t35 +4orx +2hmw +3fz1 +3qx5 +4zhm +3rxo +1atl +5t1k +3np7 +3zt4 +1bdl +4u6y +2jbk +5l13 +5mob +2igy +5jlz +4flj +2zu4 +2w70 +5d6y +4kip +2uw0 +4msg +6f8x +1r2b +4qvm +2ydo +1w4q +5okt +5op4 +3al3 +2rvn +3dp1 +4luz +3o87 +1w2h +1fiv +1uh1 +6c8p +4g8m +4hai +2evc +2q2z +4yyt +4od0 +5k9w +3fyj +5vh0 +5ale +3th8 +4yhp +4pvo +4whs +1c4v +5hed +5qbv +2oei +4y2s +1z6q +4px6 +2knh +5kns +1m4h +2g96 +3cm7 +4gcj +1qft +3ztd +4yzu +1vwl +4kiq +6ax1 +3zyh +2c1q +6mx3 +5o9r +1q8t +4ue1 +2y7z +3mdz +4gtr +6fh6 +4qvq +3tti +5ap6 +4pkt +3tdj +3qnd +4lvt +1xo2 +1pcg +5fiv +1g3e +6gx3 +5dhj +5ixt +1xhm +4po7 +1a5g +1haa +6egs +3iw8 +5qag +5o1b +5brz +3mpt +3ral +1j14 +5wdj +4io3 +2irz +3f35 +5jan +6mrg +2oi3 +1onp +4bjx +2i0v +4hmk +5mat +4h2m +4agl +4ayx +5iwg +2xez +5em7 +5kdf +3vv6 +4y59 +3ti6 +5gn6 +4u7v +3rz7 +5wxo +1o2w +4r0i +1a30 +5nz2 +3rf4 +6ee6 +4oiv +1a9u +3iop +5x4p +3gl6 +1hqg +2oji +4ps3 +2w05 +1tnh +3djq +2c9b +2flb +5itd +5k00 +1t37 +3g0b +6hdo +2rfh +1b5g +4bdi +4ikn +3inf +1rww +4g68 +4djw +5yum +4hxl +3dbd +4hw7 +5mys +3p7i +2j7h +2vvt +3r2y +5ng9 +1y2g +2vwm +3if7 +2hl4 +3ffp +4qwg +3g6z +5dhf +4y79 +3m57 +4oyi +3s0b +1yvx +3v7c +4md6 +4g9c +2w6m +5tt7 +3pkb +3upi +2va6 +2weh +4bao +5gid +2w1g +3mxc +2fw6 +3diw +1ind +3i3d +5k0i +4npv +2wf5 +2q7q +4wyz +5xaf +1gj4 +5hf1 +2aw1 +3dbu +3o9i +4l34 +5j0d +5afl +2w3i +5ew3 +5e0m +5jfu +4fm7 +1o4h +4w9p +6eyb +4onf +4zhl +3mj2 +3k22 +2xxy +5m5q +4aci +5kab +4dpt +4rsk +1rsi +5c42 +3tb6 +6bmi +4myd +2i40 +5nhj +5moc +1tom +3ds6 +1lt6 +3eft +2w8g +4d1y +4yrc +1tu6 +5hmh +5nw2 +1k9r +3mxf +2q1j +1u0h +4ye3 +4k60 +5zop +6htg +2fx9 +4q83 +4y8y +4gr0 +3uqf +3vha +2yaz +1k1m +4i2w +5ovx +3db6 +1v2p +5njx +1wtg +3uvn +3h89 +2q7y +4o2f +3fv1 +4eu3 +3pwm +4tmp +5ulp +6di1 +4iks +4dhs +5hg8 +6b1x +5nw8 +3tjd +5yve +4uwg +1jd5 +5km5 +2w4x +3umx +2ony +1bm2 +4pku +2pj9 +2m41 +4z8a +2yjq +6mwe +1prl +5y1y +1c5y +5ak6 +1ggn +2usn +2pv2 +1w25 +1c4u +4ba3 +6c2x +4ijl +3gxy +3f3u +4io2 +4drm +3cjo +4dst +5c87 +5ni8 +2ll7 +2pem +2x7d +3zc6 +1m2q +4gk2 +3lqi +3c1x +2az9 +1fv0 +1gz3 +1gfy +5mte +2nxm +4yml +4yog +2g6q +1t79 +3hdk +3uij +5vl2 +4ufk +4aph +6d1u +4rn2 +1zxv +2p4i +1w80 +3bl7 +2vh0 +3kqa +5vdu +4b2d +5jgi +4u0i +3g9n +5var +4lnf +3e9h +2fzz +3cfv +1ws4 +5h21 +2p7z +4j8s +5fdr +5z7b +6cj1 +5tts +6bmx +1g49 +1mqd +5ost +2qhd +6e4a +3fur +3v1r +4wwn +4hni +6e83 +1gqs +3eko +1dmp +2wnj +4i6q +4d8s +2jb5 +4p58 +4usj +6eq6 +3o0j +5apj +4ty1 +5avf +2xef +1way +1a7c +5f2r +5k1f +2c9d +3a9i +3deh +3q3k +4xtw +6f0y +2r3l +4mra +5qa4 +4zpg +4ijp +3wyx +1s5z +1y2h +3qi1 +1c83 +5dts +5e1d +5j9k +5o4f +2wxi +5f04 +2qmj +3kvx +4l7j +1d5r +2vyt +4cfv +1qcp +4hlm +5d48 +3snl +5uwn +4yll +2brm +4dzw +4q7v +5teh +1c4y +5mro +4ymg +3b1u +2z9g +2rt5 +3gxz +6e0q +6f7b +4qz5 +5dh5 +3v3q +4do5 +3k0h +5dt2 +3l54 +3ebh +2hd6 +5hbe +4wri +2qi7 +1jao +5ajp +6ej3 +3gjq +6f8w +5y20 +1rwx +4ibj +5op2 +3c6u +2w0s +1c7e +5yyb +3ni5 +3t8w +5ntp +4dfu +1o4e +5tqu +2xyd +4pl0 +5tl9 +2vpo +4ov5 +4qps +5wdc +4awp +4a9s +3atk +4nja +4ms0 +3fzr +1i9m +4b5s +2y71 +3f33 +5q0h +5x9p +4p4j +5ajo +3c56 +4pnl +4fmu +3uli +5nlk +4cs9 +6hkz +2vo5 +2fts +3sfc +3pke +4dk5 +1lor +2jdn +3jya +6bqg +3p9t +3qkk +6fnj +6fmc +5c0k +1mmq +4fck +5npe +4j59 +4eol +5m57 +4efk +3bt9 +1oiq +5dwr +3wyk +4qf9 +4c5w +5tuq +3oqk +4xj0 +5mwg +2vwu +6dpy +4ara +3wi6 +5owf +3nw5 +4fhh +3sna +1uye +2vcg +5j7j +4gtt +5jc1 +2ph6 +4zpe +2hz0 +5l4i +1y1m +5vo1 +4y2v +1ecq +6eeo +5jrs +4l23 +6f34 +4r4q +4quo +6dq6 +4e3k +3zov +1cet +5e2k +5jyy +2jld +5jna +4qls +4ksq +4py4 +6biy +2hqu +4yk5 +4i12 +4km0 +5ttu +2za3 +4o5g +2y1g +4cff +4ztr +4jps +4uin +2q93 +2x7t +2q6c +6ayr +1pwy +5lwe +5cxi +4n9b +1fzo +5ukm +1ork +4mk0 +1jbd +1k2i +4ifh +4yh4 +3fee +1flm +5tqe +1jj9 +4li5 +4jc1 +5i8p +1lah +1apw +3ppm +4kn7 +1m5w +3vbx +4ie2 +3sur +1nz7 +4trw +4qmu +4qnu +4os6 +2bge +5xvg +4o6w +1nfu +2zjf +1fpi +5id1 +3ccz +2d3u +5tyj +1fls +1ggd +5ofx +4o12 +4wf4 +3pck +4ayv +5jt9 +5nb7 +4b84 +5ggk +6h5x +1r5w +4zw6 +5d6j +5ep7 +4duh +4ps0 +4drk +1los +3s6t +3be9 +4ipi +3wf7 +4pow +4y46 +5izj +1d4j +4qg7 +2avm +1g85 +1r1i +3g1v +3l7c +5wyr +4oti +4a4o +1fvv +3uok +5n3v +4tim +3tv8 +4a4g +6c0s +4fak +1w12 +4jn2 +4oyp +4zt7 +5emj +2rc8 +2ypo +3kmc +4hwp +3q2j +4k5l +3hwn +5i83 +1j4r +4fyh +1kfy +5g43 +4bnv +5fpu +1o48 +6ei4 +6ekd +2k3w +3ksq +5wkl +4hzz +2f8g +2uw3 +6mi6 +4u91 +3f7u +3zk6 +5n2f +2qx0 +3mrt +2f0z +3qvv +1l7x +2oc1 +3gbe +5he3 +1pun +5m4i +3pp7 +5o91 +4mjo +4fns +4xnv +3tay +3ejs +4c1w +3njq +6guc +4dv8 +4el9 +1iep +1ta2 +3h0e +1bsk +4pri +3pcb +4jg8 +3lbj +1i8h +4a4x +1lhd +2wa3 +5tcy +3wth +4baq +1ylv +5bve +4g34 +5u28 +1qbv +5z5f +4ot6 +2g97 +5n16 +1m5e +3efk +4h7q +2v5a +5vs6 +2cne +1pxn +1jqy +5j6s +4abv +6d6t +1t4s +3zsx +1p01 +2f4j +2gh7 +3f78 +5i38 +3h0w +5d3x +3zst +2lbm +6fil +2vk2 +6ew7 +2iu0 +3t0l +5ai9 +5wbm +4ark +3hlo +1np0 +2w6c +6dk1 +5xih +5hk2 +1qhc +3v5l +1vr1 +4no8 +5hmy +2jql +1a46 +3kf7 +4lv4 +5hbh +3wf9 +4ieh +2vr3 +5dyw +4xuz +1mmr +5ar0 +4h4b +5knj +5elv +2g79 +4w52 +2rnx +4rgd +2az8 +3c1k +5ick +4lv3 +4mwb +5a83 +1prm +4gg5 +1rlq +1rq2 +5fbn +4e49 +1f47 +4yfi +2v10 +3pa3 +4j1f +4jvi +4gmy +2v3d +4cft +1epp +2hoc +4r5g +4k0y +5zz2 +1d3p +4ng9 +6fav +5hor +3qci +4mbp +4msu +5hg1 +1pyn +1hmt +2lpr +4yha +5hoa +1sln +4h3c +4k64 +2w0d +4lmu +3tf6 +4z2p +2hny +5ctc +6cck +5vd1 +3qcf +5ia0 +3l7g +1eld +1xh9 +1h3c +4urm +4j7i +4ly9 +1nu1 +5sz5 +6cdl +1uvu +2wl4 +4eo4 +5lj2 +2qrl +3kc1 +5nqe +6gon +2xs8 +5gmj +5eyd +6c42 +4wks +3ka2 +4p6e +4nnn +3zvw +4q6r +5v8v +2jdv +4lxm +5qcm +5e8f +5y80 +3ud5 +4n70 +1jwt +4cgi +2uw8 +4ajw +4ivb +4m6u +2xu1 +6bxy +4ud7 +3k5f +2arm +5jin +3fmq +2nt7 +1p4r +3hxf +4zwx +4xx3 +4yee +6aak +2xae +5t31 +3bhb +5llh +5nge +4b9h +5oxl +5ipa +1q3w +6d56 +6df1 +3ole +2v16 +3g2s +4led +2uz6 +4pnu +3nrz +1f7b +1j4k +5ku9 +1a4g +4uia +5kv8 +2rke +4e8z +5ykp +4z1n +5sz1 +4oak +3twu +2r3p +5cf6 +1ym4 +5l4j +4exs +4qpa +2k1q +4eny +4i0r +2xz5 +5fxq +3re4 +6cf5 +2cbr +5ece +5t2t +1x8r +4n3w +1klu +5nxx +5jcb +5klt +4usw +5ek9 +2mov +6bid +1mjj +1tlo +2hb9 +2f94 +4luv +5gu4 +3wf8 +5ufo +4n5t +5hcx +5mtw +1o38 +5z1c +4w4z +4v27 +1k21 +3jrx +5l96 +1esz +2m3m +5ivf +1o3p +6std +4auj +2lyb +2w87 +5j3l +2fgu +3snd +2w5g +2wsx +4qfl +2vuk +4a23 +5iu6 +1siv +4ht2 +2ltz +1o39 +4p0w +3p8p +5ypy +3zvt +2c5y +1ayv +3erk +6g4y +4ucd +3e16 +4i7p +5alv +1osg +6gw1 +5wxg +4bvb +1nc1 +5ugg +5t1w +3tvw +5hkb +2r3n +2iog +1h1r +5eyc +1ogx +3er3 +5q0p +3rlq +5itp +4nuc +4w9j +2b8l +5khj +4mz4 +3zlv +3okh +2epn +1pwq +3gsm +3hzv +6dpx +4hvg +1s17 +6arm +5jq7 +2v88 +5lay +5nn4 +4hj2 +3pp0 +1rst +3qcq +3rx8 +5ewz +5mgx +4c1t +3cso +4wq2 +4gxl +5isz +2xj2 +4y5h +3zcw +1l8g +5vex +4tky +4elh +4ejn +4pgh +2wuf +2xbp +6gw4 +3d7g +5ksu +2qrp +4jg6 +4eqf +3hbo +5yjy +2gv7 +4hdf +1yi3 +5edr +5nmf +4w7p +4fz6 +1g4j +4a2a +5kx8 +5a0b +4qta +4mho +4fn5 +1gj8 +3d94 +1kdw +6euw +6gpb +6glb +2xaq +3vh9 +1aku +5j4v +2f1g +2vfk +2r5a +1g1e +3buf +1bzj +4abg +3cho +5t6j +4jda +5ywy +6cdo +6fo9 +1g74 +3oxi +5t8p +3pcu +1fyr +1fm9 +5ddc +5hwu +1xud +5ai1 +4e9u +4uyn +5k05 +6fgl +5iha +4aa2 +1wzy +2w85 +3s2v +4yuy +6dud +1xdg +3bgs +4hiq +3rqf +1jmg +2xeg +4g93 +6c7b +3zso +4xjt +4rsc +5q0u +5iyv +1qxy +5n20 +1you +5c7e +4x9v +5tjx +3rtm +4b05 +3tg5 +1b52 +1w1v +5lg3 +2bmg +5wii +5wew +5iua +2g00 +1nm6 +4wef +1i7m +4hrc +6at0 +1biw +4uhg +5hvp +1ugy +3ery +5ll4 +5v6y +1ru1 +2o4l +2evo +5lvn +2rc9 +2fci +6evq +3rah +3g2v +5ny1 +1p10 +3o96 +5vsd +4cxx +5kcb +4g2l +5vqi +5svl +5qax +5h6v +4aa0 +6g1w +3ddu +1pzi +3sax +4nmr +4c1f +5d26 +1elr +3gc7 +3c84 +2y1d +3vd4 +2uzo +3v8s +5mqv +4zly +4p0a +5dlv +3qqa +4g3g +3h9f +4wr8 +5win +4hzm +3rqe +4j1c +4kii +5mw6 +5mwj +1xap +4zv2 +1ol1 +1a69 +4btt +5t4h +2oi2 +6bbs +6aaj +2y4a +5i80 +1hte +6cfd +1o4l +5kbe +3saz +3kqr +2b07 +3iue +3bxf +1is0 +5a4t +5ktu +4agd +3nf8 +2nd0 +5yyz +2ay4 +6fhq +3f3t +1zhl +3vbv +1g9s +5aut +4u71 +1q66 +5vb9 +3qtr +4nw2 +3qin +5mzg +1d9i +5h08 +3rv7 +3rv8 +6ckx +2itk +4ij1 +1pxo +5nt0 +5x62 +4oc4 +5qao +3le9 +5dtr +4a6s +3bls +1em6 +1pus +3asx +4lng +3mvj +4q3q +3s9t +4gfd +2r8q +5wqc +3ri1 +5ih8 +6g6w +3cjf +6duh +3shv +4pda +3cpb +1bp0 +4lmn +1zm6 +2kup +3ij0 +6cz3 +3v3b +3vru +3arg +2o64 +6ayi +4qab +4xu2 +5wzu +4lh2 +1w6y +5eis +1czr +5jzy +4ejf +4ufe +5wqd +5f0f +4aw5 +3iqg +4p90 +2ank +4n1u +6eku +2h23 +1j5i +2y7w +5alj +5jfr +5eak +2gfk +3be2 +5hln +5uc1 +3ax5 +4f3k +5qb0 +4llj +5ldk +4eyr +5v3x +4fys +4crl +2zxd +2etk +3ddp +4q0a +3el0 +2ica +4yv8 +4jin +5enc +1pb9 +5jur +5n1z +5ou2 +4y8d +4gby +5acw +4r76 +5hrv +2jt6 +4hhz +3ocb +4qxj +5er5 +3hu2 +1v2l +3dz5 +4j8b +5vqs +5khd +5fho +2f71 +3kdt +5gmm +3sd5 +5fck +3q8h +4h36 +6bm6 +4qmz +4ks5 +4dew +1nf8 +5ap0 +3s73 +6chp +4r3b +2oag +2w06 +5j74 +2a2g +4ju7 +3i81 +5aol +1af2 +5hda +3nu3 +6das +1swp +5njz +1b74 +1sts +2co0 +5od1 +3pab +5u06 +2gnh +3uig +2uw5 +2vrx +1ele +3hp9 +3fqa +4mh7 +2xkc +2w3l +5w73 +5hip +4xv2 +1fch +2j4k +1o33 +2v57 +1kqb +1bhx +4a4w +5bmm +3dcw +4i9o +4ydn +5xii +5duf +5j1r +4no1 +1gvu +1ycm +2l3r +4wnp +4yzn +4p75 +1b1h +5hwv +1s4d +3rlb +1c5f +4fqo +1iau +5mim +5y93 +5qa6 +3wk5 +4mcb +4qms +6b5m +6fba +2q8z +3r92 +5mo0 +3wvm +4qh8 +6dgz +2w66 +2joa +6gih +5fqt +2viw +4o76 +2j83 +3cm2 +3ion +4y2u +3fhb +2rip +2q94 +6eiz +3ieo +1uyg +1vsn +5fnf +2vgp +2pk5 +2f6t +1zfq +5ypo +2nnv +4jvj +2cej +3hqw +2boj +2h21 +3bzi +5ut6 +2toh +5urm +5ml8 +2qpu +6f5u +3t3y +2zx5 +5alu +2vxa +1z4n +1c8k +4w53 +5cf8 +4hcz +1jik +4b81 +5y86 +5ul6 +5l2n +3dhk +5vio +2piz +3jyj +5ly2 +6do4 +1v2s +4ezo +1iht +3hl8 +4q1c +3sk2 +2xjj +6cdj +4w4w +1bm7 +5lca +3rxf +4q7w +5ceq +5aen +3kjf +2psj +4avt +5nxo +3qn7 +1d6w +5ory +3ckr +6eyz +4lxa +2chz +6eqw +3nzc +5umw +1ilh +1o4j +2pks +6evp +2wks +2yc0 +4xy8 +2jg0 +1u33 +5iez +2zxb +2qnz +5wfz +3ob0 +1g36 +4at3 +3pd3 +5jjr +6eo8 +4omc +1k3t +2vl8 +4y8x +3l4t +3s2a +6f4x +2qrm +4u68 +4o3f +4rpn +2zns +4zvi +5ul1 +3fwv +1exw +2vvv +2aof +5vfd +2qcf +3bl0 +3gk1 +5i2i +5w8v +4ptc +3piy +1nlj +4ddy +2vwo +3kdu +6bt0 +5l99 +3rtn +2yk1 +6f28 +1zh7 +5uiu +4eh7 +5epp +1o6h +1b05 +5t5g +6b1k +4k75 +5hva +3ips +5obg +5a3o +2q89 +5owc +5ggo +4xk9 +3cf9 +1czl +2ewy +3a1e +1xlz +5k0b +2nta +4gvc +1axr +5fou +1kkq +4u7o +5i4o +4cwr +5fa7 +1i5d +5hdu +2zg3 +2pmk +3mrx +3s1g +6mvu +1wkm +6dvl +5fe9 +5odx +3sbh +2jiw +2r43 +4ibb +4o05 +2v13 +4io8 +4z2h +2vmc +5ggn +2afx +1g3c +2cn0 +3oki +4xty +1xgi +3mmf +5v1d +2uxz +4wwo +4kpz +4xbb +5up0 +2gzl +4r5x +3iet +1oq5 +6afd +1b55 +6chl +1xp0 +2q5k +5ti7 +5uqx +6bm5 +5w94 +1k1o +4lm3 +1ek1 +4ayr +2xxn +5lsc +1o0m +4ui7 +1ppl +6beh +3ud7 +4zxx +6gzh +1uu9 +5jgq +2q95 +5l2y +3v3l +2gmv +5bvo +3snb +5q13 +5upf +5tq3 +3gw5 +4wwp +5wbo +2j7g +3rmf +1fhd +2w2u +4flk +2r0y +2wbb +3da6 +3l4z +1v2w +2azb +2bz5 +2oxn +3f8w +1w7g +3moh +5m3a +5aqq +3tz0 +3zyu +2pzy +5usy +4mx0 +4cpq +4aq6 +5eld +5gmv +3qbn +6rsa +3u5j +5h1t +1ms0 +1ctr +5lub +4h5d +4n99 +4cc2 +4hzw +5lbq +3ati +3pd2 +1f74 +1qyg +5dhg +4mwx +5ohy +4ob2 +2b2v +3c0z +2gtv +4r75 +5w2s +3wsy +1rs2 +2r6w +4agn +4gk4 +4qq5 +6bfa +1n7t +1ocn +1gnj +4u79 +3hmm +2vl1 +1ec1 +5f4u +2zsc +4u5j +4jfk +5kew +4btw +2cbu +1vkj +4jnc +1nli +6bw3 +3fyk +3cl2 +6f3i +1w3l +3gv9 +4h4e +2ylc +2fgi +2ha5 +5sz9 +5gjg +2q1q +4aza +5yia +2rny +1o30 +3m1k +6dj5 +1pfu +4awf +5oqu +4k42 +2wcg +3oj8 +4ngs +4lq9 +1jzs +6g9h +6g9u +3axm +4qp7 +4mma +1g98 +5wi0 +5iep +2opy +6eq1 +2pri +4y2j +4x1s +1bux +4cjn +2vnn +5d2a +6cms +3g72 +4wz4 +2las +4his +5ohi +1apv +2v86 +5nap +3wdc +3d9m +4kzl +5ixf +1fd7 +5f25 +5js3 +3d7h +4ceb +3dst +4yym +4x7n +4cr5 +1gar +4z2b +3iph +5t37 +3zqe +6m9t +3m54 +3szm +2oxx +4mlx +3uxg +3qri +4l0v +1a07 +5qay +4cwo +3r0y +1wbs +8gpb +6bgz +1ecv +1ha2 +2kfx +2be2 +6cjw +2p94 +5tq7 +5fsc +4btb +5yov +6eox +5am2 +2pvm +3gn7 +2ql7 +5j58 +1msm +2bjm +3rt8 +3ds3 +1m13 +4nxo +2vj9 +3fqe +2m0v +3so9 +4r5b +1v2j +4bks +4tkj +1jld +2ojf +1z5m +3m1s +4lpf +4zls +4af3 +5eue +2jh6 +2kvm +5etl +5hn0 +3f6g +3fdm +2jj3 +2zjw +3sfh +1ong +1noi +3cen +2p4j +4rvr +4az5 +2wqp +6fnt +3ejt +5c2h +3sow +3ml2 +1ld7 +1yz3 +3m2u +2wcx +1ec2 +3iqj +3ccn +3r1v +5foo +3cn0 +2z4w +4ks1 +3rtf +4bfr +2pvk +3ppr +3r93 +4x9w +2z7i +4hdb +4bti +1eef +6bln +4wyy +4bw3 +3mks +3iit +2axi +4urz +3q2a +2w12 +3aho +6e06 +5dy7 +1gi1 +3az8 +4ew2 +3tgs +3zc5 +2ggx +4ih3 +5u00 +4qkx +4oef +6gjn +1ewj +2xii +5al2 +1o4p +2xck +3dcs +2g0h +5adr +1kv1 +6h33 +2wl5 +6h3k +1qq9 +5vdr +5x4q +2xk3 +3p4f +5prc +4gxs +1qm5 +4aw8 +2eep +5l15 +3tk2 +5heb +5aia +4nh9 +5nki +3eos +4m8y +5u49 +5yh8 +5ylu +6f8r +3nxq +2x3t +5exl +4x0z +4rj6 +5ly1 +3zo4 +2nw4 +3upx +5btv +4n3r +5ek0 +3otx +4r4c +1y3n +5cal +4dld +5z89 +5sys +1ybo +2cmo +1k3q +2ima +3wv2 +3caj +3vv7 +1xug +4l2k +3aya +3gs6 +5alg +4b4n +3zyr +4hxm +2ov4 +3n9s +4n07 +2jnp +3zxr +3v7x +1aid +2zis +4u8w +6e2o +5yqn +3oc0 +4b1d +3g9e +4qw7 +1c1r +1zsf +2nm1 +3cs7 +2x24 +5d7c +1bji +4yl3 +4k78 +2f9u +4kab +3rl7 +4n8r +5etu +5drs +6es0 +2g8n +5ol3 +2i6a +5gvk +2qrk +3sx4 +4q4r +5nq5 +3ctt +5kxc +1tyr +4j0z +5z4h +2uzn +4oon +2ohr +4re4 +3prs +1f8c +4jvb +3eig +1dbk +4p74 +4zg9 +3wtn +3l7a +4cdr +4fli +2am9 +2jkp +2bts +1u9q +5ea6 +6db3 +4b6o +3nnw +3p5k +2c4f +1rbp +2xuc +4hvd +3ijz +5n4t +4jr5 +6e9w +5f63 +4cjq +1b46 +4j4n +4kcx +2a2x +4h3i +4k4f +6bky +2bak +5csd +4mgc +4mwv +1mzs +4ym2 +4ztm +5epn +5v7w +5hes +3n49 +2zlg +2kbr +5fng +2fgv +1f4e +2gd8 +3g6h +5u0f +4j44 +5v35 +1c9d +6g6y +4azi +5kpk +4ui8 +1no9 +4p3h +3gcs +5ngu +6bl1 +1q6j +2zda +3b27 +1o2j +3u2k +6mil +1n7i +2wec +1h6h +3owl +3g34 +1ec9 +4crf +3hha +3qiy +4dh6 +4pid +2y55 +5hhx +5etf +4xar +2pvu +5ito +1li3 +3dya +1xsc +3ti3 +1tze +6fmf +1di9 +2yke +5f4p +5sz7 +1m2p +1uu8 +4y64 +3rt6 +3vs3 +3nfl +4bdg +5jv0 +3wd1 +4pge +5cxz +5ak4 +4mdt +2j27 +4lte +2ayr +5m6f +5dv4 +2qbu +6ciy +3ekq +3c7q +2osc +4r6w +2ph9 +3qkm +3hhu +4xqb +5jr6 +2uwl +5ezz +1y3y +1i8i +3sxf +2i7c +4m1j +6rnt +3g5y +3hwx +2p83 +1mik +1mrn +1sm3 +3py1 +5fah +4pnn +3tza +3zmp +2wxl +5qaz +4odf +1eix +5b2d +5kdr +3hcm +2x91 +3frg +6gch +4qz2 +1hvh +1g3m +1tnl +3s7f +6cpa +6g0w +6ffh +3kec +1nj5 +6afr +5kup +6ftn +4o97 +2i5f +5nzf +2f89 +5fsn +3nex +6b2c +4zt5 +3gt9 +3sud +5btx +4fab +3fxb +2x4z +3l4x +5svx +5u98 +1oim +3lpr +4pqa +4pnk +4ogj +5k0s +4oba +6h7m +3aas +4zae +5u5t +4n6h +4dsy +1ywh +4w9o +2qbp +5d45 +5lhu +2bro +1uz4 +1ugx +6avi +2aox +5nb6 +4he9 +2oz5 +5n24 +1h8l +3pwd +1it6 +4rvk +6b27 +2qu5 +4x7j +5tyo +2ym3 +5mjn +3h26 +5ufs +2wj1 +1rt1 +1kz8 +2zbk +4krs +5j87 +3u3u +3g1d +5bms +1u9w +2z7g +4xv1 +4ngm +4j8t +5d11 +5c11 +3f17 +4alw +3q3b +3rwd +1c70 +3iiw +1hww +5kv9 +4wey +4ok3 +2ctc +4bcb +6e8m +3shy +4mnw +5ut0 +3lcd +5yc3 +2vwz +2xbx +1o0o +2vj1 +6b33 +4hnp +1w70 +4bh3 +4gng +2q8m +4rrr +4rqv +1hih +3u1i +5ovr +4ach +6c0u +4qiz +4k18 +1e72 +4ocq +1b11 +3gds +3mwu +5ivs +3bux +3qc4 +5ovc +1pu7 +1y3w +4v01 +6e7j +1uu3 +5dh3 +3tu1 +4q3s +4r5a +4z16 +3uyr +5wir +4exg +5kby +4iur +4zv1 +5xsu +4gtp +2o8h +3wqw +5th7 +5tv3 +1tfz +2v3e +5ntw +3cyy +1ny2 +4q6f +2pjt +5w7x +5jap +3b68 +5tyl +2a4w +2v00 +4nks +1ets +2fq6 +5hbs +4r59 +2ama +2web +6e4t +4xm6 +3bkl +6fty +5v0n +6ge7 +2boh +1ikt +2r5d +5alh +5szc +1cr6 +4zzy +2p1c +1phw +5w92 +3l9n +3k8q +3eml +3nij +5amg +3bv2 +2w73 +1h35 +3tne +3jyr +5l9g +2xsb +4f70 +2iuz +1ml1 +3ewj +4mk9 +2lo6 +2jiu +4es0 +6dqa +3kab +5mxk +2i0y +4zy2 +6cqt +2i47 +2i0d +2i80 +1nfw +5uzk +6hd6 +1y6a +1vcu +1e2k +4wz6 +1nc6 +4zyv +1wcc +4og4 +3il6 +1kzk +4frk +1nms +5kax +1fwu +5djr +3a1c +1hdq +5ml6 +5l4h +4rj3 +2x4t +5nk9 +2xj1 +3mfw +3gy3 +5nvv +4rh5 +4jxv +4zs2 +5ivv +4axa +5lli +3q5h +4wmu +2z50 +4lkf +1rjk +6g93 +4nni +6exs +1nki +3i7g +6ei5 +6erv +1z3c +3r5n +1lol +3qk5 +1rhm +4jfe +1xm4 +4gny +3ti1 +4hhy +5b5b +4fjz +3w2o +1oxq +3wk8 +6bl2 +3oe8 +3f7z +4j3i +5ogl +4my6 +6fob +4o1l +2uy0 +5dqc +1g46 +5orl +1uwu +185l +4z07 +2w8w +3l3l +3vvy +3jwq +1o2z +5lsy +3iok +3g6m +1sdu +4xtp +4qt2 +5fdd +5ia2 +1ux7 +5jhk +4ca4 +3qfz +1qvt +3t9t +4j0y +4x49 +3lpf +2bpv +3jzr +1fcz +3qps +4p7m +4hvb +3cp9 +5jm4 +2q9y +3eid +4o4y +3cid +3nc4 +6b1f +3rth +3rxe +1sl3 +4ap0 +4mu7 +1pvn +4jfm +1rej +3ens +3e9b +4hby +6guf +3iiy +2z4y +4to8 +5t1i +5upj +3c89 +4dju +3pcg +6gu2 +2euk +3vbw +3px8 +3g4k +6dh0 +1ukh +4rxh +2ien +2upj +5m44 +3gzn +3eio +2hk5 +3emh +1o3f +5ipj +6dai +3su5 +3sh1 +4iic +5e0h +4fs4 +4m3q +4wf2 +2osf +2pj3 +2r0z +1jrs +1i9q +2zgx +2jfh +2ow9 +2aa9 +5vt1 +5l0c +4ybk +3zmt +5kr1 +2r7b +2lh8 +2wyn +3ckz +6lpr +1usn +3v2n +5oq8 +6bhd +2pym +4xaq +3puj +4ek9 +1eb1 +5o1a +5k5c +2rnw +5qa8 +2zg1 +4gtm +3uec +4ncm +4alg +5hvs +1lxh +1y6r +5aux +2qwb +2h2d +4qir +1s89 +1qwf +2avv +5knt +4odn +3o1e +3lj7 +3eov +6cpw +1qca +1jii +3lkh +3rul +4g4p +1qk4 +5nkh +1w0z +1jlq +4ngt +4meo +4pmt +2v2h +4lm2 +3bu6 +1bzs +2vvc +1v0n +5a7y +3d67 +4u0c +4uxj +4b78 +4bo2 +2jsd +4aa1 +3ga5 +5hka +3vtd +2gm1 +4x6x +6ma2 +2ces +4ayq +1tpw +2uxi +3gc4 +5ye7 +2cbv +1v0p +2uzb +5xvq +3r7b +3umo +3b8q +2y6c +3bc5 +2bes +5wa7 +4f6x +3uvk +1hgt +6m8y +5twg +4avu +5vwi +1ms6 +5xo7 +3dcq +5km9 +2ygf +1zsr +1f40 +5m2q +1bl7 +4k7n +4hvs +4z2i +5voj +3ck8 +5d6q +3g0e +5y0f +3dpk +1bjv +6bw5 +3dow +2zif +1mzc +2nng +2xnm +3f8e +2ndg +4lh5 +3e8r +2e9o +3uih +4hlc +5moo +4epy +2i3h +6fby +6ew6 +3ama +4l0t +1vj9 +3pup +2xct +5fh8 +5ivz +3pxq +1str +2xxx +2pqj +5mra +4o28 +1d6n +1pr5 +1rpf +4tt2 +3bhx +2cma +3dct +4qt0 +6f1j +3gvu +5yy9 +2x81 +2cv3 +6fa4 +5jo0 +3l0e +3p3j +1eed +5aib +3qbc +1n7j +5fpi +6bnl +5v8q +6gxw +1rw8 +5zw6 +4e3d +2mji +6ayh +3lcv +1w2k +3zns +5mty +6c3u +4ks4 +3ds9 +4hld +3elj +4j45 +5v41 +2yne +5nad +5lvq +3h0a +3d9p +4tu4 +3ip5 +1w22 +4kbk +5lqq +5wyx +3ps1 +5ou1 +5j9x +2xru +3t4p +3uzp +2o2u +3dei +3u93 +4z8m +3lhs +2f81 +3fn0 +4iqt +5laz +1xot +2z52 +1zs0 +1oau +5d47 +3o4k +6dcg +1o4q +1csi +3tpp +5nwd +2wca +4ie5 +1u1w +2w92 +3r4n +5aae +4gja +2pj2 +5f2p +5hjq +2yme +3cbs +5u0y +5w3i +5qaw +5d3h +3sb0 +5alx +5g57 +3uhm +6afj +5jf5 +4ccd +5mhq +5etx +1clu +4k6u +6g28 +4zyz +1sj0 +4eh3 +2r5b +4pyn +5eq0 +1cil +5le1 +5nyh +6bdy +2auz +5vm0 +2ff1 +3f8f +2rka +3dda +4iu4 +5bs4 +5a0a +2qn2 +4fyo +1p2a +2uze +1x38 +4ps8 +2g19 +3nzu +3gi6 +5dtt +1gaf +3nii +2xm8 +4att +2vex +4jx7 +6h34 +4tvj +4lh3 +3pvw +1ai7 +1kc7 +2rl5 +6c0r +4rcp +1nhv +5mw2 +4mg7 +1drk +4mzh +1r6g +5xpl +4o74 +3bu8 +3cgf +4nie +2a3b +1xos +2wlz +1a37 +5lpj +1yvh +4xhe +2lct +3lau +3zki +1g2l +4uiv +6b4u +3jqf +3ixk +4pvt +5lpl +3sdk +5km3 +1t7f +4z2l +4tk1 +2j9a +3bbt +3tjc +2qi4 +4acg +5tuz +4lbo +5uuu +5ckr +2h2e +5qai +1qxk +2wyf +3f0r +1rd4 +2ipo +1ukt +2xx2 +3hvh +2bmz +1jqd +5d24 +4cpy +4bdk +2xu4 +2evm +6alc +4r5y +4oc5 +3kdm +4gk3 +5wvd +4pz5 +1x1z +2duv +6fnx +5u4b +4dfn +4agp +1n9a +5mny +3hqh +3o8h +4qem +3l81 +4c0r +1utp +5z68 +5tuy +5fb7 +3wv3 +3n7r +3bze +2bvd +2fx6 +5vcw +4ibf +1zom +4f6w +2fdp +1i91 +1d2s +3ikd +4w9e +3fej +4ntj +4wk7 +2a29 +3f15 +4u6c +1ing +3wq6 +4ock +2yb0 +3wtl +5yl2 +2hz4 +2gbf +3r8z +5cso +2j95 +5jxq +4ofb +2fr3 +5b4l +3ovx +6cha +4qwl +1tl9 +3mo2 +3c8e +3k5x +1uw6 +3fhr +1b3f +5a8z +5bvk +4o62 +2hb1 +3qxd +3i4y +6g9d +6gop +5ajv +1oar +5flt +2zhd +2xey +4ikr +4ckr +4ufg +5xvk +2vmd +5oci +1syo +4lq3 +1dtq +5bpp +6fg6 +4cki +5wei +3fl9 +5cnj +4os5 +6awn +3atm +1ypj +1odj +5msb +4f2w +2h5a +2w3k +5w1v +1pi4 +4kx8 +5m25 +1dhj +2qki +4b72 +1n0t +3oag +5dyo +1ql9 +5u7k +5mod +5mtx +4u6w +1yk7 +3d8z +2pnc +5em8 +5cil +3iny +5l2t +3u6a +5fow +2owb +1k1y +5nf9 +3ijg +1qan +4ad2 +2p09 +4xir +4tqn +5q11 +3zzh +3qel +5jha +3rxh +2ha0 +5xqx +3q2g +5wl0 +4bic +6ccn +1mh5 +2ai8 +1xp9 +4ef6 +5jzb +2h8h +5sxm +3dm6 +2ym8 +6fmi +5f1h +4qyg +2qtu +3vfb +1vwf +1ivp +3vf7 +5typ +5igq +4h1j +3sug +3h85 +2viy +6hf5 +2poq +5q17 +6afa +4mpc +6c0t +4zsh +5dit +6aom +3h8c +2e9v +2hfp +4u5n +5caq +4m2w +4tv3 +3oy8 +1pot +3owk +3arn +2dri +4ymj +3qto +5ix0 +4jr0 +5i5x +1nzl +5u4a +4eg4 +1wbw +3nee +5fdp +4cqe +1hvs +2w6q +1n8v +5ct7 +5dx4 +2fdd +4ydf +5c2e +3qsb +5zk3 +5vgy +5ot8 +6boe +1c29 +2pjl +2euf +6cnk +3as1 +3abu +3dpd +5aku +4k6w +4j3l +4nau +3fpm +5w7u +3c3q +3mg7 +1c12 +3buo +3rhx +5lyh +1b0h +4ynd +5y0x +5enb +2a5b +5mqe +3l5e +1tl1 +3way +3gs7 +3ex3 +4np9 +5mxq +2cnh +1swr +3t1m +4eyj +6mj7 +5f01 +3rkz +4ln2 +4jia +4l4z +4qr4 +2er9 +5vnb +5xff +4prp +2jqk +1gi8 +2ynr +1gfz +5nk2 +1rsd +5umz +3jzf +4k69 +6b5o +4o4k +4jal +3nht +4j1i +5xof +4ie7 +4f9w +5tvn +2qoh +2jdy +3wym +1w31 +5c6o +3nim +1vso +4z2g +1o37 +3lbz +4bqs +3tfv +5i8g +3utu +3msl +1bra +2k46 +4mg9 +2w76 +4wyp +5jcj +3b92 +3zha +2i4x +3wbl +5trk +2n8t +3ge7 +4unn +2ria +4wgi +4bds +2y06 +6bhh +4raq +5i56 +3vv8 +2xk4 +4az0 +3ov1 +4ycw +5ei4 +4u1b +3bv3 +5etp +3kcf +6fod +3eq9 +1awf +1eat +4giu +3vyd +2pvh +5aaf +2hdr +1lgw +2lsp +4l02 +6b7b +3ioe +4c1m +5kxi +5yof +4obv +3r6t +1uj6 +1a09 +4yk6 +1rzx +3cy2 +6f8v +1xor +4la7 +1qwe +5mxo +2opb +3psl +1aqi +5y5w +2j4q +5vfn +3e37 +4fzg +5qim +6da4 +1oxr +5hgc +3at3 +5ieg +6cq0 +2ioa +3lpg +4mzo +1w7x +2aia +5v2l +5abg +5x5o +2xcn +5toz +5tp0 +4z22 +4wlb +4pd5 +3zbx +3pb9 +3f5l +6bke +4p7s +6dhc +5enh +4m3p +4jmg +1k03 +4pni +4asy +1a4r +4deu +4hy9 +4g0p +5b5p +3wqv +2r3m +6q73 +5ar5 +1drv +4ua8 +4loy +5eh8 +4wy3 +3jzh +2aov +6dj2 +5t27 +1kdk +2og8 +10gs +4uw1 +4hge +5qcn +6c4d +1bai +3i5n +6epa +3dgq +5tzx +4h3b +5a4q +1e5a +5n93 +2exg +5k4l +1qpb +1f3e +4a0j +5ula +4cc3 +2oym +1dis +3ns7 +3uz5 +3ewz +5q0r +1qbo +1nd5 +2qzr +4oq3 +6e9l +2n0w +2hr6 +5vse +5jsm +1ssq +3l4u +1y2k +2r3k +3e3u +4ibi +3kgu +1tni +5hfj +1dva +5mno +1qbu +3px9 +5xpm +5q1b +4zjw +4gqp +3r21 +3lpp +4bdt +4ks2 +2v58 +3bft +1il5 +5yco +4p5e +3rhk +5fq9 +5ko1 +5bwc +4q7p +1ro7 +3lil +4mwq +3wf6 +1tft +3hpt +5t3n +4oo9 +2b5j +2bet +3nuy +5nea +3kdd +5ls6 +6f2n +4rac +4j47 +3iut +4uvv +2i4v +1a8i +5k48 +2hpa +1mrs +6fv4 +1q8u +5aqz +4ytc +1ibc +4eft +2v95 +3me9 +4o0b +4kn0 +3g2j +1ai5 +1n51 +1gnm +4zur +4zsj +3v8w +4tzm +1swn +5ih9 +3gen +4r6t +2ykb +5u7i +4e81 +3v01 +3n4b +1jmi +4dcx +3at4 +2c6c +4mzl +5ybi +5tcj +2w54 +4o13 +5ufc +2xs0 +4flp +4yqm +3o8p +3tpu +5yft +5mql +5w6e +2yjx +4pqn +4m48 +2iiv +4ohm +4a9c +6en4 +3wk4 +3p50 +3bpr +5gwz +1mai +5wys +4oel +1uy8 +4u01 +4ibc +1qti +4jpy +1q41 +2gvd +5aip +5i0b +4zyt +4yxd +4qt1 +4w4y +1c8l +3e5u +3kqd +4uj2 +3k4q +4o42 +5o1h +3b0w +3t64 +1hty +5dgm +3bjm +1b8o +4l3p +2b1z +4yyi +5ceh +3l3m +3ebl +4bo8 +4fvq +3fup +5ot9 +3eax +3vhk +3cda +5c1y +3r2a +1obx +4tzn +1d7i +3qd3 +3wka +3ovz +3ihz +5iis +5vo6 +5tc0 +3fqk +1dm2 +4y63 +6mvx +4ucu +5oht +4jff +1n8u +4xua +2ybp +2cgf +3d4q +4gsy +4i32 +2psv +2ych +5lck +1xxe +5wrs +5trg +5dp6 +2lnw +5kr2 +4utx +1a8t +4qw5 +6emu +2itt +4m7y +3kqo +5ku3 +4n7u +1oit +1w1t +4j26 +4nrp +4yh3 +3nzw +5zh2 +3vw9 +2hyy +1qj1 +1csr +2we3 +6mnf +4ijh +2jdk +4kcg +1kmv +3buh +4ovh +1mxu +4mg8 +3lka +4twy +5vsb +4ofl +6cdm +2f3k +5aqp +4tkg +5w4e +3zot +2yak +3qkd +2pv1 +5mz8 +5gut +3avz +2q8g +3ilq +3zw3 +4ckj +1qbr +4q08 +3zpu +5u8f +2ydv +3rtp +4b95 +2f9b +2uz9 +4ehg +4xsz +1bkj +6f22 +1tet +3f3w +4abd +3sjf +2byh +5jsg +4op3 +6fsy +3twv +5d25 +2vto +2oa0 +6csq +6dh3 +5jq9 +4xkx +4tmf +4brx +2ghg +2gvj +4wnm +5ayf +4osf +1u65 +3lzs +1i90 +3gdt +6eru +5ops +2mpm +4cc7 +3ckp +1o4d +2vvs +6ep4 +4bkz +4aom +1wc6 +1mpa +2pl9 +2p53 +4zyr +3gqz +4og8 +4mlt +1mf4 +1xxh +3p3g +3g0c +5dbm +4xhk +5lny +3g8i +5g60 +2zas +2z7h +3kfa +4a9u +4gj7 +4nwd +4ei4 +5yjf +2won +2f2c +3mg8 +3i06 +2wa4 +3hzm +3wkc +4j7e +3dxm +3m9f +1gzv +4clp +5a09 +5tg5 +4wkn +1njf +3ce0 +1tpz +6bh0 +3vfa +3su1 +2vtn +5yp6 +5twj +4i74 +5zk8 +2al5 +3bh9 +4djo +4oi6 +5m4u +1wva +3uvx +2aod +1pqc +4n4t +5fdi +1lcp +4ere +1ci7 +1ykp +2qju +1ftj +5hk9 +5y3o +5q0v +6ayn +5os1 +2j9l +2qm9 +2x6j +5ei2 +5izc +3u18 +4zek +4s1g +2wq4 +1nfy +6bnt +4jai +2jb6 +5ahw +4uit +3bmn +4gzx +6gxu +4gki +3ccb +4ezq +5v7a +6dzq +4az3 +4ucr +4pnt +2wtj +5amn +4ux6 +3t8v +1i33 +3r7n +5f00 +5nvy +3k41 +4aq3 +3tdh +3wdz +5nib +6b8y +1v2h +5jpt +1xge +4umn +2xrw +2vvn +3t07 +2vpp +1ghv +4ydq +2dbl +3ho9 +5w88 +2g71 +2lk1 +2pwg +4ai5 +1o41 +1tsv +5al3 +4fk7 +2o4z +4a51 +4gwk +3ay0 +3own +4e5g +1lcj +1hk3 +4rhy +4yoi +3prf +5z99 +3slz +4tmk +1au0 +2qwg +1bdq +5h8b +4emt +6bec +1g9d +1qf2 +2r0u +1f2o +1b9t +2c1p +1hn4 +3skg +4ee0 +6c2y +4pg3 +3uw4 +1wdn +3mho +4yl1 +6cex +3b7r +4pop +5f02 +3d7b +1xbb +2zft +4i8z +4oya +3otf +2xhm +5c20 +1zrz +4rqk +5k8o +1tvo +6fnr +1dwd +4zlo +1nyx +5aad +3kqw +3nw7 +5ti6 +6e59 +1ywr +2xn5 +4cy1 +1yfz +5e3a +4a7c +5ljq +5h9r +1gi6 +2b1p +1kc5 +3kpw +1q5k +4aml +4ibg +6e49 +2w8y +3kmm +2w6o +3hdn +6cef +1u8t +4e8y +4gj6 +4hbm +4g3f +4eok +2rox +1t48 +2xd6 +2fjn +5ndb +4j77 +1n43 +3ui7 +5e73 +1h1h +4mhs +3ohf +3kdc +4edy +2wzs +5h9s +3cth +2j34 +1h62 +4jnm +6bil +3mhc +4zw8 +3g90 +4f7v +4eh8 +5wkh +4ycm +4mk8 +2xj0 +2tpi +4clz +4ko8 +5qaj +5dus +4xm7 +2uzl +1k1l +4g95 +1c3e +5g10 +4nnr +3e64 +1fq8 +4c71 +4b0c +3ft3 +1ybg +4y2t +5dk4 +4dds +3ggw +5axi +3ueo +2r9b +3ppj +1meu +4z7o +3n8k +2jup +3p9j +3hxe +1e06 +3ika +5yyf +4kc4 +3su3 +4z7h +4x2l +5hcv +5ir1 +1f9g +5law +3o99 +1yci +3g3d +3aje +2o5d +3b7i +6af9 +2gvz +4qmt +3qj0 +2y9q +1nw5 +5wg8 +4bzr +1k6t +5hd0 +3fui +1lek +5kql +5d21 +1iig +5ewh +2vhq +2gh6 +4x6y +4mbl +2v54 +5ug9 +3hvk +3qts +3q43 +4nvq +3co9 +4uxh +2gj4 +4dwb +5q16 +1uyf +5jbi +2il2 +1yt9 +3t6j +2zu5 +5buj +2xiy +2fs9 +1al8 +4cig +3cgy +4mdq +3st6 +4dzy +6fcl +1m0o +3spf +1lkk +4uu7 +1zky +5d9l +1zd3 +5t6f +1g2o +2i1m +3khv +3o88 +5fi2 +3idp +2pov +4x11 +2qe4 +3qtz +5alm +4pli +3fts +2x4u +4o45 +1tkx +2q8h +1mue +4eop +2j7e +5akk +5wtt +3h9o +3v4x +5b6c +5boy +5hdz +5nkg +6ajg +4dum +3whw +6b59 +5aan +6fo5 +4pmp +5i0l +2xk9 +5mon +3d4y +2xib +3c2r +5mfr +2ra6 +2xl2 +4w5a +3zm6 +3zm5 +3zhz +3ogq +4rrq +4wxi +4asd +5txy +5u0w +4xta +3t2v +2q9m +5lyw +4kon +3sv2 +2j7d +3hmv +6g92 +1z6f +5a46 +3gnw +5k03 +2bgn +3ohi +3acw +3hng +4j06 +1owj +4fbe +1gbt +4fmo +4g8v +2fzc +2vx0 +2o1c +5unp +4o7c +4rxe +5t90 +5jyo +2wzy +1tfq +1tqf +3s78 +6g0v +2ei6 +4r95 +3peq +3o9f +1mq6 +1k6v +6gfs +6q6y +5m39 +3wz7 +1nwl +3ard +5e3d +2woq +4gq6 +3iw5 +2h15 +3avh +2azr +5vsk +6ckr +4n84 +4eh2 +4cpw +5f91 +4k3r +4dgn +2yer +5ayt +4cpz +4y29 +5tq6 +3bcn +3roc +5lwm +3gjd +3k5v +4zip +4und +6exm +5qak +4u0n +5wo4 +1rrw +6b2p +5hls +3pjc +1cin +3b2w +1kmy +4ys7 +4f0c +1zgi +3vqu +3uoh +1qr3 +5i8b +4l7h +2aay +4jlm +3wnr +4en4 +5w0f +4lxz +4cjr +4jj7 +2h5i +2avi +3ezv +4j09 +3hkw +3uxm +4b5d +5vcv +2vtt +3bl9 +2y1n +5nin +3uvo +5anw +5nvw +4b9k +1j4p +4io5 +4dkp +2jdt +1iem +4lm0 +5wyq +5nr8 +6f7t +5a81 +3lnk +2xuf +5ort +1h1q +4twd +1e1y +5lo5 +4anm +1e1v +4tki +4wv6 +4htx +1t13 +4euo +6gy1 +4civ +3dux +5knv +5hn7 +6gi6 +5vb5 +3f5p +2kfh +4b3u +5nf5 +5mes +4w55 +1sv3 +3p7a +6frf +6eks +5afm +5z4o +3cow +5hu1 +6f9g +3ds1 +6a6w +2g8r +2ze1 +5q18 +5cau +1s38 +1zge +1me4 +3cj4 +4pr5 +4gzf +1q54 +5l9i +1z6j +2ez5 +3unk +3qwc +3o86 +4j51 +6fyi +3cj2 +2viz +5ekm +3f9y +2uy5 +2brb +5g45 +5jgd +4j1e +4ikt +5opu +4mwc +1t7d +2viq +3ocp +3wuv +4yay +3d7f +2vh6 +4foc +2ra0 +5fwg +1o3c +4bi0 +1kll +3qtq +3cl0 +3ho2 +3vw7 +6h37 +4rlw +6dak +3p3t +3m11 +6fdp +4c73 +5ksx +6bcy +1okl +4gqq +3sji +2x2i +1hkj +5hu9 +5vom +5fdo +1uu7 +5iay +5nu5 +6ghh +4q06 +3apc +1re8 +5uey +2ha6 +4aaw +1tcx +3lpj +3w0l +3s45 +3n5j +1rhj +5d0r +2qp6 +2vqj +2zaz +2zir +4mq1 +5alt +5he5 +3atp +5w5j +3ekw +4xas +5mwy +4mwy +2wzf +2z1w +1d4t +4yk0 +3hnb +4h3j +6g36 +1rhr +2r3f +6bcr +5owh +5idp +1odi +3vw1 +4n6z +3qcx +4z89 +4pkb +2ewp +3d9z +4c5d +1xoz +4meq +3so6 +3tiw +6h7u +3u0d +1ur9 +4lar +4css +2a3c +1h3a +4aqc +5l2s +5vi6 +6cmr +2wf4 +3chs +5uc4 +5tca +2x6e +4gne +5ey9 +4ih5 +1mto +4qwi +5nai +4ym4 +3prz +2nnp +1vij +5bvd +5swf +3pn3 +2pql +3ekv +3nq9 +5c7n +1els +4k5m +6bq0 +1o2k +2aux +4x69 +5mts +1e9h +4ykj +1mkd +6i3s +1zpb +5mo8 +2q92 +4u70 +4wph +4xy9 +4ith +5aph +5cy3 +3fat +4ly1 +2vti +1f0q +4wsy +3ej1 +3ine +3rux +2vvu +5a3t +4qtd +5a5s +3t7g +1xon +4gah +5flq +2hw2 +3aig +4hl5 +5wzr +2imd +3oxz +1ndv +5yji +2hxm +3l59 +4jpx +1ws5 +4uix +2iv9 +4dwk +4qvp +2c1b +5cw8 +2r6n +4cws +2d3z +2oc9 +4ehz +1ett +1fdq +2avo +4r0a +1zzz +3moe +4mmp +5muc +5zh5 +3zqi +6f08 +6elo +6dif +3mf5 +5qas +3fu5 +3rv9 +4jne +5urj +5oxk +4y3y +5hki +4tyb +2glm +2xnb +3tt0 +6gmq +4i5p +1f92 +3psd +5hcy +1q6n +4tmn +1tmn +2gg0 +3zep +1c88 +4fea +5nwb +1ppc +5myx +4ci3 +1z6d +4kww +4ixu +1leg +3a5y +4bo3 +5cyi +5uzj +4mw0 +5j18 +5ml3 +2bmk +1e66 +2brc +2fwy +5oxm +1bn1 +4qlv +4qip +3ms2 +5wxf +5yib +3ig7 +4pgb +4p73 +2y4s +5zh4 +4ch2 +5tex +3b4f +3poa +4zyx +4m0y +2ot1 +5e91 +1yc5 +3iqi +2xpc +1o2s +1pmu +4l58 +4mx1 +5am4 +2hs2 +4yne +1ctt +3l5c +2zm1 +1lxf +2f1b +4zcs +4zw5 +2oh0 +5n2z +3f19 +2obf +6f9t +2f01 +6euc +5vee +4oyo +5moq +3ft4 +6cbg +5wuu +5f5b +4xs2 +1ype +4rwl +4hpi +4e34 +1yc1 +5jy0 +2fxr +5e1m +5oq6 +1nhz +3pjt +5jzi +4elf +4fxy +6dpz +5a6a +2waj +2fqt +3mxy +1y91 +6biv +4j1k +3zmi +5xjm +2fv5 +5db3 +3ryj +3b95 +3i5r +2ajl +6c99 +3bx5 +5ave +3muz +2ye9 +2rf2 +2j2i +5g0q +4yuw +2r3g +4cwf +3t3h +5ajz +2bgr +4l9i +4igk +6cz4 +4qfp +5ty8 +4odm +3hs8 +4ele +5zia +2pvj +5w5k +4m5l +3wz8 +4egh +5w7j +4jfw +6eda +5tpb +5m5d +4o71 +3kag +4u93 +6eed +4zam +2jbp +4z3v +2pu0 +4zzd +4f14 +4h4d +1pxh +2hwp +1u9e +4wko +5kqx +1tog +4yqu +2yi0 +2xf0 +4uof +4io7 +4ds1 +5diq +3ti5 +4x8u +2lp8 +3avm +3kqm +4zph +2wou +4jyu +3ip8 +5ti3 +4cku +3h78 +3vdb +3chd +4i5h +5i2r +3tyv +4uwk +5j6a +3lk0 +5k13 +2hj4 +3bfu +4ajk +3n45 +3te5 +3ujb +5v9p +2bzz +3a6t +2y6o +3b28 +1hvl +5a2i +4mx9 +4g8y +4dko +3kdb +1lf9 +4qmp +5q10 +3hqr +4d2d +3w32 +4ksp +6f8b +3h6z +5k0h +3h2n +5ke0 +5w4w +5mxr +5igl +2h9p +4iuu +4d8a +3dvp +1lox +3ao4 +4h38 +4bdb +4j1p +1o4b +4mro +5eel +2ay3 +3e8n +6g8m +2pyy +4m0z +2mas +4ks3 +4p72 +6ccx +3ttn +4jsr +6aqq +6em6 +5o3r +4rqz +4hkk +4qll +2q6f +5he1 +4fe9 +2wu6 +2pj7 +1bty +1ule +6dry +5ezg +5h17 +3s7b +4x48 +3atw +1kug +2hdu +3odu +3l9m +1d1p +2ihj +5mgk +2n1g +6hk6 +1mq5 +4kp5 +1yyr +5al4 +1pzj +6axl +2z4b +5mym +4gv8 +2cni +4xub +6cjh +3ig1 +4m2r +1tt1 +5ime +5oei +6d6u +1utj +1dfo +3ixj +5anu +2ogz +5bwb +5vad +2x9e +6epz +4yv2 +2jdh +1w13 +4eos +4x1f +4gao +5c8k +4k5n +3pj1 +5hz5 +1x7a +1h9z +6gjm +3ipe +3fuj +4m3d +5g11 +4asj +5xig +4ybs +5gmi +3d04 +3dv1 +2fie +4twt +3ary +4anb +6hmg +4n9c +2lsk +2ksb +5mwd +1gfw +5gja +3u3z +2f7o +3mzc +5w9g +5yz2 +4nmq +2y2k +5lz5 +4qz7 +1bug +4zmf +1o8b +5jf3 +2qft +5jop +4jaj +3s9i +6cvy +3oaf +4hlw +5ta6 +5mae +2yg2 +5m2v +4zxy +1g9r +3iqu +5i9y +3pch +5kkt +2fqo +5mn1 +3e3b +4jlj +4tq3 +4zga +4jlg +1pf7 +5ewd +4dhf +4ywa +5fxr +4e1k +2zmd +4jyv +1fkg +2lwi +6iik +3zxe +6dgx +2vx1 +3hav +3pkn +1trd +3u10 +2a0c +3ly2 +2xn6 +1irs +2ntf +3rum +6iin +5hzx +1urw +2xb7 +4umr +5apr +5ngt +3q71 +3og7 +4oeg +4tte +2ow6 +3sff +3eu7 +2zju +5tx5 +3v8t +1nqc +3t2c +4afh +3vhu +2d1x +2h2h +5nvz +4qy8 +3w2s +4w9h +4r6v +5esq +5v5d +3rbu +3d51 +4q07 +4jrv +2in6 +3bki +5ov9 +6ayd +4zyc +4ybm +4xip +3ibu +5kde +1br5 +1lan +1gwq +3ggv +3cke +4av0 +4qpd +4urk +4o04 +3d4z +4uct +5uwj +4b8y +5nvc +6f23 +5w0e +4m84 +2hdq +5c6p +2amt +2rgu +4ax9 +6g9n +6hu2 +6cho +3v7d +5iz9 +2vev +3dej +2hxq +4mvx +2c3l +5c29 +1we2 +6f9u +1m6p +2f80 +2ooz +4py2 +4zcw +5kx7 +4m13 +4dhp +4kz6 +5ypw +2zdn +1qf5 +2i4w +2xu3 +1wbn +5lt9 +3zmv +4efs +7cpa +2i72 +4rpo +2h13 +4obo +3f88 +6g47 +2y6d +3o57 +5a0c +2fl6 +5mja +5jal +3aqa +3g9l +5g22 +5znp +1hi4 +1y2a +6ay5 +3cf8 +3f5k +1a0q +3et7 +4eky +6bvb +5u94 +1m1b +3ryv +2peh +3vw2 +5e1e +3i51 +5fb0 +1rtl +2i4p +1a9m +6au5 +4jyt +4k3k +2c5o +2qu6 +4hwr +6bqk +4bqh +5cs6 +4dea +4x3r +4iid +6gzm +2y2n +3fuk +2ypi +1b3l +4zsq +4o44 +2e1w +3rdo +4zme +4zz2 +4ucs +3sl0 +6ey9 +5arg +3p3r +6an1 +3eor +2uue +1n4h +3gk2 +3oot +5i5z +3hv3 +2hzi +5yc2 +1g48 +3bxg +5wbq +4a1w +5q0q +4axm +6g91 +4q0l +1d8e +1bb0 +1lpg +1gmy +6gin +5yy4 +2pvv +6ht8 +5j82 +2x5o +3orn +2nmb +1pk0 +3rik +4lkd +1v2n +4ufl +6ahi +5bsk +3uza +2w8f +4nka +6bef +5abw +6bkh +5l30 +2clx +4an1 +4ufz +5nk6 +1syi +3zsy +5ul5 +3irx +5ux4 +4r5n +4bdc +5fut +3e2m +3lgp +2ivz +1akv +2ydi +4i11 +3zxv +4m3e +4kwg +4g2w +3o95 +6gu4 +2doo +5yun +3n5e +3mqf +2y5k +2vnm +3g0g +1qfs +4o7b +3ty0 +4uil +3st5 +3f36 +6fqo +4pkw +5j6m +4xbo +1hxk +6ghj +1h8y +2pq9 +5em3 +1rlp +1yvf +4cqg +3srb +6fmk +4o6e +5xmv +4agq +2j3q +1ke9 +1mfd +1a5h +4b1j +4i0f +3up2 +2ay5 +5mqx +3lpu +3bmq +5wfw +4j5b +5alp +5ee8 +4qj0 +1ka7 +3k3h +4np2 +3bwj +5osl +5all +4jze +3b3x +3sm0 +2xa4 +3t3v +3t1n +4yp8 +4bc5 +5f08 +4qw6 +5ofv +2bdl +4y83 +4y4j +2prj +6cfc +5eth +1oky +2ban +4blb +3f38 +2uy3 +6fdq +4qmx +4bo5 +5wh5 +6cvv +2oph +3owb +1xlx +2cf9 +4av4 +2wxv +5v8p +4eqj +4aoc +5jk3 +4xtx +2aoj +1v7a +3lzv +3bum +3fxw +4qhc +1b6j +3f80 +4or0 +6dq5 +5lsx +4bdd +4x8v +4od9 +3vye +2yix +1i7i +4eoy +2k4i +2bv4 +4z0f +5dp7 +4xiq +3d20 +5llo +2ymd +4hn2 +5n31 +4ffs +4b5b +5jf1 +4i73 +2pqb +2i19 +1cbx +1gsz +5ws3 +4h3q +5a5q +4uvy +4guj +1i7g +3c8a +4ll3 +4pp0 +5hk1 +4rux +3coj +3d6p +1ndj +1o79 +3nsh +5hl9 +2e27 +5iui +4og5 +3wdd +1lnm +3igb +4x24 +4tsx +2gmk +5ox5 +1bqn +4e8w +5xyf +4uco +1eas +2b7f +4m5u +5evk +3ipu +4p5d +2fsa +4ay5 +4bcp +5thj +3tam +4jsc +3ug2 +5e7n +1ofz +3uo4 +5clm +3gr2 +2m0u +6f7q +3sdg +1sme +3g2i +1oyt +3h3c +1xff +4oc6 +3b3c +1xa5 +1fe3 +1g32 +5olb +5a85 +2oz2 +4d8i +5swh +1y2d +1inc +4obz +1c7f +2clh +1qkn +2etm +4i5c +3vd7 +1p28 +5j7s +1y2j +1aht +4iq6 +2pe2 +4f3c +4ih7 +1hkk +4puj +3o1g +4w9i +3qrj +1qj6 +4azg +4joo +6dxx +4sga +1gja +5k4j +4yef +1sbg +3s00 +5fcw +4txe +4mr5 +1bxo +5xxf +5jic +4abh +3nzx +5edl +3pcf +3k98 +3qqu +1hgj +2qch +2yim +4dtt +4l09 +3f07 +4hki +4cl6 +2z5o +4e7r +5amd +1c1v +1ym1 +1s64 +2w6t +3tl5 +1w1y +4eoh +5i58 +4z5w +1u2y +6hu1 +3wav +1nxy +3n4c +5c91 +2kzu +5kre +2jkk +4hkp +4y2y +4w54 +4bnu +3lp7 +3bla +1gni +2idw +2xy9 +2q6b +3uxd +5o9p +3ao1 +3v2o +1i00 +5ewj +2rgp +2xln +6ma5 +4d2w +6eww +4lkq +4o7a +5m4k +5g3n +1wm1 +4fzj +2nyr +2mc1 +2qky +6eh2 +5v1y +4glw +3arb +4gq4 +2vin +4rss +3q7j +2h4g +2x2c +1gt4 +1fo0 +1ftk +4kby +1ch8 +5fnu +3sv7 +2vvo +5uah +3k97 +2wpa +1a4h +5lhg +6eeh +4mse +3nnv +6eq4 +5f1l +3q0z +3elm +2uzd +3p2e +1r0p +1j16 +2qhz +5h0b +4nyt +5ij7 +4l2l +1afl +5vd0 +4i10 +1opi +2ves +5tyn +3gf2 +3nuo +5w2p +4uv9 +3sut +2f3f +1dmb +3wd2 +4xm8 +4kp4 +1hy7 +4xbd +5m6h +1g05 +4zh3 +5d0c +2e7l +3kba +5nhh +2pax +1p0y +2l7u +3wns +5u4d +3axk +4qpl +5jt2 +2rk7 +5lss +3sni +5aaa +5wfm +4pb1 +3oe4 +5m63 +5xg5 +3lle +2yac +4odk +2x95 +2ig0 +2zdl +4ay6 +2c1a +1nny +1nw4 +2fj0 +3kah +4db7 +1bxq +3ifl +5f9e +4p2t +4exh +6d2o +5t2b +6eee +2oi0 +3lpl +4tk3 +1o6i +6eux +2ojj +3pyy +4edz +3jy0 +3r5t +4xdo +3qo9 +4u2w +5nme +6hdn +4mre +4ryg +5ez0 +3c14 +5kzi +6bix +3uug +5ldm +1o2v +4llx +3lw0 +2op9 +4nhc +5wqj +5wi1 +4e20 +6fjf +2x2m +5l98 +3qsd +5lcj +5t23 +1adl +2wa8 +4q9y +2rkg +4gid +3f70 +2xk6 +5t8e +4lxb +2xp2 +3juk +5uf0 +3l1s +6evr +1lag +1awi +6ej4 +1pmx +4kln +3wgg +5g61 +5n69 +4o9v +3kqc +1him +3cd0 +2kbs +1moq +2f10 +2zb0 +2d41 +4bdh +1ebz +5lcf +5u13 +4qwk +4cd8 +4d85 +1tvr +4l6s +3pd4 +1hq5 +5tiu +1o44 +6g22 +2ckm +4btk +2cli +6cb5 +3o0u +5u4x +4lp6 +5afk +2o3p +1zpc +3qo3 +3d1z +2z3h +1hms +5ox6 +5fpp +3f3d +5t78 +5hg5 +3mxe +4i7l +1tuf +5lmk +3nu9 +5om9 +2qi3 +3l5b +4v0i +2nv7 +2wkt +4ndu +1aq1 +3ddq +1vyj +5ods +1b8n +4xhl +4jft +3n7o +4og6 +5j9z +5ufr +4qjm +3g4g +5jox +2std +4uvx +2yb9 +3qaa +5mwo +2onz +2hiw +1xnz +2agv +4qgg +5x28 +2dw7 +2ohk +4oue +1o4a +6ds0 +3t5u +3ij1 +4klv +3lgl +2wey +1qk3 +4i0d +1rti +3clp +4f8h +4qwj +1bmb +5jar +5a5z +5ofu +2yiu +2z5t +5emm +4ibm +5lzh +4xh2 +3t09 +2p95 +1fmb +1gj5 +3s72 +5ia5 +4kp6 +1s9t +2fhy +4del +3ao5 +4xii +5cp9 +3vb4 +3sha +5el2 +2uup +6bfe +4jv6 +3e8u +3ubd +1tys +4p10 +5ekj +1d5j +4bpi +3n23 +3amb +4gue +4n8q +1o0d +4d1j +1a9q +2vtq +1h1d +5uw5 +6bfp +3tfu +5xyz +2q1l +4m7j +3m8u +5kbf +3btl +4ctj +1koj +2ajb +3unz +2xgs +4pd9 +5g1c +5w19 +5vqw +5yc8 +4ufd +1w83 +4qdk +1tyn +6buv +4amw +3dnd +1d7j +1nzv +4ryl +2f6j +6bbx +5lgp +6ay3 +1fwv +2qg0 +3mg4 +3qmk +4dma +2r3c +1d6s +5dsx +1tg5 +6do3 +5n7v +1g5f +1c5q +1lb6 +4pv7 +3ii5 +4rab +4eke +2w10 +5i86 +1uho +5vsf +3g08 +2zxg +4btl +3hb4 +5j4y +5lgt +6boy +5kva +5mka +5wa8 +2i0e +3ptg +5vp9 +4mw2 +3djf +6b7d +5t1a +5hrw +5e2o +4xuc +3vg1 +6c4g +2csn +3q32 +4q9m +3vry +3fv7 +5lhh +3fh5 +2qry +4f9g +4c8r +4m6p +4riu +3dp3 +2r3i +3bwk +6ew3 +4bs5 +4g3e +2wvz +3wc5 +4nb6 +5xmr +4ps5 +4dvi +3uvm +5os7 +2xix +5alw +3iaw +2xnp +6dvo +1cgl +4xv3 +5dya +3c79 +2e9a +6dil +4caf +4tpw +5fov +5n3w +5ttv +6f3b +5cas +5aqg +3dz2 +1xhy +3t01 +5th4 +4i80 +4q3t +2ceq +4yff +3bti +5vll +4hy4 +6g3y +4ykn +5lu2 +5wg7 +3fnu +6g84 +6ap6 +6hpg +1c3x +5if4 +5wgp +3d6q +2wer +2qrq +4cfl +3uib +6fgf +5tx3 +3plu +1zzl +6db4 +5tpg +3k8o +2fxv +3c2u +1w8l +1di8 +3udh +4cu1 +5gvl +5nvx +1bt6 +6d9x +1vjb +6b31 +2qo1 +3daz +1v0l +5nuu +2qtb +5ewy +4i9u +4afj +5hzn +2ohq +4eqc +3p8h +5y53 +1kat +4kwo +2hiz +2cmf +4bgx +4hys +4lww +3k39 +3rdq +4ega +5fsl +3ehw +5gnk +5n8b +6h29 +1dy4 +3nu6 +5dxu +5jf8 +2qd9 +5zaj +1oif +2v8w +1ywi +5uiq +1xr8 +4yvz +2y82 +1pkx +2qbw +1ebg +3a4p +3ful +4d62 +5qb2 +6bgx +1ela +5j59 +2q63 +6cmj +2p9a +5nwk +3w54 +4rrg +2l1r +2gsu +2wva +2vpn +1qji +4gts +6gnw +4afg +5ty9 +4ps7 +3r2f +3qak +4dgg +5f67 +1kak +4mc9 +4dpu +4u5o +3h06 +2b4m +4q99 +4dgb +4hw2 +5uvc +1yxd +4cix +4ehm +5his +2ihq +4kmd +5cin +5v7i +3itz +3il5 +3brn +1h4w +2ilp +6bnk +1me3 +5fh7 +1rgk +3iub +3p4q +1njt +1ya4 +2x6i +3e81 +4yl0 +1at6 +3pr0 +5wf3 +4uzh +5f2k +4j93 +4dwg +5ti2 +4pcs +3qcj +4qjo +4dpf +4qer +2kp8 +4o2p +4pd6 +2op3 +6got +2weq +4lh7 +5l97 +6bed +5o1e +4ezt +5y8w +1err +4rj7 +2yol +5t68 +2ltx +5g17 +1mxl +4c7t +3uvu +5a3r +4yuz +3nf9 +2fix +3pgu +4kn1 +3mi3 +5dpw +3e5a +1y1z +5j8i +4e5d +3cd8 +6ccl +4x9r +3rtx +2zmj +1xp6 +6cks +3fw3 +4znx +4nyj +1q1g +2ews +5o5m +4w4v +2w1d +5d1j +5fbe +3ipq +2qk5 +2ydj +3gv6 +1mxo +5l9h +3odk +6cze +6c98 +2wmx +3qa2 +5oax +6gnm +3q6z +2oqv +5exn +3s53 +5lzg +3mo5 +6drz +5nn5 +4x1q +2mow +3ow3 +4ebw +4a4c +6ema +2h6q +2fjm +5uez +6et8 +6ep9 +1avd +2q80 +4nj3 +4mdr +5ely +5dri +1v2r +1hk4 +5xfj +4z0q +4heg +5h3q +2buv +4l7d +3nb5 +1g9b +3oay +4aj4 +1bxr +2w9h +2iws +3oy1 +3qpo +2hog +3rl8 +1zgb +1a4w +3vvz +5ia4 +6f1n +1x70 +3q5u +4do4 +2pcp +5m77 +4a50 +1elb +1rri +2ydm +3ldp +4k9g +2ptz +3uo6 +2qwd +5fdz +6fiv +1hqh +5q12 +5anv +5k5e +5aer +1jys +2zu3 +2oiq +5lwn +1i9o +1npv +4piq +2w0j +6c5f +1w8m +5gjd +5yql +1m83 +4djv +3ns9 +3zn1 +4eg7 +3u4r +6hu0 +4awq +4l6q +1ad8 +3d9k +5ucj +3piz +3ekt +1g9a +3m2w +5q1c +1mv0 +6emh +1ttv +5u12 +1lhf +1w1p +2i4t +4z68 +4gqr +3uqp +1rhq +3oxc +4mc6 +3adt +3ivi +3r8u +1udt +1o36 +4ou3 +6apz +5gmp +2l84 +2gg9 +3o64 +3uo9 +4qht +6f6r +3hy7 +1nox +5u6v +3ans +1mdl +3d2e +2fsv +5t2m +3p79 +4je7 +3gta +4q9z +6f55 +1gu1 +5y6k +3ert +3ant +5a3h +3zdv +3o1d +6ar4 +2ym4 +3s3v +3fk1 +3pbb +3r6c +1xh8 +5lh4 +5q0w +2psu +5gty +2qi0 +2nn7 +5vb6 +4b8o +1zp8 +5uqv +4pci +5f5z +4rj4 +6dh4 +3lce +4ea2 +4y6m +5d3p +1gsf +4l7l +1tkt +3a29 +5ljj +4gpk +3ayc +3qkv +3ern +3zhf +1f4y +6bgg +5uz0 +2znt +5du4 +1utn +5ayy +1lzo +1gym +5k8s +3kb3 +3ijy +5oa2 +5wio +1yei +4hlh +2eg8 +5tkd +1kpm +3tpr +3mt9 +3tcy +4qxr +3uyt +3o9l +5ejv +6ekn +2fky +5vp0 +4rxa +2xpa +1kav +3ecn +5vlh +4lko +2g83 +4m5r +5iql +5g53 +3fhe +2xj7 +3u78 +3zt3 +4hkn +4h58 +4f1l +5q0t +2jkh +5w13 +3r88 +1yvz +4qmq +5aag +1b6m +4i47 +6d5e +6c91 +1z95 +4ca6 +4ysl +4gbz +1aj7 +1wcq +3m96 +5t92 +6gjl +4yxo +5l2m +4wov +3l2y +5v5y +3a3y +2gmx +2fo4 +2h5j +2r05 +2qtt +4jn4 +5npf +1gt5 +4jfd +2qhr +2sfp +3rx5 +4e4l +4mao +1zkl +3syr +3jzj +5h9q +5one +4io6 +3lp2 +2i4j +3rlp +5eta +4yth +3r24 +3gsg +2of4 +5inh +3ed0 +3zlr +5llp +6esj +5c8m +3dkf +4z6h +4x6o +2j47 +1usk +5w8j +4x1p +1xb7 +1lqe +4apr +5khm +2ga2 +2ly0 +4zro +4e26 +4g16 +4uvw +1f5l +4ipn +4hwt +4g90 +1kds +4bck +5k6s +5eqp +1vwn +4uye +4gtq +6fe1 +6gjj +3hb8 +2zdm +5alz +3eyg +4lkk +4os2 +1j4q +4o36 +5dtm +2wgi +6faf +1mx1 +3lfn +2i1r +3hzy +1a4k +4zl4 +1dub +3qi3 +3i90 +3qgy +6gfy +1o2u +4xyn +3rk7 +6atv +2wu7 +4tnw +2uwo +1z6p +3p8o +3my1 +3s3q +5w0l +4tw8 +6b8u +5jg1 +2azm +4qw3 +5ntk +5jq8 +3gcq +2zcs +2pvw +1o0n +1ph0 +3nu5 +5loh +1t08 +4m7b +1qpl +4ge5 +4w9s +5fat +5l2w +5nx2 +2gst +2pix +5myn +3ump +2xoi +4r7m +5ful +1kf6 +2on3 +5wa9 +5tw3 +1q4k +6fu4 +5umx +2bdy +4ezy +4crj +4pyx +5t97 +2cet +1fao +1n46 +2ooh +3dgo +4io4 +1xjd +1q72 +3mag +6euv +4g5f +4dgm +5o1c +3ken +4d2r +5fnq +4z7f +2o3z +4xgz +4i2z +6g15 +5ypp +2x09 +5lt6 +3k5g +2xej +1uyh +4tln +4mrw +3c10 +3sdi +4tw6 +3pa4 +3q4k +5l3a +3b9g +4ohp +3nq3 +5vfj +4rqi +4x7k +2q8s +1ca8 +5a3x +4wf6 +5oh4 +2qu3 +5dq8 +6ffs +4loq +4prb +5vwk +2yhw +5eob +5dgj +2i4d +5wlo +5lo6 +5aln +3p5l +4q1y +6c4u +1gi7 +2jkt +4yv1 +3ctj +4qfr +4wmy +5c84 +3ie3 +5mnn +3l8v +6eij +1o2n +4nmv +5ok6 +4yab +2vnf +3r0w +2uyq +5j19 +1y2c +3nth +5ni0 +4tpk +1vyq +4txs +2wgs +5gv2 +1b3g +3kad +6hgz +3dng +4u7q +5fnd +2c5x +5ho6 +2aqu +1ke6 +5g5f +3wff +3m36 +4bb4 +3ai8 +3ee2 +5u14 +4xe1 +4ggz +4av5 +3bgc +5i4v +3g0d +4bkj +2aei +4hup +4yv9 +4uwf +2ovy +2oo8 +4dlj +2v83 +1ct8 +5qan +4nwc +4msc +4iti +3guz +6i8l +2yof +1oyn +3qti +5nf6 +5n7x +3t0t +3oyw +4ab9 +3aw0 +2x2l +2lkk +3oyq +6c28 +2ofv +1b58 +4cpt +4em7 +1t5a +4xyf +3duy +4xh6 +1j15 +3b7j +2isw +3eqs +3tkz +3dgl +3qw5 +6df4 +2jjb +5auy +2rcu +5mqy +2aoe +1d8f +1kl5 +4zsg +3e0p +3su6 +5tz3 +4hco +3i6c +2ow1 +2yjw +4nue +1uyd +4hod +3oik +2viv +4wtu +4gih +1b9s +5i40 +4zb6 +5fns +4x6h +5hg9 +5naw +1h37 +1uj0 +3e9i +3i5z +1v2v +5wb3 +3lpt +3k4d +3nhi +1cbr +5uis +3f68 +5nk3 +3okv +3krr +4gnf +3zyf +3ivg +5d3n +4rs0 +5oxn +1owd +1w7h +5o4s +3hrf +2r3h +1yp9 +5oyd +4iif +4uvb +1b9j +2bgd +6giu +5eek +3tnh +3gp0 +3bxs +2ay1 +5cap +3ta0 +3mkf +4uyf +4zx6 +5a3n +3hek +6b5r +1fbm +5wou +3g76 +2rkf +5j86 +5lav +2rkn +3jzo +3kr5 +2vo4 +4lzr +2srt +4wbg +1vyz +3ebi +2qbx +3gbb +3vnt +3erd +1o34 +1lbk +6czi +4d1c +1k1p +2bmv +1jlr +5hx6 +2igw +4o0r +2isv +3kr8 +4k3m +5xiw +2z5s +2w97 +4u03 +4p4d +1pwp +5j27 +3w5e +5a3s +1zz3 +4y5i +4rx9 +4ty7 +6g4z +4j3e +1s3k +6ajh +2wei +5v5o +4p45 +4nbl +4rxd +1cp6 +1r5h +4nnw +4cga +3kb7 +4phw +3zpq +4b3c +1wv7 +5hja +1mfi +3jzk +1hsh +6aro +3vyf +2v7d +4k66 +5eeq +5mos +3ska +4bg1 +4ql8 +4lgg +4anw +6gr7 +2x7s +4ie4 +1p05 +4zbi +5c6v +3qzv +5etq +1nhw +2xx4 +1uys +3zsq +4h75 +4d0w +4pml +3gyn +2fm5 +4r93 +5usq +4e3b +2hwi +3kl8 +2vr4 +3o0e +3dab +5h8g +4ynb +1kr3 +4w9d +4msl +5k8v +5ix1 +5zaf +6cve +4h85 +3d5m +3v78 +5uox +5d1u +4fcm +2zz1 +4i1r +5hkm +4y8c +1fkn +5he2 +3n3g +3g5v +2evl +3k3j +1utr +3ixg +4u44 +4ahs +1o2y +3fcf +5dde +3bgm +2nxd +4yhq +4k19 +5y7w +3v3m +4ph4 +1nja +5i23 +3rsv +5n17 +4bt5 +2ojg +5x72 +6eq7 +3ehx +1a5v +6clv +3qcs +5cy9 +3bgp +3nf7 +4dmn +2dxs +4msk +2wmu +4hws +5c2a +3lco +4x3u +5c7a +1pa9 +5he7 +3m67 +1j7z +3s5y +3oe0 +4kql +5l8y +5cks +4cc5 +2rcb +3bi1 +3jzc +2qzx +4u73 +2an5 +2uvm +6g2n +1dzj +6b97 +5y6d +1qvu +4ymx +1yej +6f3d +5tg7 +2w1i +3dv5 +1vyw +3sw9 +4qz3 +8a3h +3o3j +5gn9 +4b7j +2i4u +3m8q +2fu8 +4hfp +1swi +2vd0 +3pz1 +2jo9 +3q4c +4bzo +4tyl +6fuj +2x7c +3hg1 +3pty +5f3i +4q9o +3e1r +4j78 +4ucv +6h38 +3wtk +4lno +2h65 +3s4q +3lp0 +4ajo +5maj +3d7d +4xe0 +6hpw +4p0b +5fsb +4f6u +5h0g +2zdk +6hoy +6bib +3rpv +1kel +4v24 +5k1i +4j4v +3kai +3c9e +2v59 +5cs5 +5jqb +6as8 +5dw2 +1mq1 +4gw1 +4gj9 +6e2n +4qlu +5jog +4iu1 +3tyq +3r2b +5alr +5c4s +3pa8 +2qq7 +4qn9 +2uwd +1yye +1oir +1gpy +6ert +3d7z +6dq8 +4lsj +2wxm +3pj2 +3nwb +3qxc +4hwo +3afk +2rk8 +6acb +6g85 +1kna +4pvx +5hex +3jqa +5xms +4an2 +3lcu +3rdh +2r3y +4w7t +6few +4cd4 +2gg2 +4ga3 +6anl +1yyy +3r16 +4al4 +4uru +2ovv +4ya8 +4re3 +3bgq +5lpr +5fap +5cei +3v5q +1maw +4p4f +4jx9 +4xqa +3ghe +3fci +1zfp +2l12 +4lil +6eq2 +5vdv +6b0v +4l0i +4zsl +5yu9 +1p04 +1rnt +2chm +1nhx +3ckb +2vot +5n1p +2jke +5ztn +4j82 +3hxb +3qvu +1hk5 +4pyo +3atl +3jzi +4ad3 +4b6r +3g8o +4kc2 +7upj +5eyz +6do5 +5wfj +2j4a +3kmx +5oy3 +4o55 +2qi5 +4jfs +4h3g +2y68 +3fv2 +3ioc +5jyp +2gde +4mm8 +4jjm +5ccl +5umy +4n7e +4f64 +3p9h +4hz5 +4kin +6cqf +3feg +3aaq +3sl1 +1uxb +4oee +4i7d +5ll5 +4qlk +3s9y +3t3g +2p3b +2jbj +2x8i +4ui3 +3po1 +5ji6 +3u90 +4lts +4xrq +4jzr +5nkb +1km3 +2qnx +3myg +2va7 +4ymh +4nms +5ar7 +2h3e +4lp0 +2ay9 +5trs +4fse +6cdg +6ful +4x8n +4w4x +5m29 +4ocx +3sov +4fxp +3ex2 +5uwl +4wpn +3bi0 +5nwh +1vik +4uj9 +5tg4 +5hrx +3wz6 +1mm7 +5v37 +5eol +4egi +5n21 +3vhe +3cj3 +4w9q +2qtg +4z93 +3jqb +5f3c +5ll9 +4zts +5aei +1o9d +2iw9 +5b5g +1g54 +3v5p +2yk9 +1ez9 +4c6x +2g24 +1m9n +5f60 +1vzq +1rt9 +5jq5 +3ha6 +2yj9 +4nra +2h9t +6afi +3el8 +4kni +5v79 +4fxf +3ncg +4x3s +1dif +4aa7 +4d1b +5av0 +2clm +4l10 +1exv +2ltv +6g1v +3rwh +5yfs +5xkm +5mjb +2i03 +2v0z +3udm +3i3b +1h1b +6esa +4ai8 +5ak3 +5j20 +5jjm +3wto +1std +1jjt +4kxb +1jdj +2chw +3ll8 +5j79 +5adq +4r74 +5n53 +3ljg +2lsr +3rk9 +1add +5ja0 +5x02 +5cc2 +3u9c +5d12 +1o6q +1xn0 +5f20 +4lgh +5etr +3mj5 +4km2 +4f9u +6el5 +1hmr +5n9k +3are +1lv8 +5n99 +4gly +1sh9 +4uns +4cpu +2or4 +2rkd +4psq +5wcm +5vsc +4nl1 +4um9 +4wa9 +3fxv +2obj +2ohm +4zw3 +5jao +6htz +3n2p +5dda +2vwy +4ge2 +5d7e +4wx4 +3sgv +5w12 +4gmc +4dij +1r5g +4r3w +5hn8 +4fk6 +5ggl +2ks9 +1tq4 +2qu2 +4oyk +1my8 +6mv3 +3kqs +3gz9 +3eta +1uv5 +2bok +1b42 +5mlw +5d29 +4qr5 +1r6n +6f6u +4pio +6cee +3mp1 +4pz8 +6fgy +5moe +5yfz +5zwh +4kb9 +2wmw +1nax +1o4n +2znu +5gvp +3ma3 +3k15 +5qae +1v48 +2psx +4d8e +1m5d +2v2c +2ql5 +3h0y +3wpn +2qa8 +1rs4 +5tgy +2b7d +4pl6 +2w8j +3tdz +2wxj +5g2b +4nrk +3gwv +1kne +3kqe +5khg +4uiu +4kyh +4e93 +2qg2 +1a4m +4byi +5ngz +4c1c +6fam +5lgq +2hb3 +4c1h +3ms9 +2pnx +4qmw +1lzq +3aid +3psb +5w6r +4c4e +4jxw +5ngr +1utz +3r7o +3umq +3hw1 +1tkc +1abt +3mbp +3i9g +1rwq +4iz0 +6dh2 +4rx7 +4kwf +2cn8 +3bar +1cny +1npz +2bys +4y6p +4y6s +5q0s +2uxx +6eaa +5cs2 +4c1e +4mgv +2wxf +5q1e +5o83 +3r4o +6fyk +1x8s +5b25 +4f39 +4tk5 +5nhp +4lov +1kui +3art +4d2v +1bnv +4kov +6cyc +4fkk +6hvj +2j4z +2p3c +1f0s +3dy6 +5lz7 +3mww +1hvk +1x76 +2aez +1bio +1lhe +3k48 +2krd +4bbg +5otc +3kqp +4esi +2rku +2kpl +1erb +2xx5 +2fl2 +4a4v +1lqd +1lfo +3hf6 +3bm8 +1a99 +3zly +6cwn +3d8y +5icz +1hgi +4qgf +6fug +3tn8 +6f3g +5kq5 +3tz2 +1xws +5uov +2o9j +3coh +3h98 +1mes +3ot3 +6f1x +5gs9 +2hha +1pfy +4j3j +4rn1 +4ze6 +3zpt +4f9y +4lbu +2nno +5v42 +2fyv +4j8m +6hjk +4ha5 +3tu7 +1izh +3o9d +6g4m +2r2b +2xc0 +1g30 +3anq +5uv5 +4ty8 +2vew +3br9 +1df8 +3fgc +1utt +6cd8 +6f4w +3l8x +4fe6 +5adt +5nzq +6htt +5k0t +3pn1 +4ryc +4nga +4crc +4xcu +3f3c +3obx +6agg +6cq5 +3w2r +3dsu +3zs0 +4wkv +3w07 +1ntk +4dk8 +3kgq +4anv +4ery +4jzd +3tjh +4jwk +4hyi +4kne +3u9q +5ih2 +2h02 +4c6u +1i8z +4bid +3lmp +5ohj +4j81 +5dey +5k8n +1pz5 +1shd +3hhm +5n0f +5kh3 +3nba +4u5s +2ndo +3shc +3vid +1ik4 +1al7 +1fq7 +2x4r +4kz3 +6cen +3s8l +2aoc +2xh5 +2p93 +4qfg +5huy +3ebo +1agm +1fv9 +4jnj +4c72 +1vfn +4xg3 +3hl5 +2xah +5n58 +3mof +3r22 +5o45 +1utm +3p8e +1kl3 +5o5h +5lpk +2zdx +6q74 +3miy +5bpe +2vta +1bnq +6f90 +4myh +5llm +4poj +1q6s +1jws +5xmp +4ahv +4e4n +4qgi +4o75 +3ogp +3dk1 +4bdf +2ybs +4ght +1e5j +1veb +4wrq +5cqu +1ow7 +5nsp +2wtx +2rqu +5kpl +3g19 +3gy7 +4bhi +3rjm +3f3v +4h42 +4y16 +1o3g +1utc +3wzu +6b5q +1u2r +3sx9 +1nju +3drp +6azk +3r9d +4awi +1ce5 +5vd2 +5kzq +2bal +1y2f +2xxt +5hlp +2y8c +5auu +1bwa +6gbe +2zv2 +3f7b +1e4h +6arn +3ryw +6fr0 +5wbz +1c8v +5kau +1v41 +4oew +5fbo +3l9h +5hi7 +3igg +4hy0 +5enf +1cwc +5yjo +2e92 +186l +6epy +3qk0 +6cq4 +2jkq +3gb2 +5xmt +2ybu +3arz +2o5k +5c13 +2amv +4std +4gfm +2fm2 +4pv0 +3qx9 +1hnn +3bl2 +5dhp +6c7i +5pzn +5ur9 +1pye +3upf +3ooz +3zcl +2vkm +2jdl +2uyi +1usi +1ezq +4qz0 +3g5d +3ry8 +2vnt +5ehy +1mqj +5gs4 +5xg4 +3h59 +1bv7 +1br6 +1mns +2ybk +1bcj +3ktr +1e02 +1ykr +5vja +3r5j +5edu +3kga +6b5t +5f74 +5qaq +1jpl +5hmi +3mjl +1eei +4n8e +1hrn +2o4s +1agw +4yz5 +4wn1 +3tc5 +2pjb +6ea1 +5epy +3vc4 +2i0g +3ah8 +1ogg +1onh +1tmb +6bau +4ad6 +1a3e +4drn +1aw1 +5fl1 +2h2j +4prg +4ibe +2yem +3fql +5zwf +1x0n +3pi5 +6ehh +1cps +5akl +3g2t +3vsx +4dhl +1hsg +3vhv +3ro4 +5myl +4lyw +3sl5 +3e92 +4o91 +3ipa +1imx +1u71 +5dx3 +4hvi +1h5v +3p7b +5v8o +2rg5 +5nx9 +4i8x +4rcf +4yrd +4bcn +4d09 +5f8y +5mwp +3n76 +3gba +4lyn +1unl +3e6v +4dce +3rsr +1nlp +5dxg +3rcd +2cji +5zwi +3o9a +2x2k +4n5d +5l4f +5gx7 +5yqo +4gzt +3tki +5dd0 +5ivt +5mnr +3asl +2oc0 +3l3n +5izf +1i6v +2y56 +4x0f +2h4k +3ivh +4yy6 +1drj +3hku +3kfn +6gmd +2c3k +1w5v +5oha +4myq +3new +1o5c +4k5y +5kbi +5of0 +5dhs +1y19 +3hky +5nka +1wax +6h9b +3i7i +4b6c +1h3h +5xw6 +4jql +4cra +3d4l +2nwn +2zyb +3rm9 +3q6s +3g4i +3zqt +4bbe +3olf +6gxa +5h4j +4ljh +4a4f +4nw5 +2vi5 +3eqy +4tpp +1pi5 +4bt3 +3kiv +1pb8 +4xyc +1zd2 +2vur +2k2r +5j1x +3pfp +6g37 +3nsq +6eq8 +2wi6 +1bxl +5eci +5jrq +5ehr +4qz1 +1nmk +5zkb +4f65 +5a7b +5sym +3wti +1j1a +4nwk +5l8t +5k0f +1mqh +1h3b +1wxz +3tt4 +5k0x +2ez7 +5wbp +4ddl +2ygu +3e7a +1aqj +5m7s +2odd +4eon +3fq7 +3vf9 +1pxm +4umt +4rlt +4f8j +5dp9 +1ung +4r5w +5ais +4btu +4hyb +3nyx +3nif +5hmk +2qyk +3tz4 +3qxm +6isd +6g97 +5ccm +5u0z +2wbd +3aav +5vzu +2hai +6evn +2yga +1p17 +5fus +5elw +4azt +6ea2 +1xbo +2yhy +4u0m +6b3e +5hff +3iu8 +1dqx +4o0v +5t1u +4gs6 +2xei +6gn1 +2pze +2j87 +4hy1 +3udn +5nmg +2pgz +1wdq +4er1 +2l6j +4cmo +4gbd +3itu +5a7j +3su4 +3r6g +5ea7 +6eiq +5qb1 +4o61 +3grj +4xjs +2x85 +3b66 +3le6 +5t19 +3itc +5ett +2esm +3dc2 +3p2k +3u7s +3c6w +4fcr +2clo +3d0e +2jqi +1ekb +4ovz +3soq +1hef +5os0 +4htp +4tk2 +4j22 +4etz +3gus +3s76 +4pyv +3c49 +1ktt +2yz3 +1xnx +1elc +5i24 +6eif +5t9z +2qt5 +5h1e +5ekh +5vdw +3fbr +2za0 +5wej +2yel +4anx +1rtf +1r5y +1pzo +2ydw +5kam +5i13 +5exw +2xyr +3qs1 +2x9f +2np8 +5ci7 +1m7y +1fcx +5ok3 +1hp0 +3l79 +4jq8 +4bhn +1i9p +4hyh +1jil +5f2s +1bwn +1xh7 +5kbg +4bg6 +3qfy +4ycu +3lxk +4w57 +4kxl +5mk9 +6er4 +2rcw +4jpc +6h1u +3iae +3fnm +3mxr +3c2o +5eva +4kbi +3ogx +5owa +6bmr +4mgb +5ka9 +2bza +3sad +4wj5 +4o1d +4pnc +4mzf +1f9e +4whl +2imb +4llk +4ih6 +1vjd +3stj +3usn +5tnt +4i4e +4hg7 +5dxe +1hc9 +3dcc +3d27 +5azf +5c5h +4m7x +1ysg +5nve +4dcy +2qrh +1r9l +3l0k +4iuv +5j31 +5ulg +2wc3 +2jxr +4qwx +4joe +4djh +3sm1 +4prd +5etn +1ihy +3ut5 +4gjc +5c1x +2oxy +5can +5hz9 +4uxq +3nzi +4hfz +4mn3 +1jif +1svh +4zsa +5y24 +5f9b +4mka +5wf7 +4o0y +5w8h +1t1s +5w7i +4o2e +3g58 +4jmu +3dg8 +5u7o +5owt +2oic +3mt7 +5ka3 +2wgj +1orw +1vea +1j01 +4yec +6b7c +5wg9 +2fv9 +1e55 +3cfs +5nhy +4x6s +5ak0 +4q1w +4o5b +1b32 +2fzk +2p3g +1h24 +3qfd +4xu0 +3pe2 +5ow1 +4uyg +3du8 +4cby +1mtr +3bug +4uj1 +4omj +2y58 +1onz +3esj +1y2b +1zyj +1fig +3cd7 +2hf8 +2v3u +1o4f +5w0q +1ql7 +3kv2 +2wez +2loz +4zg6 +5f4l +4mo4 +3mam +2h44 +3que +4y32 +5cwa +5hyr +4mnp +3wix +5tw5 +4dem +5di1 +3zlq +6en5 +4ogv +2r6y +4k8o +6c7w +6eqv +3dx4 +5qa7 +2qlm +4tn4 +5vm6 +4x5r +3k23 +2q54 +2pe1 +4k2y +4rfc +6hqy +4ie3 +2qbq +5o07 +7std +4mm7 +3axz +1a85 +5etb +4ylj +2vpe +2qln +5zfi +5wbk +4bty +1nvs +5ftq +3dyo +2c9t +2h6b +5nz4 +5ar4 +1xfv +6g6z +4e1n +2q2y +2zwz +2ozr +3fei +2xe4 +3nin +4py1 +4j52 +4mrf +1ngw +1m7q +4m8t +4qhp +1my3 +5fgk +2ff2 +5qat +3chf +4kox +2nsx +5v1b +1det +4cq0 +2iwx +4isi +4bcg +1pro +4ebv +2jh5 +4mm6 +4b82 +4m5m +4pp3 +3v31 +4yt7 +3vp4 +3imy +1rbo +4b1c +2w7y +1k9q +4erq +5tzh +2a3x +5f61 +5orj +3efr +5g4o +3el1 +5lkr +1mn9 +5wdw +4oym +2iko +1h39 +5ddd +5nu1 +4qz4 +1o5p +4i9c +3wmc +4ajl +4zud +3kig +3wde +6fdu +4psb +4bw2 +1hk1 +1pph +3wut +2ate +3nw9 +5ceo +1g2m +1o3i +5ald +2hjb +2cvd +5za1 +4cnh +4pnw +4uwl +5c1w +1m0n +2nsj +4imz +4x3k +4zgk +3pvu +2fah +6ex0 +6gz9 +1ke8 +3tkw +2xfj +4bo9 +4cqf +5sy2 +3fh7 +5u4f +1k27 +1msn +5ow8 +5ekx +4re2 +1dar +6iil +1bmm +3tsk +3tzm +3ttz +4ibd +4zx5 +2c4w +1qf4 +2xgo +3bm6 +2zfp +2uyw +3h8b +4acx +3ouh +4zy1 +1c1u +3btc +1azl +4ivd +1o4i +5evb +5dpx +4k9y +3wtm +6mxe +5ygf +4mpe +1o2q +3ru1 +4qtb +3qw6 +4yb5 +2hzl +1b5i +1dbb +4gj3 +6bfw +5y5n +5jdi +1kv5 +4der +2xc4 +3s3r +5lso +5cu4 +1ysi +1n2v +4jje +5akw +5f3z +6dl9 +5wgd +3muk +3uw9 +1o32 +1jq3 +2zpk +3hk1 +2f1a +2haw +4psx +4i54 +5d3s +5m56 +2bkz +5dxt +1qw7 +4abu +2ggb +5fjw +4k5p +5als +2xjg +5eqy +4g2j +2a4g +3hf8 +5em9 +4ztl +2g1q +4phv +2ylq +5tdb +6dh7 +5kz0 +3rz3 +2vww +5t4b +5xs8 +5j7g +1f4f +5mev +5cgd +3vrw +5t8j +2o48 +4cgj +4cts +4cbt +5ofw +3q2m +4ce2 +2qqs +2j75 +4yw7 +5wip +6afl +6b16 +2cgu +6exi +3mt8 +4o24 +1z34 +3fvg +1w6j +3py0 +5nkc +1os5 +3t70 +2nwl +6dq4 +1n3z +4rme +5z95 +1tx7 +4yc8 +3t0m +4e5w +4f1s +1hvr +1xzx +3fty +6au9 +4eu0 +4d1a +2p7a +4z2k +5ap7 +1sqb +3wt5 +3eqb +4o3c +1rgl +3r7q +2ay8 +5enm +2ha7 +5ak5 +6dug +3wgw +6bfn +2fx8 +4rxc +6bny +6h36 +5ugm +5trj +5xpo +1jmf +3ugc +4jhz +3p44 +1zp5 +1v2o +5tbe +1o2r +4wsk +5vdk +3rni +3kc3 +4mi3 +4pb2 +5euk +5ukk +2k2g +4a4q +3m6q +3btj +4gtv +5u0g +4zh4 +3v2p +3kqt +4im0 +1ni1 +3wp0 +3vi7 +4tkn +6bg3 +1o1s +5d3l +2qc6 +5lp6 +2br6 +5ku6 +5d3c +5ny3 +5hjb +4txc +3waw +5b1s +4trz +2pwc +3nu4 +4x3i +3djk +3qw7 +3ivx +6b8j +1y0x +1ros +4rwj +6bto +3ztc +1jlx +5h13 +3l0v +4ycn +5qam +4ke1 +3wv1 +4mep +5tzc +1bil +4fl2 +4u6r +4k6i +4g2r +4pg9 +2hzn +3r8v +5d9k +4gg7 +3d9n +6cct +2c93 +1uxa +3nfk +5f27 +2w6n +3k8d +4pl4 +4b9z +4uiy +5fl4 +2bbb +3b50 +1qon +6ce8 +1ydk +4jv8 +2bw7 +1li2 +4qag +3hyg +2qtr +2cen +4k3n +2fes +5kat +1dzp +3gcu +4nbn +4ctk +1okx +1nzq +5fb1 +4r5v +4b7n +4det +2xm2 +3exe +1jje +4wup +3sjt +6h8s +6bx6 +5tqf +3lpk +3h1x +4ps1 +3c72 +2dua +4w97 +5diu +2cbs +4zy4 +4alx +1y6b +1tnj +2vo7 +4qye +4no9 +5ct1 +4elb +5sz2 +4u0x +4ztq +2nnd +1xom +4u0a +4abj +3uh4 +1pop +3hv5 +3zll +4rak +5u7m +5tuo +5ur1 +4iqu +3kac +1t7r +4lqg +4um1 +5mwa +3djp +4zeb +3r91 +4o77 +4r5t +4ijq +4mxa +3d0b +5ld8 +3hdz +4bxn +3vb5 +5q0j +3rx7 +1k9s +6cjy +4zjj +5i3y +2wos +1bbz +4rx5 +4nzm +1m5c +4djs +5ect +5ejw +4fvr +5vtb +6f20 +1u9x +1zea +2zzu +3rxp +2z3z +1oeb +1gux +4g8r +1hk2 +1g4k +5dfp +5vc4 +4g55 +4o7d +1nc3 +6cdp +4gu9 +5gvn +3l8s +4bis +4lwi +2baj +5wik +5a7i +4pnq +2xde +6e4u +1b38 +3wzn +5nn0 +4fl1 +3q6k +6eab +3s68 +2dq7 +1yw7 +4x8p +5vgo +2o9k +4yo6 +4lv1 +5ose +4ogt +5e8a +4k76 +2pr9 +1z3j +2g72 +2l1b +4gdy +1lq2 +6g0q +6cv8 +5tri +1g7q +1o4g +4cg8 +2ohl +1v2u +4b83 +3wyj +4hrd +4kfq +1no6 +5lax +3egk +3f7g +4deh +5tmp +4rad +3d78 +5l72 +3avj +4rg0 +4qev +5nt4 +5svk +2w08 +3lxo +3ljj +4kif +2jdo +1v0o +1k3n +1rgj +4zt8 +6dub +1zdp +187l +6dkb +3n6k +1tnk +2h2g +4mjq +4a9i +4pre +2gni +3bgb +4ddm +2p99 +1mwt +5nhl +6b4w +3sou +3sm2 +5qik +1loq +4yqh +5mi7 +4ynl +2f9v +1qbn +2r2m +3uod +2w0z +4ewo +1fsy +1tmm +3va4 +6b0y +2adu +5dtj +3k3a +1sm2 +3ozs +3o8g +5g42 +4h3a +2fwp +6fbw +5tzw +1xkk +3tdu +3efj +3as2 +2ity +4zwy +2nmy +4amy +5kad +3qtv +3ncq +3p58 +6h1i +5x13 +4d1d +3zev +6aum +1ftm +4dsu +4hla +2qoe +5em5 +3u0p +3dx2 +4kn2 +4ce1 +3np9 +3t2w +4b7r +5o9q +1rth +3eq7 +3i0s +2pgl +2yj8 +2z6w +3v9b +6fs0 +1dhi +4hdp +5svz +4mz5 +1a1c +4p0n +3n7s +2aow +1j19 +4k3o +1m7i +3s9z +1igb +4o9s +5xyx +4mex +5jy3 +4jht +3hxc +2c3i +4itp +2fl5 +4b12 +3o6m +2vd4 +3tib +5yj8 +1aze +4yad +3w37 +5e8z +3pz4 +2p4y +2xda +3n4l +1i1e +1tbz +2bxu +1g6r +4h5e +3fuz +1f2p +1b51 +5lm6 +5gx6 +4wn0 +3h21 +3vd9 +1x39 +4bjc +6d5g +3nnu +3d32 +3l9l +5i3w +4kqo +5w38 +2p2i +4ohk +5l2i +4jlh +4whh +3u6i +2khh +4gu6 +1snk +1ogu +2p0d +1zeo +1iq1 +4knb +3lrh +5a5r +5iok +5t6p +4dx9 +2a0t +1c5x +1qxw +3d83 +3pwh +5g46 +4wy6 +1yw2 +1bm6 +4yhm +1tka +3qpp +4g19 +1pdz +4fnz +5mby +3zt1 +1y3v +3tv4 +2qmd +2phb +1bzf +1m2r +6g46 +3hvj +3mhl +4w9c +4imq +5k4i +3abt +3vuc +4oyt +5qb3 +5yv5 +4n1b +1pip +5iu8 +3zke +5f1z +4qvx +4zlz +4qfs +5ai0 +4op2 +3tfn +5cnm +6dge +1htg +3rey +1o6r +3w5n +4l52 +4b76 +5qar +4nj9 +2kaw +4ezz +3hkt +5e1b +3ufa +4btx +5mgj +2ha2 +5cst +5ea5 +5kgn +3jq7 +1qy2 +4fpk +3hq5 +1at5 +1hge +5ty1 +4b3d +1lyb +5mrd +4zt4 +4ovg +4pp7 +2w5i +6ced +4elg +5j71 +3i6o +4mx5 +4pnm +2g0g +1x81 +4wkp +4kxm +1wht +6q3q +4gee +2f70 +2wd1 +6aah +4rfm +2wvt +3dx3 +5t4v +1owh +4k7o +4z7n +5lla +5ehw +3jsi +1ohr +4k3h +5y8y +1pu8 +2mg5 +3l5f +3tfk +1t31 +3t08 +4iho +3i91 +5fdg +5ntq +5wa4 +2iqg +5tol +4tz2 +2d1o +5ene +4mc2 +6ax4 +4aoi +1u3r +4ngq +3src +5zzw +1sw2 +6g2m +5fwa +2q7m +1xn2 +5g6u +1puq +5ha9 +3s56 +3ckt +2jaj +2qlb +1l6y +4tzq +5u9d +4nzb +3ccc +3isj +5kbr +3u92 +5duw +3ijh +1uti +5tw2 +2hd1 +4wuy +1riw +3u8n +6flg +2qzk +5w6o +2v2q +6e86 +4e0w +5jf4 +5a8y +2nq6 +1tsy +1rek +2xht +2gtk +4kom +6en6 +3zmz +3efs +4whr +5j1v +1ld8 +4o37 +5qa5 +4c9w +2zdz +1l5q +3kek +2olb +4dru +2h42 +5jnn +1w0x +6chh +3av9 +3uzc +1l83 +2osm +5i89 +5wb6 +3vdc +1b4h +5e0a +1nyy +4bqg +5otr +5axp +2w1f +4z1q +4q4q +4o07 +3g0f +4alv +4jfx +5kjn +2jal +4u0w +2k0g +3uwk +3k26 +5iq6 +6hjj +3o0g +3iu9 +4xg8 +4xya +1zfk +3jvk +5wxh +3t0w +5yg3 +1xpz +4b60 +4j2t +3ds0 +2l0i +5y1u +1pxk +4da5 +1z6e +6fi5 +4pry +8lpr +2ya7 +3zv7 +4bnz +6bh5 +4x2s +2vth +3tvc +5fky +1i43 +5jau +4qsw +1m21 +4xsy +2pvn +5htl +5o7i +3skh +5wic +1fvt +4rio +3si3 +6dnp +4c4n +4ufu +2ork +5dkr +3m59 +3tmk +3btr +5nps +2oah +5ywx +4nxs +3r8i +1z1h +4jik +3oui +4pzx +5o1g +5auw +6cjv +1t46 +1uwb +4lgu +5xvu +4x3t +5dtk +3qkl +3meu +3ove +5mw3 +2zcq +4n7y +4e4a +2avs +4e3l +1hbv +3tl0 +3g15 +6c5t +4ayu +4aba +5ddb +5mmg +5k09 +3fj7 +4y3j +3dxg +5a6n +5lvd +3c2f +1w51 +4m3m +4x6k +6h7b +5gow +5n6s +2f18 +4kvm +6d5j +2qhn +1ppm +5y2f +3fi2 +3vzv +4r73 +5h5q +5gic +1g6g +5j47 +4tmr +4inb +1w6h +1inf +5alk +3u8l +2bvr +5bnj +5nk4 +2c6g +5tbn +5ug8 +2wbg +6bgv +3fzc +5mgf +4ngp +4akn +3k27 +4gfn +1vjc +6ckc +3bqc +2k0x +4o0a +4m1d +3cfn +2ds1 +3wnt +2ogy +6d28 +6ffi +6eu6 +2it4 +6h78 +3s0e +3u1y +3r04 +2nsl +4knj +5q14 +1e1x +5cs3 +5hfc +5zob +4erz +5ant +6ft4 +3th0 +1ijr +4lnw +4pyq +2r3t +3ik3 +5mkr +2gz7 +1joj +1j17 +1c5o +3avf +3u0t +1c85 +5w99 +6d4o +2qic +2tmn +4cc6 +5q1h +5k5n +5n55 +3rxj +3njy +3srg +4bgm +4m0r +5nxv +5j1w +4bbf +4mot +1tl7 +6gf9 +4ivs +1bmn +1bju +1w4o +2pmn +1k6p +5akz +4ca7 +4joa +1mfa +4qqc +5e2q +1dgm +2fxu +3ejp +1a1b +5k0j +3h5u +5ha1 +2rg6 +4b77 +4hzt +5fe7 +4pgd +5uwm +4rro +4rr6 +4o70 +5cfa +4ke0 +4ju3 +2wtw +2qhm +3m3z +2vqt +5q0d +3gcv +4fmn +3s7l +5kgx +2n9e +2vmf +4x63 +3fc1 +1ok7 +4pzv +5dd9 +4nru +1udu +6fmp +5fxs +4dk7 +4e5f +5ml0 +3v6s +4eeh +5cjf +5e1o +6bw8 +3h2f +1yly +4avs +1sri +1avn +3i25 +4fm8 +4jg7 +3fu6 +1g7f +4l1u +4dr9 +3ocg +5wal +4dq2 +1uz8 +4qjp +2onb +4hgc +2lyw +5xgl +1ydr +2r0h +4q4s +5j64 +4riv +4cp7 +5d9p +1il4 +2qaf +1idb +3ft8 +1hos +2xnn +5myv +5ti4 +3gi5 +2wxh +4eo6 +3spk +1g3f +5iy4 +5h1v +1mem +1rdt +2c92 +1n7m +5dif +2py4 +4k9h +4ehr +1nfx +4cst +3h0v +5z7j +5ta2 +5nu3 +3k3i +3gpe +3zq9 +5cqj +6axk +4len +4nmp +3b25 +3ch9 +5djv +4hxw +4euc +1hvy +2jk9 +3ncz +3bu1 +4bb2 +3wqh +6alz +3qch +4jv7 +2jmj +3nan +5vds +1oth +2aie +3dln +3mv5 +5egu +6dki +5am0 +3vrt +6dpt +1ew9 +2w6u +2ggu +5jsj +2xwd +3zya +5uwp +2exc +5e28 +5k32 +3g4f +2xng +4x5y +6esm +1gi4 +3d2t +5ttw +3ekx +2uwp +1klg +4y2x +2dwx +5lrq +1od8 +1ceb +4ci2 +4m5i +1o9e +1nq7 +3ejr +4wrs +4eor +1g1d +5ofi +2kce +5eko +4mhy +1ndy +2cc7 +2nnk +5ybe +1svg +5aes +4pkr +5j5x +1cka +3n7h +3pxe +4kz7 +1tcw +3v5j +2xp8 +4bv2 +1gyx +4i6b +4oyb +1ox9 +2wf2 +3zvv +5f90 +1rdj +4azb +3o5n +4uub +3d1e +1n5z +6b1y +2o7e +3o9v +4hf4 +6ety +2qi6 +5jas +3gss +1c5c +5l9l +3ifp +1czk +4fi9 +1cs4 +5e88 +1mui +5vp1 +3vbt +3sh0 +4avw +2asu +1qf1 +3hjo +1iiq +2r2w +1ztq +2c8x +3wtj +4lys +4lv2 +1u9v +1dx6 +3mtd +5ii1 +6d5h +4nku +1w1d +1e37 +4pte +3c7p +4glx +1hvi +5f4n +5f6u +1y8p +3kwb +5ggj +3gx0 +3ztx +4uiz +3ueu +2vl4 +2wnl +4cl9 +5aqn +3ipx +1i72 +3dsz +3mhi +4ap7 +2xcg +7prc +4e3o +4int +1zz1 +5bjt +2xas +4ny3 +5fi7 +1owi +3qd4 +1ew8 +4hdc +4ac3 +3h52 +5nzp +1lbf +1b6p +2ylp +3qxt +3kyr +1ksn +3twp +3ti8 +6ewe +3t19 +3eu5 +4dvf +2flr +1oe7 +2or9 +1v2m +5fjx +1m2x +2c80 +3nm6 +4nak +5y12 +2vwn +5chk +4dv9 +5akg +5x9h +5m7u +6axp +1upk +1j36 +1uv6 +3f2a +1yet +1ov3 +4kwp +1eoj +1yms +4o4g +3p0g +3tsz +2hrm +4tz8 +5k4x +3hzk +5w5s +4ekg +6gh9 +5l26 +2lsv +6eqa +5fp0 +2zc9 +2jk7 +3h03 +3wb4 +1xow +4hra +4mk1 +1y20 +5yc1 +5lx6 +2er0 +5x4m +4yih +3mbl +4a7i +5koq +3cx9 +4zom +5sz6 +2vqm +4yyn +1syh +1kwr +6cyb +3pm1 +2zym +5fv7 +3shz +4dmw +6b5j +1wn6 +5vd3 +1njc +3ew2 +5ia3 +4a4e +4a9n +4iue +2ewa +1akq +5ew9 +5n8e +5a54 +4kow +5w6i +5mgg +5oh3 +4hmh +4f3i +3kd7 +1w9u +5da3 +4n6y +5jid +1juy +1f0r +5iw0 +4ui4 +3tkh +2gc8 +5qc4 +3wzj +5l7h +2kdh +1uyk +3sv9 +3uex +5os8 +4b7q +5a6i +5om2 +5lgu +5uwk +5u1q +5unh +5nw1 +4ck3 +1g42 +2e9d +5k4z +4l7b +1so2 +6esn +4pzw +1ida +3jzb +1o2h +5qby +1nde +6dne +1f8d +1nvr +5ult +1db5 +4fr3 +1g27 +5alo +4o15 +4ryd +4oar +4zy0 +4pd7 +2aq7 +6etj +2zga +5luu +2l8j +2wky +4yrg +2v22 +2ndf +1jsv +4u6z +5wdl +5nxi +6i8y +4u6x +4mw5 +4ufj +3ith +1wb0 +3arq +3dp2 +1lst +3fqh +2ivu +1s5q +6ct7 +3pma +2ay6 +5e90 +1re1 +4l32 +3g5k +4bw4 +1pig +2xne +4qme +5ugh +1cpi +3wd9 +1o5m +5wfc +4tun +2y77 +5laq +1ax2 +1b39 +3hrb +5dah +5iza +3thb +4bi1 +3arf +4gwi +6h1h +5yzc +5tg6 +3vap +4lkh +2q2a +2fle +2vwf +1g2k +2zz2 +5xhz +4zim +5fwr +6g9i +6bqa +1g45 +2ao6 +3is9 +2qmf +3wt7 +5vlr +5fnr +6cf6 +5z9e +5t1m +3uda +3ur9 +3dp4 +6c7g +5kmf +4hev +6d8e +5acx +5ouh +2fde +6c0n +4wt2 +6cgp +5zun +5dyt +4cwb +2xcs +4z1k +4b11 +2i2c +5ko5 +2ccc +1xgj +6ayt +1ao8 +1c5z +1b9v +5toe +4bup +4pox +1gnn +3a4o +5kqg +5am1 +1i37 +5uln +3gpo +2g01 +3fx6 +5nba +1ke7 +3my5 +4inr +1jp5 +5eqe +4muf +5xhr +2wmr +3ebb +4mwe +5ap4 +5nwg +4bde +4x8t +5tbp +1nl6 +4oz1 +3hvc +1flr +3pz3 +4pkv +4drp +4awk +2ww2 +4tw9 +5b4k +4o3t +4fev +1o45 +4bcj +3upz +1qkb +5nho +2x8d +1aq7 +5y21 +5i12 +3bsc +4wki +1ouy +4jv9 +5o9h +4ez3 +3dd8 +5h7g +1sqt +3pn4 +5aab +2ha4 +4enx +1bap +6enm +5za7 +3dbs +4mvw +3vrv +2azc +3mss +3i60 +5hld +1zkn +2wxq +4y87 +4m8x +5f6v +4fhi +5auv +4l7c +3wcl +3gqo +5dhh +5u7d +6grp +1xm6 +1lpz +2wf3 +5cvd +3zmj +1g6s +5eud +3wyl +4wh9 +2oq6 +2who +2nmz +2xiz +4hbx +6bee +3ryx +5ans +5zvw +4zwz +3q96 +4v05 +2pyi +5jf2 +1wbv +6c7x +4qsx +5m6u +1m0b +3qg6 +3nyn +1rm8 +3lhj +3g86 +1ny0 +3zj8 +3o6l +2vb8 +3p9l +4bnx +5uch +3sv6 +4ynk +223l +5orr +4o0j +3rxm +3rw9 +4po0 +4z84 +4j21 +5yjm +5n25 +4bky +5svy +5gmn +5u7l +1f1j +4aji +2oj9 +3oli +4r4o +6ayq +4ql1 +3f7h +1skj +2wfj +1wc1 +2r3o +3m5a +4lwc +4j5p +3n46 +3ttp +1jjk +5vb7 +2pj0 +3dog +2bve +5vfm +4l2x +3uvp +4wno +4y38 +2c69 +6eol +2zx9 +4q9s +2alv +3ip6 +3sio +4xoe +3sn8 +5y48 +5u7j +2g9u +5f3g +1xh4 +4j79 +5ghv +2i2b +1gah +3kr1 +5npd +5mks +1jwu +4inh +1tsl +2wq5 +6hkj +2yek +1hpo +2e9u +3mwe +6aff +6e99 +5vlp +2uxu +4u5t +2hwo +4ury +5byi +4v04 +5e2s +3lj3 +4bt9 +4jt9 +4rce +6b5a +4mk2 +4idv +4bah +5lj1 +3nal +6d1m +5kpm +5sxn +5doh +3m6f +3u81 +6bsk +5nfa +1gui +5i1q +4m4q +4ow0 +5ur6 +5iu7 +6d55 +1bma +5uxn +1oiu +4ln7 +2a5s +5u5k +5e4w +6afg +1oyq +4xtv +6eji +2xdl +3at1 +1njb +4u0g +1tou +2pj5 +4q1a +3avk +6fmj +2lto +6czc +3u8m +2y5l +3fjg +5uwi +6m9c +5m3b +3jxw +4zqt +5vc6 +4bj8 +5knx +3pix +3e6k +4f7j +3koo +5n3y +3jdw +4pl3 +4nmo +1ody +5tpc +6bsm +2cm8 +5gn7 +5hu0 +1yq7 +2gga +4wy7 +2brn +2fmb +5h14 +4zsp +3nus +5n8t +5x5g +5yls +5u3b +4x14 +4puk +5wa6 +2y2h +4nmt +3uol +4jzf +4um3 +2wi2 +5k7h +2ccu +5x74 +3ddb +1xd0 +4mm9 +1ulg +3coz +5epl +4ear +3vzg +4qac +4l5j +1ai6 +3d8w +4mi9 +2wtv +5iee +6ge0 +4q3r +4qb3 +5cdh +2vip +3pz2 +3f34 +3rin +5n2t +5oqw +3p4w +4n5g +2pvl +1b0f +5a3u +4p6x +3rsx +3rcj +5i9z +5ai6 +3c4f +1o4o +1sje +3alt +5f41 +5yid +3bgl +4rdn +4hso +4ttv +3ok9 +5y0g +5twx +1wug +4a6c +3gxt +4lzs +2zx7 +3jvr +4as9 +3f1a +4cd6 +4r06 +5h5s +7gpb +5w0i +4wp7 +5e3g +2ydt +3hec +4abb +5ji8 +4olc +5ieo +5i2z +4zy5 +4qga +3t82 +4dff +2wr8 +3i7b +3v4v +4gw8 +2hxl +4bkt +2wj2 +4xwk +1xk9 +2fgh +3tv7 +1ajp +1tps +3ag9 +3ls4 +5wg3 +1cwb +3mea +1bjr +5auz +2y67 +1q9m +2i3i +5ivj +2vd1 +1p2g +4l7n +5efj +4jzi +5mt0 +5vlk +3rt4 +3zmm +2wn9 +2a25 +4hmq +5jim +4zyy +4bi7 +1f3j +2b52 +2xkd +1yys +3vtr +3zrl +4kao +5fnc +1ke5 +6hwz +4d0x +4mzs +5mi5 +4a6l +4eg5 +1sgu +1w11 +3dkg +1s19 +4wyo +3ppp +4rlu +2jjk +5ovp +6cd9 +3qio +4wrb +5h9p +5na0 +1wbt +4ael +5eu1 +3nw6 +1c5s +1bl4 +4ual +2vsl +5vil +4o7f +4dhr +1pw6 +1b5j +3iad +3rwg +5wev +2pjc +3vi2 +4xum +2zmm +3fv3 +3ukr +5myo +3lbl +4ppb +1q0b +4abi +6bfx +3kmy +1q6k +5ygd +4z90 +4omk +5bqh +5lrj +1ta6 +5jzn +3ad8 +5izu +5ief +4frj +2pre +4ty6 +5nut +3zps +5e2v +4k43 +5e7r +6ftz +3rjc +4uvc +3uix +5up3 +4i31 +2p15 +1ag9 +1gu3 +6cw8 +5ei8 +4psh +4i71 +6f9v +4yoz +6hu3 +5e2m +5ap5 +4fob +1uod +3e63 +3mke +3io7 +4whz +4mt9 +4x21 +1ppi +3t2p +5q0g +5jjs +5icp +4g1f +1csh +4mwr +5al1 +4qxq +3o9c +2wxo +4xmo +4ii9 +5mlj +5hm3 +4wr7 +3iqv +4xjr +4w9w +5fhm +5o2d +5osk +3bv9 +3i7e +4kz4 +2oz7 +2vzr +4d4d +6eog +1sfi +5ehq +5uy8 +4u0d +2l11 +5v2p +2vtm +5ef8 +1zgv +4b2l +3g32 +4q4e +4x50 +2oax +4r8y +4c61 +1w2g +5er2 +6b2q +3up7 +3r01 +5izq +3l6f +3p9m +6ey8 +4b74 +6fyz +5g1z +3t4h +4wkc +2o4j +4bdj +5mek +4gvd +5qil +5go2 +4m3f +2e2b +4bi2 +5l0h +5ugb +2ynd +2wzz +5wyz +5may +4eev +4kiw +2y7i +3tcg +3s71 +2qcd +1vru +1h00 +4uat +3nyd +3bkk +4n9d +1bim +5zt1 +5ml2 +3fpd +6h7o +3p8z +4ewh +4uva +5a3q +4dai +5o55 +3f69 +4puz +6f05 +3g35 +4qij +4dxg +3n1c +4dt2 +5u2e +6c7j +4cj4 +2w7x +6ccq +4qsv +5upz +5iu4 +4x0u +3ffg +1y6q +5yba +2f35 +4gsc +1jt1 +4qvl +1mwn +1jeu +1c3b +3ual +1rdn +6dh1 +5iub +5mav +3t83 +3iof +6cwf +2fpz +5ml4 +5f2f +1fpc +7kme +1erq +5t9u +1lhv +2qlf +5jmp +4s3f +6hm6 +2qt9 +4p4i +5vih +3mnu +6djc +5db0 +4ca5 +5mar +2w77 +2avq +5eqq +1tsi +2vw2 +2ggd +6f6d +1f4g +3ndm +3l17 +5fni +1c3r +5trf +1pau +3vf3 +6ewk +4re9 +4li8 +1n4k +4l7o +5kbh +2gj5 +2oh4 +4wht +1joc +1qxl +6elp +4qge +2yfe +3g8e +1cea +6czu +2fs8 +4mk5 +4i60 +5h5f +5tcc +4dpy +3fh8 +4h1m +2anm +1uom +3oim +6ezi +3da9 +3r17 +1l6s +2uzv +4e1e +3d28 +4bcd +5kez +1bn4 +6biz +4lwt +3jpv +5fsm +4mds +4e6c +3o9p +2ylo +5fyq +4hxq +3mj1 +1zm7 +1s63 +4nk9 +4qmm +3g7l +4zyw +2q8i +3gfe +5ih6 +4owo +5o5f +4x7q +4wy1 +1qbq +5du6 +1oe0 +1g2a +4iie +4x5p +1fhr +1ntv +1o5f +3os3 +2cbz +4arb +3lxs +3lxg +2g94 +4gv1 +5n87 +1gz9 +1b6l +5kcx +2xni +3ig6 +1p4u +3r9h +5n2x +6aud +5nee +1bnw +6bo6 +3kaf +2fvd +2xaj +4a95 +5iv2 +3n2u +4q6e +1i2s +2f34 +6evo +5ksv +2e9n +5cgc +1m51 +1d4h +5ncy +5kls +3s2p +4o09 +1zaj +1f0u +4k3l +4tww +5f88 +3fcb +4arw +4o9w +2ay7 +5dcz +5dqf +3ttj +3lc3 +2xg3 +1uml +3qt6 +4oty +4wzv +1hxb +5l87 +4ah9 +5ma7 +3npa +4hs8 +4qgd +5zkc +4d2t +5elz +1jk7 +5cuh +3jqg +5w4s +4n7m +3tct +5ie1 +4n1t +5aqh +5oku +5nhf +2yge +4gs9 +6bu3 +5vqr +2am4 +5wr7 +4j58 +4zsr +1yon +2bkt +4buq +3u7m +3u51 +3w55 +2gfa +3fum +4hyu +4pra +4kyk +4gzp +3ekn +3v2w +2pog +2bb7 +3qj9 +5d3t +2w67 +5fs5 +5qaf +4crb +6fe0 +3zmq +5t1t +2xm1 +4oaz +6e8k +2fys +5egs +4aac +1akr +2wi1 +2qbs +5h2u +1lke +3smq +5eds +3wmb +4azy +3cz1 +1s39 +1n94 +5lc0 +5wg5 +5ttg +1ndz +6gmx +4cfe +3fsj +1cim +3kry +2f2h +2g9x +1o47 +6aqf +4knn +1t29 +4k5z +2ycf +5ijr +5kks +5tg2 +4yxu +4i7c +3ftv +1ke3 +4bib +3coy +5orw +5bns +4bs0 +3igp +2uw4 +4avj +3qcy +1yqj +3a73 +4uu8 +4h3f +4rwk +5j5r +2rkm +1w14 +4edu +3e73 +5o1s +4ngn +1mnc +2ypp +3wq5 +4j74 +3rz5 +5upe +4eh6 +1c84 +4fcf +1sc8 +3agm +5w10 +1gzg +4qvv +4qp6 +4why +3ip9 +5he0 +4yv5 +3ril +4uyd +1nu3 +3ozr +5qin +4utv +5x9o +1okv +5m4c +4xhv +5ll7 +3cyx +4gm3 +2w78 +5q1d +3bym +5aqo +1ga8 +1p5e +4c4f +2mps +4cxw +4w9f +3lmk +3sz1 +4tya +4qjx +2v85 +5bw4 +2ael +4pv5 +5osy +4m8h +3i02 +6fs1 +3twr +2vt3 +4rrn +1h5u +2qoa +4lk6 +3qtu +3k5d +2qhy +5jmw +3bun +1ghy +4l70 +6cd4 +4ymb +6bh2 +5h0h +1ppx +3r4p +6gnr +2x6k +3f9n +4oys +3rf5 +3hub +4fcq +5o1i +3ohh +4mqu +4gw5 +5m4f +2r9w +5yea +1xp1 +5lm4 +2csm +5t2g +3n1w +3t8s +5nhz +4zk5 +1e6q +4cg9 +4bf1 +2ycr +1c50 +3hd3 +4ehe +5tku +5wle +2r64 +5l3j +6ft8 +5ti5 +4tyt +5lma +3oct +3zt2 +5ai4 +3zh8 +3fl8 +3old +3fqs +4xct +4mjp +3g2y +6g8n +5kkr +1fjs +6dxg +4cxy +3che +3qfv +1r5v +1pr1 +3nsn +4p1u +3szb +5u11 +4o0t +3ux0 +4q4i +2nww +2qp8 +4rsp +2p0x +1fsg +1u0g +4djx +6cco +3opp +4lg6 +2clv +1hps +6bhe +6bir +5h09 +3kwj +2bed +3e51 +3msk +6axj +3udd +1zvx +5u6d +4qnb +4eox +2rly +3ob2 +3osi +5m7t +5vqy +5ei6 +3mbz +4f7n +3ipb +1w6r +4zx9 +6gy5 +2hh5 +3nmq +1vjy +4hy5 +4ts1 +2qcg +5lwd +1kjr +3oyn +3rbq +3ujc +3ekp +3p7c +4urv +3gws +3fw4 +1ow8 +4pov +4k4j +4m0f +1ow4 +3qaq +4ovf +1ps3 +4gw6 +5a4c +2ovx +5q19 +1gwm +2kwn +4uce +4y2p +2q2c +5lgn +5oxg +3i4b +4qmo +2wyg +5eay +2b4l +2xd9 +3sus +5zu0 +3gjt +2b1v +3ctq +5tzz +1h28 +4b13 +2q15 +2aac +3kw9 +3qox +4wku +5wqk +2fjp +5eij +4mga +5ak2 +5dp4 +6f8u +4inu +4nrc +5drt +1tng +3vhc +1xh3 +1toi +2lya +3mpm +4k55 +1qb9 +5hlw +6ej2 +4i33 +4cwp +2fvc +3r02 +6ezh +4muw +3d91 +3img +4op1 +4s3e +3qxp +4f5y +4qmv +5u62 +4qtc +6tim +4h2j +1x7e +1b6h +3opr +4gjb +5u5h +5v3o +5l44 +2c6i +4ezr +5u4e +1fta +4z2j +3n3j +5eg4 +2yiq +4jmx +4q2k +2x2r +2fzg +1ajx +2yki +3ccw +5u0e +5j32 +1y2e +6gwr +6gu7 +5dxb +1hyo +3lc5 +3arw +3rz1 +5kr8 +5jv2 +4xpj +4l2f +3vi5 +5etj +2a3w +3bra +1h79 +6ed6 +5w85 +6mub +5od5 +5ejl +3ubx +2c6k +4rn0 +3pdq +4unp +5mng +4w5j +5afn +6h0b +5yc4 +1ahx +2jst +1o3h +3rz8 +4na9 +1f0t +4eym +5vew +4k3p +4zs9 +3b9s +5wcl +4mmm +2l65 +4lph +3frz +3rz9 +4kba +5a8x +5eps +2wor +5f1j +2oxd +3ldq +4g0k +1u32 +5gr9 +4fl3 +3uxl +3fud +4x68 +3k2f +5lsg +1h23 +3zrm +5v6u +4lke +1o4r +5mw4 +4fcb +4qp8 +1htf +4q0k +4yc0 +3ebp +3q4b +5dh4 +1bqo +3sn7 +4waf +3gst +5ecv +1hvj +3m3c +4ocv +2ewb +5h19 +3ryz +3a2o +4ea3 +4wx6 +1i32 +4lge +3cwe +2hmu +1h0r +5m9w +3jwr +2q70 +4iu0 +1tl3 +2gqn +3bc3 +4gk7 +2wd3 +2x8z +4ybt +5dms +1nhg +4mw1 +3cgo +5v4q +4x6n +3bh8 +3f6h +4au7 +5i3a +3l6x +5d0j +4c1u +6fzx +3ook +5mt4 +4hzx +3udp +4g8o +4mrz +4tyo +4umj +4rfz +1rxp +2zyn +5t54 +6bh3 +4cd5 +4hlk +3t0x +2az5 +5dy5 +1ft4 +5org +5to8 +6eqx +3b2q +6ez9 +6aox +2y5f +4x7o +4y4v +5oae +1h25 +5d2r +2xch +5ndd +2cf8 +2yi7 +4nzo +3mct +5dia +1iih +3ime +3tll +2jle +1h26 +3l7b +5v88 +1o2o +3e90 +4j46 +3dp9 +5ngb +5jh6 +3rpy +2vfz +5xn3 +1e6s +4xx4 +3ppo +4qfo +1o3d +3s43 +6ar2 +3cyu +1qf0 +1ow6 +5eml +4amz +5ye9 +4mti +6f29 +5mk1 +1qsc +1uvr +2off +4whq +4d08 +6gu3 +4b6q +4agc +4uu5 +3blt +2zb1 +1g37 +3krd +3lm1 +1noj +1t32 +3i6z +5kbq +1gwr +4k6v +2vba +3zrc +4nmx +4mbc +1ie9 +3bgz +2k62 +1mqg +1qy1 +1mau +2x7u +3iqq +5h8e +2aig +5n9l +2g5p +4fri +4q3u +5flo +4bo1 +5tyr +5x26 +2drc +4bbh +3gkz +5e6o +4qy3 +1nq0 +5e8w +2gu8 +1w9v +5uci +2veu +5hvy +3ewu +1jm4 +5yas +4ojr +1h0w +5eou +3as0 +4e3g +4euv +2wly +3lir +3znr +3q3t +6ap8 +3g3n +3c52 +3g2w +4iku +5bry +4wvu +3neo +1abf +5db2 +1sjh +3hc8 +2liq +5dtw +5dhu +1qhr +2pl0 +4wcu +3exo +1jvp +4mrd +5jxn +2yjc +2o9r +5en3 +5hpm +4qf8 +3k05 +3h0q +2a4l +4xue +3ewc +1dtt +4yzm +4q4p +3d9o +3s8x +4mq2 +6ek3 +5tgc +2hds +5fwj +3kme +1zog +6ce6 +4c6v +1xdd +5bvf +1dud +3lzu +4c1d +2er6 +3f48 +2xuz +5fto +4zei +3c39 +6fqu +4fc0 +1bzc +3pj8 +2auc +4kp0 +2y7p +4ono +3nc9 +4bjb +4qo4 +3mz3 +5edc +2xzg +6bqj +1ga9 +4oq6 +4b80 +5ee7 +1bcu +4y8z +5ka1 +5lpd +3qi4 +5vii +5ele +2ew6 +1s9v +4bqw +2x6w +5mnc +5eni +2xi7 +1wqv +2b8v +6f92 +4jve +3oy0 +6h41 +4zx4 +4q15 +6czv +2wti +5j9l +1o5r +5n1v +2bq7 +4fic +2ole +3umw +1a1e +1ax0 +4ux9 +6bhi +3pj3 +5m55 +2uw6 +2bqv +2ea2 +5dl1 +6g7f +5o8t +5nze +5fkj +5t1l +4wj7 +3ibi +5h22 +4bgh +2j9h +2iyf +5m28 +2v0c +6ez6 +5b6g +2q3z +4des +3pcj +5y0z +3u6h +3dtc +3cr4 +2igv +4qaa +1yhm +3n35 +5ylj +5dls +5h1u +4qw4 +4hv3 +4jfi +2go4 +2m0o +5vzy +2h6t +2qnn +2aqb +1zd5 +3pxf +1p1n +1bnt +2hm1 +4z46 +5e2n +2p8s +1h2k +3uh2 +1b2m +5eiw +4bfy +5csx +2z92 +2rjp +5o7n +3d3p +4ban +4l0l +3zo2 +3zhx +2hah +3dx1 +4dkt +3sz9 +3n0n +4l4m +6asz +3hv4 +3imc +3eju +5t6z +4jaz +1gjb +5fl0 +3r9n +3sw8 +5ggp +1btn +5j8u +1x8t +3bva +2wtc +5lom +2vwc +6bw2 +6m9f +4n1z +3lzb +1fwe +2w16 +2i6b +5ytu +4j8r +4je8 +3t60 +2n9x +6f8t +4lwu +6ebe +6ajj +5wap +5a6k +2xp7 +3ql9 +1q7a +5dkn +2gnl +3rwc +5npr +2e9c +1ajv +6e4f +1o7o +5alf +2xys +6h5w +2jfz +2xdx +4bda +3rxd +1f57 +9icd +3zpr +4ciw +2zcr +2w9r +3hnz +4r1y +3olg +5lce +1kwq +5lrg +5f0c +3u8j +1bl6 +3amv +1ej4 +4ht0 +3u6w +4q09 +5jss +3qlc +2b1i +5cls +3eyf +6eln +5mrh +3nox +2qe2 +3u6j +1qka +3k5u +1ws1 +5ew0 +6ft9 +3kr4 +5iug +1ajn +3zn0 +2i3z +1y3x +2r9s +5vpm +4occ +4nym +3osw +3hy5 +4j86 +4dfg +5f1v +5l4e +2qve +4erf +1wss +1ajq +5h5r +1ym2 +1hee +3rq7 +1dxp +1vgc +3huc +5fqb +4mue +1qbs +1ujk +6fa3 +4xo8 +4ww8 +4qxt +3in3 +2wxp +5j3s +2h1h +5d7a +4lhm +2rjs +5nvf +3l0n +5b5f +4oow +3ej5 +5fum +1li6 +5swg +6eum +3fas +5klr +2vv9 +3v5t +2byr +5nr7 +3suf +2xb9 +1f4x +5v86 +3kl6 +1qng +2a8g +2j62 +3zi0 +1h6e +3ap7 +3zj6 +4hp0 +1bmk +1nj1 +5xp7 +4iwd +1if7 +4ia0 +3ms7 +5yjk +5j7f +5aba +5ake +1h2t +4mzk +4ezj +4an3 +5jfp +5v24 +2xmy +1yqy +3aox +4yjn +2i0h +4lwe +4frs +2hwg +3o4l +3ddg +4cu8 +2zfs +5wfd +4lnb +4j17 +4qyy +2r3j +4eoi +3pdh +1y3p +4g5y +3v4t +4okp +5meh +1xpc +3avl +6gzl +2pqc +4g0y +3wab +5iop +2ax9 +3bex +3cdb +6afc +1v0m +3ksl +3fvl +6c7d +1t2v +5bot +5yto +2wed +1jn2 +4k4e +1a94 +3ozj +1j81 +3e93 +1q9d +4idt +4k2f +2whp +3snc +4otg +2lty +5bnr +6c7r +3as3 +4ge9 +4xg7 +5dw1 +3ivq +2qrg +2xdk +1gno +2gg5 +1d4y +5dqe +3rwi +4x8o +3el9 +4kup +3eks +2kff +1w5y +2flh +2vwv +3rvg +4oeu +3u4o +2pv3 +4i0t +3aru +4yv0 +5ura +5uv1 +3vjc +4bam +5vc5 +2ito +1l5s +3adv +3oe6 +5mnb +5gvm +1r1j +2qe5 +2rjr +3wiz +4cfm +1gz4 +3c1n +6ma3 +1z9h +6g98 +1qm4 +4j24 +3pju +5cqx +1wvj +2yde +4mqp +2np9 +4zyu +3ua9 +3fc2 +2xp5 +1umw +5jer +3dcv +4aft +4rhu +3ahn +4ytf +5os2 +5el9 +3gjw +4oth +1lkl +6ela +5vkf +2oz6 +4q1f +5usz +2xfk +3n51 +4fxq +4oc3 +3ggu +1hiv +4d2s +2fkf +4oz3 +6fkz +3lhg +1jyq +3uvw +4f6s +2qk8 +5jat +1lvu +11gs +4cwt +4gz3 +5vkc +1wu1 +5m17 +1rne +1l2z +3s0d +4nxq +5iu2 +1x78 +6bc9 +6drx +5juz +4qvn +5i7x +5o7e +4ktc +1fl3 +1enu +2clf +4zs0 +4llp +4djy +5nrf +4z0k +4k8a +6cdc +6bin +4e1z +1a61 +2q64 +3s0o +4z0u +1ii5 +4lk7 +5ubt +5m1z +1t4j +1g7v +4q1n +3l08 +5d7r +2emt +4y2b +4j0t +2zof +6ecz +4gb9 +4jfl +2ow3 +1ibg +3fyz +5g5z +7lpr +4g8n +4bie +2yir +4yps +5jdc +3p4v +2fum +5c4o +1kcs +4r3s +1cj1 +6f09 +4ce3 +4ft2 +2pcu +4mo8 +2g2r +4lxk +4utn +1stc +5nxp +5t52 +3iod +1h01 +4deb +4jjs +5ts0 +3su0 +4yhf +1y4z +3heg +2g70 +3p2h +1sqq +1swk +2f4b +1igj +3fcq +2pja +4ase +2o63 +2xyf +4j0v +5ka7 +5kzp +3oob +5w6t +1h2u +5a00 +4zt3 +2cer +1w96 +2pzi +4qf7 +3c88 +4y67 +4nrl +4du8 +4qtn +3sue +6fhu +1qs4 +5u6b +5edb +2p2h +5gtr +4hbp +3tia +3cbp +2cgw +4bio +3gwt +5t8r +6fo7 +1g9t +3kr0 +1gt3 +5c83 +2r9x +3f8c +5nk7 +4cfu +5e2r +2wf1 +3uil +5fl6 +1ms7 +4csj +4yat +4f1q +4lm1 +4oc0 +3hj0 +2xwe +3ms4 +2jh0 +3oad +2y2j +4bd3 +6h7n +5exm +1ctu +5l3f +2ajd +2c94 +3g3r +2zo3 +4dro +1uou +6bsl +1uy7 +3zyb +3t0b +4o43 +5q0l +3wch +5orb +3kgp +3zeb +4pyy +4b0j +1jq8 +3u4w +2xyt +5jhd +3hmp +4dxj +5hg7 +3zlx +1zkk +1gny +4ju6 +1il3 +4daf +4o0x +5ygi +6gla +3mtw +3mg0 +3d9l +2vcj +5edq +4bci +4e4x +2qdt +3wuu +3udv +4dkq +4bfp +5i2e +3q1x +1qaq +3rm8 +4ahu +1gx4 +3wi2 +4de2 +5g2g +2wuu +5nud +4hcu +3k37 +6prc +2e7f +4bb9 +1amk +4l1a +4lqy +4a16 +5wkf +4qsm +3sxu +3uqg +5lif +4azf +5ntt +3vsw +5fos +1qin +3e7b +1ll4 +3vjm +3ny3 +6aqs +1k6c +4ngr +6fc6 +2gnj +4zy6 +3u8d +2aou +4b0b +5t2p +5efb +5u0d +5vjp +5bui +5e13 +5f5i +4ivk +7hvp +3d7k +4vgc +5g1p +6b1h +2br8 +1cqp +4w50 +4ehv +5kqf +2yfa +3ws9 +4xkb +5mrp +3str +5nkd +5mpk +5kqy +4lw1 +1avp +3qdd +5xxk +4oks +4hxr +3ur0 +4pns +4i5m +4hgl +2vtd +2gph +3rme +5fqp +5hfu +2iok +4yux +6g1u +2hvx +1n0s +4l4v +3s1y +1g7p +2rr4 +2x00 +2b55 +4wmv +4kc1 +2x39 +5joh +2w0p +4az6 +1o0f +3haw +5w44 +2xhr +1h8s +4ge1 +3phe +1evh +6b30 +1g35 +2qfo +5gjf +6bt6 +2gg3 +5trr +5ly3 +5eh0 +5ioy +1xt8 +3c3o +5e74 +3acl +5abh +5fol +5ddf +3qxv +3iwy +3ttm +5n18 +6beb +5nxw +5lxb +4ra1 +4muk +4mm4 +4hs6 +5ncq +3zmg +4hcv +4i72 +4pee +1qtn +4awg +4rfy +5dyy +3ud8 +5itf +5cuq +5f1c +5hey +2ko7 +3oil +5tks +3t3u +5o3q +4k72 +4oma +3hv6 +6df7 +5er4 +4no6 +5ugd +1ciz +1rev +6hdq +3c43 +1a7t +6fky +5n1r +3rr4 +5bqg +3b8r +2pfy +5c7f +5l6j +1etz +3drf +3k7f +4b6e +6asu +1fd0 +4zjc +1y3g +4joh +4c38 +4h5c +5kej +6aji +4mvy +3lpi +4pms +6fx1 +3ioi +2xbj +1bnm +4tuh +4fgt +5mhp +4hwb +6eo9 +4aje +1v2t +5h85 +1x8d +3uoj +2aog +3o5x +5vr8 +2o4p +1fh8 +4jz1 +5oq4 +3zdh +5otz +3lnz +1n9m +3rak +6ba7 +6czb +2wyj +4qyo +3mxd +2ri9 +2zq1 +3ljo +1kvo +6g2a +1gj6 +5oug +1g4o +3mvm +1iyl +2cgx +3v2x +5iv4 +4m2u +4q1b +3ggc +2c68 +3ind +1laf +4kxn +2ada +1om1 +5eh7 +4jk5 +4q93 +1b8y +1x11 +4mr6 +4c4j +4yik +1osv +4y7r +4zow +2hdx +4ui6 +4r6x +1pxi +5ni5 +5v4b +1h0a +4pf5 +5ggz +1o4m +1urc +5mqt +3ads +5gn5 +4mrg +5ylt +5k6a +4nld +2xl3 +4auy +1zoe +4pum +5lxd +5csh +3gbq +5xs2 +4ggl +5a14 +4au8 +5n49 +5x8i +6c7f +3fvk +4kqp +5y97 +6gub +5mxx +3lox +2oc7 +3g1m +6hsz +1ilq +3twj +5vcx +3svv +4na4 +3v5g +4p7e +2vwl +4umu +3v4j +6gfz +6duf +5aqf +5ezh +5nar +4yti +4zyq +4lop +4nus +2a3a +2o9i +3ia6 +5nfh +1e00 +5u8a +3n8n +4axd +3kxz +3owd +4fxj +1gx0 +4u90 +1d7x +3eq8 +3q44 +4ago +5e2p +1pxj +4p6c +1zoh +4i7j +4hlg +5j6n +3s2o +4ec0 +3wiy +4k77 +5yf1 +1hsl +4tjz +6cbh +6g38 +4efu +5wep +1jwm +3wfg +5lvl +5kqd +1fq5 +5nn6 +3b5r +5xag +3w69 +3udr +4c4h +5mgi +5ad1 +5zh3 +4i7b +6cwi +1tkb +3unj +4u54 +2x0y +3isw +2xbw +6fzu +5wgq +4kzc +1yhs +5btr +1juq +2idk +2v25 +4prh +5o5a +3r0h +1sle +2ieh +5ous +2fqy +1p7m +3fuf +1w10 +1vot +1guw +1gbq +5thn +6b41 +4xoc +5ugc +4h71 +5i43 +4oc1 +3t0d +4kw6 +4jrg +4ra5 +4azp +5sz3 +5hbj +1npa +4wi1 +4b7p +3biz +4bpj +3lxe +5qav +4ge6 +4k1e +3vje +4mic +4qp9 +2xg5 +1w5x +2rin +3omc +5ii2 +3pax +5elq +3bpc +1ec0 +4x8g +3cft +4qd6 +5n1x +2v96 +4ucc +5ocj +4mul +3lok +2e5y +3eql +5kyj +1n5r +3hfz +2pg2 +3g3m +4fs3 +1nnk +4n9a +4jp9 +2j77 +6cvd +4tkb +3tfp +5a5o +5nsx +6bbv +5myg +3ws8 +6ex1 +2jkr +5kcv +2vw5 +4ezl +3dp0 +5hyq +5iyy +2oc4 +5div +1gag +2r38 +3chp +3ot8 +2wqb +6bqh +3gxl +2pgj +6gfx +2i3v +5yqw +5n9t +6cc9 +3p3s +3oe9 +2y1o +2o4r +1dzm +3vp2 +3djo +4oz2 +1p6d +5mih +4xt2 +3tf7 +5v19 +6g9k +4ipf +3bim +4nwm +2aeb +3blu +3vtc +3nww +3mp6 +1nu8 +3wzk +4zec +3gvb +4bt4 +2f6v +5xwr +5m53 +4yvc +1ai4 +1z1r +1ryh +1rhu +5d1s +3g0i +1gpk +4clb +5cte +4kmu +6f9r +3pjg +1uz1 +3tww +2lbv +5w1w +1s26 +3w9r +4yes +3sl8 +5km0 +2j79 +2h03 +1qnh +4cpx +5enk +4mnv +3r5m +4tk4 +5wg6 +5uwf +4cjp +3uu1 +4bxu +3h30 +4qwr +2gyi +1cnw +4fzc +6gdg +4e5i +3oq5 +3wig +5my8 +4aj1 +3b65 +2e95 +4xcb +3wf5 +5h15 +5htb +3t3e +3h5b +1ugp +5anq +6g4n +1apb +2ai7 +4xxs +1lrh +1ove +3v7t +1xk5 +4fiv +4mcv +1mrw +4b14 +5isl +5cj6 +5un9 +3nil +1x07 +6fni +4u2y +4hbv +6h2t +4q6d +3b8z +2cnf +4isu +5u2f +2cng +4q90 +4l0b +1z6s +6g3q +4ylu +3w5t +4dei +2zjv +2x6y +3ikg +5j7p +4mcc +3vqs +3u9n +2g5t +3owj +5bnm +3hkn +3qtx +3qzt +5bpa +3p17 +6fag +5nfb +1o43 +1nok +3vby +6as6 +5vjn +3skc +3bg8 +1tc1 +5orh +5bq0 +5qac +3l16 +5kw2 +3uj9 +6byk +5kgw +4wvt +1bv9 +4ruz +1fzq +1mu8 +1srg +4y73 +3upv +4jj8 +5xvf +1xka +4ewn +4ewr +4a7b +2xab +6g9b +4u69 +1qb6 +3c6t +3qar +4k5o +2gnf +1u3q +3g31 +3oev +4oc2 +1eby +3uef +4k0o +5ehi +5h63 +3wcg +6hvi +1hii +4jhq +3sgt +4aua +4hnc +3bxh +5g5v +4j08 +4anu +3hac +2vcq +2f7p +4ycl +3s22 +3obq +1o9k +4xy2 +3k9x +1xog +5b0x +4m5g +4czs +5eyr +4lm5 +5xup +4rao +3qcl +3lkj +2xno +1hfs +1gjd +4bzs +4j7d +1fw0 +1jfh +2cgr +4pd8 +3mz6 +5obj +3gm0 +4j3u +1uvt +5ml5 +4fut +4r3c +4kfp +4jck +5xhs +5mwz +3n86 +5qab +5zeq +1fo3 +4eki +5n8w +2exm +4klb +5ikb +2qhc +5aqr +4qn7 +5nbw +4k2g +3qck +3zo1 +4knx +3m6r +6dh6 +3p8n +5f3e +2vie +4uyh +5uoy +4gr3 +6cyd +2f14 +5v2q +3h2m +3ao2 +1k1n +3gfw +4q19 +2b1g +4pdk +1oxn +2pj1 +4o3a +2y6s +1inh +4u5l +4esg +4mg6 +4de5 +3wke +6dgy +4mp7 +4de1 +4z6i +2r4b +5q0z +4b33 +2koh +4qyh +5y94 +6fo8 +3p4r +4mny +2vjx +5lj0 +5yqx +5c26 +4eb8 +3lk1 +3mg6 +4oru +2wpb +4k7i +5fsy +3ava +2z7r +3ex6 +4b8p +3l13 +5fbi +3s3k +3chc +2wf0 +4hgt +6b5i +4ayw +3gk4 +2lcs +5fas +1lyx +6bvh +6edr +1ghz +1dg9 +1egh +2x6f +5o48 +4avg +3znc +1czq +5f39 +2bpy +2vc9 +5op5 +4ory +3hu3 +1xuc +3hmo +2ybt +1v1k +5o22 +3qt7 +4eo8 +4bco +2p3i +3hii +2xdm +6axq +2xhs +5cyv +1sre +2y8o +9lpr +4l3o +1dzk +3lnj +3fsm +4ghi +2nqi +1i48 +5mo2 +6fr2 +1bnn +2jbl +1r17 +4rrf +5jah +6apw +4trc +1lgt +2wl0 +4i0z +3oe5 +3fjz +2uyn +3hxd +6g3o +2nqg +3fc8 +2zvj +3mw1 +2c5n +3poz +5ax9 +3mhw +5tdw +5n7b +4kij +5axq +3pxy +2q96 +3kjd +5cf5 +5id0 +5e0j +4kjv +4e1m +3gy4 +5m34 +1xqc +2hzy +5brn +2d0k +3s8n +2j7f +5zc5 +3p5o +1xh5 +2vj7 +4zt2 +2b7a +1thl +1jmq +5h7h +5mz3 +3dxj +5q1g +6ays +3w2t +1bkm +3pb3 +3vjl +2a8h +4pax +5tt8 +2y2p +5lyx +1zaf +3mb6 +2fuu +4mw6 +2j6m +4rll +3qai +4kzq +2uym +5v84 +3bm9 +4egk +2bqw +2q8y +4pji +6ary +4daw +4i06 +2wxk +1r5n diff --git a/forks/DiffDockv1/data/splits/timesplit_no_lig_overlap_val b/forks/DiffDockv1/data/splits/timesplit_no_lig_overlap_val new file mode 100644 index 00000000..2eb87aa1 --- /dev/null +++ b/forks/DiffDockv1/data/splits/timesplit_no_lig_overlap_val @@ -0,0 +1,968 @@ +4lp9 +1me7 +2zv9 +2qo8 +1cw2 +3k5c +2o65 +4kqq +3rdv +1d4w +1q4l +4b5w +4bgg +4mm5 +3iej +3ftu +830c +2xye +1olu +2wk2 +4pxf +5o0j +1my2 +5czm +4jit +5mb1 +1sqp +3zlw +4xqu +3hkq +6fns +5e0l +2p8o +4gzw +3n87 +1lhc +4itj +4m7c +4olh +4q1e +5l7e +3faa +5vqx +3pka +5x54 +5a9u +4n9e +4est +1il9 +4igr +3t2t +6dar +3gol +3vbg +2ydk +4zpf +5zo7 +4xnw +1fpy +2r1y +6m8w +2jds +5icx +1hwr +6bj2 +4b4m +1zsb +4do3 +3t3i +1f8a +2ke1 +5ezx +3p78 +4rvm +3ovn +5wzv +4udb +1okz +1mpl +5npc +5ff6 +1hlf +1nvq +4bhf +4y4g +5mkz +2o0u +3bcs +1wvc +4fsl +3oz1 +6dgt +1me8 +2puy +4odp +1hpx +4nrq +1z2b +3uik +3mfv +3vqh +4w9g +4xek +4jok +2wap +1g50 +4j0p +2o9a +3m94 +4i1c +5a82 +4i9h +1k1i +4uro +2f7i +5fpk +2lgf +4l7f +1g3d +4ir5 +3mta +3jzg +5f94 +4nrt +4yax +5nhv +2xtk +4qh7 +1tok +4b6p +3rg2 +3q8d +3obu +4awj +3daj +2j50 +5l2z +5bml +2bba +5n34 +2xvn +1dpu +5fnt +1jyc +4zz1 +6hm7 +4rrv +4rww +5orv +3qo2 +3uii +6d1x +3juq +4qk4 +6mr5 +5hjc +2p4s +2hnc +1k4g +4g0c +2y5g +4u3f +3tv5 +1i3z +4mw7 +3n2c +6cvw +3v66 +3wzp +3s7m +5ujv +1p06 +3ipy +4wkt +4ie0 +5fot +5i59 +5za9 +4gii +4h2o +4yrs +5a6h +2xo8 +4e3n +4m5k +3dga +6fse +6ck6 +1sqc +4x1r +3dnj +3rvi +2a58 +4bf6 +3zlk +4mbj +4tpm +4d8c +1ejn +4yt6 +2x7x +4qp1 +4de3 +5yg4 +1x7b +5n9s +2fme +1ydt +2bdf +6baw +6fsd +2xn3 +4tk0 +3q4j +1u9l +1oqp +5htz +4glr +5kj0 +5ukl +3fun +4wk2 +4ht6 +5hv1 +1uze +4bcc +3ff6 +5if6 +1tsm +2r59 +3iqh +2v7a +5d10 +5nvh +3eqr +1jq9 +1u1b +6cer +5uq9 +1u3s +5icy +3exh +2oqs +1pzp +1d4i +4x6p +4mb9 +5emk +1iky +6b7f +3chq +3h5s +5zmq +4ib5 +2wej +6fjm +5ewa +2igx +2z78 +5lpm +4wet +3lxl +2xba +5wbl +5zla +2x6x +4mw9 +5t2d +4j3m +4aqh +3lbk +4djp +4odl +4x6j +1ero +5f3t +4k3q +5ta4 +1caq +2eg7 +1f73 +3rxg +6ezq +1qkt +5l3e +5c28 +4pp9 +4bgk +3iaf +5vrp +5zz4 +5ur5 +3ft2 +5ech +4jjq +5iz6 +5dhr +4l2g +4r17 +3wk6 +4h1e +2aq9 +5g1n +3zm9 +5c4l +5mfs +1fzj +2ltw +4x7i +4c94 +2cfg +2va5 +3vb6 +2hob +5ah2 +5syn +3g6g +3rwj +5sz4 +4f9v +5n2d +3n9r +5ldo +3vb7 +1sqo +3drg +5j9y +6b96 +4yz9 +1vcj +5epr +4tx6 +3dz6 +3czv +5v49 +1ahy +3wzq +1bq4 +5u8c +6bj3 +2qnb +4a9m +3d4f +5oui +5wmg +6ma4 +4x5q +5cbr +6msy +5avi +1g3b +2wi4 +3kjn +4dhn +4o7e +5kit +5y5t +3hfj +2qd8 +5vsj +2y2i +5m0m +3tcp +4bhz +1jd6 +5idn +4zzx +4kn4 +2a5c +6hly +1au2 +4jbo +5cgj +3ske +3lq2 +4pxm +2wxg +5tb6 +2vc7 +3iw4 +5hct +3skf +5lyy +3fmz +4p5z +5ktw +6e4w +1cx9 +6em7 +4mjr +4u7t +3rde +4ux4 +4i6f +3l3x +4ie6 +4j70 +1jd0 +4iaw +1szm +2afw +3ess +3sap +1olx +1bzh +5hfb +4x3h +5we9 +3zsw +5ny6 +1hn2 +3l3z +4qp2 +1d4p +4xkc +2is0 +6c7e +5zku +4fai +6g9a +4xu3 +5dry +4d8z +3zcz +3kbz +2y59 +4nal +4rpv +4yje +3vf8 +4bqx +4z9l +4ep2 +4ylk +5mme +4dht +2uy4 +6mu3 +3kx1 +5o0s +4bch +5c4k +2br1 +4ddh +2f9k +2w2i +4ogn +4up5 +5o4y +5hjd +2qw1 +5y8z +4kqr +1o2t +6e05 +3u7l +2mip +3hvg +2p59 +4d3h +4pl5 +3tzd +2vnp +4e3m +3vgc +5bqi +1b7h +1lhu +3rlr +3h22 +2wnc +2wot +5d1t +3mo0 +4wn5 +3p3u +1nfs +4e90 +5aqu +1bmq +3kwz +6f6n +4rj5 +4omd +6min +1ujj +4ppa +4uxl +5y3n +6df2 +4wvl +1xt3 +5oaj +4a9r +5mli +4p4e +3juo +1z9g +2ykc +5a0e +3g0w +5t9w +1sqa +3wci +1fkw +5u4g +4mfe +4kpx +3nti +3azb +2xog +3c3r +2buc +1hyz +4dcd +6azl +3t3d +3q4l +4few +1q95 +4u0b +3b7u +4bo4 +4o10 +5wmt +5v9t +5aok +1jtq +5uit +2vgc +2gfd +3mna +1aqc +4xtt +4z0d +4ty9 +2yiv +2hrp +4zh2 +2z4o +1qku +2xdw +4n7j +4yp1 +3exf +4c6z +6ccu +2wxn +1bwb +2gvf +1hiy +5c4t +2za5 +2xkf +4q18 +1o2p +5th2 +4dj7 +3eyd +4j0r +2m3o +2b53 +4m3b +2izl +2vtr +2x6d +2i0a +5ehg +6cw4 +4c37 +3cwj +1azm +2qci +5sz0 +2gkl +2z4z +6awo +1v11 +4l53 +3p55 +2ynn +2vu3 +4dli +2bcd +4l0s +4uda +3m37 +5j5t +2p16 +4gh6 +1mfg +3s3i +4j73 +2v5x +2h4n +4jsz +4wk1 +4igt +4k63 +3qqk +16pk +5aom +1hyv +5a3w +3veh +3g4l +2ph8 +5mkx +5c4u +4gto +3cj5 +4prj +2vd7 +5duc +3odi +6bg5 +1qwu +5jn8 +1v1m +1qpe +5v3r +2wc4 +2vte +1a52 +4dhq +2qta +6ccy +4jog +4bgy +5u9i +3az9 +1gt1 +2jew +3pdc +1n3i +5fyx +4f49 +4nzn +6hm2 +4a4l +5xij +5vk0 +4xsx +2aj8 +4odq +2n7b +4ygf +2a4q +2jc0 +4jsa +1inq +3dc3 +5tob +4urn +6bik +4ju4 +5nya +5oh2 +5znr +5ct2 +3u4u +4x7h +3max +3rbm +3krj +1aj6 +1pmv +5n0e +4nhy +4oem +6fi4 +4e3j +1fq4 +5myr +2hkf +1os0 +3rqg +4ivc +5c7b +3lq4 +1u6q +1qxz +1l5r +4xxh +3m40 +5or9 +4okg +4d89 +2gm9 +5x33 +4de0 +4gr8 +5lz8 +1p93 +2brp +2gg8 +6fdt +5cxh +1jvu +3wp1 +1fzm +5cxa +2gbg +2g78 +5aml +2y34 +2qnp +1v16 +1njj +2a5u +4z88 +4wmx +5vo2 +4fod +2pou +3jsw +2ow2 +5g3m +3odl +3o9e +3eyh +4ej2 +3c4e +4b6f +1pl0 +3pb8 +6fap +4iax +2bua +6fgg +2o4h +4uwh +5wbf +2yxj +1ff1 +2giu +1qbt +2ovq +4bak +2y3p +2iwu +3hvi +2w0x +3fcl +1zpa +5czb +3t1l +2cfd +3k3g +4cfw +2e91 +5op8 +3hig +6h7y +3mtb +4eb9 +4lkg +5ehv +5ier +4ode +1xoq +5d6p +3kwa +5np8 +5v82 +6ma1 +3bz3 +3myq +4j0s +4f4p +4lh6 +1uef +4j3d +4yx4 +4amx +4ptg +2c97 +4ec4 +4r1v +1zc9 +4nuf +3g2u +6hlx +5vij +2x4o +6hlz +4lkj +3s75 +2gz8 +1gvk +2yhd +3hqz +3pb7 +1thr +4ris +5twh +4gql +3n3l +3acx +5yvx +3gy2 +1xmu +5l6p +5l8n +4msn +4rz1 +3f66 +3ucj +5hcl +1t1r +3kce +3u15 +1wbg +5khi +3er5 +4qew +5mft +6eqp +5gsw +2qd7 +4cli +3f9w +3msc +1jgl +3kid +1ymx +1ui0 +3d1f +1pxl +5kos +3vzd +5fcz +3ara +4li6 +5ks7 +4wym +5j7q +4qsh +2ce9 +5vqz +3o2m +4bcm +5orx +1i41 +3c5u +4kai +6gjy +4tsz +5o0e +6drt +1y57 +3kqb +3jup +5ork +3ikc +3gwu +4wke +4x7l +3lp1 +5ivy +3f16 +4c36 +1w2x +2d06 +1hbj +1ols +1iup +5aix +1ydd +5w4r +3h23 +3rj7 +4ish +1ebw +1fcy +1d09 +5hdv +4x1n +5boj +2xn7 +4b6s +3f82 +4clj +4zzz +5j5d +2vts +1k08 +3u3f +4jk6 +4csy +6hth +2mnz +2vpg +2qd6 +4jkw +3ml5 +1ih0 +4at5 +5dgu +4g31 +5n0d +5aa9 +4u4s +5oa6 +2wzm +4b4q +6fi1 +6chn +1z4u +5aa8 +1lpk +3cib +5d75 +5x4o +1ydb +5dhq +5t28 +4zz0 +3evf +5vyy +6eip +1q63 +3ldw +5tq4 +5uxf +2j7x +4kil +1yda +3bc4 +2ew5 +6ee3 +4yrr +3wax +3bzf +5ody +1k06 +4j84 +5l6h +5eok +5nne +5m6m +2a4r +3p1d +2ayp +3iux +4b0g +1jr1 +4qo9 +4bh4 +4xt9 +2ok1 +2r7g +4uib +5mmn +5akj +3hs4 +5wpb +6e5x +5vnd +5evd +5wlg +5l4m +4kiu +4own +5oh9 +6arv +1xr9 +4hv7 diff --git a/forks/DiffDockv1/data/splits/timesplit_test b/forks/DiffDockv1/data/splits/timesplit_test new file mode 100644 index 00000000..3e247f07 --- /dev/null +++ b/forks/DiffDockv1/data/splits/timesplit_test @@ -0,0 +1,363 @@ +6qqw +6d08 +6jap +6np2 +6uvp +6oxq +6jsn +6hzb +6qrc +6oio +6jag +6moa +6hld +6i9a +6e4c +6g24 +6jb4 +6s55 +6seo +6dyz +5zk5 +6jid +5ze6 +6qlu +6a6k +6qgf +6e3z +6te6 +6pka +6g2o +6jsf +5zxk +6qxd +6n97 +6jt3 +6qtr +6oy1 +6n96 +6qzh +6qqz +6qmt +6ibx +6hmt +5zk7 +6k3l +6cjs +6n9l +6ibz +6ott +6gge +6hot +6e3p +6md6 +6hlb +6fe5 +6uwp +6npp +6g2f +6mo7 +6bqd +6nsv +6i76 +6n53 +6g2c +6eeb +6n0m +6uvy +6ovz +6olx +6v5l +6hhg +5zcu +6dz2 +6mjq +6efk +6s9w +6gdy +6kqi +6ueg +6oxt +6oy0 +6qr7 +6i41 +6cyg +6qmr +6g27 +6ggb +6g3c +6n4e +6fcj +6quv +6iql +6i74 +6qr4 +6rnu +6jib +6izq +6qw8 +6qto +6qrd +6hza +6e5s +6dz3 +6e6w +6cyh +5zlf +6om4 +6gga +6pgp +6qqv +6qtq +6gj6 +6os5 +6s07 +6i77 +6hhj +6ahs +6oxx +6mjj +6hor +6jb0 +6i68 +6pz4 +6mhb +6uim +6jsg +6i78 +6oxy +6gbw +6mo0 +6ggf +6qge +6cjr +6oxp +6d07 +6i63 +6ten +6uii +6qlr +6sen +6oxv +6g2b +5zr3 +6kjf +6qr9 +6g9f +6e6v +5zk9 +6pnn +6nri +6uwv +6ooz +6npi +6oip +6miv +6s57 +6p8x +6hoq +6qts +6ggd +6pnm +6oy2 +6oi8 +6mhd +6agt +6i5p +6hhr +6p8z +6c85 +6g5u +6j06 +6qsz +6jbb +6hhp +6np5 +6nlj +6qlp +6n94 +6e13 +6qls +6uil +6st3 +6n92 +6s56 +6hzd +6uhv +6k05 +6q36 +6ic0 +6hhi +6e3m +6qtx +6jse +5zjy +6o3y +6rpg +6rr0 +6gzy +6qlt +6ufo +6o0h +6o3x +5zjz +6i8t +6ooy +6oiq +6od6 +6nrh +6qra +6hhh +6m7h +6ufn +6qr0 +6o5u +6h14 +6jwa +6ny0 +6jan +6ftf +6oxw +6jon +6cf7 +6rtn +6jsz +6o9c +6mo8 +6qln +6qqu +6i66 +6mja +6gwe +6d3z +6oxr +6r4k +6hle +6h9v +6hou +6nv9 +6py0 +6qlq +6nv7 +6n4b +6jaq +6i8m +6dz0 +6oxs +6k2n +6cjj +6ffg +6a73 +6qqt +6a1c +6oxu +6qre +6qtw +6np4 +6hv2 +6n55 +6e3o +6kjd +6sfc +6qi7 +6hzc +6k04 +6op0 +6q38 +6n8x +6np3 +6uvv +6pgo +6jbe +6i75 +6qqq +6i62 +6j9y +6g29 +6h7d +6mo9 +6jao +6jmf +6hmy +6qfe +5zml +6i65 +6e7m +6i61 +6rz6 +6qtm +6qlo +6oie +6miy +6nrf +6gj5 +6jad +6mj4 +6h12 +6d3y +6qr2 +6qxa +6o9b +6ckl +6oir +6d40 +6e6j +6i7a +6g25 +6oin +6jam +6oxz +6hop +6rot +6uhu +6mji +6nrj +6nt2 +6op9 +6pno +6e4v +6k1s +6a87 +6oim +6cjp +6pyb +6h13 +6qrf +6mhc +6j9w +6nrg +6fff +6n93 +6jut +6g2e +6nd3 +6os6 +6dql +6inz +6i67 +6quw +6qwi +6npm +6i64 +6e3n +6qrg +6nxz +6iby +6gj7 +6qr3 +6qr1 +6s9x +6q4q +6hbn +6nw3 +6tel +6p8y +6d5w +6t6a +6o5g +6r7d +6pya +6ffe +6d3x +6gj8 +6mo2 diff --git a/forks/DiffDockv1/data/splits/timesplit_test_no_rec_overlap b/forks/DiffDockv1/data/splits/timesplit_test_no_rec_overlap new file mode 100644 index 00000000..bc8c12d8 --- /dev/null +++ b/forks/DiffDockv1/data/splits/timesplit_test_no_rec_overlap @@ -0,0 +1,144 @@ +6qqw +6jap +6np2 +6qrc +6oio +6jag +6i9a +6jb4 +6seo +6jid +5ze6 +6pka +6n97 +6qtr +6n96 +6qzh +6qqz +6k3l +6cjs +6n9l +6ott +6npp +6nsv +6n53 +6eeb +6n0m +6ovz +5zcu +6mjq +6efk +6gdy +6kqi +6ueg +6qr7 +6g3c +6iql +6qr4 +6jib +6qto +6qrd +6e5s +5zlf +6om4 +6qqv +6qtq +6os5 +6s07 +6mjj +6jb0 +6uim +6mo0 +6cjr +6uii +6sen +6kjf +6qr9 +6g9f +6npi +6oip +6miv +6qts +6oi8 +6c85 +6qsz +6jbb +6np5 +6nlj +6n94 +6e13 +6uil +6n92 +6uhv +6q36 +6qtx +6rr0 +6ufo +6oiq +6qra +6m7h +6ufn +6qr0 +6o5u +6ny0 +6jan +6ftf +6jon +6cf7 +6o9c +6qqu +6mja +6r4k +6h9v +6py0 +6jaq +6k2n +6cjj +6a73 +6qqt +6qre +6qtw +6np4 +6n55 +6kjd +6np3 +6jbe +6qqq +6j9y +6h7d +6jao +6e7m +6rz6 +6qtm +6miy +6jad +6mj4 +6qr2 +6qxa +6o9b +6ckl +6oir +6oin +6jam +6uhu +6mji +6nt2 +6op9 +6e4v +6a87 +6cjp +6qrf +6j9w +6n93 +6nd3 +6os6 +6dql +6qwi +6npm +6qrg +6nxz +6qr3 +6qr1 +6o5g +6r7d +6mo2 diff --git a/forks/DiffDockv1/data/testset_csv.csv b/forks/DiffDockv1/data/testset_csv.csv new file mode 100644 index 00000000..455964ad --- /dev/null +++ b/forks/DiffDockv1/data/testset_csv.csv @@ -0,0 +1,364 @@ +complex_name,protein_path,ligand_description,protein_sequence +0,data/PDBBind_processed/6qqw/6qqw_protein_processed.pdb,data/PDBBind_processed/6qqw/6qqw_ligand.mol2, +1,data/PDBBind_processed/6d08/6d08_protein_processed.pdb,data/PDBBind_processed/6d08/6d08_ligand.sdf, +2,data/PDBBind_processed/6jap/6jap_protein_processed.pdb,data/PDBBind_processed/6jap/6jap_ligand.sdf, +3,data/PDBBind_processed/6np2/6np2_protein_processed.pdb,data/PDBBind_processed/6np2/6np2_ligand.sdf, +4,data/PDBBind_processed/6uvp/6uvp_protein_processed.pdb,data/PDBBind_processed/6uvp/6uvp_ligand.sdf, +5,data/PDBBind_processed/6oxq/6oxq_protein_processed.pdb,data/PDBBind_processed/6oxq/6oxq_ligand.sdf, +6,data/PDBBind_processed/6jsn/6jsn_protein_processed.pdb,data/PDBBind_processed/6jsn/6jsn_ligand.sdf, +7,data/PDBBind_processed/6hzb/6hzb_protein_processed.pdb,data/PDBBind_processed/6hzb/6hzb_ligand.sdf, +8,data/PDBBind_processed/6qrc/6qrc_protein_processed.pdb,data/PDBBind_processed/6qrc/6qrc_ligand.mol2, +9,data/PDBBind_processed/6oio/6oio_protein_processed.pdb,data/PDBBind_processed/6oio/6oio_ligand.sdf, +10,data/PDBBind_processed/6jag/6jag_protein_processed.pdb,data/PDBBind_processed/6jag/6jag_ligand.sdf, +11,data/PDBBind_processed/6moa/6moa_protein_processed.pdb,data/PDBBind_processed/6moa/6moa_ligand.mol2, +12,data/PDBBind_processed/6hld/6hld_protein_processed.pdb,data/PDBBind_processed/6hld/6hld_ligand.sdf, +13,data/PDBBind_processed/6i9a/6i9a_protein_processed.pdb,data/PDBBind_processed/6i9a/6i9a_ligand.sdf, +14,data/PDBBind_processed/6e4c/6e4c_protein_processed.pdb,data/PDBBind_processed/6e4c/6e4c_ligand.sdf, +15,data/PDBBind_processed/6g24/6g24_protein_processed.pdb,data/PDBBind_processed/6g24/6g24_ligand.sdf, +16,data/PDBBind_processed/6jb4/6jb4_protein_processed.pdb,data/PDBBind_processed/6jb4/6jb4_ligand.sdf, +17,data/PDBBind_processed/6s55/6s55_protein_processed.pdb,data/PDBBind_processed/6s55/6s55_ligand.sdf, +18,data/PDBBind_processed/6seo/6seo_protein_processed.pdb,data/PDBBind_processed/6seo/6seo_ligand.sdf, +19,data/PDBBind_processed/6dyz/6dyz_protein_processed.pdb,data/PDBBind_processed/6dyz/6dyz_ligand.mol2, +20,data/PDBBind_processed/5zk5/5zk5_protein_processed.pdb,data/PDBBind_processed/5zk5/5zk5_ligand.sdf, +21,data/PDBBind_processed/6jid/6jid_protein_processed.pdb,data/PDBBind_processed/6jid/6jid_ligand.sdf, +22,data/PDBBind_processed/5ze6/5ze6_protein_processed.pdb,data/PDBBind_processed/5ze6/5ze6_ligand.sdf, +23,data/PDBBind_processed/6qlu/6qlu_protein_processed.pdb,data/PDBBind_processed/6qlu/6qlu_ligand.sdf, +24,data/PDBBind_processed/6a6k/6a6k_protein_processed.pdb,data/PDBBind_processed/6a6k/6a6k_ligand.sdf, +25,data/PDBBind_processed/6qgf/6qgf_protein_processed.pdb,data/PDBBind_processed/6qgf/6qgf_ligand.sdf, +26,data/PDBBind_processed/6e3z/6e3z_protein_processed.pdb,data/PDBBind_processed/6e3z/6e3z_ligand.sdf, +27,data/PDBBind_processed/6te6/6te6_protein_processed.pdb,data/PDBBind_processed/6te6/6te6_ligand.sdf, +28,data/PDBBind_processed/6pka/6pka_protein_processed.pdb,data/PDBBind_processed/6pka/6pka_ligand.sdf, +29,data/PDBBind_processed/6g2o/6g2o_protein_processed.pdb,data/PDBBind_processed/6g2o/6g2o_ligand.sdf, +30,data/PDBBind_processed/6jsf/6jsf_protein_processed.pdb,data/PDBBind_processed/6jsf/6jsf_ligand.sdf, +31,data/PDBBind_processed/5zxk/5zxk_protein_processed.pdb,data/PDBBind_processed/5zxk/5zxk_ligand.sdf, +32,data/PDBBind_processed/6qxd/6qxd_protein_processed.pdb,data/PDBBind_processed/6qxd/6qxd_ligand.sdf, +33,data/PDBBind_processed/6n97/6n97_protein_processed.pdb,data/PDBBind_processed/6n97/6n97_ligand.sdf, +34,data/PDBBind_processed/6jt3/6jt3_protein_processed.pdb,data/PDBBind_processed/6jt3/6jt3_ligand.sdf, +35,data/PDBBind_processed/6qtr/6qtr_protein_processed.pdb,data/PDBBind_processed/6qtr/6qtr_ligand.sdf, +36,data/PDBBind_processed/6oy1/6oy1_protein_processed.pdb,data/PDBBind_processed/6oy1/6oy1_ligand.sdf, +37,data/PDBBind_processed/6n96/6n96_protein_processed.pdb,data/PDBBind_processed/6n96/6n96_ligand.sdf, +38,data/PDBBind_processed/6qzh/6qzh_protein_processed.pdb,data/PDBBind_processed/6qzh/6qzh_ligand.sdf, +39,data/PDBBind_processed/6qqz/6qqz_protein_processed.pdb,data/PDBBind_processed/6qqz/6qqz_ligand.mol2, +40,data/PDBBind_processed/6qmt/6qmt_protein_processed.pdb,data/PDBBind_processed/6qmt/6qmt_ligand.sdf, +41,data/PDBBind_processed/6ibx/6ibx_protein_processed.pdb,data/PDBBind_processed/6ibx/6ibx_ligand.sdf, +42,data/PDBBind_processed/6hmt/6hmt_protein_processed.pdb,data/PDBBind_processed/6hmt/6hmt_ligand.sdf, +43,data/PDBBind_processed/5zk7/5zk7_protein_processed.pdb,data/PDBBind_processed/5zk7/5zk7_ligand.sdf, +44,data/PDBBind_processed/6k3l/6k3l_protein_processed.pdb,data/PDBBind_processed/6k3l/6k3l_ligand.sdf, +45,data/PDBBind_processed/6cjs/6cjs_protein_processed.pdb,data/PDBBind_processed/6cjs/6cjs_ligand.sdf, +46,data/PDBBind_processed/6n9l/6n9l_protein_processed.pdb,data/PDBBind_processed/6n9l/6n9l_ligand.sdf, +47,data/PDBBind_processed/6ibz/6ibz_protein_processed.pdb,data/PDBBind_processed/6ibz/6ibz_ligand.sdf, +48,data/PDBBind_processed/6ott/6ott_protein_processed.pdb,data/PDBBind_processed/6ott/6ott_ligand.sdf, +49,data/PDBBind_processed/6gge/6gge_protein_processed.pdb,data/PDBBind_processed/6gge/6gge_ligand.sdf, +50,data/PDBBind_processed/6hot/6hot_protein_processed.pdb,data/PDBBind_processed/6hot/6hot_ligand.sdf, +51,data/PDBBind_processed/6e3p/6e3p_protein_processed.pdb,data/PDBBind_processed/6e3p/6e3p_ligand.mol2, +52,data/PDBBind_processed/6md6/6md6_protein_processed.pdb,data/PDBBind_processed/6md6/6md6_ligand.sdf, +53,data/PDBBind_processed/6hlb/6hlb_protein_processed.pdb,data/PDBBind_processed/6hlb/6hlb_ligand.sdf, +54,data/PDBBind_processed/6fe5/6fe5_protein_processed.pdb,data/PDBBind_processed/6fe5/6fe5_ligand.sdf, +55,data/PDBBind_processed/6uwp/6uwp_protein_processed.pdb,data/PDBBind_processed/6uwp/6uwp_ligand.sdf, +56,data/PDBBind_processed/6npp/6npp_protein_processed.pdb,data/PDBBind_processed/6npp/6npp_ligand.sdf, +57,data/PDBBind_processed/6g2f/6g2f_protein_processed.pdb,data/PDBBind_processed/6g2f/6g2f_ligand.sdf, +58,data/PDBBind_processed/6mo7/6mo7_protein_processed.pdb,data/PDBBind_processed/6mo7/6mo7_ligand.sdf, +59,data/PDBBind_processed/6bqd/6bqd_protein_processed.pdb,data/PDBBind_processed/6bqd/6bqd_ligand.mol2, +60,data/PDBBind_processed/6nsv/6nsv_protein_processed.pdb,data/PDBBind_processed/6nsv/6nsv_ligand.mol2, +61,data/PDBBind_processed/6i76/6i76_protein_processed.pdb,data/PDBBind_processed/6i76/6i76_ligand.sdf, +62,data/PDBBind_processed/6n53/6n53_protein_processed.pdb,data/PDBBind_processed/6n53/6n53_ligand.sdf, +63,data/PDBBind_processed/6g2c/6g2c_protein_processed.pdb,data/PDBBind_processed/6g2c/6g2c_ligand.sdf, +64,data/PDBBind_processed/6eeb/6eeb_protein_processed.pdb,data/PDBBind_processed/6eeb/6eeb_ligand.mol2, +65,data/PDBBind_processed/6n0m/6n0m_protein_processed.pdb,data/PDBBind_processed/6n0m/6n0m_ligand.sdf, +66,data/PDBBind_processed/6uvy/6uvy_protein_processed.pdb,data/PDBBind_processed/6uvy/6uvy_ligand.sdf, +67,data/PDBBind_processed/6ovz/6ovz_protein_processed.pdb,data/PDBBind_processed/6ovz/6ovz_ligand.sdf, +68,data/PDBBind_processed/6olx/6olx_protein_processed.pdb,data/PDBBind_processed/6olx/6olx_ligand.sdf, +69,data/PDBBind_processed/6v5l/6v5l_protein_processed.pdb,data/PDBBind_processed/6v5l/6v5l_ligand.mol2, +70,data/PDBBind_processed/6hhg/6hhg_protein_processed.pdb,data/PDBBind_processed/6hhg/6hhg_ligand.sdf, +71,data/PDBBind_processed/5zcu/5zcu_protein_processed.pdb,data/PDBBind_processed/5zcu/5zcu_ligand.sdf, +72,data/PDBBind_processed/6dz2/6dz2_protein_processed.pdb,data/PDBBind_processed/6dz2/6dz2_ligand.mol2, +73,data/PDBBind_processed/6mjq/6mjq_protein_processed.pdb,data/PDBBind_processed/6mjq/6mjq_ligand.sdf, +74,data/PDBBind_processed/6efk/6efk_protein_processed.pdb,data/PDBBind_processed/6efk/6efk_ligand.sdf, +75,data/PDBBind_processed/6s9w/6s9w_protein_processed.pdb,data/PDBBind_processed/6s9w/6s9w_ligand.sdf, +76,data/PDBBind_processed/6gdy/6gdy_protein_processed.pdb,data/PDBBind_processed/6gdy/6gdy_ligand.sdf, +77,data/PDBBind_processed/6kqi/6kqi_protein_processed.pdb,data/PDBBind_processed/6kqi/6kqi_ligand.sdf, +78,data/PDBBind_processed/6ueg/6ueg_protein_processed.pdb,data/PDBBind_processed/6ueg/6ueg_ligand.sdf, +79,data/PDBBind_processed/6oxt/6oxt_protein_processed.pdb,data/PDBBind_processed/6oxt/6oxt_ligand.sdf, +80,data/PDBBind_processed/6oy0/6oy0_protein_processed.pdb,data/PDBBind_processed/6oy0/6oy0_ligand.sdf, +81,data/PDBBind_processed/6qr7/6qr7_protein_processed.pdb,data/PDBBind_processed/6qr7/6qr7_ligand.mol2, +82,data/PDBBind_processed/6i41/6i41_protein_processed.pdb,data/PDBBind_processed/6i41/6i41_ligand.sdf, +83,data/PDBBind_processed/6cyg/6cyg_protein_processed.pdb,data/PDBBind_processed/6cyg/6cyg_ligand.sdf, +84,data/PDBBind_processed/6qmr/6qmr_protein_processed.pdb,data/PDBBind_processed/6qmr/6qmr_ligand.sdf, +85,data/PDBBind_processed/6g27/6g27_protein_processed.pdb,data/PDBBind_processed/6g27/6g27_ligand.sdf, +86,data/PDBBind_processed/6ggb/6ggb_protein_processed.pdb,data/PDBBind_processed/6ggb/6ggb_ligand.sdf, +87,data/PDBBind_processed/6g3c/6g3c_protein_processed.pdb,data/PDBBind_processed/6g3c/6g3c_ligand.sdf, +88,data/PDBBind_processed/6n4e/6n4e_protein_processed.pdb,data/PDBBind_processed/6n4e/6n4e_ligand.sdf, +89,data/PDBBind_processed/6fcj/6fcj_protein_processed.pdb,data/PDBBind_processed/6fcj/6fcj_ligand.sdf, +90,data/PDBBind_processed/6quv/6quv_protein_processed.pdb,data/PDBBind_processed/6quv/6quv_ligand.sdf, +91,data/PDBBind_processed/6iql/6iql_protein_processed.pdb,data/PDBBind_processed/6iql/6iql_ligand.mol2, +92,data/PDBBind_processed/6i74/6i74_protein_processed.pdb,data/PDBBind_processed/6i74/6i74_ligand.sdf, +93,data/PDBBind_processed/6qr4/6qr4_protein_processed.pdb,data/PDBBind_processed/6qr4/6qr4_ligand.mol2, +94,data/PDBBind_processed/6rnu/6rnu_protein_processed.pdb,data/PDBBind_processed/6rnu/6rnu_ligand.sdf, +95,data/PDBBind_processed/6jib/6jib_protein_processed.pdb,data/PDBBind_processed/6jib/6jib_ligand.sdf, +96,data/PDBBind_processed/6izq/6izq_protein_processed.pdb,data/PDBBind_processed/6izq/6izq_ligand.sdf, +97,data/PDBBind_processed/6qw8/6qw8_protein_processed.pdb,data/PDBBind_processed/6qw8/6qw8_ligand.sdf, +98,data/PDBBind_processed/6qto/6qto_protein_processed.pdb,data/PDBBind_processed/6qto/6qto_ligand.sdf, +99,data/PDBBind_processed/6qrd/6qrd_protein_processed.pdb,data/PDBBind_processed/6qrd/6qrd_ligand.mol2, +100,data/PDBBind_processed/6hza/6hza_protein_processed.pdb,data/PDBBind_processed/6hza/6hza_ligand.sdf, +101,data/PDBBind_processed/6e5s/6e5s_protein_processed.pdb,data/PDBBind_processed/6e5s/6e5s_ligand.sdf, +102,data/PDBBind_processed/6dz3/6dz3_protein_processed.pdb,data/PDBBind_processed/6dz3/6dz3_ligand.mol2, +103,data/PDBBind_processed/6e6w/6e6w_protein_processed.pdb,data/PDBBind_processed/6e6w/6e6w_ligand.mol2, +104,data/PDBBind_processed/6cyh/6cyh_protein_processed.pdb,data/PDBBind_processed/6cyh/6cyh_ligand.sdf, +105,data/PDBBind_processed/5zlf/5zlf_protein_processed.pdb,data/PDBBind_processed/5zlf/5zlf_ligand.sdf, +106,data/PDBBind_processed/6om4/6om4_protein_processed.pdb,data/PDBBind_processed/6om4/6om4_ligand.sdf, +107,data/PDBBind_processed/6gga/6gga_protein_processed.pdb,data/PDBBind_processed/6gga/6gga_ligand.sdf, +108,data/PDBBind_processed/6pgp/6pgp_protein_processed.pdb,data/PDBBind_processed/6pgp/6pgp_ligand.sdf, +109,data/PDBBind_processed/6qqv/6qqv_protein_processed.pdb,data/PDBBind_processed/6qqv/6qqv_ligand.mol2, +110,data/PDBBind_processed/6qtq/6qtq_protein_processed.pdb,data/PDBBind_processed/6qtq/6qtq_ligand.sdf, +111,data/PDBBind_processed/6gj6/6gj6_protein_processed.pdb,data/PDBBind_processed/6gj6/6gj6_ligand.mol2, +112,data/PDBBind_processed/6os5/6os5_protein_processed.pdb,data/PDBBind_processed/6os5/6os5_ligand.mol2, +113,data/PDBBind_processed/6s07/6s07_protein_processed.pdb,data/PDBBind_processed/6s07/6s07_ligand.sdf, +114,data/PDBBind_processed/6i77/6i77_protein_processed.pdb,data/PDBBind_processed/6i77/6i77_ligand.sdf, +115,data/PDBBind_processed/6hhj/6hhj_protein_processed.pdb,data/PDBBind_processed/6hhj/6hhj_ligand.sdf, +116,data/PDBBind_processed/6ahs/6ahs_protein_processed.pdb,data/PDBBind_processed/6ahs/6ahs_ligand.sdf, +117,data/PDBBind_processed/6oxx/6oxx_protein_processed.pdb,data/PDBBind_processed/6oxx/6oxx_ligand.sdf, +118,data/PDBBind_processed/6mjj/6mjj_protein_processed.pdb,data/PDBBind_processed/6mjj/6mjj_ligand.sdf, +119,data/PDBBind_processed/6hor/6hor_protein_processed.pdb,data/PDBBind_processed/6hor/6hor_ligand.sdf, +120,data/PDBBind_processed/6jb0/6jb0_protein_processed.pdb,data/PDBBind_processed/6jb0/6jb0_ligand.sdf, +121,data/PDBBind_processed/6i68/6i68_protein_processed.pdb,data/PDBBind_processed/6i68/6i68_ligand.sdf, +122,data/PDBBind_processed/6pz4/6pz4_protein_processed.pdb,data/PDBBind_processed/6pz4/6pz4_ligand.sdf, +123,data/PDBBind_processed/6mhb/6mhb_protein_processed.pdb,data/PDBBind_processed/6mhb/6mhb_ligand.sdf, +124,data/PDBBind_processed/6uim/6uim_protein_processed.pdb,data/PDBBind_processed/6uim/6uim_ligand.sdf, +125,data/PDBBind_processed/6jsg/6jsg_protein_processed.pdb,data/PDBBind_processed/6jsg/6jsg_ligand.sdf, +126,data/PDBBind_processed/6i78/6i78_protein_processed.pdb,data/PDBBind_processed/6i78/6i78_ligand.sdf, +127,data/PDBBind_processed/6oxy/6oxy_protein_processed.pdb,data/PDBBind_processed/6oxy/6oxy_ligand.sdf, +128,data/PDBBind_processed/6gbw/6gbw_protein_processed.pdb,data/PDBBind_processed/6gbw/6gbw_ligand.sdf, +129,data/PDBBind_processed/6mo0/6mo0_protein_processed.pdb,data/PDBBind_processed/6mo0/6mo0_ligand.sdf, +130,data/PDBBind_processed/6ggf/6ggf_protein_processed.pdb,data/PDBBind_processed/6ggf/6ggf_ligand.sdf, +131,data/PDBBind_processed/6qge/6qge_protein_processed.pdb,data/PDBBind_processed/6qge/6qge_ligand.sdf, +132,data/PDBBind_processed/6cjr/6cjr_protein_processed.pdb,data/PDBBind_processed/6cjr/6cjr_ligand.sdf, +133,data/PDBBind_processed/6oxp/6oxp_protein_processed.pdb,data/PDBBind_processed/6oxp/6oxp_ligand.sdf, +134,data/PDBBind_processed/6d07/6d07_protein_processed.pdb,data/PDBBind_processed/6d07/6d07_ligand.sdf, +135,data/PDBBind_processed/6i63/6i63_protein_processed.pdb,data/PDBBind_processed/6i63/6i63_ligand.sdf, +136,data/PDBBind_processed/6ten/6ten_protein_processed.pdb,data/PDBBind_processed/6ten/6ten_ligand.sdf, +137,data/PDBBind_processed/6uii/6uii_protein_processed.pdb,data/PDBBind_processed/6uii/6uii_ligand.sdf, +138,data/PDBBind_processed/6qlr/6qlr_protein_processed.pdb,data/PDBBind_processed/6qlr/6qlr_ligand.sdf, +139,data/PDBBind_processed/6sen/6sen_protein_processed.pdb,data/PDBBind_processed/6sen/6sen_ligand.mol2, +140,data/PDBBind_processed/6oxv/6oxv_protein_processed.pdb,data/PDBBind_processed/6oxv/6oxv_ligand.sdf, +141,data/PDBBind_processed/6g2b/6g2b_protein_processed.pdb,data/PDBBind_processed/6g2b/6g2b_ligand.sdf, +142,data/PDBBind_processed/5zr3/5zr3_protein_processed.pdb,data/PDBBind_processed/5zr3/5zr3_ligand.sdf, +143,data/PDBBind_processed/6kjf/6kjf_protein_processed.pdb,data/PDBBind_processed/6kjf/6kjf_ligand.sdf, +144,data/PDBBind_processed/6qr9/6qr9_protein_processed.pdb,data/PDBBind_processed/6qr9/6qr9_ligand.mol2, +145,data/PDBBind_processed/6g9f/6g9f_protein_processed.pdb,data/PDBBind_processed/6g9f/6g9f_ligand.sdf, +146,data/PDBBind_processed/6e6v/6e6v_protein_processed.pdb,data/PDBBind_processed/6e6v/6e6v_ligand.sdf, +147,data/PDBBind_processed/5zk9/5zk9_protein_processed.pdb,data/PDBBind_processed/5zk9/5zk9_ligand.sdf, +148,data/PDBBind_processed/6pnn/6pnn_protein_processed.pdb,data/PDBBind_processed/6pnn/6pnn_ligand.sdf, +149,data/PDBBind_processed/6nri/6nri_protein_processed.pdb,data/PDBBind_processed/6nri/6nri_ligand.sdf, +150,data/PDBBind_processed/6uwv/6uwv_protein_processed.pdb,data/PDBBind_processed/6uwv/6uwv_ligand.sdf, +151,data/PDBBind_processed/6ooz/6ooz_protein_processed.pdb,data/PDBBind_processed/6ooz/6ooz_ligand.sdf, +152,data/PDBBind_processed/6npi/6npi_protein_processed.pdb,data/PDBBind_processed/6npi/6npi_ligand.sdf, +153,data/PDBBind_processed/6oip/6oip_protein_processed.pdb,data/PDBBind_processed/6oip/6oip_ligand.sdf, +154,data/PDBBind_processed/6miv/6miv_protein_processed.pdb,data/PDBBind_processed/6miv/6miv_ligand.sdf, +155,data/PDBBind_processed/6s57/6s57_protein_processed.pdb,data/PDBBind_processed/6s57/6s57_ligand.sdf, +156,data/PDBBind_processed/6p8x/6p8x_protein_processed.pdb,data/PDBBind_processed/6p8x/6p8x_ligand.sdf, +157,data/PDBBind_processed/6hoq/6hoq_protein_processed.pdb,data/PDBBind_processed/6hoq/6hoq_ligand.sdf, +158,data/PDBBind_processed/6qts/6qts_protein_processed.pdb,data/PDBBind_processed/6qts/6qts_ligand.sdf, +159,data/PDBBind_processed/6ggd/6ggd_protein_processed.pdb,data/PDBBind_processed/6ggd/6ggd_ligand.sdf, +160,data/PDBBind_processed/6pnm/6pnm_protein_processed.pdb,data/PDBBind_processed/6pnm/6pnm_ligand.sdf, +161,data/PDBBind_processed/6oy2/6oy2_protein_processed.pdb,data/PDBBind_processed/6oy2/6oy2_ligand.sdf, +162,data/PDBBind_processed/6oi8/6oi8_protein_processed.pdb,data/PDBBind_processed/6oi8/6oi8_ligand.sdf, +163,data/PDBBind_processed/6mhd/6mhd_protein_processed.pdb,data/PDBBind_processed/6mhd/6mhd_ligand.sdf, +164,data/PDBBind_processed/6agt/6agt_protein_processed.pdb,data/PDBBind_processed/6agt/6agt_ligand.sdf, +165,data/PDBBind_processed/6i5p/6i5p_protein_processed.pdb,data/PDBBind_processed/6i5p/6i5p_ligand.sdf, +166,data/PDBBind_processed/6hhr/6hhr_protein_processed.pdb,data/PDBBind_processed/6hhr/6hhr_ligand.sdf, +167,data/PDBBind_processed/6p8z/6p8z_protein_processed.pdb,data/PDBBind_processed/6p8z/6p8z_ligand.sdf, +168,data/PDBBind_processed/6c85/6c85_protein_processed.pdb,data/PDBBind_processed/6c85/6c85_ligand.sdf, +169,data/PDBBind_processed/6g5u/6g5u_protein_processed.pdb,data/PDBBind_processed/6g5u/6g5u_ligand.sdf, +170,data/PDBBind_processed/6j06/6j06_protein_processed.pdb,data/PDBBind_processed/6j06/6j06_ligand.sdf, +171,data/PDBBind_processed/6qsz/6qsz_protein_processed.pdb,data/PDBBind_processed/6qsz/6qsz_ligand.sdf, +172,data/PDBBind_processed/6jbb/6jbb_protein_processed.pdb,data/PDBBind_processed/6jbb/6jbb_ligand.sdf, +173,data/PDBBind_processed/6hhp/6hhp_protein_processed.pdb,data/PDBBind_processed/6hhp/6hhp_ligand.sdf, +174,data/PDBBind_processed/6np5/6np5_protein_processed.pdb,data/PDBBind_processed/6np5/6np5_ligand.sdf, +175,data/PDBBind_processed/6nlj/6nlj_protein_processed.pdb,data/PDBBind_processed/6nlj/6nlj_ligand.sdf, +176,data/PDBBind_processed/6qlp/6qlp_protein_processed.pdb,data/PDBBind_processed/6qlp/6qlp_ligand.sdf, +177,data/PDBBind_processed/6n94/6n94_protein_processed.pdb,data/PDBBind_processed/6n94/6n94_ligand.sdf, +178,data/PDBBind_processed/6e13/6e13_protein_processed.pdb,data/PDBBind_processed/6e13/6e13_ligand.sdf, +179,data/PDBBind_processed/6qls/6qls_protein_processed.pdb,data/PDBBind_processed/6qls/6qls_ligand.sdf, +180,data/PDBBind_processed/6uil/6uil_protein_processed.pdb,data/PDBBind_processed/6uil/6uil_ligand.sdf, +181,data/PDBBind_processed/6st3/6st3_protein_processed.pdb,data/PDBBind_processed/6st3/6st3_ligand.sdf, +182,data/PDBBind_processed/6n92/6n92_protein_processed.pdb,data/PDBBind_processed/6n92/6n92_ligand.sdf, +183,data/PDBBind_processed/6s56/6s56_protein_processed.pdb,data/PDBBind_processed/6s56/6s56_ligand.sdf, +184,data/PDBBind_processed/6hzd/6hzd_protein_processed.pdb,data/PDBBind_processed/6hzd/6hzd_ligand.sdf, +185,data/PDBBind_processed/6uhv/6uhv_protein_processed.pdb,data/PDBBind_processed/6uhv/6uhv_ligand.sdf, +186,data/PDBBind_processed/6k05/6k05_protein_processed.pdb,data/PDBBind_processed/6k05/6k05_ligand.sdf, +187,data/PDBBind_processed/6q36/6q36_protein_processed.pdb,data/PDBBind_processed/6q36/6q36_ligand.mol2, +188,data/PDBBind_processed/6ic0/6ic0_protein_processed.pdb,data/PDBBind_processed/6ic0/6ic0_ligand.sdf, +189,data/PDBBind_processed/6hhi/6hhi_protein_processed.pdb,data/PDBBind_processed/6hhi/6hhi_ligand.sdf, +190,data/PDBBind_processed/6e3m/6e3m_protein_processed.pdb,data/PDBBind_processed/6e3m/6e3m_ligand.sdf, +191,data/PDBBind_processed/6qtx/6qtx_protein_processed.pdb,data/PDBBind_processed/6qtx/6qtx_ligand.sdf, +192,data/PDBBind_processed/6jse/6jse_protein_processed.pdb,data/PDBBind_processed/6jse/6jse_ligand.sdf, +193,data/PDBBind_processed/5zjy/5zjy_protein_processed.pdb,data/PDBBind_processed/5zjy/5zjy_ligand.sdf, +194,data/PDBBind_processed/6o3y/6o3y_protein_processed.pdb,data/PDBBind_processed/6o3y/6o3y_ligand.sdf, +195,data/PDBBind_processed/6rpg/6rpg_protein_processed.pdb,data/PDBBind_processed/6rpg/6rpg_ligand.sdf, +196,data/PDBBind_processed/6rr0/6rr0_protein_processed.pdb,data/PDBBind_processed/6rr0/6rr0_ligand.sdf, +197,data/PDBBind_processed/6gzy/6gzy_protein_processed.pdb,data/PDBBind_processed/6gzy/6gzy_ligand.sdf, +198,data/PDBBind_processed/6qlt/6qlt_protein_processed.pdb,data/PDBBind_processed/6qlt/6qlt_ligand.sdf, +199,data/PDBBind_processed/6ufo/6ufo_protein_processed.pdb,data/PDBBind_processed/6ufo/6ufo_ligand.sdf, +200,data/PDBBind_processed/6o0h/6o0h_protein_processed.pdb,data/PDBBind_processed/6o0h/6o0h_ligand.sdf, +201,data/PDBBind_processed/6o3x/6o3x_protein_processed.pdb,data/PDBBind_processed/6o3x/6o3x_ligand.sdf, +202,data/PDBBind_processed/5zjz/5zjz_protein_processed.pdb,data/PDBBind_processed/5zjz/5zjz_ligand.mol2, +203,data/PDBBind_processed/6i8t/6i8t_protein_processed.pdb,data/PDBBind_processed/6i8t/6i8t_ligand.sdf, +204,data/PDBBind_processed/6ooy/6ooy_protein_processed.pdb,data/PDBBind_processed/6ooy/6ooy_ligand.sdf, +205,data/PDBBind_processed/6oiq/6oiq_protein_processed.pdb,data/PDBBind_processed/6oiq/6oiq_ligand.sdf, +206,data/PDBBind_processed/6od6/6od6_protein_processed.pdb,data/PDBBind_processed/6od6/6od6_ligand.sdf, +207,data/PDBBind_processed/6nrh/6nrh_protein_processed.pdb,data/PDBBind_processed/6nrh/6nrh_ligand.sdf, +208,data/PDBBind_processed/6qra/6qra_protein_processed.pdb,data/PDBBind_processed/6qra/6qra_ligand.mol2, +209,data/PDBBind_processed/6hhh/6hhh_protein_processed.pdb,data/PDBBind_processed/6hhh/6hhh_ligand.sdf, +210,data/PDBBind_processed/6m7h/6m7h_protein_processed.pdb,data/PDBBind_processed/6m7h/6m7h_ligand.sdf, +211,data/PDBBind_processed/6ufn/6ufn_protein_processed.pdb,data/PDBBind_processed/6ufn/6ufn_ligand.sdf, +212,data/PDBBind_processed/6qr0/6qr0_protein_processed.pdb,data/PDBBind_processed/6qr0/6qr0_ligand.mol2, +213,data/PDBBind_processed/6o5u/6o5u_protein_processed.pdb,data/PDBBind_processed/6o5u/6o5u_ligand.sdf, +214,data/PDBBind_processed/6h14/6h14_protein_processed.pdb,data/PDBBind_processed/6h14/6h14_ligand.sdf, +215,data/PDBBind_processed/6jwa/6jwa_protein_processed.pdb,data/PDBBind_processed/6jwa/6jwa_ligand.sdf, +216,data/PDBBind_processed/6ny0/6ny0_protein_processed.pdb,data/PDBBind_processed/6ny0/6ny0_ligand.sdf, +217,data/PDBBind_processed/6jan/6jan_protein_processed.pdb,data/PDBBind_processed/6jan/6jan_ligand.sdf, +218,data/PDBBind_processed/6ftf/6ftf_protein_processed.pdb,data/PDBBind_processed/6ftf/6ftf_ligand.sdf, +219,data/PDBBind_processed/6oxw/6oxw_protein_processed.pdb,data/PDBBind_processed/6oxw/6oxw_ligand.sdf, +220,data/PDBBind_processed/6jon/6jon_protein_processed.pdb,data/PDBBind_processed/6jon/6jon_ligand.sdf, +221,data/PDBBind_processed/6cf7/6cf7_protein_processed.pdb,data/PDBBind_processed/6cf7/6cf7_ligand.sdf, +222,data/PDBBind_processed/6rtn/6rtn_protein_processed.pdb,data/PDBBind_processed/6rtn/6rtn_ligand.mol2, +223,data/PDBBind_processed/6jsz/6jsz_protein_processed.pdb,data/PDBBind_processed/6jsz/6jsz_ligand.sdf, +224,data/PDBBind_processed/6o9c/6o9c_protein_processed.pdb,data/PDBBind_processed/6o9c/6o9c_ligand.sdf, +225,data/PDBBind_processed/6mo8/6mo8_protein_processed.pdb,data/PDBBind_processed/6mo8/6mo8_ligand.sdf, +226,data/PDBBind_processed/6qln/6qln_protein_processed.pdb,data/PDBBind_processed/6qln/6qln_ligand.sdf, +227,data/PDBBind_processed/6qqu/6qqu_protein_processed.pdb,data/PDBBind_processed/6qqu/6qqu_ligand.mol2, +228,data/PDBBind_processed/6i66/6i66_protein_processed.pdb,data/PDBBind_processed/6i66/6i66_ligand.sdf, +229,data/PDBBind_processed/6mja/6mja_protein_processed.pdb,data/PDBBind_processed/6mja/6mja_ligand.sdf, +230,data/PDBBind_processed/6gwe/6gwe_protein_processed.pdb,data/PDBBind_processed/6gwe/6gwe_ligand.mol2, +231,data/PDBBind_processed/6d3z/6d3z_protein_processed.pdb,data/PDBBind_processed/6d3z/6d3z_ligand.sdf, +232,data/PDBBind_processed/6oxr/6oxr_protein_processed.pdb,data/PDBBind_processed/6oxr/6oxr_ligand.sdf, +233,data/PDBBind_processed/6r4k/6r4k_protein_processed.pdb,data/PDBBind_processed/6r4k/6r4k_ligand.sdf, +234,data/PDBBind_processed/6hle/6hle_protein_processed.pdb,data/PDBBind_processed/6hle/6hle_ligand.sdf, +235,data/PDBBind_processed/6h9v/6h9v_protein_processed.pdb,data/PDBBind_processed/6h9v/6h9v_ligand.sdf, +236,data/PDBBind_processed/6hou/6hou_protein_processed.pdb,data/PDBBind_processed/6hou/6hou_ligand.sdf, +237,data/PDBBind_processed/6nv9/6nv9_protein_processed.pdb,data/PDBBind_processed/6nv9/6nv9_ligand.sdf, +238,data/PDBBind_processed/6py0/6py0_protein_processed.pdb,data/PDBBind_processed/6py0/6py0_ligand.sdf, +239,data/PDBBind_processed/6qlq/6qlq_protein_processed.pdb,data/PDBBind_processed/6qlq/6qlq_ligand.sdf, +240,data/PDBBind_processed/6nv7/6nv7_protein_processed.pdb,data/PDBBind_processed/6nv7/6nv7_ligand.sdf, +241,data/PDBBind_processed/6n4b/6n4b_protein_processed.pdb,data/PDBBind_processed/6n4b/6n4b_ligand.sdf, +242,data/PDBBind_processed/6jaq/6jaq_protein_processed.pdb,data/PDBBind_processed/6jaq/6jaq_ligand.sdf, +243,data/PDBBind_processed/6i8m/6i8m_protein_processed.pdb,data/PDBBind_processed/6i8m/6i8m_ligand.sdf, +244,data/PDBBind_processed/6dz0/6dz0_protein_processed.pdb,data/PDBBind_processed/6dz0/6dz0_ligand.mol2, +245,data/PDBBind_processed/6oxs/6oxs_protein_processed.pdb,data/PDBBind_processed/6oxs/6oxs_ligand.sdf, +246,data/PDBBind_processed/6k2n/6k2n_protein_processed.pdb,data/PDBBind_processed/6k2n/6k2n_ligand.sdf, +247,data/PDBBind_processed/6cjj/6cjj_protein_processed.pdb,data/PDBBind_processed/6cjj/6cjj_ligand.sdf, +248,data/PDBBind_processed/6ffg/6ffg_protein_processed.pdb,data/PDBBind_processed/6ffg/6ffg_ligand.sdf, +249,data/PDBBind_processed/6a73/6a73_protein_processed.pdb,data/PDBBind_processed/6a73/6a73_ligand.sdf, +250,data/PDBBind_processed/6qqt/6qqt_protein_processed.pdb,data/PDBBind_processed/6qqt/6qqt_ligand.mol2, +251,data/PDBBind_processed/6a1c/6a1c_protein_processed.pdb,data/PDBBind_processed/6a1c/6a1c_ligand.sdf, +252,data/PDBBind_processed/6oxu/6oxu_protein_processed.pdb,data/PDBBind_processed/6oxu/6oxu_ligand.sdf, +253,data/PDBBind_processed/6qre/6qre_protein_processed.pdb,data/PDBBind_processed/6qre/6qre_ligand.mol2, +254,data/PDBBind_processed/6qtw/6qtw_protein_processed.pdb,data/PDBBind_processed/6qtw/6qtw_ligand.sdf, +255,data/PDBBind_processed/6np4/6np4_protein_processed.pdb,data/PDBBind_processed/6np4/6np4_ligand.sdf, +256,data/PDBBind_processed/6hv2/6hv2_protein_processed.pdb,data/PDBBind_processed/6hv2/6hv2_ligand.sdf, +257,data/PDBBind_processed/6n55/6n55_protein_processed.pdb,data/PDBBind_processed/6n55/6n55_ligand.sdf, +258,data/PDBBind_processed/6e3o/6e3o_protein_processed.pdb,data/PDBBind_processed/6e3o/6e3o_ligand.sdf, +259,data/PDBBind_processed/6kjd/6kjd_protein_processed.pdb,data/PDBBind_processed/6kjd/6kjd_ligand.sdf, +260,data/PDBBind_processed/6sfc/6sfc_protein_processed.pdb,data/PDBBind_processed/6sfc/6sfc_ligand.sdf, +261,data/PDBBind_processed/6qi7/6qi7_protein_processed.pdb,data/PDBBind_processed/6qi7/6qi7_ligand.sdf, +262,data/PDBBind_processed/6hzc/6hzc_protein_processed.pdb,data/PDBBind_processed/6hzc/6hzc_ligand.sdf, +263,data/PDBBind_processed/6k04/6k04_protein_processed.pdb,data/PDBBind_processed/6k04/6k04_ligand.sdf, +264,data/PDBBind_processed/6op0/6op0_protein_processed.pdb,data/PDBBind_processed/6op0/6op0_ligand.sdf, +265,data/PDBBind_processed/6q38/6q38_protein_processed.pdb,data/PDBBind_processed/6q38/6q38_ligand.mol2, +266,data/PDBBind_processed/6n8x/6n8x_protein_processed.pdb,data/PDBBind_processed/6n8x/6n8x_ligand.sdf, +267,data/PDBBind_processed/6np3/6np3_protein_processed.pdb,data/PDBBind_processed/6np3/6np3_ligand.sdf, +268,data/PDBBind_processed/6uvv/6uvv_protein_processed.pdb,data/PDBBind_processed/6uvv/6uvv_ligand.sdf, +269,data/PDBBind_processed/6pgo/6pgo_protein_processed.pdb,data/PDBBind_processed/6pgo/6pgo_ligand.sdf, +270,data/PDBBind_processed/6jbe/6jbe_protein_processed.pdb,data/PDBBind_processed/6jbe/6jbe_ligand.sdf, +271,data/PDBBind_processed/6i75/6i75_protein_processed.pdb,data/PDBBind_processed/6i75/6i75_ligand.sdf, +272,data/PDBBind_processed/6qqq/6qqq_protein_processed.pdb,data/PDBBind_processed/6qqq/6qqq_ligand.mol2, +273,data/PDBBind_processed/6i62/6i62_protein_processed.pdb,data/PDBBind_processed/6i62/6i62_ligand.sdf, +274,data/PDBBind_processed/6j9y/6j9y_protein_processed.pdb,data/PDBBind_processed/6j9y/6j9y_ligand.sdf, +275,data/PDBBind_processed/6g29/6g29_protein_processed.pdb,data/PDBBind_processed/6g29/6g29_ligand.sdf, +276,data/PDBBind_processed/6h7d/6h7d_protein_processed.pdb,data/PDBBind_processed/6h7d/6h7d_ligand.sdf, +277,data/PDBBind_processed/6mo9/6mo9_protein_processed.pdb,data/PDBBind_processed/6mo9/6mo9_ligand.sdf, +278,data/PDBBind_processed/6jao/6jao_protein_processed.pdb,data/PDBBind_processed/6jao/6jao_ligand.sdf, +279,data/PDBBind_processed/6jmf/6jmf_protein_processed.pdb,data/PDBBind_processed/6jmf/6jmf_ligand.sdf, +280,data/PDBBind_processed/6hmy/6hmy_protein_processed.pdb,data/PDBBind_processed/6hmy/6hmy_ligand.sdf, +281,data/PDBBind_processed/6qfe/6qfe_protein_processed.pdb,data/PDBBind_processed/6qfe/6qfe_ligand.mol2, +282,data/PDBBind_processed/5zml/5zml_protein_processed.pdb,data/PDBBind_processed/5zml/5zml_ligand.sdf, +283,data/PDBBind_processed/6i65/6i65_protein_processed.pdb,data/PDBBind_processed/6i65/6i65_ligand.sdf, +284,data/PDBBind_processed/6e7m/6e7m_protein_processed.pdb,data/PDBBind_processed/6e7m/6e7m_ligand.sdf, +285,data/PDBBind_processed/6i61/6i61_protein_processed.pdb,data/PDBBind_processed/6i61/6i61_ligand.sdf, +286,data/PDBBind_processed/6rz6/6rz6_protein_processed.pdb,data/PDBBind_processed/6rz6/6rz6_ligand.sdf, +287,data/PDBBind_processed/6qtm/6qtm_protein_processed.pdb,data/PDBBind_processed/6qtm/6qtm_ligand.sdf, +288,data/PDBBind_processed/6qlo/6qlo_protein_processed.pdb,data/PDBBind_processed/6qlo/6qlo_ligand.sdf, +289,data/PDBBind_processed/6oie/6oie_protein_processed.pdb,data/PDBBind_processed/6oie/6oie_ligand.sdf, +290,data/PDBBind_processed/6miy/6miy_protein_processed.pdb,data/PDBBind_processed/6miy/6miy_ligand.sdf, +291,data/PDBBind_processed/6nrf/6nrf_protein_processed.pdb,data/PDBBind_processed/6nrf/6nrf_ligand.mol2, +292,data/PDBBind_processed/6gj5/6gj5_protein_processed.pdb,data/PDBBind_processed/6gj5/6gj5_ligand.mol2, +293,data/PDBBind_processed/6jad/6jad_protein_processed.pdb,data/PDBBind_processed/6jad/6jad_ligand.sdf, +294,data/PDBBind_processed/6mj4/6mj4_protein_processed.pdb,data/PDBBind_processed/6mj4/6mj4_ligand.sdf, +295,data/PDBBind_processed/6h12/6h12_protein_processed.pdb,data/PDBBind_processed/6h12/6h12_ligand.sdf, +296,data/PDBBind_processed/6d3y/6d3y_protein_processed.pdb,data/PDBBind_processed/6d3y/6d3y_ligand.sdf, +297,data/PDBBind_processed/6qr2/6qr2_protein_processed.pdb,data/PDBBind_processed/6qr2/6qr2_ligand.mol2, +298,data/PDBBind_processed/6qxa/6qxa_protein_processed.pdb,data/PDBBind_processed/6qxa/6qxa_ligand.mol2, +299,data/PDBBind_processed/6o9b/6o9b_protein_processed.pdb,data/PDBBind_processed/6o9b/6o9b_ligand.sdf, +300,data/PDBBind_processed/6ckl/6ckl_protein_processed.pdb,data/PDBBind_processed/6ckl/6ckl_ligand.sdf, +301,data/PDBBind_processed/6oir/6oir_protein_processed.pdb,data/PDBBind_processed/6oir/6oir_ligand.sdf, +302,data/PDBBind_processed/6d40/6d40_protein_processed.pdb,data/PDBBind_processed/6d40/6d40_ligand.sdf, +303,data/PDBBind_processed/6e6j/6e6j_protein_processed.pdb,data/PDBBind_processed/6e6j/6e6j_ligand.mol2, +304,data/PDBBind_processed/6i7a/6i7a_protein_processed.pdb,data/PDBBind_processed/6i7a/6i7a_ligand.sdf, +305,data/PDBBind_processed/6g25/6g25_protein_processed.pdb,data/PDBBind_processed/6g25/6g25_ligand.mol2, +306,data/PDBBind_processed/6oin/6oin_protein_processed.pdb,data/PDBBind_processed/6oin/6oin_ligand.sdf, +307,data/PDBBind_processed/6jam/6jam_protein_processed.pdb,data/PDBBind_processed/6jam/6jam_ligand.sdf, +308,data/PDBBind_processed/6oxz/6oxz_protein_processed.pdb,data/PDBBind_processed/6oxz/6oxz_ligand.sdf, +309,data/PDBBind_processed/6hop/6hop_protein_processed.pdb,data/PDBBind_processed/6hop/6hop_ligand.sdf, +310,data/PDBBind_processed/6rot/6rot_protein_processed.pdb,data/PDBBind_processed/6rot/6rot_ligand.sdf, +311,data/PDBBind_processed/6uhu/6uhu_protein_processed.pdb,data/PDBBind_processed/6uhu/6uhu_ligand.mol2, +312,data/PDBBind_processed/6mji/6mji_protein_processed.pdb,data/PDBBind_processed/6mji/6mji_ligand.sdf, +313,data/PDBBind_processed/6nrj/6nrj_protein_processed.pdb,data/PDBBind_processed/6nrj/6nrj_ligand.mol2, +314,data/PDBBind_processed/6nt2/6nt2_protein_processed.pdb,data/PDBBind_processed/6nt2/6nt2_ligand.mol2, +315,data/PDBBind_processed/6op9/6op9_protein_processed.pdb,data/PDBBind_processed/6op9/6op9_ligand.sdf, +316,data/PDBBind_processed/6pno/6pno_protein_processed.pdb,data/PDBBind_processed/6pno/6pno_ligand.sdf, +317,data/PDBBind_processed/6e4v/6e4v_protein_processed.pdb,data/PDBBind_processed/6e4v/6e4v_ligand.sdf, +318,data/PDBBind_processed/6k1s/6k1s_protein_processed.pdb,data/PDBBind_processed/6k1s/6k1s_ligand.sdf, +319,data/PDBBind_processed/6a87/6a87_protein_processed.pdb,data/PDBBind_processed/6a87/6a87_ligand.sdf, +320,data/PDBBind_processed/6oim/6oim_protein_processed.pdb,data/PDBBind_processed/6oim/6oim_ligand.sdf, +321,data/PDBBind_processed/6cjp/6cjp_protein_processed.pdb,data/PDBBind_processed/6cjp/6cjp_ligand.sdf, +322,data/PDBBind_processed/6pyb/6pyb_protein_processed.pdb,data/PDBBind_processed/6pyb/6pyb_ligand.sdf, +323,data/PDBBind_processed/6h13/6h13_protein_processed.pdb,data/PDBBind_processed/6h13/6h13_ligand.sdf, +324,data/PDBBind_processed/6qrf/6qrf_protein_processed.pdb,data/PDBBind_processed/6qrf/6qrf_ligand.mol2, +325,data/PDBBind_processed/6mhc/6mhc_protein_processed.pdb,data/PDBBind_processed/6mhc/6mhc_ligand.sdf, +326,data/PDBBind_processed/6j9w/6j9w_protein_processed.pdb,data/PDBBind_processed/6j9w/6j9w_ligand.sdf, +327,data/PDBBind_processed/6nrg/6nrg_protein_processed.pdb,data/PDBBind_processed/6nrg/6nrg_ligand.mol2, +328,data/PDBBind_processed/6fff/6fff_protein_processed.pdb,data/PDBBind_processed/6fff/6fff_ligand.sdf, +329,data/PDBBind_processed/6n93/6n93_protein_processed.pdb,data/PDBBind_processed/6n93/6n93_ligand.sdf, +330,data/PDBBind_processed/6jut/6jut_protein_processed.pdb,data/PDBBind_processed/6jut/6jut_ligand.mol2, +331,data/PDBBind_processed/6g2e/6g2e_protein_processed.pdb,data/PDBBind_processed/6g2e/6g2e_ligand.sdf, +332,data/PDBBind_processed/6nd3/6nd3_protein_processed.pdb,data/PDBBind_processed/6nd3/6nd3_ligand.sdf, +333,data/PDBBind_processed/6os6/6os6_protein_processed.pdb,data/PDBBind_processed/6os6/6os6_ligand.mol2, +334,data/PDBBind_processed/6dql/6dql_protein_processed.pdb,data/PDBBind_processed/6dql/6dql_ligand.mol2, +335,data/PDBBind_processed/6inz/6inz_protein_processed.pdb,data/PDBBind_processed/6inz/6inz_ligand.sdf, +336,data/PDBBind_processed/6i67/6i67_protein_processed.pdb,data/PDBBind_processed/6i67/6i67_ligand.sdf, +337,data/PDBBind_processed/6quw/6quw_protein_processed.pdb,data/PDBBind_processed/6quw/6quw_ligand.sdf, +338,data/PDBBind_processed/6qwi/6qwi_protein_processed.pdb,data/PDBBind_processed/6qwi/6qwi_ligand.sdf, +339,data/PDBBind_processed/6npm/6npm_protein_processed.pdb,data/PDBBind_processed/6npm/6npm_ligand.sdf, +340,data/PDBBind_processed/6i64/6i64_protein_processed.pdb,data/PDBBind_processed/6i64/6i64_ligand.sdf, +341,data/PDBBind_processed/6e3n/6e3n_protein_processed.pdb,data/PDBBind_processed/6e3n/6e3n_ligand.sdf, +342,data/PDBBind_processed/6qrg/6qrg_protein_processed.pdb,data/PDBBind_processed/6qrg/6qrg_ligand.mol2, +343,data/PDBBind_processed/6nxz/6nxz_protein_processed.pdb,data/PDBBind_processed/6nxz/6nxz_ligand.sdf, +344,data/PDBBind_processed/6iby/6iby_protein_processed.pdb,data/PDBBind_processed/6iby/6iby_ligand.sdf, +345,data/PDBBind_processed/6gj7/6gj7_protein_processed.pdb,data/PDBBind_processed/6gj7/6gj7_ligand.mol2, +346,data/PDBBind_processed/6qr3/6qr3_protein_processed.pdb,data/PDBBind_processed/6qr3/6qr3_ligand.mol2, +347,data/PDBBind_processed/6qr1/6qr1_protein_processed.pdb,data/PDBBind_processed/6qr1/6qr1_ligand.mol2, +348,data/PDBBind_processed/6s9x/6s9x_protein_processed.pdb,data/PDBBind_processed/6s9x/6s9x_ligand.sdf, +349,data/PDBBind_processed/6q4q/6q4q_protein_processed.pdb,data/PDBBind_processed/6q4q/6q4q_ligand.mol2, +350,data/PDBBind_processed/6hbn/6hbn_protein_processed.pdb,data/PDBBind_processed/6hbn/6hbn_ligand.sdf, +351,data/PDBBind_processed/6nw3/6nw3_protein_processed.pdb,data/PDBBind_processed/6nw3/6nw3_ligand.sdf, +352,data/PDBBind_processed/6tel/6tel_protein_processed.pdb,data/PDBBind_processed/6tel/6tel_ligand.sdf, +353,data/PDBBind_processed/6p8y/6p8y_protein_processed.pdb,data/PDBBind_processed/6p8y/6p8y_ligand.sdf, +354,data/PDBBind_processed/6d5w/6d5w_protein_processed.pdb,data/PDBBind_processed/6d5w/6d5w_ligand.sdf, +355,data/PDBBind_processed/6t6a/6t6a_protein_processed.pdb,data/PDBBind_processed/6t6a/6t6a_ligand.mol2, +356,data/PDBBind_processed/6o5g/6o5g_protein_processed.pdb,data/PDBBind_processed/6o5g/6o5g_ligand.mol2, +357,data/PDBBind_processed/6r7d/6r7d_protein_processed.pdb,data/PDBBind_processed/6r7d/6r7d_ligand.sdf, +358,data/PDBBind_processed/6pya/6pya_protein_processed.pdb,data/PDBBind_processed/6pya/6pya_ligand.mol2, +359,data/PDBBind_processed/6ffe/6ffe_protein_processed.pdb,data/PDBBind_processed/6ffe/6ffe_ligand.sdf, +360,data/PDBBind_processed/6d3x/6d3x_protein_processed.pdb,data/PDBBind_processed/6d3x/6d3x_ligand.sdf, +361,data/PDBBind_processed/6gj8/6gj8_protein_processed.pdb,data/PDBBind_processed/6gj8/6gj8_ligand.mol2, +362,data/PDBBind_processed/6mo2/6mo2_protein_processed.pdb,data/PDBBind_processed/6mo2/6mo2_ligand.mol2, diff --git a/forks/DiffDockv1/datasets/conformer_matching.py b/forks/DiffDockv1/datasets/conformer_matching.py new file mode 100644 index 00000000..fc889439 --- /dev/null +++ b/forks/DiffDockv1/datasets/conformer_matching.py @@ -0,0 +1,196 @@ +import copy, time +import numpy as np +from collections import defaultdict +from rdkit import Chem, RDLogger +from rdkit.Chem import AllChem, rdMolTransforms +from rdkit import Geometry +import networkx as nx +from scipy.optimize import differential_evolution + +RDLogger.DisableLog('rdApp.*') + +""" + Conformer matching routines from Torsional Diffusion +""" + +def GetDihedral(conf, atom_idx): + return rdMolTransforms.GetDihedralRad(conf, atom_idx[0], atom_idx[1], atom_idx[2], atom_idx[3]) + + +def SetDihedral(conf, atom_idx, new_vale): + rdMolTransforms.SetDihedralRad(conf, atom_idx[0], atom_idx[1], atom_idx[2], atom_idx[3], new_vale) + + +def apply_changes(mol, values, rotable_bonds, conf_id): + opt_mol = copy.copy(mol) + [SetDihedral(opt_mol.GetConformer(conf_id), rotable_bonds[r], values[r]) for r in range(len(rotable_bonds))] + return opt_mol + + +def optimize_rotatable_bonds(mol, true_mol, rotable_bonds, probe_id=-1, ref_id=-1, seed=0, popsize=15, maxiter=500, + mutation=(0.5, 1), recombination=0.8): + opt = OptimizeConformer(mol, true_mol, rotable_bonds, seed=seed, probe_id=probe_id, ref_id=ref_id) + max_bound = [np.pi] * len(opt.rotable_bonds) + min_bound = [-np.pi] * len(opt.rotable_bonds) + bounds = (min_bound, max_bound) + bounds = list(zip(bounds[0], bounds[1])) + + # Optimize conformations + result = differential_evolution(opt.score_conformation, bounds, + maxiter=maxiter, popsize=popsize, + mutation=mutation, recombination=recombination, disp=False, seed=seed) + opt_mol = apply_changes(opt.mol, result['x'], opt.rotable_bonds, conf_id=probe_id) + + return opt_mol + + +class OptimizeConformer: + def __init__(self, mol, true_mol, rotable_bonds, probe_id=-1, ref_id=-1, seed=None): + super(OptimizeConformer, self).__init__() + if seed: + np.random.seed(seed) + self.rotable_bonds = rotable_bonds + self.mol = mol + self.true_mol = true_mol + self.probe_id = probe_id + self.ref_id = ref_id + + def score_conformation(self, values): + for i, r in enumerate(self.rotable_bonds): + SetDihedral(self.mol.GetConformer(self.probe_id), r, values[i]) + return RMSD(self.mol, self.true_mol, self.probe_id, self.ref_id) + + +def get_torsion_angles(mol): + torsions_list = [] + G = nx.Graph() + for i, atom in enumerate(mol.GetAtoms()): + G.add_node(i) + nodes = set(G.nodes()) + for bond in mol.GetBonds(): + start, end = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx() + G.add_edge(start, end) + for e in G.edges(): + G2 = copy.deepcopy(G) + G2.remove_edge(*e) + if nx.is_connected(G2): continue + l = list(sorted(nx.connected_components(G2), key=len)[0]) + if len(l) < 2: continue + n0 = list(G2.neighbors(e[0])) + n1 = list(G2.neighbors(e[1])) + torsions_list.append( + (n0[0], e[0], e[1], n1[0]) + ) + return torsions_list + + +# GeoMol +def get_torsions(mol_list): + print('USING GEOMOL GET TORSIONS FUNCTION') + atom_counter = 0 + torsionList = [] + for m in mol_list: + torsionSmarts = '[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]' + torsionQuery = Chem.MolFromSmarts(torsionSmarts) + matches = m.GetSubstructMatches(torsionQuery) + for match in matches: + idx2 = match[0] + idx3 = match[1] + bond = m.GetBondBetweenAtoms(idx2, idx3) + jAtom = m.GetAtomWithIdx(idx2) + kAtom = m.GetAtomWithIdx(idx3) + for b1 in jAtom.GetBonds(): + if (b1.GetIdx() == bond.GetIdx()): + continue + idx1 = b1.GetOtherAtomIdx(idx2) + for b2 in kAtom.GetBonds(): + if ((b2.GetIdx() == bond.GetIdx()) + or (b2.GetIdx() == b1.GetIdx())): + continue + idx4 = b2.GetOtherAtomIdx(idx3) + # skip 3-membered rings + if (idx4 == idx1): + continue + if m.GetAtomWithIdx(idx4).IsInRing(): + torsionList.append( + (idx4 + atom_counter, idx3 + atom_counter, idx2 + atom_counter, idx1 + atom_counter)) + break + else: + torsionList.append( + (idx1 + atom_counter, idx2 + atom_counter, idx3 + atom_counter, idx4 + atom_counter)) + break + break + + atom_counter += m.GetNumAtoms() + return torsionList + + +def A_transpose_matrix(alpha): + return np.array([[np.cos(alpha), np.sin(alpha)], [-np.sin(alpha), np.cos(alpha)]], dtype=np.double) + + +def S_vec(alpha): + return np.array([[np.cos(alpha)], [np.sin(alpha)]], dtype=np.double) + + +def GetDihedralFromPointCloud(Z, atom_idx): + p = Z[list(atom_idx)] + b = p[:-1] - p[1:] + b[0] *= -1 + v = np.array([v - (v.dot(b[1]) / b[1].dot(b[1])) * b[1] for v in [b[0], b[2]]]) + # Normalize vectors + v /= np.sqrt(np.einsum('...i,...i', v, v)).reshape(-1, 1) + b1 = b[1] / np.linalg.norm(b[1]) + x = np.dot(v[0], v[1]) + m = np.cross(v[0], b1) + y = np.dot(m, v[1]) + return np.arctan2(y, x) + + +def get_dihedral_vonMises(mol, conf, atom_idx, Z): + Z = np.array(Z) + v = np.zeros((2, 1)) + iAtom = mol.GetAtomWithIdx(atom_idx[1]) + jAtom = mol.GetAtomWithIdx(atom_idx[2]) + k_0 = atom_idx[0] + i = atom_idx[1] + j = atom_idx[2] + l_0 = atom_idx[3] + for b1 in iAtom.GetBonds(): + k = b1.GetOtherAtomIdx(i) + if k == j: + continue + for b2 in jAtom.GetBonds(): + l = b2.GetOtherAtomIdx(j) + if l == i: + continue + assert k != l + s_star = S_vec(GetDihedralFromPointCloud(Z, (k, i, j, l))) + a_mat = A_transpose_matrix(GetDihedral(conf, (k, i, j, k_0)) + GetDihedral(conf, (l_0, i, j, l))) + v = v + np.matmul(a_mat, s_star) + v = v / np.linalg.norm(v) + v = v.reshape(-1) + return np.arctan2(v[1], v[0]) + + +def get_von_mises_rms(mol, mol_rdkit, rotable_bonds, conf_id): + new_dihedrals = np.zeros(len(rotable_bonds)) + for idx, r in enumerate(rotable_bonds): + new_dihedrals[idx] = get_dihedral_vonMises(mol_rdkit, + mol_rdkit.GetConformer(conf_id), r, + mol.GetConformer().GetPositions()) + mol_rdkit = apply_changes(mol_rdkit, new_dihedrals, rotable_bonds, conf_id) + return RMSD(mol_rdkit, mol, conf_id) + + +def mmff_func(mol): + mol_mmff = copy.deepcopy(mol) + AllChem.MMFFOptimizeMoleculeConfs(mol_mmff, mmffVariant='MMFF94s') + for i in range(mol.GetNumConformers()): + coords = mol_mmff.GetConformers()[i].GetPositions() + for j in range(coords.shape[0]): + mol.GetConformer(i).SetAtomPosition(j, + Geometry.Point3D(*coords[j])) + + +RMSD = AllChem.AlignMol diff --git a/forks/DiffDockv1/datasets/esm_embedding_preparation.py b/forks/DiffDockv1/datasets/esm_embedding_preparation.py new file mode 100644 index 00000000..d4075638 --- /dev/null +++ b/forks/DiffDockv1/datasets/esm_embedding_preparation.py @@ -0,0 +1,87 @@ +import os +from argparse import FileType, ArgumentParser + +import numpy as np +import pandas as pd +from Bio.PDB import PDBParser +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord +from tqdm import tqdm +from Bio import SeqIO + +parser = ArgumentParser() +parser.add_argument('--out_file', type=str, default="data/prepared_for_esm.fasta") +parser.add_argument('--protein_ligand_csv', type=str, default='data/protein_ligand_example_csv.csv', help='Path to a .csv specifying the input as described in the main README') +parser.add_argument('--protein_path', type=str, default=None, help='Path to a single PDB file. If this is not None then it will be used instead of the --protein_ligand_csv') +args = parser.parse_args() + +biopython_parser = PDBParser() + +three_to_one = {'ALA': 'A', +'ARG': 'R', +'ASN': 'N', +'ASP': 'D', +'CYS': 'C', +'GLN': 'Q', +'GLU': 'E', +'GLY': 'G', +'HIS': 'H', +'ILE': 'I', +'LEU': 'L', +'LYS': 'K', +'MET': 'M', +'MSE': 'M', # MSE this is almost the same AA as MET. The sulfur is just replaced by Selen +'PHE': 'F', +'PRO': 'P', +'PYL': 'O', +'SER': 'S', +'SEC': 'U', +'THR': 'T', +'TRP': 'W', +'TYR': 'Y', +'VAL': 'V', +'ASX': 'B', +'GLX': 'Z', +'XAA': 'X', +'XLE': 'J'} + +if args.protein_path is not None: + file_paths = [args.protein_path] +else: + df = pd.read_csv(args.protein_ligand_csv) + file_paths = list(set(df['protein_path'].tolist())) +sequences = [] +ids = [] +for file_path in tqdm(file_paths): + structure = biopython_parser.get_structure('random_id', file_path) + structure = structure[0] + for i, chain in enumerate(structure): + seq = '' + for res_idx, residue in enumerate(chain): + if residue.get_resname() == 'HOH': + continue + residue_coords = [] + c_alpha, n, c = None, None, None + for atom in residue: + if atom.name == 'CA': + c_alpha = list(atom.get_vector()) + if atom.name == 'N': + n = list(atom.get_vector()) + if atom.name == 'C': + c = list(atom.get_vector()) + if c_alpha != None and n != None and c != None: # only append residue if it is an amino acid + try: + seq += three_to_one[residue.get_resname()] + except Exception as e: + seq += '-' + print("encountered unknown AA: ", residue.get_resname(), ' in the complex ', file_path, '. Replacing it with a dash - .') + sequences.append(seq) + ids.append(f'{os.path.basename(file_path)}_chain_{i}') +records = [] +for (index, seq) in zip(ids,sequences): + record = SeqRecord(Seq(seq), str(index)) + record.description = '' + records.append(record) +SeqIO.write(records, args.out_file, "fasta") + + diff --git a/forks/DiffDockv1/datasets/esm_embeddings_to_pt.py b/forks/DiffDockv1/datasets/esm_embeddings_to_pt.py new file mode 100644 index 00000000..3f95414c --- /dev/null +++ b/forks/DiffDockv1/datasets/esm_embeddings_to_pt.py @@ -0,0 +1,17 @@ + +import os +from argparse import ArgumentParser + +import torch +from tqdm import tqdm + + +parser = ArgumentParser() +parser.add_argument('--esm_embeddings_path', type=str, default='data/embeddings_output', help='') +parser.add_argument('--output_path', type=str, default='data/esm2_3billion_embeddings.pt', help='') +args = parser.parse_args() + +dict = {} +for filename in tqdm(os.listdir(args.esm_embeddings_path)): + dict[filename.split('.')[0]] = torch.load(os.path.join(args.esm_embeddings_path,filename))['representations'][33] +torch.save(dict,args.output_path) \ No newline at end of file diff --git a/forks/DiffDockv1/datasets/pdbbind.py b/forks/DiffDockv1/datasets/pdbbind.py new file mode 100644 index 00000000..1a78cbe0 --- /dev/null +++ b/forks/DiffDockv1/datasets/pdbbind.py @@ -0,0 +1,423 @@ +import binascii +import glob +import hashlib +import os +import pickle +from collections import defaultdict +from multiprocessing import Pool +import random +import copy + +import numpy as np +import torch +from rdkit.Chem import MolToSmiles, MolFromSmiles, AddHs +from torch_geometric.data import Dataset, HeteroData +from torch_geometric.loader import DataLoader, DataListLoader +from torch_geometric.transforms import BaseTransform +from tqdm import tqdm + +from datasets.process_mols import read_molecule, get_rec_graph, generate_conformer, \ + get_lig_graph_with_matching, extract_receptor_structure, parse_receptor, parse_pdb_from_path +from utils.diffusion_utils import modify_conformer, set_time +from utils.utils import read_strings_from_txt +from utils import so3, torus + + +class NoiseTransform(BaseTransform): + def __init__(self, t_to_sigma, no_torsion, all_atom): + self.t_to_sigma = t_to_sigma + self.no_torsion = no_torsion + self.all_atom = all_atom + + def __call__(self, data): + t = np.random.uniform() + t_tr, t_rot, t_tor = t, t, t + return self.apply_noise(data, t_tr, t_rot, t_tor) + + def apply_noise(self, data, t_tr, t_rot, t_tor, tr_update = None, rot_update=None, torsion_updates=None): + if not torch.is_tensor(data['ligand'].pos): + data['ligand'].pos = random.choice(data['ligand'].pos) + + tr_sigma, rot_sigma, tor_sigma = self.t_to_sigma(t_tr, t_rot, t_tor) + set_time(data, t_tr, t_rot, t_tor, 1, self.all_atom, device=None) + + tr_update = torch.normal(mean=0, std=tr_sigma, size=(1, 3)) if tr_update is None else tr_update + rot_update = so3.sample_vec(eps=rot_sigma) if rot_update is None else rot_update + torsion_updates = np.random.normal(loc=0.0, scale=tor_sigma, size=data['ligand'].edge_mask.sum()) if torsion_updates is None else torsion_updates + torsion_updates = None if self.no_torsion else torsion_updates + modify_conformer(data, tr_update, torch.from_numpy(rot_update).float(), torsion_updates) + + data.tr_score = -tr_update / tr_sigma ** 2 + data.rot_score = torch.from_numpy(so3.score_vec(vec=rot_update, eps=rot_sigma)).float().unsqueeze(0) + data.tor_score = None if self.no_torsion else torch.from_numpy(torus.score(torsion_updates, tor_sigma)).float() + data.tor_sigma_edge = None if self.no_torsion else np.ones(data['ligand'].edge_mask.sum()) * tor_sigma + return data + + +class PDBBind(Dataset): + def __init__(self, root, transform=None, cache_path='data/cache', split_path='data/', limit_complexes=0, + receptor_radius=30, num_workers=1, c_alpha_max_neighbors=None, popsize=15, maxiter=15, + matching=True, keep_original=False, max_lig_size=None, remove_hs=False, num_conformers=1, all_atoms=False, + atom_radius=5, atom_max_neighbors=None, esm_embeddings_path=None, require_ligand=False, + ligands_list=None, protein_path_list=None, ligand_descriptions=None, keep_local_structures=False): + + super(PDBBind, self).__init__(root, transform) + self.pdbbind_dir = root + self.max_lig_size = max_lig_size + self.split_path = split_path + self.limit_complexes = limit_complexes + self.receptor_radius = receptor_radius + self.num_workers = num_workers + self.c_alpha_max_neighbors = c_alpha_max_neighbors + self.remove_hs = remove_hs + self.esm_embeddings_path = esm_embeddings_path + self.require_ligand = require_ligand + self.protein_path_list = protein_path_list + self.ligand_descriptions = ligand_descriptions + self.keep_local_structures = keep_local_structures + if matching or protein_path_list is not None and ligand_descriptions is not None: + cache_path += '_torsion' + if all_atoms: + cache_path += '_allatoms' + self.full_cache_path = os.path.join(cache_path, f'limit{self.limit_complexes}' + f'_INDEX{os.path.splitext(os.path.basename(self.split_path))[0]}' + f'_maxLigSize{self.max_lig_size}_H{int(not self.remove_hs)}' + f'_recRad{self.receptor_radius}_recMax{self.c_alpha_max_neighbors}' + + ('' if not all_atoms else f'_atomRad{atom_radius}_atomMax{atom_max_neighbors}') + + ('' if not matching or num_conformers == 1 else f'_confs{num_conformers}') + + ('' if self.esm_embeddings_path is None else f'_esmEmbeddings') + + ('' if not keep_local_structures else f'_keptLocalStruct') + + ('' if protein_path_list is None or ligand_descriptions is None else str(binascii.crc32(''.join(ligand_descriptions + protein_path_list).encode())))) + self.popsize, self.maxiter = popsize, maxiter + self.matching, self.keep_original = matching, keep_original + self.num_conformers = num_conformers + self.all_atoms = all_atoms + self.atom_radius, self.atom_max_neighbors = atom_radius, atom_max_neighbors + if not os.path.exists(os.path.join(self.full_cache_path, "heterographs.pkl"))\ + or (require_ligand and not os.path.exists(os.path.join(self.full_cache_path, "rdkit_ligands.pkl"))): + os.makedirs(self.full_cache_path, exist_ok=True) + if protein_path_list is None or ligand_descriptions is None: + self.preprocessing() + else: + self.inference_preprocessing() + + print('loading data from memory: ', os.path.join(self.full_cache_path, "heterographs.pkl")) + with open(os.path.join(self.full_cache_path, "heterographs.pkl"), 'rb') as f: + self.complex_graphs = pickle.load(f) + if require_ligand: + with open(os.path.join(self.full_cache_path, "rdkit_ligands.pkl"), 'rb') as f: + self.rdkit_ligands = pickle.load(f) + + print_statistics(self.complex_graphs) + + def len(self): + return len(self.complex_graphs) + + def get(self, idx): + if self.require_ligand: + complex_graph = copy.deepcopy(self.complex_graphs[idx]) + complex_graph.mol = copy.deepcopy(self.rdkit_ligands[idx]) + return complex_graph + else: + return copy.deepcopy(self.complex_graphs[idx]) + + def preprocessing(self): + print(f'Processing complexes from [{self.split_path}] and saving it to [{self.full_cache_path}]') + + complex_names_all = read_strings_from_txt(self.split_path) + if self.limit_complexes is not None and self.limit_complexes != 0: + complex_names_all = complex_names_all[:self.limit_complexes] + print(f'Loading {len(complex_names_all)} complexes.') + + if self.esm_embeddings_path is not None: + id_to_embeddings = torch.load(self.esm_embeddings_path) + chain_embeddings_dictlist = defaultdict(list) + for key, embedding in id_to_embeddings.items(): + key_name = key.split('_')[0] + if key_name in complex_names_all: + chain_embeddings_dictlist[key_name].append(embedding) + lm_embeddings_chains_all = [] + for name in complex_names_all: + lm_embeddings_chains_all.append(chain_embeddings_dictlist[name]) + else: + lm_embeddings_chains_all = [None] * len(complex_names_all) + + if self.num_workers > 1: + # running preprocessing in parallel on multiple workers and saving the progress every 1000 complexes + for i in range(len(complex_names_all)//1000+1): + if os.path.exists(os.path.join(self.full_cache_path, f"heterographs{i}.pkl")): + continue + complex_names = complex_names_all[1000*i:1000*(i+1)] + lm_embeddings_chains = lm_embeddings_chains_all[1000*i:1000*(i+1)] + complex_graphs, rdkit_ligands = [], [] + if self.num_workers > 1: + p = Pool(self.num_workers, maxtasksperchild=1) + p.__enter__() + with tqdm(total=len(complex_names), desc=f'loading complexes {i}/{len(complex_names_all)//1000+1}') as pbar: + map_fn = p.imap_unordered if self.num_workers > 1 else map + for t in map_fn(self.get_complex, zip(complex_names, lm_embeddings_chains, [None] * len(complex_names), [None] * len(complex_names))): + complex_graphs.extend(t[0]) + rdkit_ligands.extend(t[1]) + pbar.update() + if self.num_workers > 1: p.__exit__(None, None, None) + + with open(os.path.join(self.full_cache_path, f"heterographs{i}.pkl"), 'wb') as f: + pickle.dump((complex_graphs), f) + with open(os.path.join(self.full_cache_path, f"rdkit_ligands{i}.pkl"), 'wb') as f: + pickle.dump((rdkit_ligands), f) + + complex_graphs_all = [] + for i in range(len(complex_names_all)//1000+1): + with open(os.path.join(self.full_cache_path, f"heterographs{i}.pkl"), 'rb') as f: + l = pickle.load(f) + complex_graphs_all.extend(l) + with open(os.path.join(self.full_cache_path, f"heterographs.pkl"), 'wb') as f: + pickle.dump((complex_graphs_all), f) + + rdkit_ligands_all = [] + for i in range(len(complex_names_all) // 1000 + 1): + with open(os.path.join(self.full_cache_path, f"rdkit_ligands{i}.pkl"), 'rb') as f: + l = pickle.load(f) + rdkit_ligands_all.extend(l) + with open(os.path.join(self.full_cache_path, f"rdkit_ligands.pkl"), 'wb') as f: + pickle.dump((rdkit_ligands_all), f) + else: + complex_graphs, rdkit_ligands = [], [] + with tqdm(total=len(complex_names_all), desc='loading complexes') as pbar: + for t in map(self.get_complex, zip(complex_names_all, lm_embeddings_chains_all, [None] * len(complex_names_all), [None] * len(complex_names_all))): + complex_graphs.extend(t[0]) + rdkit_ligands.extend(t[1]) + pbar.update() + with open(os.path.join(self.full_cache_path, "heterographs.pkl"), 'wb') as f: + pickle.dump((complex_graphs), f) + with open(os.path.join(self.full_cache_path, "rdkit_ligands.pkl"), 'wb') as f: + pickle.dump((rdkit_ligands), f) + + def inference_preprocessing(self): + ligands_list = [] + print('Reading molecules and generating local structures with RDKit (unless --keep_local_structures is turned on).') + failed_ligand_indices = [] + for idx, ligand_description in tqdm(enumerate(self.ligand_descriptions)): + try: + mol = MolFromSmiles(ligand_description) # check if it is a smiles or a path + if mol is not None: + mol = AddHs(mol) + generate_conformer(mol) + ligands_list.append(mol) + else: + mol = read_molecule(ligand_description, remove_hs=False, sanitize=True) + if mol is None: + raise Exception('RDKit could not read the molecule ', ligand_description) + if not self.keep_local_structures: + mol.RemoveAllConformers() + mol = AddHs(mol) + generate_conformer(mol) + ligands_list.append(mol) + except Exception as e: + + print('Failed to read molecule ', ligand_description, ' We are skipping it. The reason is the exception: ', e) + failed_ligand_indices.append(idx) + for index in sorted(failed_ligand_indices, reverse=True): + del self.protein_path_list[index] + del self.ligand_descriptions[index] + + if self.esm_embeddings_path is not None: + print('Reading language model embeddings.') + lm_embeddings_chains_all = [] + if not os.path.exists(self.esm_embeddings_path): raise Exception('ESM embeddings path does not exist: ',self.esm_embeddings_path) + for protein_path in self.protein_path_list: + embeddings_paths = sorted(glob.glob(os.path.join(self.esm_embeddings_path, os.path.basename(protein_path)) + '*')) + lm_embeddings_chains = [] + for embeddings_path in embeddings_paths: + lm_embeddings_chains.append(torch.load(embeddings_path)['representations'][33]) + lm_embeddings_chains_all.append(lm_embeddings_chains) + else: + lm_embeddings_chains_all = [None] * len(self.protein_path_list) + + print('Generating graphs for ligands and proteins') + if self.num_workers > 1: + # running preprocessing in parallel on multiple workers and saving the progress every 1000 complexes + for i in range(len(self.protein_path_list)//1000+1): + if os.path.exists(os.path.join(self.full_cache_path, f"heterographs{i}.pkl")): + continue + protein_paths_chunk = self.protein_path_list[1000*i:1000*(i+1)] + ligand_description_chunk = self.ligand_descriptions[1000*i:1000*(i+1)] + ligands_chunk = ligands_list[1000 * i:1000 * (i + 1)] + lm_embeddings_chains = lm_embeddings_chains_all[1000*i:1000*(i+1)] + complex_graphs, rdkit_ligands = [], [] + if self.num_workers > 1: + p = Pool(self.num_workers, maxtasksperchild=1) + p.__enter__() + with tqdm(total=len(protein_paths_chunk), desc=f'loading complexes {i}/{len(protein_paths_chunk)//1000+1}') as pbar: + map_fn = p.imap_unordered if self.num_workers > 1 else map + for t in map_fn(self.get_complex, zip(protein_paths_chunk, lm_embeddings_chains, ligands_chunk,ligand_description_chunk)): + complex_graphs.extend(t[0]) + rdkit_ligands.extend(t[1]) + pbar.update() + if self.num_workers > 1: p.__exit__(None, None, None) + + with open(os.path.join(self.full_cache_path, f"heterographs{i}.pkl"), 'wb') as f: + pickle.dump((complex_graphs), f) + with open(os.path.join(self.full_cache_path, f"rdkit_ligands{i}.pkl"), 'wb') as f: + pickle.dump((rdkit_ligands), f) + + complex_graphs_all = [] + for i in range(len(self.protein_path_list)//1000+1): + with open(os.path.join(self.full_cache_path, f"heterographs{i}.pkl"), 'rb') as f: + l = pickle.load(f) + complex_graphs_all.extend(l) + with open(os.path.join(self.full_cache_path, f"heterographs.pkl"), 'wb') as f: + pickle.dump((complex_graphs_all), f) + + rdkit_ligands_all = [] + for i in range(len(self.protein_path_list) // 1000 + 1): + with open(os.path.join(self.full_cache_path, f"rdkit_ligands{i}.pkl"), 'rb') as f: + l = pickle.load(f) + rdkit_ligands_all.extend(l) + with open(os.path.join(self.full_cache_path, f"rdkit_ligands.pkl"), 'wb') as f: + pickle.dump((rdkit_ligands_all), f) + else: + complex_graphs, rdkit_ligands = [], [] + with tqdm(total=len(self.protein_path_list), desc='loading complexes') as pbar: + for t in map(self.get_complex, zip(self.protein_path_list, lm_embeddings_chains_all, ligands_list, self.ligand_descriptions)): + complex_graphs.extend(t[0]) + rdkit_ligands.extend(t[1]) + pbar.update() + if complex_graphs == []: raise Exception('Preprocessing did not succeed for any complex') + with open(os.path.join(self.full_cache_path, "heterographs.pkl"), 'wb') as f: + pickle.dump((complex_graphs), f) + with open(os.path.join(self.full_cache_path, "rdkit_ligands.pkl"), 'wb') as f: + pickle.dump((rdkit_ligands), f) + + def get_complex(self, par): + name, lm_embedding_chains, ligand, ligand_description = par + if not os.path.exists(os.path.join(self.pdbbind_dir, name)) and ligand is None: + print("Folder not found", name) + return [], [] + + if ligand is not None: + rec_model = parse_pdb_from_path(name) + name = f'{name}____{ligand_description}' + ligs = [ligand] + else: + try: + rec_model = parse_receptor(name, self.pdbbind_dir) + except Exception as e: + print(f'Skipping {name} because of the error:') + print(e) + return [], [] + + ligs = read_mols(self.pdbbind_dir, name, remove_hs=False) + complex_graphs = [] + failed_indices = [] + for i, lig in enumerate(ligs): + if self.max_lig_size is not None and lig.GetNumHeavyAtoms() > self.max_lig_size: + print(f'Ligand with {lig.GetNumHeavyAtoms()} heavy atoms is larger than max_lig_size {self.max_lig_size}. Not including {name} in preprocessed data.') + continue + complex_graph = HeteroData() + complex_graph['name'] = name + try: + get_lig_graph_with_matching(lig, complex_graph, self.popsize, self.maxiter, self.matching, self.keep_original, + self.num_conformers, remove_hs=self.remove_hs) + rec, rec_coords, c_alpha_coords, n_coords, c_coords, lm_embeddings = extract_receptor_structure(copy.deepcopy(rec_model), lig, lm_embedding_chains=lm_embedding_chains) + if lm_embeddings is not None and len(c_alpha_coords) != len(lm_embeddings): + print(f'LM embeddings for complex {name} did not have the right length for the protein. Skipping {name}.') + failed_indices.append(i) + continue + + get_rec_graph(rec, rec_coords, c_alpha_coords, n_coords, c_coords, complex_graph, rec_radius=self.receptor_radius, + c_alpha_max_neighbors=self.c_alpha_max_neighbors, all_atoms=self.all_atoms, + atom_radius=self.atom_radius, atom_max_neighbors=self.atom_max_neighbors, remove_hs=self.remove_hs, lm_embeddings=lm_embeddings) + + except Exception as e: + print(f'Skipping {name} because of the error:') + print(e) + failed_indices.append(i) + continue + + protein_center = torch.mean(complex_graph['receptor'].pos, dim=0, keepdim=True) + complex_graph['receptor'].pos -= protein_center + if self.all_atoms: + complex_graph['atom'].pos -= protein_center + + if (not self.matching) or self.num_conformers == 1: + complex_graph['ligand'].pos -= protein_center + else: + for p in complex_graph['ligand'].pos: + p -= protein_center + + complex_graph.original_center = protein_center + complex_graphs.append(complex_graph) + for idx_to_delete in sorted(failed_indices, reverse=True): + del ligs[idx_to_delete] + return complex_graphs, ligs + + +def print_statistics(complex_graphs): + statistics = ([], [], [], []) + + for complex_graph in complex_graphs: + lig_pos = complex_graph['ligand'].pos if torch.is_tensor(complex_graph['ligand'].pos) else complex_graph['ligand'].pos[0] + radius_protein = torch.max(torch.linalg.vector_norm(complex_graph['receptor'].pos, dim=1)) + molecule_center = torch.mean(lig_pos, dim=0) + radius_molecule = torch.max( + torch.linalg.vector_norm(lig_pos - molecule_center.unsqueeze(0), dim=1)) + distance_center = torch.linalg.vector_norm(molecule_center) + statistics[0].append(radius_protein) + statistics[1].append(radius_molecule) + statistics[2].append(distance_center) + if "rmsd_matching" in complex_graph: + statistics[3].append(complex_graph.rmsd_matching) + else: + statistics[3].append(0) + + name = ['radius protein', 'radius molecule', 'distance protein-mol', 'rmsd matching'] + print('Number of complexes: ', len(complex_graphs)) + for i in range(4): + array = np.asarray(statistics[i]) + print(f"{name[i]}: mean {np.mean(array)}, std {np.std(array)}, max {np.max(array)}") + + +def construct_loader(args, t_to_sigma): + transform = NoiseTransform(t_to_sigma=t_to_sigma, no_torsion=args.no_torsion, + all_atom=args.all_atoms) + + common_args = {'transform': transform, 'root': args.data_dir, 'limit_complexes': args.limit_complexes, + 'receptor_radius': args.receptor_radius, + 'c_alpha_max_neighbors': args.c_alpha_max_neighbors, + 'remove_hs': args.remove_hs, 'max_lig_size': args.max_lig_size, + 'matching': not args.no_torsion, 'popsize': args.matching_popsize, 'maxiter': args.matching_maxiter, + 'num_workers': args.num_workers, 'all_atoms': args.all_atoms, + 'atom_radius': args.atom_radius, 'atom_max_neighbors': args.atom_max_neighbors, + 'esm_embeddings_path': args.esm_embeddings_path} + + train_dataset = PDBBind(cache_path=args.cache_path, split_path=args.split_train, keep_original=True, + num_conformers=args.num_conformers, **common_args) + val_dataset = PDBBind(cache_path=args.cache_path, split_path=args.split_val, keep_original=True, **common_args) + + loader_class = DataListLoader if torch.cuda.is_available() else DataLoader + train_loader = loader_class(dataset=train_dataset, batch_size=args.batch_size, num_workers=args.num_dataloader_workers, shuffle=True, pin_memory=args.pin_memory) + val_loader = loader_class(dataset=val_dataset, batch_size=args.batch_size, num_workers=args.num_dataloader_workers, shuffle=True, pin_memory=args.pin_memory) + + return train_loader, val_loader + + +def read_mol(pdbbind_dir, name, remove_hs=False): + lig = read_molecule(os.path.join(pdbbind_dir, name, f'{name}_ligand.sdf'), remove_hs=remove_hs, sanitize=True) + if lig is None: # read mol2 file if sdf file cannot be sanitized + print('Using the .sdf file failed. We found a .mol2 file instead and are trying to use that.') + lig = read_molecule(os.path.join(pdbbind_dir, name, f'{name}_ligand.mol2'), remove_hs=remove_hs, sanitize=True) + return lig + + +def read_mols(pdbbind_dir, name, remove_hs=False): + ligs = [] + for file in os.listdir(os.path.join(pdbbind_dir, name)): + if file.endswith(".sdf") and 'rdkit' not in file: + lig = read_molecule(os.path.join(pdbbind_dir, name, file), remove_hs=remove_hs, sanitize=True) + if lig is None and os.path.exists(os.path.join(pdbbind_dir, name, file[:-4] + ".mol2")): # read mol2 file if sdf file cannot be sanitized + print('Using the .sdf file failed. We found a .mol2 file instead and are trying to use that.') + lig = read_molecule(os.path.join(pdbbind_dir, name, file[:-4] + ".mol2"), remove_hs=remove_hs, sanitize=True) + if lig is not None: + ligs.append(lig) + return ligs \ No newline at end of file diff --git a/forks/DiffDockv1/datasets/pdbbind_lm_embedding_preparation.py b/forks/DiffDockv1/datasets/pdbbind_lm_embedding_preparation.py new file mode 100644 index 00000000..ddef4f2b --- /dev/null +++ b/forks/DiffDockv1/datasets/pdbbind_lm_embedding_preparation.py @@ -0,0 +1,96 @@ +import os +from argparse import FileType, ArgumentParser + +import numpy as np +from Bio.PDB import PDBParser +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord +from tqdm import tqdm + +parser = ArgumentParser() +parser.add_argument('--data_dir', type=str, default='data/PDBBind_processed', help='') +parser.add_argument('--chain_cutoff', type=int, default=10, help='') +parser.add_argument('--out_file', type=str, default="data/pdbbind_sequences.fasta") +args = parser.parse_args() + +cutoff = args.chain_cutoff +data_dir = args.data_dir +names = os.listdir(data_dir) +#%% +from Bio import SeqIO +biopython_parser = PDBParser() + +three_to_one = {'ALA': 'A', +'ARG': 'R', +'ASN': 'N', +'ASP': 'D', +'CYS': 'C', +'GLN': 'Q', +'GLU': 'E', +'GLY': 'G', +'HIS': 'H', +'ILE': 'I', +'LEU': 'L', +'LYS': 'K', +'MET': 'M', +'MSE': 'M', # this is almost the same AA as MET. The sulfur is just replaced by Selen +'PHE': 'F', +'PRO': 'P', +'PYL': 'O', +'SER': 'S', +'SEC': 'U', +'THR': 'T', +'TRP': 'W', +'TYR': 'Y', +'VAL': 'V', +'ASX': 'B', +'GLX': 'Z', +'XAA': 'X', +'XLE': 'J'} + +sequences = [] +ids = [] +for name in tqdm(names): + if name == '.DS_Store': continue + if os.path.exists(os.path.join(data_dir, name, f'{name}_protein_processed.pdb')): + rec_path = os.path.join(data_dir, name, f'{name}_protein_processed.pdb') + elif os.path.exists(os.path.join(data_dir, name, f'{name}_protein.pdb')): + rec_path = os.path.join(data_dir, name, f'{name}_protein.pdb') + else: + continue + if cutoff > 10: + rec_path = os.path.join(data_dir, name, f'{name}_protein_obabel_reduce.pdb') + if not os.path.exists(rec_path): + rec_path = os.path.join(data_dir, name, f'{name}_protein.pdb') + structure = biopython_parser.get_structure('random_id', rec_path) + structure = structure[0] + for i, chain in enumerate(structure): + seq = '' + for res_idx, residue in enumerate(chain): + if residue.get_resname() == 'HOH': + continue + residue_coords = [] + c_alpha, n, c = None, None, None + for atom in residue: + if atom.name == 'CA': + c_alpha = list(atom.get_vector()) + if atom.name == 'N': + n = list(atom.get_vector()) + if atom.name == 'C': + c = list(atom.get_vector()) + if c_alpha != None and n != None and c != None: # only append residue if it is an amino acid and not + try: + seq += three_to_one[residue.get_resname()] + except Exception as e: + seq += '-' + print("encountered unknown AA: ", residue.get_resname(), ' in the complex ', name, '. Replacing it with a dash - .') + sequences.append(seq) + ids.append(f'{name}_chain_{i}') +records = [] +for (index, seq) in zip(ids,sequences): + record = SeqRecord(Seq(seq), str(index)) + record.description = '' + records.append(record) +SeqIO.write(records, args.out_file, "fasta") + + diff --git a/forks/DiffDockv1/datasets/process_mols.py b/forks/DiffDockv1/datasets/process_mols.py new file mode 100644 index 00000000..10900944 --- /dev/null +++ b/forks/DiffDockv1/datasets/process_mols.py @@ -0,0 +1,547 @@ +import copy +import os +import warnings + +import numpy as np +import scipy.spatial as spa +import torch +from Bio.PDB import PDBParser +from Bio.PDB.PDBExceptions import PDBConstructionWarning +from rdkit import Chem +from rdkit.Chem.rdchem import BondType as BT +from rdkit.Chem import AllChem, GetPeriodicTable, RemoveHs +from rdkit.Geometry import Point3D +from scipy import spatial +from scipy.special import softmax +from torch_cluster import radius_graph + + +import torch.nn.functional as F + +from datasets.conformer_matching import get_torsion_angles, optimize_rotatable_bonds +from utils.torsion import get_transformation_mask + + +biopython_parser = PDBParser() +periodic_table = GetPeriodicTable() +allowable_features = { + 'possible_atomic_num_list': list(range(1, 119)) + ['misc'], + 'possible_chirality_list': [ + 'CHI_UNSPECIFIED', + 'CHI_TETRAHEDRAL_CW', + 'CHI_TETRAHEDRAL_CCW', + 'CHI_OTHER' + ], + 'possible_degree_list': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 'misc'], + 'possible_numring_list': [0, 1, 2, 3, 4, 5, 6, 'misc'], + 'possible_implicit_valence_list': [0, 1, 2, 3, 4, 5, 6, 'misc'], + 'possible_formal_charge_list': [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 'misc'], + 'possible_numH_list': [0, 1, 2, 3, 4, 5, 6, 7, 8, 'misc'], + 'possible_number_radical_e_list': [0, 1, 2, 3, 4, 'misc'], + 'possible_hybridization_list': [ + 'SP', 'SP2', 'SP3', 'SP3D', 'SP3D2', 'misc' + ], + 'possible_is_aromatic_list': [False, True], + 'possible_is_in_ring3_list': [False, True], + 'possible_is_in_ring4_list': [False, True], + 'possible_is_in_ring5_list': [False, True], + 'possible_is_in_ring6_list': [False, True], + 'possible_is_in_ring7_list': [False, True], + 'possible_is_in_ring8_list': [False, True], + 'possible_amino_acids': ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', + 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL', 'HIP', 'HIE', 'TPO', 'HID', 'LEV', 'MEU', + 'PTR', 'GLV', 'CYT', 'SEP', 'HIZ', 'CYM', 'GLM', 'ASQ', 'TYS', 'CYX', 'GLZ', 'misc'], + 'possible_atom_type_2': ['C*', 'CA', 'CB', 'CD', 'CE', 'CG', 'CH', 'CZ', 'N*', 'ND', 'NE', 'NH', 'NZ', 'O*', 'OD', + 'OE', 'OG', 'OH', 'OX', 'S*', 'SD', 'SG', 'misc'], + 'possible_atom_type_3': ['C', 'CA', 'CB', 'CD', 'CD1', 'CD2', 'CE', 'CE1', 'CE2', 'CE3', 'CG', 'CG1', 'CG2', 'CH2', + 'CZ', 'CZ2', 'CZ3', 'N', 'ND1', 'ND2', 'NE', 'NE1', 'NE2', 'NH1', 'NH2', 'NZ', 'O', 'OD1', + 'OD2', 'OE1', 'OE2', 'OG', 'OG1', 'OH', 'OXT', 'SD', 'SG', 'misc'], +} +bonds = {BT.SINGLE: 0, BT.DOUBLE: 1, BT.TRIPLE: 2, BT.AROMATIC: 3} + +lig_feature_dims = (list(map(len, [ + allowable_features['possible_atomic_num_list'], + allowable_features['possible_chirality_list'], + allowable_features['possible_degree_list'], + allowable_features['possible_formal_charge_list'], + allowable_features['possible_implicit_valence_list'], + allowable_features['possible_numH_list'], + allowable_features['possible_number_radical_e_list'], + allowable_features['possible_hybridization_list'], + allowable_features['possible_is_aromatic_list'], + allowable_features['possible_numring_list'], + allowable_features['possible_is_in_ring3_list'], + allowable_features['possible_is_in_ring4_list'], + allowable_features['possible_is_in_ring5_list'], + allowable_features['possible_is_in_ring6_list'], + allowable_features['possible_is_in_ring7_list'], + allowable_features['possible_is_in_ring8_list'], +])), 0) # number of scalar features + +rec_atom_feature_dims = (list(map(len, [ + allowable_features['possible_amino_acids'], + allowable_features['possible_atomic_num_list'], + allowable_features['possible_atom_type_2'], + allowable_features['possible_atom_type_3'], +])), 0) + +rec_residue_feature_dims = (list(map(len, [ + allowable_features['possible_amino_acids'] +])), 0) + + +def lig_atom_featurizer(mol): + ringinfo = mol.GetRingInfo() + atom_features_list = [] + for idx, atom in enumerate(mol.GetAtoms()): + atom_features_list.append([ + safe_index(allowable_features['possible_atomic_num_list'], atom.GetAtomicNum()), + allowable_features['possible_chirality_list'].index(str(atom.GetChiralTag())), + safe_index(allowable_features['possible_degree_list'], atom.GetTotalDegree()), + safe_index(allowable_features['possible_formal_charge_list'], atom.GetFormalCharge()), + safe_index(allowable_features['possible_implicit_valence_list'], atom.GetImplicitValence()), + safe_index(allowable_features['possible_numH_list'], atom.GetTotalNumHs()), + safe_index(allowable_features['possible_number_radical_e_list'], atom.GetNumRadicalElectrons()), + safe_index(allowable_features['possible_hybridization_list'], str(atom.GetHybridization())), + allowable_features['possible_is_aromatic_list'].index(atom.GetIsAromatic()), + safe_index(allowable_features['possible_numring_list'], ringinfo.NumAtomRings(idx)), + allowable_features['possible_is_in_ring3_list'].index(ringinfo.IsAtomInRingOfSize(idx, 3)), + allowable_features['possible_is_in_ring4_list'].index(ringinfo.IsAtomInRingOfSize(idx, 4)), + allowable_features['possible_is_in_ring5_list'].index(ringinfo.IsAtomInRingOfSize(idx, 5)), + allowable_features['possible_is_in_ring6_list'].index(ringinfo.IsAtomInRingOfSize(idx, 6)), + allowable_features['possible_is_in_ring7_list'].index(ringinfo.IsAtomInRingOfSize(idx, 7)), + allowable_features['possible_is_in_ring8_list'].index(ringinfo.IsAtomInRingOfSize(idx, 8)), + ]) + + return torch.tensor(atom_features_list) + + +def rec_residue_featurizer(rec): + feature_list = [] + for residue in rec.get_residues(): + feature_list.append([safe_index(allowable_features['possible_amino_acids'], residue.get_resname())]) + return torch.tensor(feature_list, dtype=torch.float32) # (N_res, 1) + + +def safe_index(l, e): + """ Return index of element e in list l. If e is not present, return the last index """ + try: + return l.index(e) + except: + return len(l) - 1 + + + +def parse_receptor(pdbid, pdbbind_dir): + rec = parsePDB(pdbid, pdbbind_dir) + return rec + + +def parsePDB(pdbid, pdbbind_dir): + rec_path = os.path.join(pdbbind_dir, pdbid, f'{pdbid}_protein_processed.pdb') + return parse_pdb_from_path(rec_path) + +def parse_pdb_from_path(path): + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=PDBConstructionWarning) + structure = biopython_parser.get_structure('random_id', path) + rec = structure[0] + return rec + + +def extract_receptor_structure(rec, lig, lm_embedding_chains=None): + conf = lig.GetConformer() + lig_coords = conf.GetPositions() + min_distances = [] + coords = [] + c_alpha_coords = [] + n_coords = [] + c_coords = [] + valid_chain_ids = [] + lengths = [] + for i, chain in enumerate(rec): + chain_coords = [] # num_residues, num_atoms, 3 + chain_c_alpha_coords = [] + chain_n_coords = [] + chain_c_coords = [] + count = 0 + invalid_res_ids = [] + for res_idx, residue in enumerate(chain): + if residue.get_resname() == 'HOH': + invalid_res_ids.append(residue.get_id()) + continue + residue_coords = [] + c_alpha, n, c = None, None, None + for atom in residue: + if atom.name == 'CA': + c_alpha = list(atom.get_vector()) + if atom.name == 'N': + n = list(atom.get_vector()) + if atom.name == 'C': + c = list(atom.get_vector()) + residue_coords.append(list(atom.get_vector())) + + if c_alpha != None and n != None and c != None: + # only append residue if it is an amino acid and not some weird molecule that is part of the complex + chain_c_alpha_coords.append(c_alpha) + chain_n_coords.append(n) + chain_c_coords.append(c) + chain_coords.append(np.array(residue_coords)) + count += 1 + else: + invalid_res_ids.append(residue.get_id()) + for res_id in invalid_res_ids: + chain.detach_child(res_id) + if len(chain_coords) > 0: + all_chain_coords = np.concatenate(chain_coords, axis=0) + distances = spatial.distance.cdist(lig_coords, all_chain_coords) + min_distance = distances.min() + else: + min_distance = np.inf + + min_distances.append(min_distance) + lengths.append(count) + coords.append(chain_coords) + c_alpha_coords.append(np.array(chain_c_alpha_coords)) + n_coords.append(np.array(chain_n_coords)) + c_coords.append(np.array(chain_c_coords)) + if not count == 0: valid_chain_ids.append(chain.get_id()) + + min_distances = np.array(min_distances) + if len(valid_chain_ids) == 0: + valid_chain_ids.append(np.argmin(min_distances)) + valid_coords = [] + valid_c_alpha_coords = [] + valid_n_coords = [] + valid_c_coords = [] + valid_lengths = [] + invalid_chain_ids = [] + valid_lm_embeddings = [] + for i, chain in enumerate(rec): + if chain.get_id() in valid_chain_ids: + valid_coords.append(coords[i]) + valid_c_alpha_coords.append(c_alpha_coords[i]) + if lm_embedding_chains is not None: + if i >= len(lm_embedding_chains): + raise ValueError('Encountered valid chain id that was not present in the LM embeddings') + valid_lm_embeddings.append(lm_embedding_chains[i]) + valid_n_coords.append(n_coords[i]) + valid_c_coords.append(c_coords[i]) + valid_lengths.append(lengths[i]) + else: + invalid_chain_ids.append(chain.get_id()) + coords = [item for sublist in valid_coords for item in sublist] # list with n_residues arrays: [n_atoms, 3] + + c_alpha_coords = np.concatenate(valid_c_alpha_coords, axis=0) # [n_residues, 3] + n_coords = np.concatenate(valid_n_coords, axis=0) # [n_residues, 3] + c_coords = np.concatenate(valid_c_coords, axis=0) # [n_residues, 3] + lm_embeddings = np.concatenate(valid_lm_embeddings, axis=0) if lm_embedding_chains is not None else None + for invalid_id in invalid_chain_ids: + rec.detach_child(invalid_id) + + assert len(c_alpha_coords) == len(n_coords) + assert len(c_alpha_coords) == len(c_coords) + assert sum(valid_lengths) == len(c_alpha_coords) + return rec, coords, c_alpha_coords, n_coords, c_coords, lm_embeddings + + +def get_lig_graph(mol, complex_graph): + lig_coords = torch.from_numpy(mol.GetConformer().GetPositions()).float() + atom_feats = lig_atom_featurizer(mol) + + row, col, edge_type = [], [], [] + for bond in mol.GetBonds(): + start, end = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx() + row += [start, end] + col += [end, start] + edge_type += 2 * [bonds[bond.GetBondType()]] if bond.GetBondType() != BT.UNSPECIFIED else [0, 0] + + edge_index = torch.tensor([row, col], dtype=torch.long) + edge_type = torch.tensor(edge_type, dtype=torch.long) + edge_attr = F.one_hot(edge_type, num_classes=len(bonds)).to(torch.float) + + complex_graph['ligand'].x = atom_feats + complex_graph['ligand'].pos = lig_coords + complex_graph['ligand', 'lig_bond', 'ligand'].edge_index = edge_index + complex_graph['ligand', 'lig_bond', 'ligand'].edge_attr = edge_attr + return + +def generate_conformer(mol): + ps = AllChem.ETKDGv2() + id = AllChem.EmbedMolecule(mol, ps) + if id == -1: + print('rdkit coords could not be generated without using random coords. using random coords now.') + ps.useRandomCoords = True + AllChem.EmbedMolecule(mol, ps) + AllChem.MMFFOptimizeMolecule(mol, confId=0) + # else: + # AllChem.MMFFOptimizeMolecule(mol_rdkit, confId=0) + +def get_lig_graph_with_matching(mol_, complex_graph, popsize, maxiter, matching, keep_original, num_conformers, remove_hs): + if matching: + mol_maybe_noh = copy.deepcopy(mol_) + if remove_hs: + mol_maybe_noh = RemoveHs(mol_maybe_noh, sanitize=True) + if keep_original: + complex_graph['ligand'].orig_pos = mol_maybe_noh.GetConformer().GetPositions() + + rotable_bonds = get_torsion_angles(mol_maybe_noh) + if not rotable_bonds: print("no_rotable_bonds but still using it") + + for i in range(num_conformers): + mol_rdkit = copy.deepcopy(mol_) + + mol_rdkit.RemoveAllConformers() + mol_rdkit = AllChem.AddHs(mol_rdkit) + generate_conformer(mol_rdkit) + if remove_hs: + mol_rdkit = RemoveHs(mol_rdkit, sanitize=True) + mol = copy.deepcopy(mol_maybe_noh) + if rotable_bonds: + optimize_rotatable_bonds(mol_rdkit, mol, rotable_bonds, popsize=popsize, maxiter=maxiter) + mol.AddConformer(mol_rdkit.GetConformer()) + rms_list = [] + AllChem.AlignMolConformers(mol, RMSlist=rms_list) + mol_rdkit.RemoveAllConformers() + mol_rdkit.AddConformer(mol.GetConformers()[1]) + + if i == 0: + complex_graph.rmsd_matching = rms_list[0] + get_lig_graph(mol_rdkit, complex_graph) + else: + if torch.is_tensor(complex_graph['ligand'].pos): + complex_graph['ligand'].pos = [complex_graph['ligand'].pos] + complex_graph['ligand'].pos.append(torch.from_numpy(mol_rdkit.GetConformer().GetPositions()).float()) + + else: # no matching + complex_graph.rmsd_matching = 0 + if remove_hs: mol_ = RemoveHs(mol_) + get_lig_graph(mol_, complex_graph) + + edge_mask, mask_rotate = get_transformation_mask(complex_graph) + complex_graph['ligand'].edge_mask = torch.tensor(edge_mask) + complex_graph['ligand'].mask_rotate = mask_rotate + + return + + +def get_calpha_graph(rec, c_alpha_coords, n_coords, c_coords, complex_graph, cutoff=20, max_neighbor=None, lm_embeddings=None): + n_rel_pos = n_coords - c_alpha_coords + c_rel_pos = c_coords - c_alpha_coords + num_residues = len(c_alpha_coords) + if num_residues <= 1: + raise ValueError(f"rec contains only 1 residue!") + + # Build the k-NN graph + distances = spa.distance.cdist(c_alpha_coords, c_alpha_coords) + src_list = [] + dst_list = [] + mean_norm_list = [] + for i in range(num_residues): + dst = list(np.where(distances[i, :] < cutoff)[0]) + dst.remove(i) + if max_neighbor != None and len(dst) > max_neighbor: + dst = list(np.argsort(distances[i, :]))[1: max_neighbor + 1] + if len(dst) == 0: + dst = list(np.argsort(distances[i, :]))[1:2] # choose second because first is i itself + print(f'The c_alpha_cutoff {cutoff} was too small for one c_alpha such that it had no neighbors. ' + f'So we connected it to the closest other c_alpha') + assert i not in dst + src = [i] * len(dst) + src_list.extend(src) + dst_list.extend(dst) + valid_dist = list(distances[i, dst]) + valid_dist_np = distances[i, dst] + sigma = np.array([1., 2., 5., 10., 30.]).reshape((-1, 1)) + weights = softmax(- valid_dist_np.reshape((1, -1)) ** 2 / sigma, axis=1) # (sigma_num, neigh_num) + assert weights[0].sum() > 1 - 1e-2 and weights[0].sum() < 1.01 + diff_vecs = c_alpha_coords[src, :] - c_alpha_coords[dst, :] # (neigh_num, 3) + mean_vec = weights.dot(diff_vecs) # (sigma_num, 3) + denominator = weights.dot(np.linalg.norm(diff_vecs, axis=1)) # (sigma_num,) + mean_vec_ratio_norm = np.linalg.norm(mean_vec, axis=1) / denominator # (sigma_num,) + mean_norm_list.append(mean_vec_ratio_norm) + assert len(src_list) == len(dst_list) + + node_feat = rec_residue_featurizer(rec) + mu_r_norm = torch.from_numpy(np.array(mean_norm_list).astype(np.float32)) + side_chain_vecs = torch.from_numpy( + np.concatenate([np.expand_dims(n_rel_pos, axis=1), np.expand_dims(c_rel_pos, axis=1)], axis=1)) + + complex_graph['receptor'].x = torch.cat([node_feat, torch.tensor(lm_embeddings)], axis=1) if lm_embeddings is not None else node_feat + complex_graph['receptor'].pos = torch.from_numpy(c_alpha_coords).float() + complex_graph['receptor'].mu_r_norm = mu_r_norm + complex_graph['receptor'].side_chain_vecs = side_chain_vecs.float() + complex_graph['receptor', 'rec_contact', 'receptor'].edge_index = torch.from_numpy(np.asarray([src_list, dst_list])) + + return + + +def rec_atom_featurizer(rec): + atom_feats = [] + for i, atom in enumerate(rec.get_atoms()): + atom_name, element = atom.name, atom.element + if element == 'CD': + element = 'C' + assert not element == '' + try: + atomic_num = periodic_table.GetAtomicNumber(element) + except: + atomic_num = -1 + atom_feat = [safe_index(allowable_features['possible_amino_acids'], atom.get_parent().get_resname()), + safe_index(allowable_features['possible_atomic_num_list'], atomic_num), + safe_index(allowable_features['possible_atom_type_2'], (atom_name + '*')[:2]), + safe_index(allowable_features['possible_atom_type_3'], atom_name)] + atom_feats.append(atom_feat) + + return atom_feats + + +def get_rec_graph(rec, rec_coords, c_alpha_coords, n_coords, c_coords, complex_graph, rec_radius, c_alpha_max_neighbors=None, all_atoms=False, + atom_radius=5, atom_max_neighbors=None, remove_hs=False, lm_embeddings=None): + if all_atoms: + return get_fullrec_graph(rec, rec_coords, c_alpha_coords, n_coords, c_coords, complex_graph, + c_alpha_cutoff=rec_radius, c_alpha_max_neighbors=c_alpha_max_neighbors, + atom_cutoff=atom_radius, atom_max_neighbors=atom_max_neighbors, remove_hs=remove_hs,lm_embeddings=lm_embeddings) + else: + return get_calpha_graph(rec, c_alpha_coords, n_coords, c_coords, complex_graph, rec_radius, c_alpha_max_neighbors,lm_embeddings=lm_embeddings) + + +def get_fullrec_graph(rec, rec_coords, c_alpha_coords, n_coords, c_coords, complex_graph, c_alpha_cutoff=20, + c_alpha_max_neighbors=None, atom_cutoff=5, atom_max_neighbors=None, remove_hs=False, lm_embeddings=None): + # builds the receptor graph with both residues and atoms + + n_rel_pos = n_coords - c_alpha_coords + c_rel_pos = c_coords - c_alpha_coords + num_residues = len(c_alpha_coords) + if num_residues <= 1: + raise ValueError(f"rec contains only 1 residue!") + + # Build the k-NN graph of residues + distances = spa.distance.cdist(c_alpha_coords, c_alpha_coords) + src_list = [] + dst_list = [] + mean_norm_list = [] + for i in range(num_residues): + dst = list(np.where(distances[i, :] < c_alpha_cutoff)[0]) + dst.remove(i) + if c_alpha_max_neighbors != None and len(dst) > c_alpha_max_neighbors: + dst = list(np.argsort(distances[i, :]))[1: c_alpha_max_neighbors + 1] + if len(dst) == 0: + dst = list(np.argsort(distances[i, :]))[1:2] # choose second because first is i itself + print(f'The c_alpha_cutoff {c_alpha_cutoff} was too small for one c_alpha such that it had no neighbors. ' + f'So we connected it to the closest other c_alpha') + assert i not in dst + src = [i] * len(dst) + src_list.extend(src) + dst_list.extend(dst) + valid_dist = list(distances[i, dst]) + valid_dist_np = distances[i, dst] + sigma = np.array([1., 2., 5., 10., 30.]).reshape((-1, 1)) + weights = softmax(- valid_dist_np.reshape((1, -1)) ** 2 / sigma, axis=1) # (sigma_num, neigh_num) + assert 1 - 1e-2 < weights[0].sum() < 1.01 + diff_vecs = c_alpha_coords[src, :] - c_alpha_coords[dst, :] # (neigh_num, 3) + mean_vec = weights.dot(diff_vecs) # (sigma_num, 3) + denominator = weights.dot(np.linalg.norm(diff_vecs, axis=1)) # (sigma_num,) + mean_vec_ratio_norm = np.linalg.norm(mean_vec, axis=1) / denominator # (sigma_num,) + mean_norm_list.append(mean_vec_ratio_norm) + assert len(src_list) == len(dst_list) + + node_feat = rec_residue_featurizer(rec) + mu_r_norm = torch.from_numpy(np.array(mean_norm_list).astype(np.float32)) + side_chain_vecs = torch.from_numpy( + np.concatenate([np.expand_dims(n_rel_pos, axis=1), np.expand_dims(c_rel_pos, axis=1)], axis=1)) + + complex_graph['receptor'].x = torch.cat([node_feat, torch.tensor(lm_embeddings)], axis=1) if lm_embeddings is not None else node_feat + complex_graph['receptor'].pos = torch.from_numpy(c_alpha_coords).float() + complex_graph['receptor'].mu_r_norm = mu_r_norm + complex_graph['receptor'].side_chain_vecs = side_chain_vecs.float() + complex_graph['receptor', 'rec_contact', 'receptor'].edge_index = torch.from_numpy(np.asarray([src_list, dst_list])) + + src_c_alpha_idx = np.concatenate([np.asarray([i]*len(l)) for i, l in enumerate(rec_coords)]) + atom_feat = torch.from_numpy(np.asarray(rec_atom_featurizer(rec))) + atom_coords = torch.from_numpy(np.concatenate(rec_coords, axis=0)).float() + + if remove_hs: + not_hs = (atom_feat[:, 1] != 0) + src_c_alpha_idx = src_c_alpha_idx[not_hs] + atom_feat = atom_feat[not_hs] + atom_coords = atom_coords[not_hs] + + atoms_edge_index = radius_graph(atom_coords, atom_cutoff, max_num_neighbors=atom_max_neighbors if atom_max_neighbors else 1000) + atom_res_edge_index = torch.from_numpy(np.asarray([np.arange(len(atom_feat)), src_c_alpha_idx])).long() + + complex_graph['atom'].x = atom_feat + complex_graph['atom'].pos = atom_coords + complex_graph['atom', 'atom_contact', 'atom'].edge_index = atoms_edge_index + complex_graph['atom', 'atom_rec_contact', 'receptor'].edge_index = atom_res_edge_index + + return + +def write_mol_with_coords(mol, new_coords, path): + w = Chem.SDWriter(path) + conf = mol.GetConformer() + for i in range(mol.GetNumAtoms()): + x,y,z = new_coords.astype(np.double)[i] + conf.SetAtomPosition(i,Point3D(x,y,z)) + w.write(mol) + w.close() + +def read_molecule(molecule_file, sanitize=False, calc_charges=False, remove_hs=False): + if molecule_file.endswith('.mol2'): + mol = Chem.MolFromMol2File(molecule_file, sanitize=False, removeHs=False) + elif molecule_file.endswith('.sdf'): + supplier = Chem.SDMolSupplier(molecule_file, sanitize=False, removeHs=False) + mol = supplier[0] + elif molecule_file.endswith('.pdbqt'): + with open(molecule_file) as file: + pdbqt_data = file.readlines() + pdb_block = '' + for line in pdbqt_data: + pdb_block += '{}\n'.format(line[:66]) + mol = Chem.MolFromPDBBlock(pdb_block, sanitize=False, removeHs=False) + elif molecule_file.endswith('.pdb'): + mol = Chem.MolFromPDBFile(molecule_file, sanitize=False, removeHs=False) + else: + raise ValueError('Expect the format of the molecule_file to be ' + 'one of .mol2, .sdf, .pdbqt and .pdb, got {}'.format(molecule_file)) + + try: + if sanitize or calc_charges: + Chem.SanitizeMol(mol) + + if calc_charges: + # Compute Gasteiger charges on the molecule. + try: + AllChem.ComputeGasteigerCharges(mol) + except: + warnings.warn('Unable to compute charges for the molecule.') + + if remove_hs: + mol = Chem.RemoveHs(mol, sanitize=sanitize) + except Exception as e: + print(e) + print("RDKit was unable to read the molecule.") + return None + + return mol + + +def read_sdf_or_mol2(sdf_fileName, mol2_fileName): + + mol = Chem.MolFromMolFile(sdf_fileName, sanitize=False) + problem = False + try: + Chem.SanitizeMol(mol) + mol = Chem.RemoveHs(mol) + except Exception as e: + problem = True + if problem: + mol = Chem.MolFromMol2File(mol2_fileName, sanitize=False) + try: + Chem.SanitizeMol(mol) + mol = Chem.RemoveHs(mol) + problem = False + except Exception as e: + problem = True + + return mol, problem diff --git a/forks/DiffDockv1/environment.yml b/forks/DiffDockv1/environment.yml new file mode 100644 index 00000000..d864c3f8 --- /dev/null +++ b/forks/DiffDockv1/environment.yml @@ -0,0 +1,102 @@ +name: diffdock +channels: + - pytorch + - defaults +dependencies: + - blas=1.0 + - brotlipy=0.7.0 + - bzip2=1.0.8 + - ca-certificates=2022.07.19 + - certifi=2022.9.14 + - cffi=1.15.1 + - charset-normalizer=2.0.4 + - cryptography=37.0.1 + - ffmpeg=4.3 + - freetype=2.11.0 + - gettext=0.21.0 + - giflib=5.2.1 + - gmp=6.2.1 + - gnutls=3.6.15 + - icu=58.2 + - idna=3.3 + - intel-openmp=2021.4.0 + - jpeg=9e + - lame=3.100 + - lcms2=2.12 + - lerc=3.0 + - libcxx=14.0.6 + - libdeflate=1.8 + - libffi=3.3 + - libiconv=1.16 + - libidn2=2.3.2 + - libpng=1.6.37 + - libtasn1=4.16.0 + - libtiff=4.4.0 + - libunistring=0.9.10 + - libwebp=1.2.2 + - libwebp-base=1.2.2 + - libxml2=2.9.14 + - llvm-openmp=14.0.6 + - lz4-c=1.9.3 + - mkl=2021.4.0 + - mkl-service=2.4.0 + - mkl_fft=1.3.1 + - mkl_random=1.2.2 + - ncurses=6.3 + - nettle=3.7.3 + - numpy=1.23.1 + - numpy-base=1.23.1 + - openh264=2.1.1 + - openssl=1.1.1q + - pillow=9.2.0 + - pip=22.2.2 + - pycparser=2.21 + - pyopenssl=22.0.0 + - pysocks=1.7.1 + - python=3.9.13 + - pytorch=1.12.1 + - readline=8.1.2 + - requests=2.28.1 + - setuptools=63.4.1 + - six=1.16.0 + - sqlite=3.39.3 + - tk=8.6.12 + - torchaudio=0.12.1 + - torchvision=0.13.1 + - typing_extensions=4.3.0 + - tzdata=2022c + - urllib3=1.26.11 + - wheel=0.37.1 + - xz=5.2.6 + - zlib=1.2.12 + - zstd=1.5.2 + - pip: + - biopandas==0.4.1 + - biopython==1.79 + - e3nn==0.5.0 + - jinja2==3.1.2 + - joblib==1.2.0 + - markupsafe==2.1.1 + - mpmath==1.2.1 + - networkx==2.8.7 + - opt-einsum==3.3.0 + - opt-einsum-fx==0.1.4 + - packaging==21.3 + - pandas==1.5.0 + - pyaml==21.10.1 + - pyparsing==3.0.9 + - python-dateutil==2.8.2 + - pytz==2022.4 + - pyyaml==6.0 + - rdkit-pypi==2022.3.5 + - scikit-learn==1.1.2 + - scipy==1.9.1 + - spyrmsd==0.5.2 + - sympy==1.11.1 + - threadpoolctl==3.1.0 + - torch-cluster==1.6.0 + - torch-geometric==2.1.0.post1 + - torch-scatter==2.0.9 + - torch-sparse==0.6.15 + - torch-spline-conv==1.2.1 + - tqdm==4.64.1 diff --git a/forks/DiffDockv1/evaluate.py b/forks/DiffDockv1/evaluate.py new file mode 100644 index 00000000..ab046b94 --- /dev/null +++ b/forks/DiffDockv1/evaluate.py @@ -0,0 +1,533 @@ +import copy +import os +import torch +import time +from argparse import ArgumentParser, Namespace, FileType +from datetime import datetime +from functools import partial +import numpy as np +import wandb +from biopandas.pdb import PandasPdb +from rdkit import RDLogger +from torch_geometric.loader import DataLoader + +from datasets.pdbbind import PDBBind, read_mol +from utils.diffusion_utils import t_to_sigma as t_to_sigma_compl, get_t_schedule +from utils.sampling import randomize_position, sampling +from utils.utils import get_model, get_symmetry_rmsd, remove_all_hs, read_strings_from_txt, ExponentialMovingAverage +from utils.visualise import PDBFile +from tqdm import tqdm + +RDLogger.DisableLog('rdApp.*') +import yaml + +cache_name = datetime.now().strftime('date%d-%m_time%H-%M-%S.%f') +parser = ArgumentParser() +parser.add_argument('--config', type=FileType(mode='r'), default=None) +parser.add_argument('--model_dir', type=str, default='workdir', help='Path to folder with trained score model and hyperparameters') +parser.add_argument('--ckpt', type=str, default='best_model.pt', help='Checkpoint to use inside the folder') +parser.add_argument('--confidence_model_dir', type=str, default=None, help='Path to folder with trained confidence model and hyperparameters') +parser.add_argument('--confidence_ckpt', type=str, default='best_model.pt', help='Checkpoint to use inside the folder') +parser.add_argument('--num_cpu', type=int, default=None, help='if this is a number instead of none, the max number of cpus used by torch will be set to this.') +parser.add_argument('--run_name', type=str, default='test', help='') +parser.add_argument('--project', type=str, default='ligbind_inf', help='') +parser.add_argument('--out_dir', type=str, default=None, help='Where to save results to') +parser.add_argument('--batch_size', type=int, default=10, help='Number of poses to sample in parallel') +parser.add_argument('--cache_path', type=str, default='data/cacheNew', help='Folder from where to load/restore cached dataset') +parser.add_argument('--data_dir', type=str, default='data/PDBBind_processed/', help='Folder containing original structures') +parser.add_argument('--split_path', type=str, default='data/splits/timesplit_no_lig_overlap_val', help='Path of file defining the split') +parser.add_argument('--no_overlap_names_path', type=str, default='data/splits/timesplit_test_no_rec_overlap', help='Path text file with the folder names in the test set that have no receptor overlap with the train set') +parser.add_argument('--no_model', action='store_true', default=False, help='Whether to return seed conformer without running model') +parser.add_argument('--no_random', action='store_true', default=False, help='Whether to add randomness in diffusion steps') +parser.add_argument('--no_final_step_noise', action='store_true', default=False, help='Whether to add noise after the final step') +parser.add_argument('--ode', action='store_true', default=False, help='Whether to run the probability flow ODE') +parser.add_argument('--wandb', action='store_true', default=False, help='') +parser.add_argument('--inference_steps', type=int, default=20, help='Number of denoising steps') +parser.add_argument('--limit_complexes', type=int, default=0, help='Limit to the number of complexes') +parser.add_argument('--num_workers', type=int, default=1, help='Number of workers for dataset creation') +parser.add_argument('--tqdm', action='store_true', default=False, help='Whether to show progress bar') +parser.add_argument('--save_visualisation', action='store_true', default=False, help='Whether to save visualizations') +parser.add_argument('--samples_per_complex', type=int, default=1, help='Number of poses to sample for each complex') +parser.add_argument('--actual_steps', type=int, default=None, help='') +args = parser.parse_args() + +if args.config: + config_dict = yaml.load(args.config, Loader=yaml.FullLoader) + arg_dict = args.__dict__ + for key, value in config_dict.items(): + if isinstance(value, list): + for v in value: + arg_dict[key].append(v) + else: + arg_dict[key] = value + +if args.out_dir is None: args.out_dir = f'inference_out_dir_not_specified/{args.run_name}' +os.makedirs(args.out_dir, exist_ok=True) +with open(f'{args.model_dir}/model_parameters.yml') as f: + score_model_args = Namespace(**yaml.full_load(f)) + + +if args.confidence_model_dir is not None: + with open(f'{args.confidence_model_dir}/model_parameters.yml') as f: + confidence_args = Namespace(**yaml.full_load(f)) + if not os.path.exists(confidence_args.original_model_dir): + print("Path does not exist: ", confidence_args.original_model_dir) + confidence_args.original_model_dir = os.path.join(*confidence_args.original_model_dir.split('/')[-2:]) + print('instead trying path: ', confidence_args.original_model_dir) + +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') +test_dataset = PDBBind(transform=None, root=args.data_dir, limit_complexes=args.limit_complexes, + receptor_radius=score_model_args.receptor_radius, + cache_path=args.cache_path, split_path=args.split_path, + remove_hs=score_model_args.remove_hs, max_lig_size=None, + c_alpha_max_neighbors=score_model_args.c_alpha_max_neighbors, + matching=not score_model_args.no_torsion, keep_original=True, + popsize=score_model_args.matching_popsize, + maxiter=score_model_args.matching_maxiter, + all_atoms=score_model_args.all_atoms, + atom_radius=score_model_args.atom_radius, + atom_max_neighbors=score_model_args.atom_max_neighbors, + esm_embeddings_path=score_model_args.esm_embeddings_path, + require_ligand=True, + num_workers=args.num_workers) +test_loader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False) + +if args.confidence_model_dir is not None: + if not (confidence_args.use_original_model_cache or confidence_args.transfer_weights): + # if the confidence model uses the same type of data as the original model then we do not need this dataset and can just use the complexes + print('HAPPENING | confidence model uses different type of graphs than the score model. Loading (or creating if not existing) the data for the confidence model now.') + confidence_test_dataset = PDBBind(transform=None, root=args.data_dir, limit_complexes=args.limit_complexes, + receptor_radius=confidence_args.receptor_radius, + cache_path=args.cache_path, split_path=args.split_path, + remove_hs=confidence_args.remove_hs, max_lig_size=None, c_alpha_max_neighbors=confidence_args.c_alpha_max_neighbors, + matching=not confidence_args.no_torsion, keep_original=True, + popsize=confidence_args.matching_popsize, + maxiter=confidence_args.matching_maxiter, + all_atoms=confidence_args.all_atoms, + atom_radius=confidence_args.atom_radius, + atom_max_neighbors=confidence_args.atom_max_neighbors, + esm_embeddings_path= confidence_args.esm_embeddings_path, require_ligand=True, + num_workers=args.num_workers) + confidence_complex_dict = {d.name: d for d in confidence_test_dataset} + +t_to_sigma = partial(t_to_sigma_compl, args=score_model_args) + +if not args.no_model: + model = get_model(score_model_args, device, t_to_sigma=t_to_sigma, no_parallel=True) + state_dict = torch.load(f'{args.model_dir}/{args.ckpt}', map_location=torch.device('cpu')) + if args.ckpt == 'last_model.pt': + model_state_dict = state_dict['model'] + ema_weights_state = state_dict['ema_weights'] + model.load_state_dict(model_state_dict, strict=True) + ema_weights = ExponentialMovingAverage(model.parameters(), decay=score_model_args.ema_rate) + ema_weights.load_state_dict(ema_weights_state, device=device) + ema_weights.copy_to(model.parameters()) + else: + model.load_state_dict(state_dict, strict=True) + model = model.to(device) + model.eval() + if args.confidence_model_dir is not None: + if confidence_args.transfer_weights: + with open(f'{confidence_args.original_model_dir}/model_parameters.yml') as f: + confidence_model_args = Namespace(**yaml.full_load(f)) + else: + confidence_model_args = confidence_args + + confidence_model = get_model(confidence_model_args, device, t_to_sigma=t_to_sigma, no_parallel=True, + confidence_mode=True) + state_dict = torch.load(f'{args.confidence_model_dir}/{args.confidence_ckpt}', map_location=torch.device('cpu')) + confidence_model.load_state_dict(state_dict, strict=True) + confidence_model = confidence_model.to(device) + confidence_model.eval() + else: + confidence_model = None + confidence_args = None + confidence_model_args = None + + +if args.wandb: + run = wandb.init( + entity='entity', + settings=wandb.Settings(start_method="fork"), + project=args.project, + name=args.run_name, + config=args + ) + +tr_schedule = get_t_schedule(inference_steps=args.inference_steps) +rot_schedule = tr_schedule +tor_schedule = tr_schedule +print('t schedule', tr_schedule) + +rmsds_list, obrmsds, centroid_distances_list, failures, skipped, min_cross_distances_list, base_min_cross_distances_list, confidences_list, names_list = [], [], [], 0, 0, [], [], [], [] +run_times, min_self_distances_list, without_rec_overlap_list = [], [], [] +N = args.samples_per_complex +names_no_rec_overlap = read_strings_from_txt(args.no_overlap_names_path) +print('Size of test dataset: ', len(test_dataset)) + +for idx, orig_complex_graph in tqdm(enumerate(test_loader)): + if confidence_model is not None and not (confidence_args.use_original_model_cache or + confidence_args.transfer_weights) and orig_complex_graph.name[0] not in confidence_complex_dict.keys(): + skipped += 1 + print(f"HAPPENING | The confidence dataset did not contain {orig_complex_graph.name[0]}. We are skipping this complex.") + continue + + success = 0 + while not success: # keep trying in case of failure (sometimes stochastic) + try: + success = 1 + data_list = [copy.deepcopy(orig_complex_graph) for _ in range(N)] + randomize_position(data_list, score_model_args.no_torsion, args.no_random, score_model_args.tr_sigma_max) + + pdb = None + if args.save_visualisation: + visualization_list = [] + for idx, graph in enumerate(data_list): + lig = read_mol(args.data_dir, graph['name'][0], remove_hs=score_model_args.remove_hs) + pdb = PDBFile(lig) + pdb.add(lig, 0, 0) + pdb.add((orig_complex_graph['ligand'].pos + orig_complex_graph.original_center).detach().cpu(), 1, 0) + pdb.add((graph['ligand'].pos + graph.original_center).detach().cpu(), part=1, order=1) + visualization_list.append(pdb) + else: + visualization_list = None + + rec_path = os.path.join(args.data_dir, data_list[0]["name"][0], f'{data_list[0]["name"][0]}_protein_processed.pdb') + if not os.path.exists(rec_path): + rec_path = os.path.join(args.data_dir, data_list[0]["name"][0], f'{data_list[0]["name"][0]}_protein_obabel_reduce.pdb') + rec = PandasPdb().read_pdb(rec_path) + rec_df = rec.df['ATOM'] + receptor_pos = rec_df[['x_coord', 'y_coord', 'z_coord']].to_numpy().squeeze().astype( + np.float32) - orig_complex_graph.original_center.cpu().numpy() + receptor_pos = np.tile(receptor_pos, (N, 1, 1)) + start_time = time.time() + if not args.no_model: + if confidence_model is not None and not ( + confidence_args.use_original_model_cache or confidence_args.transfer_weights): + confidence_data_list = [copy.deepcopy(confidence_complex_dict[orig_complex_graph.name[0]]) for _ in + range(N)] + else: + confidence_data_list = None + + data_list, confidence = sampling(data_list=data_list, model=model, + inference_steps=args.actual_steps if args.actual_steps is not None else args.inference_steps, + tr_schedule=tr_schedule, rot_schedule=rot_schedule, + tor_schedule=tor_schedule, + device=device, t_to_sigma=t_to_sigma, model_args=score_model_args, + no_random=args.no_random, + ode=args.ode, visualization_list=visualization_list, + confidence_model=confidence_model, + confidence_data_list=confidence_data_list, + confidence_model_args=confidence_model_args, + batch_size=args.batch_size, + no_final_step_noise=args.no_final_step_noise) + + run_times.append(time.time() - start_time) + if score_model_args.no_torsion: orig_complex_graph['ligand'].orig_pos = (orig_complex_graph['ligand'].pos.cpu().numpy() + orig_complex_graph.original_center.cpu().numpy()) + + filterHs = torch.not_equal(data_list[0]['ligand'].x[:, 0], 0).cpu().numpy() + + if isinstance(orig_complex_graph['ligand'].orig_pos, list): + orig_complex_graph['ligand'].orig_pos = orig_complex_graph['ligand'].orig_pos[0] + + ligand_pos = np.asarray( + [complex_graph['ligand'].pos.cpu().numpy()[filterHs] for complex_graph in data_list]) + orig_ligand_pos = np.expand_dims( + orig_complex_graph['ligand'].orig_pos[filterHs] - orig_complex_graph.original_center.cpu().numpy(), + axis=0) + + try: + mol = remove_all_hs(orig_complex_graph.mol[0]) + rmsd = get_symmetry_rmsd(mol, orig_ligand_pos[0], [l for l in ligand_pos]) + except Exception as e: + print("Using non corrected RMSD because of the error", e) + rmsd = np.sqrt(((ligand_pos - orig_ligand_pos) ** 2).sum(axis=2).mean(axis=1)) + rmsds_list.append(rmsd) + centroid_distance = np.linalg.norm(ligand_pos.mean(axis=1) - orig_ligand_pos.mean(axis=1), axis=1) + if confidence is not None and isinstance(confidence_args.rmsd_classification_cutoff, list): + confidence = confidence[:, 0] + if confidence is not None: + confidence = confidence.cpu().numpy() + re_order = np.argsort(confidence)[::-1] + print(orig_complex_graph['name'], ' rmsd', np.around(rmsd, 1)[re_order], ' centroid distance', + np.around(centroid_distance, 1)[re_order], ' confidences ', np.around(confidence, 4)[re_order]) + confidences_list.append(confidence) + else: + print(orig_complex_graph['name'], ' rmsd', np.around(rmsd, 1), ' centroid distance', + np.around(centroid_distance, 1)) + centroid_distances_list.append(centroid_distance) + + cross_distances = np.linalg.norm(receptor_pos[:, :, None, :] - ligand_pos[:, None, :, :], axis=-1) + min_cross_distances_list.append(np.min(cross_distances, axis=(1, 2))) + self_distances = np.linalg.norm(ligand_pos[:, :, None, :] - ligand_pos[:, None, :, :], axis=-1) + self_distances = np.where(np.eye(self_distances.shape[2]), np.inf, self_distances) + min_self_distances_list.append(np.min(self_distances, axis=(1, 2))) + + base_cross_distances = np.linalg.norm(receptor_pos[:, :, None, :] - orig_ligand_pos[:, None, :, :], axis=-1) + base_min_cross_distances_list.append(np.min(base_cross_distances, axis=(1, 2))) + + if args.save_visualisation: + if confidence is not None: + for rank, batch_idx in enumerate(re_order): + visualization_list[batch_idx].write( + f'{args.out_dir}/{data_list[batch_idx]["name"][0]}_{rank + 1}_{rmsd[batch_idx]:.1f}_{(confidence)[batch_idx]:.1f}.pdb') + else: + for rank, batch_idx in enumerate(np.argsort(rmsd)): + visualization_list[batch_idx].write( + f'{args.out_dir}/{data_list[batch_idx]["name"][0]}_{rank + 1}_{rmsd[batch_idx]:.1f}.pdb') + without_rec_overlap_list.append(1 if orig_complex_graph.name[0] in names_no_rec_overlap else 0) + names_list.append(orig_complex_graph.name[0]) + except Exception as e: + print("Failed on", orig_complex_graph["name"], e) + failures += 1 + success = 0 + +print('Performance without hydrogens included in the loss') +print(failures, "failures due to exceptions") +print(skipped, ' skipped because complex was not in confidence dataset') + +performance_metrics = {} +for overlap in ['', 'no_overlap_']: + if 'no_overlap_' == overlap: + without_rec_overlap = np.array(without_rec_overlap_list, dtype=bool) + if without_rec_overlap.sum() == 0: continue + rmsds = np.array(rmsds_list)[without_rec_overlap] + min_self_distances = np.array(min_self_distances_list)[without_rec_overlap] + centroid_distances = np.array(centroid_distances_list)[without_rec_overlap] + confidences = np.array(confidences_list)[without_rec_overlap] + min_cross_distances = np.array(min_cross_distances_list)[without_rec_overlap] + base_min_cross_distances = np.array(base_min_cross_distances_list)[without_rec_overlap] + names = np.array(names_list)[without_rec_overlap] + else: + rmsds = np.array(rmsds_list) + min_self_distances = np.array(min_self_distances_list) + centroid_distances = np.array(centroid_distances_list) + confidences = np.array(confidences_list) + min_cross_distances = np.array(min_cross_distances_list) + base_min_cross_distances = np.array(base_min_cross_distances_list) + names = np.array(names_list) + + run_times = np.array(run_times) + np.save(f'{args.out_dir}/{overlap}min_cross_distances.npy', min_cross_distances) + np.save(f'{args.out_dir}/{overlap}min_self_distances.npy', min_self_distances) + np.save(f'{args.out_dir}/{overlap}base_min_cross_distances.npy', base_min_cross_distances) + np.save(f'{args.out_dir}/{overlap}rmsds.npy', rmsds) + np.save(f'{args.out_dir}/{overlap}centroid_distances.npy', centroid_distances) + np.save(f'{args.out_dir}/{overlap}confidences.npy', confidences) + np.save(f'{args.out_dir}/{overlap}run_times.npy', run_times) + np.save(f'{args.out_dir}/{overlap}complex_names.npy', np.array(names)) + + performance_metrics.update({ + f'{overlap}run_times_std': run_times.std().__round__(2), + f'{overlap}run_times_mean': run_times.mean().__round__(2), + f'{overlap}steric_clash_fraction': ( + 100 * (min_cross_distances < 0.4).sum() / len(min_cross_distances) / N).__round__(2), + f'{overlap}self_intersect_fraction': ( + 100 * (min_self_distances < 0.4).sum() / len(min_self_distances) / N).__round__(2), + f'{overlap}mean_rmsd': rmsds.mean(), + f'{overlap}rmsds_below_2': (100 * (rmsds < 2).sum() / len(rmsds) / N), + f'{overlap}rmsds_below_5': (100 * (rmsds < 5).sum() / len(rmsds) / N), + f'{overlap}rmsds_percentile_25': np.percentile(rmsds, 25).round(2), + f'{overlap}rmsds_percentile_50': np.percentile(rmsds, 50).round(2), + f'{overlap}rmsds_percentile_75': np.percentile(rmsds, 75).round(2), + + f'{overlap}mean_centroid': centroid_distances.mean().__round__(2), + f'{overlap}centroid_below_2': (100 * (centroid_distances < 2).sum() / len(centroid_distances) / N).__round__(2), + f'{overlap}centroid_below_5': (100 * (centroid_distances < 5).sum() / len(centroid_distances) / N).__round__(2), + f'{overlap}centroid_percentile_25': np.percentile(centroid_distances, 25).round(2), + f'{overlap}centroid_percentile_50': np.percentile(centroid_distances, 50).round(2), + f'{overlap}centroid_percentile_75': np.percentile(centroid_distances, 75).round(2), + }) + + if N >= 5: + top5_rmsds = np.min(rmsds[:, :5], axis=1) + top5_centroid_distances = centroid_distances[ + np.arange(rmsds.shape[0])[:, None], np.argsort(rmsds[:, :5], axis=1)][:, 0] + top5_min_cross_distances = min_cross_distances[ + np.arange(rmsds.shape[0])[:, None], np.argsort(rmsds[:, :5], axis=1)][:, 0] + top5_min_self_distances = min_self_distances[ + np.arange(rmsds.shape[0])[:, None], np.argsort(rmsds[:, :5], axis=1)][:, 0] + performance_metrics.update({ + f'{overlap}top5_steric_clash_fraction': ( + 100 * (top5_min_cross_distances < 0.4).sum() / len(top5_min_cross_distances)).__round__(2), + f'{overlap}top5_self_intersect_fraction': ( + 100 * (top5_min_self_distances < 0.4).sum() / len(top5_min_self_distances)).__round__(2), + f'{overlap}top5_rmsds_below_2': (100 * (top5_rmsds < 2).sum() / len(top5_rmsds)).__round__(2), + f'{overlap}top5_rmsds_below_5': (100 * (top5_rmsds < 5).sum() / len(top5_rmsds)).__round__(2), + f'{overlap}top5_rmsds_percentile_25': np.percentile(top5_rmsds, 25).round(2), + f'{overlap}top5_rmsds_percentile_50': np.percentile(top5_rmsds, 50).round(2), + f'{overlap}top5_rmsds_percentile_75': np.percentile(top5_rmsds, 75).round(2), + + f'{overlap}top5_centroid_below_2': ( + 100 * (top5_centroid_distances < 2).sum() / len(top5_centroid_distances)).__round__(2), + f'{overlap}top5_centroid_below_5': ( + 100 * (top5_centroid_distances < 5).sum() / len(top5_centroid_distances)).__round__(2), + f'{overlap}top5_centroid_percentile_25': np.percentile(top5_centroid_distances, 25).round(2), + f'{overlap}top5_centroid_percentile_50': np.percentile(top5_centroid_distances, 50).round(2), + f'{overlap}top5_centroid_percentile_75': np.percentile(top5_centroid_distances, 75).round(2), + }) + + if N >= 10: + top10_rmsds = np.min(rmsds[:, :10], axis=1) + top10_centroid_distances = centroid_distances[ + np.arange(rmsds.shape[0])[:, None], np.argsort(rmsds[:, :10], axis=1)][:, 0] + top10_min_cross_distances = min_cross_distances[ + np.arange(rmsds.shape[0])[:, None], np.argsort(rmsds[:, :10], axis=1)][:, 0] + top10_min_self_distances = min_self_distances[ + np.arange(rmsds.shape[0])[:, None], np.argsort(rmsds[:, :10], axis=1)][:, 0] + performance_metrics.update({ + f'{overlap}top10_steric_clash_fraction': ( + 100 * (top10_min_cross_distances < 0.4).sum() / len(top10_min_cross_distances)).__round__(2), + f'{overlap}top10_self_intersect_fraction': ( + 100 * (top10_min_self_distances < 0.4).sum() / len(top10_min_self_distances)).__round__(2), + f'{overlap}top10_rmsds_below_2': (100 * (top10_rmsds < 2).sum() / len(top10_rmsds)).__round__(2), + f'{overlap}top10_rmsds_below_5': (100 * (top10_rmsds < 5).sum() / len(top10_rmsds)).__round__(2), + f'{overlap}top10_rmsds_percentile_25': np.percentile(top10_rmsds, 25).round(2), + f'{overlap}top10_rmsds_percentile_50': np.percentile(top10_rmsds, 50).round(2), + f'{overlap}top10_rmsds_percentile_75': np.percentile(top10_rmsds, 75).round(2), + + f'{overlap}top10_centroid_below_2': ( + 100 * (top10_centroid_distances < 2).sum() / len(top10_centroid_distances)).__round__(2), + f'{overlap}top10_centroid_below_5': ( + 100 * (top10_centroid_distances < 5).sum() / len(top10_centroid_distances)).__round__(2), + f'{overlap}top10_centroid_percentile_25': np.percentile(top10_centroid_distances, 25).round(2), + f'{overlap}top10_centroid_percentile_50': np.percentile(top10_centroid_distances, 50).round(2), + f'{overlap}top10_centroid_percentile_75': np.percentile(top10_centroid_distances, 75).round(2), + }) + + if confidence_model is not None: + confidence_ordering = np.argsort(confidences, axis=1)[:, ::-1] + + filtered_rmsds = rmsds[np.arange(rmsds.shape[0])[:, None], confidence_ordering][:, 0] + filtered_centroid_distances = centroid_distances[np.arange(rmsds.shape[0])[:, None], confidence_ordering][:, 0] + filtered_min_cross_distances = min_cross_distances[np.arange(rmsds.shape[0])[:, None], confidence_ordering][:, + 0] + filtered_min_self_distances = min_self_distances[np.arange(rmsds.shape[0])[:, None], confidence_ordering][:, 0] + performance_metrics.update({ + f'{overlap}filtered_self_intersect_fraction': ( + 100 * (filtered_min_self_distances < 0.4).sum() / len(filtered_min_self_distances)).__round__( + 2), + f'{overlap}filtered_steric_clash_fraction': ( + 100 * (filtered_min_cross_distances < 0.4).sum() / len(filtered_min_cross_distances)).__round__( + 2), + f'{overlap}filtered_rmsds_below_2': (100 * (filtered_rmsds < 2).sum() / len(filtered_rmsds)).__round__(2), + f'{overlap}filtered_rmsds_below_5': (100 * (filtered_rmsds < 5).sum() / len(filtered_rmsds)).__round__(2), + f'{overlap}filtered_rmsds_percentile_25': np.percentile(filtered_rmsds, 25).round(2), + f'{overlap}filtered_rmsds_percentile_50': np.percentile(filtered_rmsds, 50).round(2), + f'{overlap}filtered_rmsds_percentile_75': np.percentile(filtered_rmsds, 75).round(2), + + f'{overlap}filtered_centroid_below_2': ( + 100 * (filtered_centroid_distances < 2).sum() / len(filtered_centroid_distances)).__round__(2), + f'{overlap}filtered_centroid_below_5': ( + 100 * (filtered_centroid_distances < 5).sum() / len(filtered_centroid_distances)).__round__(2), + f'{overlap}filtered_centroid_percentile_25': np.percentile(filtered_centroid_distances, 25).round(2), + f'{overlap}filtered_centroid_percentile_50': np.percentile(filtered_centroid_distances, 50).round(2), + f'{overlap}filtered_centroid_percentile_75': np.percentile(filtered_centroid_distances, 75).round(2), + }) + + if N >= 5: + top5_filtered_rmsds = np.min(rmsds[np.arange(rmsds.shape[0])[:, None], confidence_ordering][:, :5], axis=1) + top5_filtered_centroid_distances = \ + centroid_distances[np.arange(rmsds.shape[0])[:, None], confidence_ordering][:, :5][ + np.arange(rmsds.shape[0])[:, None], np.argsort( + rmsds[np.arange(rmsds.shape[0])[:, None], confidence_ordering][:, :5], axis=1)][:, 0] + top5_filtered_min_cross_distances = \ + min_cross_distances[np.arange(rmsds.shape[0])[:, None], confidence_ordering][:, :5][ + np.arange(rmsds.shape[0])[:, None], np.argsort( + rmsds[np.arange(rmsds.shape[0])[:, None], confidence_ordering][:, :5], axis=1)][:, 0] + top5_filtered_min_self_distances = \ + min_self_distances[np.arange(rmsds.shape[0])[:, None], confidence_ordering][:, :5][ + np.arange(rmsds.shape[0])[:, None], np.argsort( + rmsds[np.arange(rmsds.shape[0])[:, None], confidence_ordering][:, :5], axis=1)][:, 0] + performance_metrics.update({ + f'{overlap}top5_filtered_self_intersect_fraction': ( + 100 * (top5_filtered_min_cross_distances < 0.4).sum() / len( + top5_filtered_min_cross_distances)).__round__(2), + f'{overlap}top5_filtered_steric_clash_fraction': ( + 100 * (top5_filtered_min_cross_distances < 0.4).sum() / len( + top5_filtered_min_cross_distances)).__round__(2), + f'{overlap}top5_filtered_rmsds_below_2': ( + 100 * (top5_filtered_rmsds < 2).sum() / len(top5_filtered_rmsds)).__round__(2), + f'{overlap}top5_filtered_rmsds_below_5': ( + 100 * (top5_filtered_rmsds < 5).sum() / len(top5_filtered_rmsds)).__round__(2), + f'{overlap}top5_filtered_rmsds_percentile_25': np.percentile(top5_filtered_rmsds, 25).round(2), + f'{overlap}top5_filtered_rmsds_percentile_50': np.percentile(top5_filtered_rmsds, 50).round(2), + f'{overlap}top5_filtered_rmsds_percentile_75': np.percentile(top5_filtered_rmsds, 75).round(2), + + f'{overlap}top5_filtered_centroid_below_2': (100 * (top5_filtered_centroid_distances < 2).sum() / len( + top5_filtered_centroid_distances)).__round__(2), + f'{overlap}top5_filtered_centroid_below_5': (100 * (top5_filtered_centroid_distances < 5).sum() / len( + top5_filtered_centroid_distances)).__round__(2), + f'{overlap}top5_filtered_centroid_percentile_25': np.percentile(top5_filtered_centroid_distances, + 25).round(2), + f'{overlap}top5_filtered_centroid_percentile_50': np.percentile(top5_filtered_centroid_distances, + 50).round(2), + f'{overlap}top5_filtered_centroid_percentile_75': np.percentile(top5_filtered_centroid_distances, + 75).round(2), + }) + if N >= 10: + top10_filtered_rmsds = np.min(rmsds[np.arange(rmsds.shape[0])[:, None], confidence_ordering][:, :10], + axis=1) + top10_filtered_centroid_distances = \ + centroid_distances[np.arange(rmsds.shape[0])[:, None], confidence_ordering][:, :10][ + np.arange(rmsds.shape[0])[:, None], np.argsort( + rmsds[np.arange(rmsds.shape[0])[:, None], confidence_ordering][:, :10], axis=1)][:, 0] + top10_filtered_min_cross_distances = \ + min_cross_distances[np.arange(rmsds.shape[0])[:, None], confidence_ordering][:, :10][ + np.arange(rmsds.shape[0])[:, None], np.argsort( + rmsds[np.arange(rmsds.shape[0])[:, None], confidence_ordering][:, :10], axis=1)][:, 0] + top10_filtered_min_self_distances = \ + min_self_distances[np.arange(rmsds.shape[0])[:, None], confidence_ordering][:, :10][ + np.arange(rmsds.shape[0])[:, None], np.argsort( + rmsds[np.arange(rmsds.shape[0])[:, None], confidence_ordering][:, :10], axis=1)][:, 0] + performance_metrics.update({ + f'{overlap}top10_filtered_self_intersect_fraction': ( + 100 * (top10_filtered_min_cross_distances < 0.4).sum() / len( + top10_filtered_min_cross_distances)).__round__(2), + f'{overlap}top10_filtered_steric_clash_fraction': ( + 100 * (top10_filtered_min_cross_distances < 0.4).sum() / len( + top10_filtered_min_cross_distances)).__round__(2), + f'{overlap}top10_filtered_rmsds_below_2': ( + 100 * (top10_filtered_rmsds < 2).sum() / len(top10_filtered_rmsds)).__round__(2), + f'{overlap}top10_filtered_rmsds_below_5': ( + 100 * (top10_filtered_rmsds < 5).sum() / len(top10_filtered_rmsds)).__round__(2), + f'{overlap}top10_filtered_rmsds_percentile_25': np.percentile(top10_filtered_rmsds, 25).round(2), + f'{overlap}top10_filtered_rmsds_percentile_50': np.percentile(top10_filtered_rmsds, 50).round(2), + f'{overlap}top10_filtered_rmsds_percentile_75': np.percentile(top10_filtered_rmsds, 75).round(2), + + f'{overlap}top10_filtered_centroid_below_2': (100 * (top10_filtered_centroid_distances < 2).sum() / len( + top10_filtered_centroid_distances)).__round__(2), + f'{overlap}top10_filtered_centroid_below_5': (100 * (top10_filtered_centroid_distances < 5).sum() / len( + top10_filtered_centroid_distances)).__round__(2), + f'{overlap}top10_filtered_centroid_percentile_25': np.percentile(top10_filtered_centroid_distances, + 25).round(2), + f'{overlap}top10_filtered_centroid_percentile_50': np.percentile(top10_filtered_centroid_distances, + 50).round(2), + f'{overlap}top10_filtered_centroid_percentile_75': np.percentile(top10_filtered_centroid_distances, + 75).round(2), + }) + +for k in performance_metrics: + print(k, performance_metrics[k]) + +if args.wandb: + wandb.log(performance_metrics) + histogram_metrics_list = [('rmsd', rmsds[:, 0]), + ('centroid_distance', centroid_distances[:, 0]), + ('mean_rmsd', rmsds.mean(axis=1)), + ('mean_centroid_distance', centroid_distances.mean(axis=1))] + if N >= 5: + histogram_metrics_list.append(('top5_rmsds', top5_rmsds)) + histogram_metrics_list.append(('top5_centroid_distances', top5_centroid_distances)) + if N >= 10: + histogram_metrics_list.append(('top10_rmsds', top10_rmsds)) + histogram_metrics_list.append(('top10_centroid_distances', top10_centroid_distances)) + if confidence_model is not None: + histogram_metrics_list.append(('filtered_rmsd', filtered_rmsds)) + histogram_metrics_list.append(('filtered_centroid_distance', filtered_centroid_distances)) + if N >= 5: + histogram_metrics_list.append(('top5_filtered_rmsds', top5_filtered_rmsds)) + histogram_metrics_list.append(('top5_filtered_centroid_distances', top5_filtered_centroid_distances)) + if N >= 10: + histogram_metrics_list.append(('top10_filtered_rmsds', top10_filtered_rmsds)) + histogram_metrics_list.append(('top10_filtered_centroid_distances', top10_filtered_centroid_distances)) diff --git a/forks/DiffDockv1/evaluate_confidence_calibration.py b/forks/DiffDockv1/evaluate_confidence_calibration.py new file mode 100644 index 00000000..8b7d2f45 --- /dev/null +++ b/forks/DiffDockv1/evaluate_confidence_calibration.py @@ -0,0 +1,361 @@ +import os +from argparse import ArgumentParser + +import pandas as pd +import plotly.express as px +import numpy as np +import scipy + +from utils.utils import read_strings_from_txt + +parser = ArgumentParser() + + +parser.add_argument('--data_dir', type=str, default='data/PDBBind_processed', help='') +parser.add_argument('--results_path', type=str, default='inference_out_dir_not_specified/TEST_top40_epoch75_FILTER_restart_cacheNewRestart_big_ema_ESM2emb_tr34_WITH_fixedSamples28_id1_FILTERFROM_temp_restart_ema_ESM2emb_tr34', help='') +parser.add_argument('--gnina_results_path', type=str, default='results/gnina_rosetta13', help='') +parser.add_argument('--smina_results_path', type=str, default='results/smina_rosetta13', help='') +parser.add_argument('--glide_results_path', type=str, default='results/glide', help='') +parser.add_argument('--qvinaw_results_path', type=str, default='results/qvinaw', help='') +parser.add_argument('--tankbind_results_path', type=str, default='results/tankbind_top5', help='') +parser.add_argument('--equibind_results_path', type=str, default='results/equibind_paper', help='') +parser.add_argument('--no_rec_overlap', action='store_true', default=False, help='') +args = parser.parse_args() + + + +min_cross_distances = np.load(f'{args.results_path}/min_cross_distances.npy') +#min_self_distances = np.load(f'{args.results_path}/min_self_distances.npy') +base_min_cross_distances = np.load(f'{args.results_path}/base_min_cross_distances.npy') +rmsds = np.load(f'{args.results_path}/rmsds.npy') +centroid_distances = np.load(f'{args.results_path}/centroid_distances.npy') +confidences = np.load(f'{args.results_path}/confidences.npy') +#complex_names = np.load(f'{args.results_path}/complex_names.npy') +complex_names = read_strings_from_txt('data/splits/timesplit_test') +if args.no_rec_overlap: + names_no_rec_overlap = read_strings_from_txt(f'data/splits/timesplit_test_no_rec_overlap') + without_rec_overlap_list = [] + for name in complex_names: + if name in names_no_rec_overlap: + without_rec_overlap_list.append(1) + else: + without_rec_overlap_list.append(0) + without_rec_overlap = np.array(without_rec_overlap_list, dtype=bool) + rmsds = np.array(rmsds)[without_rec_overlap] + #min_self_distances = np.array(min_self_distances)[without_rec_overlap] + centroid_distances = np.array(centroid_distances)[without_rec_overlap] + confidences = np.array(confidences)[without_rec_overlap] + min_cross_distances = np.array(min_cross_distances)[without_rec_overlap] + base_min_cross_distances = np.array(base_min_cross_distances)[without_rec_overlap] + complex_names = names_no_rec_overlap + + + + +N = rmsds.shape[1] +performance_metrics = { + 'steric_clash_fraction': (100 * (min_cross_distances < 0.4).sum() / len(min_cross_distances) / N).__round__(2), + 'mean_rmsd': rmsds.mean(), + 'rmsds_below_2': (100 * (rmsds < 2).sum() / len(rmsds) / N), + 'rmsds_below_5': (100 * (rmsds < 5).sum() / len(rmsds) / N), + 'rmsds_percentile_25': np.percentile(rmsds, 25).round(2), + 'rmsds_percentile_50': np.percentile(rmsds, 50).round(2), + 'rmsds_percentile_75': np.percentile(rmsds, 75).round(2), + + 'mean_centroid': centroid_distances.mean().__round__(2), + 'centroid_below_2': (100 * (centroid_distances < 2).sum() / len(centroid_distances) / N).__round__(2), + 'centroid_below_5': (100 * (centroid_distances < 5).sum() / len(centroid_distances) / N).__round__(2), + 'centroid_percentile_25': np.percentile(centroid_distances, 25).round(2), + 'centroid_percentile_50': np.percentile(centroid_distances, 50).round(2), + 'centroid_percentile_75': np.percentile(centroid_distances, 75).round(2), +} + +if N >= 5: + top5_rmsds = np.min(rmsds[:, :5], axis=1) + top5_centroid_distances = centroid_distances[np.arange(rmsds.shape[0])[:, None], np.argsort(rmsds[:, :5], axis=1)][ :, 0] + top5_min_cross_distances = min_cross_distances[ np.arange(rmsds.shape[0])[:, None], np.argsort(rmsds[:, :5], axis=1)][:, 0] + performance_metrics.update({ + 'top5_steric_clash_fraction': (100 * (top5_min_cross_distances < 0.4).sum() / len(top5_min_cross_distances)).__round__(2), + 'top5_rmsds_below_2': (100 * (top5_rmsds < 2).sum() / len(top5_rmsds)).__round__(2), + 'top5_rmsds_below_5': (100 * (top5_rmsds < 5).sum() / len(top5_rmsds)).__round__(2), + 'top5_rmsds_percentile_25': np.percentile(top5_rmsds, 25).round(2), + 'top5_rmsds_percentile_50': np.percentile(top5_rmsds, 50).round(2), + 'top5_rmsds_percentile_75': np.percentile(top5_rmsds, 75).round(2), + + 'top5_centroid_below_2': (100 * (top5_centroid_distances < 2).sum() / len(top5_centroid_distances)).__round__(2), + 'top5_centroid_below_5': (100 * (top5_centroid_distances < 5).sum() / len(top5_centroid_distances)).__round__(2), + 'top5_centroid_percentile_25': np.percentile(top5_centroid_distances, 25).round(2), + 'top5_centroid_percentile_50': np.percentile(top5_centroid_distances, 50).round(2), + 'top5_centroid_percentile_75': np.percentile(top5_centroid_distances, 75).round(2), + }) + +if N >= 10: + top10_rmsds = np.min(rmsds[:, :10], axis=1) + top10_centroid_distances = centroid_distances[np.arange(rmsds.shape[0])[:, None], np.argsort(rmsds[:, :10], axis=1)][:, 0] + top10_min_cross_distances = min_cross_distances[np.arange(rmsds.shape[0])[:, None], np.argsort(rmsds[:, :10], axis=1)][:, 0] + performance_metrics.update({ + 'top10_steric_clash_fraction': (100 * (top10_min_cross_distances < 0.4).sum() / len(top10_min_cross_distances)).__round__(2), + 'top10_rmsds_below_2': (100 * (top10_rmsds < 2).sum() / len(top10_rmsds)).__round__(2), + 'top10_rmsds_below_5': (100 * (top10_rmsds < 5).sum() / len(top10_rmsds)).__round__(2), + 'top10_rmsds_percentile_25': np.percentile(top10_rmsds, 25).round(2), + 'top10_rmsds_percentile_50': np.percentile(top10_rmsds, 50).round(2), + 'top10_rmsds_percentile_75': np.percentile(top10_rmsds, 75).round(2), + + 'top10_centroid_below_2': (100 * (top10_centroid_distances < 2).sum() / len(top10_centroid_distances)).__round__(2), + 'top10_centroid_below_5': (100 * (top10_centroid_distances < 5).sum() / len(top10_centroid_distances)).__round__(2), + 'top10_centroid_percentile_25': np.percentile(top10_centroid_distances, 25).round(2), + 'top10_centroid_percentile_50': np.percentile(top10_centroid_distances, 50).round(2), + 'top10_centroid_percentile_75': np.percentile(top10_centroid_distances, 75).round(2), + }) + + +confidence_ordering = np.argsort(confidences,axis=1)[:,::-1] +filtered_rmsds = rmsds[np.arange(rmsds.shape[0])[:,None],confidence_ordering][:,0] +filtered_centroid_distances = centroid_distances[np.arange(rmsds.shape[0])[:,None],confidence_ordering][:,0] +filtered_min_cross_distances = min_cross_distances[np.arange(rmsds.shape[0])[:, None], confidence_ordering][:, 0] +performance_metrics.update({ + 'filtered_steric_clash_fraction': (100 * (filtered_min_cross_distances < 0.4).sum() / len(filtered_min_cross_distances)).__round__(2), + 'filtered_rmsds_below_2': (100 * (filtered_rmsds < 2).sum() / len(filtered_rmsds)).__round__(2), + 'filtered_rmsds_below_5': (100 * (filtered_rmsds < 5).sum() / len(filtered_rmsds)).__round__(2), + 'filtered_rmsds_percentile_25': np.percentile(filtered_rmsds, 25).round(2), + 'filtered_rmsds_percentile_50': np.percentile(filtered_rmsds, 50).round(2), + 'filtered_rmsds_percentile_75': np.percentile(filtered_rmsds, 75).round(2), + + 'filtered_centroid_below_2': (100 * (filtered_centroid_distances < 2).sum() / len(filtered_centroid_distances)).__round__(2), + 'filtered_centroid_below_5': (100 * (filtered_centroid_distances < 5).sum() / len(filtered_centroid_distances)).__round__(2), + 'filtered_centroid_percentile_25': np.percentile(filtered_centroid_distances, 25).round(2), + 'filtered_centroid_percentile_50': np.percentile(filtered_centroid_distances, 50).round(2), + 'filtered_centroid_percentile_75': np.percentile(filtered_centroid_distances, 75).round(2), +}) + +if N >= 5: + top5_filtered_rmsds = np.min(rmsds[np.arange(rmsds.shape[0])[:,None],confidence_ordering][:,:5], axis=1) + top5_filtered_centroid_distances = centroid_distances[np.arange(rmsds.shape[0])[:,None],confidence_ordering][:,:5][ np.arange(rmsds.shape[0])[:, None], np.argsort(rmsds[np.arange(rmsds.shape[0])[:,None],confidence_ordering][:, :5], axis=1)][:, 0] + top5_filtered_min_cross_distances = min_cross_distances[np.arange(rmsds.shape[0])[:, None], confidence_ordering][:, :5][ np.arange(rmsds.shape[0])[:, None], np.argsort(rmsds[np.arange(rmsds.shape[0])[:,None],confidence_ordering][:, :5], axis=1)][:, 0] + performance_metrics.update({ + 'top5_filtered_steric_clash_fraction': (100 * (top5_filtered_min_cross_distances < 0.4).sum() / len(top5_filtered_min_cross_distances)).__round__(2), + 'top5_filtered_rmsds_below_2': (100 * (top5_filtered_rmsds < 2).sum() / len(top5_filtered_rmsds)).__round__(2), + 'top5_filtered_rmsds_below_5': (100 * (top5_filtered_rmsds < 5).sum() / len(top5_filtered_rmsds)).__round__(2), + 'top5_filtered_rmsds_percentile_25': np.percentile(top5_filtered_rmsds, 25).round(2), + 'top5_filtered_rmsds_percentile_50': np.percentile(top5_filtered_rmsds, 50).round(2), + 'top5_filtered_rmsds_percentile_75': np.percentile(top5_filtered_rmsds, 75).round(2), + + 'top5_filtered_centroid_below_2': (100 * (top5_filtered_centroid_distances < 2).sum() / len(top5_filtered_centroid_distances)).__round__(2), + 'top5_filtered_centroid_below_5': (100 * (top5_filtered_centroid_distances < 5).sum() / len(top5_filtered_centroid_distances)).__round__(2), + 'top5_filtered_centroid_percentile_25': np.percentile(top5_filtered_centroid_distances, 25).round(2), + 'top5_filtered_centroid_percentile_50': np.percentile(top5_filtered_centroid_distances, 50).round(2), + 'top5_filtered_centroid_percentile_75': np.percentile(top5_filtered_centroid_distances, 75).round(2), + }) +if N >= 10: + top10_filtered_rmsds = np.min(rmsds[np.arange(rmsds.shape[0])[:,None],confidence_ordering][:,:10], axis=1) + top10_filtered_centroid_distances = centroid_distances[np.arange(rmsds.shape[0])[:,None],confidence_ordering][:,:10][ np.arange(rmsds.shape[0])[:, None], np.argsort(rmsds[np.arange(rmsds.shape[0])[:,None],confidence_ordering][:, :10], axis=1)][:, 0] + top10_filtered_min_cross_distances = min_cross_distances[np.arange(rmsds.shape[0])[:, None], confidence_ordering][:, :10][ np.arange(rmsds.shape[0])[:, None], np.argsort(rmsds[np.arange(rmsds.shape[0])[:,None],confidence_ordering][:, :10], axis=1)][:, 0] + performance_metrics.update({ + 'top10_filtered_steric_clash_fraction': (100 * (top10_filtered_min_cross_distances < 0.4).sum() / len(top10_filtered_min_cross_distances)).__round__(2), + 'top10_filtered_rmsds_below_2': (100 * (top10_filtered_rmsds < 2).sum() / len(top10_filtered_rmsds)).__round__(2), + 'top10_filtered_rmsds_below_5': (100 * (top10_filtered_rmsds < 5).sum() / len(top10_filtered_rmsds)).__round__(2), + 'top10_filtered_rmsds_percentile_25': np.percentile(top10_filtered_rmsds, 25).round(2), + 'top10_filtered_rmsds_percentile_50': np.percentile(top10_filtered_rmsds, 50).round(2), + 'top10_filtered_rmsds_percentile_75': np.percentile(top10_filtered_rmsds, 75).round(2), + + 'top10_filtered_centroid_below_2': (100 * (top10_filtered_centroid_distances < 2).sum() / len(top10_filtered_centroid_distances)).__round__(2), + 'top10_filtered_centroid_below_5': (100 * (top10_filtered_centroid_distances < 5).sum() / len(top10_filtered_centroid_distances)).__round__(2), + 'top10_filtered_centroid_percentile_25': np.percentile(top10_filtered_centroid_distances, 25).round(2), + 'top10_filtered_centroid_percentile_50': np.percentile(top10_filtered_centroid_distances, 50).round(2), + 'top10_filtered_centroid_percentile_75': np.percentile(top10_filtered_centroid_distances, 75).round(2), + }) + +reverse_confidence_ordering = np.argsort(confidences,axis=1) +reverse_filtered_rmsds = rmsds[np.arange(rmsds.shape[0])[:, None], reverse_confidence_ordering][:, 0] +reverse_filtered_centroid_distances = centroid_distances[np.arange(rmsds.shape[0])[:, None], reverse_confidence_ordering][:, 0] +reverse_filtered_min_cross_distances = min_cross_distances[np.arange(rmsds.shape[0])[:, None], reverse_confidence_ordering][:, 0] +performance_metrics.update({ + 'reversefiltered_steric_clash_fraction': (100 * (reverse_filtered_min_cross_distances < 0.4).sum() / len(reverse_filtered_min_cross_distances)).__round__(2), + 'reversefiltered_rmsds_below_2': (100 * (reverse_filtered_rmsds < 2).sum() / len(reverse_filtered_rmsds)).__round__(2), + 'reversefiltered_rmsds_below_5': (100 * (reverse_filtered_rmsds < 5).sum() / len(reverse_filtered_rmsds)).__round__(2), + 'reversefiltered_rmsds_percentile_25': np.percentile(reverse_filtered_rmsds, 25).round(2), + 'reversefiltered_rmsds_percentile_50': np.percentile(reverse_filtered_rmsds, 50).round(2), + 'reversefiltered_rmsds_percentile_75': np.percentile(reverse_filtered_rmsds, 75).round(2), + + 'reversefiltered_centroid_below_2': (100 * (reverse_filtered_centroid_distances < 2).sum() / len(reverse_filtered_centroid_distances)).__round__(2), + 'reversefiltered_centroid_below_5': (100 * (reverse_filtered_centroid_distances < 5).sum() / len(reverse_filtered_centroid_distances)).__round__(2), + 'reversefiltered_centroid_percentile_25': np.percentile(reverse_filtered_centroid_distances, 25).round(2), + 'reversefiltered_centroid_percentile_50': np.percentile(reverse_filtered_centroid_distances, 50).round(2), + 'reversefiltered_centroid_percentile_75': np.percentile(reverse_filtered_centroid_distances, 75).round(2), +}) + +if N >= 5: + top5_reverse_filtered_rmsds = np.min(rmsds[np.arange(rmsds.shape[0])[:, None], reverse_confidence_ordering][:, :5], axis=1) + top5_reverse_filtered_centroid_distances = np.min(centroid_distances[np.arange(rmsds.shape[0])[:, None], reverse_confidence_ordering][:, :5], axis=1) + top5_reverse_filtered_min_cross_distances = np.max(min_cross_distances[np.arange(rmsds.shape[0])[:, None], reverse_confidence_ordering][:, :5], axis=1) + performance_metrics.update({ + 'top5_reverse_filtered_steric_clash_fraction': (100 * (top5_reverse_filtered_min_cross_distances < 0.4).sum() / len(top5_reverse_filtered_min_cross_distances)).__round__(2), + 'top5_reversefiltered_rmsds_below_2': (100 * (top5_reverse_filtered_rmsds < 2).sum() / len(top5_reverse_filtered_rmsds)).__round__(2), + 'top5_reversefiltered_rmsds_below_5': (100 * (top5_reverse_filtered_rmsds < 5).sum() / len(top5_reverse_filtered_rmsds)).__round__(2), + 'top5_reversefiltered_rmsds_percentile_25': np.percentile(top5_reverse_filtered_rmsds, 25).round(2), + 'top5_reversefiltered_rmsds_percentile_50': np.percentile(top5_reverse_filtered_rmsds, 50).round(2), + 'top5_reversefiltered_rmsds_percentile_75': np.percentile(top5_reverse_filtered_rmsds, 75).round(2), + + 'top5_reversefiltered_centroid_below_2': (100 * (top5_reverse_filtered_centroid_distances < 2).sum() / len(top5_reverse_filtered_centroid_distances)).__round__(2), + 'top5_reversefiltered_centroid_below_5': (100 * (top5_reverse_filtered_centroid_distances < 5).sum() / len(top5_reverse_filtered_centroid_distances)).__round__(2), + 'top5_reversefiltered_centroid_percentile_25': np.percentile(top5_reverse_filtered_centroid_distances, 25).round(2), + 'top5_reversefiltered_centroid_percentile_50': np.percentile(top5_reverse_filtered_centroid_distances, 50).round(2), + 'top5_reversefiltered_centroid_percentile_75': np.percentile(top5_reverse_filtered_centroid_distances, 75).round(2), + }) + +if N >= 10: + top10_reverse_filtered_rmsds = np.min(rmsds[np.arange(rmsds.shape[0])[:, None], reverse_confidence_ordering][:, :10], axis=1) + top10_reverse_filtered_centroid_distances = np.min(centroid_distances[np.arange(rmsds.shape[0])[:, None], reverse_confidence_ordering][:, :10], axis=1) + top10_reverse_filtered_min_cross_distances = np.max(min_cross_distances[np.arange(rmsds.shape[0])[:, None], reverse_confidence_ordering][:, :10], axis=1) + performance_metrics.update({ + 'top10_reverse_filtered_steric_clash_fraction': (100 * (top10_reverse_filtered_min_cross_distances < 0.4).sum() / len(top10_reverse_filtered_min_cross_distances)).__round__(2), + 'top10_reversefiltered_rmsds_below_2': (100 * (top10_reverse_filtered_rmsds < 2).sum() / len(top10_reverse_filtered_rmsds)).__round__(2), + 'top10_reversefiltered_rmsds_below_5': (100 * (top10_reverse_filtered_rmsds < 5).sum() / len(top10_reverse_filtered_rmsds)).__round__(2), + 'top10_reversefiltered_rmsds_percentile_25': np.percentile(top10_reverse_filtered_rmsds, 25).round(2), + 'top10_reversefiltered_rmsds_percentile_50': np.percentile(top10_reverse_filtered_rmsds, 50).round(2), + 'top10_reversefiltered_rmsds_percentile_75': np.percentile(top10_reverse_filtered_rmsds, 75).round(2), + + 'top10_reversefiltered_centroid_below_2': (100 * (top10_reverse_filtered_centroid_distances < 2).sum() / len(top10_reverse_filtered_centroid_distances)).__round__(2), + 'top10_reversefiltered_centroid_below_5': (100 * (top10_reverse_filtered_centroid_distances < 5).sum() / len(top10_reverse_filtered_centroid_distances)).__round__(2), + 'top10_reversefiltered_centroid_percentile_25': np.percentile(top10_reverse_filtered_centroid_distances, 25).round(2), + 'top10_reversefiltered_centroid_percentile_50': np.percentile(top10_reverse_filtered_centroid_distances, 50).round(2), + 'top10_reversefiltered_centroid_percentile_75': np.percentile(top10_reverse_filtered_centroid_distances, 75).round(2), + }) + +filtered_confidences = confidences[np.arange(confidences.shape[0])[:,None],confidence_ordering][:,0] + +confident_mask = filtered_confidences > 0 +confident_rmsds = filtered_rmsds[confident_mask] +confident_centroid_distances = filtered_centroid_distances[confident_mask] +confident_min_cross_distances = filtered_min_cross_distances[confident_mask] + +performance_metrics.update({ + 'fraction_confident_predictions': (100 * len(confident_rmsds) / len(rmsds)).__round__(2), + 'confident_steric_clash_fraction': (100 * (confident_min_cross_distances < 0.4).sum() / len(confident_min_cross_distances)).__round__(2), + 'confident_rmsds_below_2': (100 * (confident_rmsds < 2).sum() / len(confident_rmsds)).__round__(2), + 'confident_rmsds_below_5': (100 * (confident_rmsds < 5).sum() / len(confident_rmsds)).__round__(2), + 'confident_rmsds_percentile_25': np.percentile(confident_rmsds, 25).round(2), + 'confident_rmsds_percentile_50': np.percentile(confident_rmsds, 50).round(2), + 'confident_rmsds_percentile_75': np.percentile(confident_rmsds, 75).round(2), + + 'confident_centroid_below_2': (100 * (confident_centroid_distances < 2).sum() / len(confident_centroid_distances)).__round__(2), + 'confident_centroid_below_5': (100 * (confident_centroid_distances < 5).sum() / len(confident_centroid_distances)).__round__(2), + 'confident_centroid_percentile_25': np.percentile(confident_centroid_distances, 25).round(2), + 'confident_centroid_percentile_50': np.percentile(confident_centroid_distances, 50).round(2), + 'confident_centroid_percentile_75': np.percentile(confident_centroid_distances, 75).round(2), +}) + +for k in performance_metrics: + print(k, performance_metrics[k]) + +fraction_dataset_rmsds_below_2 = [] +perfect_calibration = [] +no_calibration = [] +for dataset_percentage in range(100): + dataset_percentage += 1 + dataset_fraction = (dataset_percentage)/100 + num_samples = round(len(rmsds)*dataset_fraction) + per_complex_confidence_ordering = np.argsort(filtered_confidences)[::-1] + confident_complexes_rmsds = filtered_rmsds[per_complex_confidence_ordering][:num_samples] + confident_complexes_centroid_distances = filtered_centroid_distances[per_complex_confidence_ordering][:num_samples] + confident_complexes_min_cross_distances = filtered_min_cross_distances[per_complex_confidence_ordering][:num_samples] + confident_complexes_metrics = { + 'fraction_confident_complexes_predictions': (100 * len(confident_complexes_rmsds) / len(rmsds)).__round__(2), + 'confident_complexes_steric_clash_fraction': (100 * (confident_complexes_min_cross_distances < 0.4).sum() / len(confident_complexes_min_cross_distances)).__round__(2), + 'confident_complexes_rmsds_below_2': (100 * (confident_complexes_rmsds < 2).sum() / len(confident_complexes_rmsds)).__round__(2), + 'confident_complexes_rmsds_below_5': (100 * (confident_complexes_rmsds < 5).sum() / len(confident_complexes_rmsds)).__round__(2), + 'confident_complexes_rmsds_percentile_25': np.percentile(confident_complexes_rmsds, 25).round(2), + 'confident_complexes_rmsds_percentile_50': np.percentile(confident_complexes_rmsds, 50).round(2), + 'confident_complexes_rmsds_percentile_75': np.percentile(confident_complexes_rmsds, 75).round(2), + + 'confident_complexes_centroid_below_2': (100 * (confident_complexes_centroid_distances < 2).sum() / len(confident_complexes_centroid_distances)).__round__(2), + 'confident_complexes_centroid_below_5': (100 * (confident_complexes_centroid_distances < 5).sum() / len(confident_complexes_centroid_distances)).__round__(2), + 'confident_complexes_centroid_percentile_25': np.percentile(confident_complexes_centroid_distances, 25).round(2), + 'confident_complexes_centroid_percentile_50': np.percentile(confident_complexes_centroid_distances, 50).round(2), + 'confident_complexes_centroid_percentile_75': np.percentile(confident_complexes_centroid_distances, 75).round(2), + } + fraction_dataset_rmsds_below_2.append(confident_complexes_metrics['confident_complexes_rmsds_below_2']) + perfect_calibration.append((100 * (np.sort(filtered_rmsds)[:num_samples] < 2).sum() / len(confident_complexes_rmsds)).__round__(2)) + no_calibration.append(performance_metrics['filtered_rmsds_below_2']) + #print('percentage: ',dataset_percentage) + #print(confident_complexes_metrics['confident_complexes_rmsds_below_2']) + +print(scipy.stats.spearmanr(filtered_rmsds, filtered_confidences)) +df = {'conf': filtered_confidences, 'rmsd': filtered_rmsds} +fig = px.scatter(df, x='rmsd',y='conf').update_layout( + xaxis_title="Percentage of datapoints that may be abstained", yaxis_title="Percentage of predictions with RMSD < 2A" +) +fig.update_layout(margin={'l': 0, 'r': 0, 't': 20, 'b': 100}, plot_bgcolor='white', + paper_bgcolor='white', legend_title_text='', legend_title_font_size=1, + legend=dict(yanchor="bottom", y=0.1, xanchor="right", x=0.99, font=dict(size=17), ), + ) +fig.update_xaxes(showgrid=True, gridcolor='lightgrey',title_font=dict(size=19),mirror=True,ticks='outside',showline=True,) +fig.update_yaxes(showgrid=True, gridcolor='lightgrey',title_font=dict(size=19),mirror=True,ticks='outside',showline=True,) +fig.show() + +df = {'Confidence Model': reversed(fraction_dataset_rmsds_below_2),'No Calibration': reversed(no_calibration),'Perfect Calibration': reversed(perfect_calibration),} +fig = px.line(df, y=list(df.keys())).update_layout( + xaxis_title="Percentage of datapoints that may be abstained", yaxis_title="Percentage of predictions with RMSD < 2A" +) +fig.update_yaxes(range = [0,103]) +fig.update_layout(margin={'l': 0, 'r': 0, 't': 20, 'b': 100}, plot_bgcolor='white', + paper_bgcolor='white', legend_title_text='', legend_title_font_size=1, + legend=dict(yanchor="bottom", y=0.1, xanchor="right", x=0.99, font=dict(size=17), ), + ) +fig.update_xaxes(showgrid=True, gridcolor='lightgrey',title_font=dict(size=19),mirror=True,ticks='outside',showline=True,) +fig.update_yaxes(showgrid=True, gridcolor='lightgrey',title_font=dict(size=19),mirror=True,ticks='outside',showline=True,) +fig.write_image('results/confidence_calibration.pdf') +fig.show() + +def filter_by_names(method_names, method_array, names_to_keep): + output_array = [] + output_names = [] + for method_name, array_element in zip(method_names,method_array): + if method_name in names_to_keep: + output_array.append(array_element) + output_names.append(method_name) + return np.array(output_array), np.array(output_names) + +qvinaw_rmsds = np.load(os.path.join(args.qvinaw_results_path, 'rmsds.npy')) +qvinaw_names = np.load(os.path.join(args.qvinaw_results_path, 'names.npy')) +qvinaw_rmsds, qvinaw_names = filter_by_names(qvinaw_names, qvinaw_rmsds, complex_names) +qvinaw_rmsds = np.concatenate([qvinaw_rmsds, np.random.choice(qvinaw_rmsds, size=len(complex_names) - len(qvinaw_rmsds))]) + +glide_rmsds = np.load(os.path.join(args.glide_results_path, 'rmsds.npy')) +glide_names = np.load(os.path.join(args.glide_results_path, 'names.npy')).tolist() +glide_rmsds, glide_names = filter_by_names(glide_names, glide_rmsds, complex_names) +glide_rmsds = np.concatenate([glide_rmsds, np.random.choice(glide_rmsds, size=len(complex_names) - len(glide_rmsds))]) + +smina_rmsds = np.load(os.path.join(args.smina_results_path, 'rmsds.npy'))[:,0] +smina_names = np.load(os.path.join(args.smina_results_path, 'names.npy')) +smina_rmsds, smina_names = filter_by_names(smina_names, smina_rmsds, complex_names) +smina_rmsds = np.concatenate([smina_rmsds, np.random.choice(smina_rmsds, size=len(complex_names) - len(smina_rmsds))]) + +gnina_rmsds = np.load(os.path.join(args.gnina_results_path, 'rmsds.npy'))[:,0] +gnina_names = np.load(os.path.join(args.gnina_results_path, 'names.npy')) +gnina_rmsds, gnina_names = filter_by_names(gnina_names, gnina_rmsds, complex_names) +gnina_rmsds = np.concatenate([gnina_rmsds, np.random.choice(gnina_rmsds, size=len(complex_names) - len(gnina_rmsds))]) + +tankbind_rmsds = np.load(os.path.join(args.tankbind_results_path, 'rmsds.npy'))[:,0] +tankbind_names = np.load(os.path.join(args.tankbind_results_path, 'names.npy')) +tankbind_rmsds, tankbind_names = filter_by_names(tankbind_names, tankbind_rmsds, complex_names) + +equibind_rmsds = np.load(os.path.join(args.equibind_results_path, 'rmsds.npy')) +equibind_names = np.load(os.path.join(args.equibind_results_path, 'names.npy')) +equibind_rmsds, equibind_names = filter_by_names(equibind_names, equibind_rmsds, complex_names) + + +df = {'DiffDock': filtered_rmsds, 'GLIDE': glide_rmsds, 'GNINA': gnina_rmsds, 'SMINA': smina_rmsds, 'QVinaW':qvinaw_rmsds, 'TANKBind': tankbind_rmsds, 'EquiBind': equibind_rmsds} +fig = px.ecdf(df, range_x=[0, 5], range_y=[0.001, 0.75], width=600, height=400) +fig.add_vline(x=2, annotation_text='', annotation_font_size=20, annotation_position="top right", + line_dash='dash', line_color='firebrick', annotation_font_color='firebrick') +fig.update_xaxes(title=f'RMSD (Å)') +fig.update_yaxes(title=f'Fraction with lower RMSD') +fig.update_layout(autosize=False, margin={'l': 65, 'r': 5, 't': 5, 'b': 60}, plot_bgcolor='white', + paper_bgcolor='white', legend_title_text='', legend_title_font_size=18, + legend=dict(yanchor="top", y=0.995, xanchor="left", x=0.02, font=dict(size=18, color='black'), ), ) +fig.update_xaxes(showgrid=True, gridcolor='lightgrey',title_font=dict(size=23, color='black'),mirror=True,ticks='outside',showline=True, linewidth=1, linecolor='black', tickfont = dict(size = 18, color='black')) +fig.update_yaxes(showgrid=True, gridcolor='lightgrey',title_font=dict(size=23, color='black'),mirror=True,ticks='outside',showline=True, linewidth=1, linecolor='black', tickfont = dict(size = 18, color='black')) +fig.update_traces(line=dict(width=3)) +fig.write_image('results/rmsds_nooverlap.pdf') +fig.show() \ No newline at end of file diff --git a/forks/DiffDockv1/evaluate_files.py b/forks/DiffDockv1/evaluate_files.py new file mode 100644 index 00000000..c4bc8807 --- /dev/null +++ b/forks/DiffDockv1/evaluate_files.py @@ -0,0 +1,180 @@ +# small script to extract the ligand and save it in a separate file because GNINA will use the ligand position as initial pose +import os +import time +from argparse import FileType, ArgumentParser + +import numpy as np +from biopandas.pdb import PandasPdb +from rdkit import Chem + +from tqdm import tqdm + +from datasets.pdbbind import read_mol +from datasets.process_mols import read_molecule +from utils.utils import read_strings_from_txt, get_symmetry_rmsd + +parser = ArgumentParser() +parser.add_argument('--config', type=FileType(mode='r'), default=None) +parser.add_argument('--data_dir', type=str, default='data/PDBBind_processed', help='') +parser.add_argument('--results_path', type=str, default='results/user_predictions_testset', help='Path to folder with trained model and hyperparameters') +parser.add_argument('--file_suffix', type=str, default='_baseline_ligand.pdb', help='Path to folder with trained model and hyperparameters') +parser.add_argument('--project', type=str, default='ligbind_inf', help='') +parser.add_argument('--file_to_exclude', type=str, default=None, help='') +parser.add_argument('--all_dirs_in_results', action='store_true', default=True, help='Evaluate all directories in the results path instead of using directly looking for the names') +parser.add_argument('--num_predictions', type=int, default=10, help='') +parser.add_argument('--no_id_in_filename', action='store_true', default=False, help='') +parser.add_argument('--test_names_path', type=str, default='data/splits/timesplit_test', help='Path to text file with the folder names in the test set') +parser.add_argument('--no_overlap_names_path', type=str, default='data/splits/timesplit_test_no_rec_overlap', help='Path text file with the folder names in the test set that have no receptor overlap with the train set') +args = parser.parse_args() + +print('Reading paths and names.') +names = read_strings_from_txt(args.test_names_path) +names_no_rec_overlap = read_strings_from_txt(args.no_overlap_names_path) +results_path_containments = os.listdir(args.results_path) + +all_times = [] +successful_names_list = [] +rmsds_list = [] +centroid_distances_list = [] +min_cross_distances_list = [] +min_self_distances_list = [] +without_rec_overlap_list = [] +start_time = time.time() +for i, name in enumerate(tqdm(names)): + mol = read_mol(args.data_dir, name, remove_hs=True) + mol = Chem.RemoveAllHs(mol) + orig_ligand_pos = np.array(mol.GetConformer().GetPositions()) + + if args.all_dirs_in_results: + directory_with_name_list = [directory for directory in results_path_containments if name in directory] + if directory_with_name_list == []: + print('Did not find a directory for ', name, '. We are skipping that complex') + continue + else: + directory_with_name = directory_with_name_list[0] + ligand_pos = [] + debug_paths = [] + for i in range(args.num_predictions): + file_paths = sorted(os.listdir(os.path.join(args.results_path, directory_with_name))) + if args.file_to_exclude is not None: + file_paths = [path for path in file_paths if not args.file_to_exclude in path] + file_path = [path for path in file_paths if f'rank{i+1}_' in path][0] + mol_pred = read_molecule(os.path.join(args.results_path, directory_with_name, file_path),remove_hs=True, sanitize=True) + mol_pred = Chem.RemoveAllHs(mol_pred) + ligand_pos.append(mol_pred.GetConformer().GetPositions()) + debug_paths.append(file_path) + ligand_pos = np.asarray(ligand_pos) + else: + if not os.path.exists(os.path.join(args.results_path, name, f'{"" if args.no_id_in_filename else name}{args.file_suffix}')): raise Exception('path did not exists:', os.path.join(args.results_path, name, f'{"" if args.no_id_in_filename else name}{args.file_suffix}')) + mol_pred = read_molecule(os.path.join(args.results_path, name, f'{"" if args.no_id_in_filename else name}{args.file_suffix}'), remove_hs=True, sanitize=True) + if mol_pred == None: + print("Skipping ", name, ' because RDKIT could not read it.') + continue + mol_pred = Chem.RemoveAllHs(mol_pred) + ligand_pos = np.asarray([np.array(mol_pred.GetConformer(i).GetPositions()) for i in range(args.num_predictions)]) + try: + rmsd = get_symmetry_rmsd(mol, orig_ligand_pos, [l for l in ligand_pos], mol_pred) + except Exception as e: + print("Using non corrected RMSD because of the error:", e) + rmsd = np.sqrt(((ligand_pos - orig_ligand_pos) ** 2).sum(axis=2).mean(axis=1)) + + rmsds_list.append(rmsd) + centroid_distances_list.append(np.linalg.norm(ligand_pos.mean(axis=1) - orig_ligand_pos[None,:].mean(axis=1), axis=1)) + + rec_path = os.path.join(args.data_dir, name, f'{name}_protein_processed.pdb') + if not os.path.exists(rec_path): + rec_path = os.path.join(args.data_dir, name,f'{name}_protein_obabel_reduce.pdb') + rec = PandasPdb().read_pdb(rec_path) + rec_df = rec.df['ATOM'] + receptor_pos = rec_df[['x_coord', 'y_coord', 'z_coord']].to_numpy().squeeze().astype(np.float32) + receptor_pos = np.tile(receptor_pos, (args.num_predictions, 1, 1)) + + cross_distances = np.linalg.norm(receptor_pos[:, :, None, :] - ligand_pos[:, None, :, :], axis=-1) + self_distances = np.linalg.norm(ligand_pos[:, :, None, :] - ligand_pos[:, None, :, :], axis=-1) + self_distances = np.where(np.eye(self_distances.shape[2]), np.inf, self_distances) + min_cross_distances_list.append(np.min(cross_distances, axis=(1,2))) + min_self_distances_list.append(np.min(self_distances, axis=(1, 2))) + successful_names_list.append(name) + without_rec_overlap_list.append(1 if name in names_no_rec_overlap else 0) +performance_metrics = {} +for overlap in ['', 'no_overlap_']: + if 'no_overlap_' == overlap: + without_rec_overlap = np.array(without_rec_overlap_list, dtype=bool) + rmsds = np.array(rmsds_list)[without_rec_overlap] + centroid_distances = np.array(centroid_distances_list)[without_rec_overlap] + min_cross_distances = np.array(min_cross_distances_list)[without_rec_overlap] + min_self_distances = np.array(min_self_distances_list)[without_rec_overlap] + successful_names = np.array(successful_names_list)[without_rec_overlap] + else: + rmsds = np.array(rmsds_list) + centroid_distances = np.array(centroid_distances_list) + min_cross_distances = np.array(min_cross_distances_list) + min_self_distances = np.array(min_self_distances_list) + successful_names = np.array(successful_names_list) + + np.save(os.path.join(args.results_path, f'{overlap}rmsds.npy'), rmsds) + np.save(os.path.join(args.results_path, f'{overlap}names.npy'), successful_names) + np.save(os.path.join(args.results_path, f'{overlap}min_cross_distances.npy'), np.array(min_cross_distances)) + np.save(os.path.join(args.results_path, f'{overlap}min_self_distances.npy'), np.array(min_self_distances)) + + performance_metrics.update({ + f'{overlap}steric_clash_fraction': (100 * (min_cross_distances < 0.4).sum() / len(min_cross_distances) / args.num_predictions).__round__(2), + f'{overlap}self_intersect_fraction': (100 * (min_self_distances < 0.4).sum() / len(min_self_distances) / args.num_predictions).__round__(2), + f'{overlap}mean_rmsd': rmsds[:,0].mean(), + f'{overlap}rmsds_below_2': (100 * (rmsds[:,0] < 2).sum() / len(rmsds[:,0])), + f'{overlap}rmsds_below_5': (100 * (rmsds[:,0] < 5).sum() / len(rmsds[:,0])), + f'{overlap}rmsds_percentile_25': np.percentile(rmsds[:,0], 25).round(2), + f'{overlap}rmsds_percentile_50': np.percentile(rmsds[:,0], 50).round(2), + f'{overlap}rmsds_percentile_75': np.percentile(rmsds[:,0], 75).round(2), + + f'{overlap}mean_centroid': centroid_distances[:,0].mean().__round__(2), + f'{overlap}centroid_below_2': (100 * (centroid_distances[:,0] < 2).sum() / len(centroid_distances[:,0])).__round__(2), + f'{overlap}centroid_below_5': (100 * (centroid_distances[:,0] < 5).sum() / len(centroid_distances[:,0])).__round__(2), + f'{overlap}centroid_percentile_25': np.percentile(centroid_distances[:,0], 25).round(2), + f'{overlap}centroid_percentile_50': np.percentile(centroid_distances[:,0], 50).round(2), + f'{overlap}centroid_percentile_75': np.percentile(centroid_distances[:,0], 75).round(2), + }) + + top5_rmsds = np.min(rmsds[:, :5], axis=1) + top5_centroid_distances = centroid_distances[np.arange(rmsds.shape[0])[:,None],np.argsort(rmsds[:, :5], axis=1)][:,0] + top5_min_cross_distances = min_cross_distances[np.arange(rmsds.shape[0])[:,None],np.argsort(rmsds[:, :5], axis=1)][:,0] + top5_min_self_distances = min_self_distances[np.arange(rmsds.shape[0])[:,None],np.argsort(rmsds[:, :5], axis=1)][:,0] + performance_metrics.update({ + f'{overlap}top5_steric_clash_fraction': (100 * (top5_min_cross_distances < 0.4).sum() / len(top5_min_cross_distances)).__round__(2), + f'{overlap}top5_self_intersect_fraction': (100 * (top5_min_self_distances < 0.4).sum() / len(top5_min_self_distances)).__round__(2), + f'{overlap}top5_rmsds_below_2': (100 * (top5_rmsds < 2).sum() / len(top5_rmsds)).__round__(2), + f'{overlap}top5_rmsds_below_5': (100 * (top5_rmsds < 5).sum() / len(top5_rmsds)).__round__(2), + f'{overlap}top5_rmsds_percentile_25': np.percentile(top5_rmsds, 25).round(2), + f'{overlap}top5_rmsds_percentile_50': np.percentile(top5_rmsds, 50).round(2), + f'{overlap}top5_rmsds_percentile_75': np.percentile(top5_rmsds, 75).round(2), + + f'{overlap}top5_centroid_below_2': (100 * (top5_centroid_distances < 2).sum() / len(top5_centroid_distances)).__round__(2), + f'{overlap}top5_centroid_below_5': (100 * (top5_centroid_distances < 5).sum() / len(top5_centroid_distances)).__round__(2), + f'{overlap}top5_centroid_percentile_25': np.percentile(top5_centroid_distances, 25).round(2), + f'{overlap}top5_centroid_percentile_50': np.percentile(top5_centroid_distances, 50).round(2), + f'{overlap}top5_centroid_percentile_75': np.percentile(top5_centroid_distances, 75).round(2), + }) + + + top10_rmsds = np.min(rmsds[:, :10], axis=1) + top10_centroid_distances = centroid_distances[np.arange(rmsds.shape[0])[:,None],np.argsort(rmsds[:, :10], axis=1)][:,0] + top10_min_cross_distances = min_cross_distances[np.arange(rmsds.shape[0])[:,None],np.argsort(rmsds[:, :10], axis=1)][:,0] + top10_min_self_distances = min_self_distances[np.arange(rmsds.shape[0])[:,None],np.argsort(rmsds[:, :10], axis=1)][:,0] + performance_metrics.update({ + f'{overlap}top10_self_intersect_fraction': (100 * (top10_min_self_distances < 0.4).sum() / len(top10_min_self_distances)).__round__(2), + f'{overlap}top10_steric_clash_fraction': ( 100 * (top10_min_cross_distances < 0.4).sum() / len(top10_min_cross_distances)).__round__(2), + f'{overlap}top10_rmsds_below_2': (100 * (top10_rmsds < 2).sum() / len(top10_rmsds)).__round__(2), + f'{overlap}top10_rmsds_below_5': (100 * (top10_rmsds < 5).sum() / len(top10_rmsds)).__round__(2), + f'{overlap}top10_rmsds_percentile_25': np.percentile(top10_rmsds, 25).round(2), + f'{overlap}top10_rmsds_percentile_50': np.percentile(top10_rmsds, 50).round(2), + f'{overlap}top10_rmsds_percentile_75': np.percentile(top10_rmsds, 75).round(2), + + f'{overlap}top10_centroid_below_2': (100 * (top10_centroid_distances < 2).sum() / len(top10_centroid_distances)).__round__(2), + f'{overlap}top10_centroid_below_5': (100 * (top10_centroid_distances < 5).sum() / len(top10_centroid_distances)).__round__(2), + f'{overlap}top10_centroid_percentile_25': np.percentile(top10_centroid_distances, 25).round(2), + f'{overlap}top10_centroid_percentile_50': np.percentile(top10_centroid_distances, 50).round(2), + f'{overlap}top10_centroid_percentile_75': np.percentile(top10_centroid_distances, 75).round(2), + }) +for k in performance_metrics: + print(k, performance_metrics[k]) + diff --git a/forks/DiffDockv1/inference.py b/forks/DiffDockv1/inference.py new file mode 100644 index 00000000..ac988744 --- /dev/null +++ b/forks/DiffDockv1/inference.py @@ -0,0 +1,359 @@ +from collections import defaultdict +import copy +import glob +import os +import shutil +import torch +from argparse import ArgumentParser, Namespace +from rdkit.Chem import RemoveHs +from functools import partial +import numpy as np +import pandas as pd +from rdkit import Chem, RDLogger +from torch_geometric.loader import DataLoader + +from datasets.process_mols import write_mol_with_coords +from utils.diffusion_utils import t_to_sigma as t_to_sigma_compl, get_t_schedule +from utils.inference_utils import InferenceDataset, set_nones +from utils.sampling import randomize_position, sampling +from utils.utils import get_model +from utils.visualise import PDBFile +from tqdm import tqdm + +RDLogger.DisableLog('rdApp.*') +import yaml +parser = ArgumentParser() +parser.add_argument('--protein_ligand_csv', type=str, default=None, help='Path to a .csv file specifying the input as described in the README. If this is not None, it will be used instead of the --protein_path, --protein_sequence and --ligand parameters') +parser.add_argument('--complex_name', type=str, default='1a0q', help='Name that the complex will be saved with') +parser.add_argument('--protein_path', type=str, default=None, help='Path to the protein file') +parser.add_argument('--protein_sequence', type=str, default=None, help='Sequence of the protein for ESMFold, this is ignored if --protein_path is not None') +parser.add_argument('--ligand_description', type=str, default='CCCCC(NC(=O)CCC(=O)O)P(=O)(O)OC1=CC=CC=C1', help='Either a SMILES string or the path to a molecule file that rdkit can read') + +parser.add_argument('--out_dir', type=str, default='results/user_inference', help='Directory where the outputs will be written to') +parser.add_argument('--save_visualisation', action='store_true', default=False, help='Save a pdb file with all of the steps of the reverse diffusion') +parser.add_argument('--samples_per_complex', type=int, default=10, help='Number of samples to generate') + +parser.add_argument('--model_dir', type=str, default='workdir/paper_score_model', help='Path to folder with trained score model and hyperparameters') +parser.add_argument('--ckpt', type=str, default='best_ema_inference_epoch_model.pt', help='Checkpoint to use for the score model') +parser.add_argument('--confidence_model_dir', type=str, default='workdir/paper_confidence_model', help='Path to folder with trained confidence model and hyperparameters') +parser.add_argument('--confidence_ckpt', type=str, default='best_model_epoch75.pt', help='Checkpoint to use for the confidence model') + +parser.add_argument('--batch_size', type=int, default=32, help='') +parser.add_argument('--no_final_step_noise', action='store_true', default=False, help='Use no noise in the final step of the reverse diffusion') +parser.add_argument('--inference_steps', type=int, default=20, help='Number of denoising steps') +parser.add_argument('--actual_steps', type=int, default=None, help='Number of denoising steps that are actually performed') + +parser.add_argument('--cuda_device_index', type=int, default=None) +parser.add_argument('--skip_existing', action='store_true', default=False, help='Skip inference for complexes that already have output files') + +args = parser.parse_args() + + +# define helper functions +def is_int(s): + """Checks if a string is an integer.""" + try: + int(s) + return True + except ValueError: + return False + + +def merge_sdf_files(sdf_files, output_file): + """Combines molecules into a single molecule.""" + assert len(sdf_files) > 1, "There must be at least two molecules to merge." + combined_sdf = Chem.SDMolSupplier(sdf_files[0]) + assert len(combined_sdf) == 1, "The first SDF file must contain exactly one molecule." + combined_molecule = combined_sdf[0] + for sdf_file in sdf_files[1:]: + sdf = Chem.SDMolSupplier(sdf_file) + assert len(sdf) == 1, "Each SDF file must contain exactly one molecule." + mol = sdf[0] + if mol is None: + raise ValueError(f"Failed to load a valid molecule from {sdf_file} in `merge_sdf_files`.") + if combined_molecule is None and mol is not None: + combined_molecule = mol + else: + combined_molecule = Chem.CombineMols(combined_molecule, mol) + + if combined_molecule is None: + raise ValueError("Failed to merge molecules in `merge_sdf_files`.") + w = Chem.SDWriter(output_file) + w.write(combined_molecule) + w.close() + + +def rename_files_by_confidence(directory_path): + """Renames files in a directory such that files with higher confidence scores + have lower rank numbers (e.g., rank1.sdf has the highest confidence). + """ + files = [file for file in os.listdir(directory_path) if "_confidence" in file] + # Sort files by confidence in descending order + files.sort(key=lambda filename: -float(os.path.splitext(filename.split("_confidence")[-1])[0])) + for rank, filename in enumerate(files, start=1): + if "_confidence" in filename: + confidence = os.path.splitext(filename.split("_confidence")[-1])[0] + extension = os.path.splitext(filename)[-1] + # Rename file with new rank + new_filename = f"rank{rank}_confidence{confidence}{extension}" + os.rename(os.path.join(directory_path, filename), os.path.join(directory_path, new_filename)) + + +def filename_sort_key(filepath): + """Rank-by-rank, combines multi-ligand predictions into one SDF file each.""" + parts = filepath.split('/') + ligand_number = int(parts[-2].split('_')[1]) + rank_number = int(os.path.splitext(parts[-1].split('_')[0].split("rank")[1])[0]) + return ligand_number, rank_number + + +os.makedirs(args.out_dir, exist_ok=True) +with open(f'{args.model_dir}/model_parameters.yml') as f: + score_model_args = Namespace(**yaml.full_load(f)) +if args.confidence_model_dir is not None: + with open(f'{args.confidence_model_dir}/model_parameters.yml') as f: + confidence_args = Namespace(**yaml.full_load(f)) + +device = torch.device((f'cuda:{args.cuda_device_index}' if args.cuda_device_index is not None else 'cuda') if torch.cuda.is_available() else 'cpu') + +if args.protein_ligand_csv is not None: + df = pd.read_csv(args.protein_ligand_csv) + complex_name_list = set_nones(df['complex_name'].tolist()) + protein_path_list = set_nones(df['protein_path'].tolist()) + protein_sequence_list = set_nones(df['protein_sequence'].tolist()) + ligand_description_list = set_nones(df['ligand_description'].tolist()) +else: + complex_name_list = [args.complex_name] + protein_path_list = [args.protein_path] + protein_sequence_list = [args.protein_sequence] + ligand_description_list = [args.ligand_description] + +# organize multi-ligand inputs by grouping complexes with multiple ligands together, predicting the ligand conformations separately, and then re-ranking and combining the ligands thereafter +ligand_description_groups = [ + { + "ligand_descriptions": (ligand_description.split('|') if ligand_description is not None and '|' in ligand_description else [ligand_description]), + "complex_names": ([complex_name_list[i] + f'_{lig_idx}' for lig_idx in range(len(ligand_description.split('|')))] if ligand_description is not None and '|' in ligand_description else [complex_name_list[i]]), + "protein_paths": ([protein_path_list[i] for _ in range(len(ligand_description.split('|')))] if ligand_description is not None and '|' in ligand_description else [protein_path_list[i]]), + "protein_sequences": ([protein_sequence_list[i] for _ in range(len(ligand_description.split('|')))] if ligand_description is not None and '|' in ligand_description else [protein_sequence_list[i]]), + } for i, ligand_description in enumerate(ligand_description_list) +] +for ligand_description_group in ligand_description_groups: + complex_name_list, protein_path_list, protein_sequence_list, ligand_description_list = [], [], [], [] + for i in range(len(ligand_description_group["ligand_descriptions"])): + name = ligand_description_group["complex_names"][i].split("_")[0] + if args.skip_existing and name is not None and len(glob.glob(f'{args.out_dir}/{name}/rank1*.sdf')): + print(f"HAPPENING | Skipping inference for {name} as it already has output files.") + continue + complex_name_list.append(ligand_description_group["complex_names"][i]) + protein_path_list.append(ligand_description_group["protein_paths"][i]) + protein_sequence_list.append(ligand_description_group["protein_sequences"][i]) + ligand_description_list.append(ligand_description_group["ligand_descriptions"][i]) + + complex_name_list = [name if name is not None else f"complex_{i}" for i, name in enumerate(complex_name_list)] + if not complex_name_list: + print("With `skip_existing=True`, all complexes for the current ligand group have already been processed. Continuing...") + continue + for name in complex_name_list: + write_dir = f'{args.out_dir}/{name}' + os.makedirs(write_dir, exist_ok=True) + + # preprocessing of complexes into geometric graphs + test_dataset = InferenceDataset(out_dir=args.out_dir, complex_names=complex_name_list, protein_files=protein_path_list, + ligand_descriptions=ligand_description_list, protein_sequences=protein_sequence_list, + lm_embeddings=score_model_args.esm_embeddings_path is not None, + receptor_radius=score_model_args.receptor_radius, remove_hs=score_model_args.remove_hs, + c_alpha_max_neighbors=score_model_args.c_alpha_max_neighbors, + all_atoms=score_model_args.all_atoms, atom_radius=score_model_args.atom_radius, + atom_max_neighbors=score_model_args.atom_max_neighbors) + test_loader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False) + + if args.confidence_model_dir is not None and not confidence_args.use_original_model_cache: + print('HAPPENING | confidence model uses different type of graphs than the score model. ' + 'Loading (or creating if not existing) the data for the confidence model now.') + confidence_test_dataset = \ + InferenceDataset(out_dir=args.out_dir, complex_names=complex_name_list, protein_files=protein_path_list, + ligand_descriptions=ligand_description_list, protein_sequences=protein_sequence_list, + lm_embeddings=confidence_args.esm_embeddings_path is not None, + receptor_radius=confidence_args.receptor_radius, remove_hs=confidence_args.remove_hs, + c_alpha_max_neighbors=confidence_args.c_alpha_max_neighbors, + all_atoms=confidence_args.all_atoms, atom_radius=confidence_args.atom_radius, + atom_max_neighbors=confidence_args.atom_max_neighbors, + precomputed_lm_embeddings=test_dataset.lm_embeddings) + else: + confidence_test_dataset = None + + t_to_sigma = partial(t_to_sigma_compl, args=score_model_args) + + model = get_model(score_model_args, device, t_to_sigma=t_to_sigma, no_parallel=True) + state_dict = torch.load(f'{args.model_dir}/{args.ckpt}', map_location=torch.device('cpu')) + model.load_state_dict(state_dict, strict=True) + model = model.to(device) + model.eval() + + if args.confidence_model_dir is not None: + confidence_model = get_model(confidence_args, device, t_to_sigma=t_to_sigma, no_parallel=True, confidence_mode=True) + state_dict = torch.load(f'{args.confidence_model_dir}/{args.confidence_ckpt}', map_location=torch.device('cpu')) + confidence_model.load_state_dict(state_dict, strict=True) + confidence_model = confidence_model.to(device) + confidence_model.eval() + else: + confidence_model = None + confidence_args = None + + tr_schedule = get_t_schedule(inference_steps=args.inference_steps) + + try: + failures, skipped = 0, 0 + N = args.samples_per_complex + print('Size of test dataset: ', len(test_dataset)) + for idx, orig_complex_graph in tqdm(enumerate(test_loader)): + if not orig_complex_graph.success[0]: + skipped += 1 + print(f"HAPPENING | The test dataset did not contain {test_dataset.complex_names[idx]} for {test_dataset.ligand_descriptions[idx]} and {test_dataset.protein_files[idx]}. We are skipping this complex.") + continue + try: + if confidence_test_dataset is not None: + confidence_complex_graph = confidence_test_dataset[idx] + if not confidence_complex_graph.success: + skipped += 1 + print(f"HAPPENING | The confidence dataset did not contain {orig_complex_graph.name}. We are skipping this complex.") + continue + confidence_data_list = [copy.deepcopy(confidence_complex_graph) for _ in range(N)] + else: + confidence_data_list = None + data_list = [copy.deepcopy(orig_complex_graph) for _ in range(N)] + randomize_position(data_list, score_model_args.no_torsion, False, score_model_args.tr_sigma_max) + lig = orig_complex_graph.mol[0] + + # initialize visualisation + pdb = None + if args.save_visualisation: + visualization_list = [] + for graph in data_list: + pdb = PDBFile(lig) + pdb.add(lig, 0, 0) + pdb.add((orig_complex_graph['ligand'].pos + orig_complex_graph.original_center).detach().cpu(), 1, 0) + pdb.add((graph['ligand'].pos + graph.original_center).detach().cpu(), part=1, order=1) + visualization_list.append(pdb) + else: + visualization_list = None + + # run reverse diffusion + data_list, confidence = sampling(data_list=data_list, model=model, + inference_steps=args.actual_steps if args.actual_steps is not None else args.inference_steps, + tr_schedule=tr_schedule, rot_schedule=tr_schedule, tor_schedule=tr_schedule, + device=device, t_to_sigma=t_to_sigma, model_args=score_model_args, + visualization_list=visualization_list, confidence_model=confidence_model, + confidence_data_list=confidence_data_list, confidence_model_args=confidence_args, + batch_size=args.batch_size, no_final_step_noise=args.no_final_step_noise) + ligand_pos = np.asarray([complex_graph['ligand'].pos.cpu().numpy() + orig_complex_graph.original_center.cpu().numpy() for complex_graph in data_list]) + + # reorder predictions based on confidence output + if confidence is not None and isinstance(confidence_args.rmsd_classification_cutoff, list): + confidence = confidence[:, 0] + if confidence is not None: + confidence = confidence.cpu().numpy() + re_order = np.argsort(confidence)[::-1] + confidence = confidence[re_order] + ligand_pos = ligand_pos[re_order] + + # save predictions + write_dir = f'{args.out_dir}/{complex_name_list[idx]}' + for rank, pos in enumerate(ligand_pos): + mol_pred = copy.deepcopy(lig) + if score_model_args.remove_hs: mol_pred = RemoveHs(mol_pred) + if rank == 0: write_mol_with_coords(mol_pred, pos, os.path.join(write_dir, f'rank{rank+1}.sdf')) + write_mol_with_coords(mol_pred, pos, os.path.join(write_dir, f'rank{rank+1}_confidence{confidence[rank]:.2f}.sdf')) + + # save visualisation frames + if args.save_visualisation: + if confidence is not None: + for rank, batch_idx in enumerate(re_order): + visualization_list[batch_idx].write(os.path.join(write_dir, f'rank{rank+1}_reverseprocess.pdb')) + else: + for rank, batch_idx in enumerate(ligand_pos): + visualization_list[batch_idx].write(os.path.join(write_dir, f'rank{rank+1}_reverseprocess.pdb')) + + except Exception as e: + print("Failed on", orig_complex_graph["name"], e) + failures += 1 + + print(f'Failed for {failures} complexes') + print(f'Skipped {skipped} complexes') + print(f'Results are in {args.out_dir}') + + group_member_names = defaultdict(list) + for idx in range(len(test_dataset)): + group_idx = complex_name_list[idx].split('_')[-1] + if is_int(group_idx): + group_name = complex_name_list[idx].rpartition("_")[0] + group_member_names[group_name].append(complex_name_list[idx]) + for group_name, filenames in group_member_names.items(): + write_dir = f'{args.out_dir}/{group_name}' + os.makedirs(write_dir, exist_ok=True) + # find the first valid (ranked) molecule for each group member + first_valid_rank_filenames = [] + for filename in filenames: + first_valid_rank_filenames.append(None) + for current_rank in range(N): + current_rank_filename = glob.glob(f'{args.out_dir}/{filename}/rank{current_rank+1}_*.sdf')[0] + sdf = Chem.SDMolSupplier(current_rank_filename) + assert len(sdf) == 1, "Each valid ranked SDF file must contain exactly one molecule." + mol = sdf[0] + if mol is not None: + first_valid_rank_filenames[-1] = current_rank_filename + break + # report the average confidence value across the group members + for rank in range(N): + rank_filenames = sorted([glob.glob(f'{args.out_dir}/{filename}/rank{rank+1}_*.sdf')[0] for filename in filenames], key=filename_sort_key) + # ensure that all group members have a valid molecule at this rank + valid_rank_filenames = [] + for filename_index, rank_filename in enumerate(rank_filenames): + sdf = Chem.SDMolSupplier(rank_filename) + assert len(sdf) == 1, "Each ranked SDF file must contain exactly one molecule." + mol = sdf[0] + if mol is None: + print(f"Failed to load a valid ranked molecule from {rank_filename}. Replacing with the first valid molecule from another rank: {first_valid_rank_filenames[filename_index]}") + valid_rank_filenames.append(first_valid_rank_filenames[filename_index] if first_valid_rank_filenames[filename_index] else rank_filename) + else: + valid_rank_filenames.append(rank_filename) + rank_filenames = valid_rank_filenames + avg_confidence = 0 + for rank_filename in rank_filenames: + confidence = float(os.path.splitext(os.path.basename(rank_filename))[0].split('_confidence')[-1]) + avg_confidence += confidence + avg_confidence /= len(rank_filenames) + if len(rank_filenames) > 1: + merge_sdf_files(rank_filenames, f'{write_dir}/rank{rank+1}_confidence{avg_confidence:.2f}.sdf') + else: + shutil.move(rank_filenames[0], f'{write_dir}/rank{rank+1}_confidence{avg_confidence:.2f}.sdf') + if rank == 0: + rank_filenames = sorted([f'{args.out_dir}/{filename}/rank{rank+1}.sdf' for filename in filenames], key=filename_sort_key) + valid_rank_filenames = [] + for filename_index, rank_filename in enumerate(rank_filenames): + sdf = Chem.SDMolSupplier(rank_filename) + assert len(sdf) == 1, "Each ranked SDF file must contain exactly one molecule." + mol = sdf[0] + if mol is None: + print(f"Failed to load a valid ranked molecule from {rank_filename}. Replacing with the first valid molecule from another rank: {first_valid_rank_filenames[filename_index]}") + valid_rank_filenames.append(first_valid_rank_filenames[filename_index] if first_valid_rank_filenames[filename_index] else rank_filename) + else: + valid_rank_filenames.append(rank_filename) + rank_filenames = valid_rank_filenames + if len(rank_filenames) > 1: + merge_sdf_files(rank_filenames, f'{write_dir}/rank{rank+1}.sdf') + else: + shutil.move(rank_filenames[0], f'{write_dir}/rank{rank+1}.sdf') + # re-rank group members according to their average confidence + rename_files_by_confidence(write_dir) + # update solo `rank1` file + try: + shutil.copyfile(glob.glob(f'{write_dir}/rank1_*.sdf')[0], f'{write_dir}/rank1.sdf') + except IndexError: + print(f"Failed to find a valid `rank1` molecule for {group_name}. Skipping update to `rank1.sdf`.") + # remove the individual (now-unused) file directories + for filename in filenames: + if os.path.exists(f'{args.out_dir}/{filename}') and os.path.isdir(f'{args.out_dir}/{filename}'): + shutil.rmtree(f'{args.out_dir}/{filename}', ignore_errors=True) + except Exception as e: + print(f"Failed on complex {complex_name_list[idx]} due to: {e}. Skipping...") diff --git a/forks/DiffDockv1/models/all_atom_score_model.py b/forks/DiffDockv1/models/all_atom_score_model.py new file mode 100644 index 00000000..10f70b66 --- /dev/null +++ b/forks/DiffDockv1/models/all_atom_score_model.py @@ -0,0 +1,415 @@ +from e3nn import o3 +import torch +from torch import nn +from torch.nn import functional as F +from torch_cluster import radius, radius_graph +from torch_scatter import scatter_mean +import numpy as np + +from models.score_model import AtomEncoder, TensorProductConvLayer, GaussianSmearing +from utils import so3, torus +from datasets.process_mols import lig_feature_dims, rec_residue_feature_dims, rec_atom_feature_dims + + +class TensorProductScoreModel(torch.nn.Module): + def __init__(self, t_to_sigma, device, timestep_emb_func, in_lig_edge_features=4, sigma_embed_dim=32, sh_lmax=2, + ns=16, nv=4, num_conv_layers=2, lig_max_radius=5, rec_max_radius=30, cross_max_distance=250, + center_max_distance=30, distance_embed_dim=32, cross_distance_embed_dim=32, no_torsion=False, + scale_by_sigma=True, use_second_order_repr=False, batch_norm=True, + dynamic_max_cross=False, dropout=0.0, lm_embedding_type=False, confidence_mode=False, + confidence_dropout=0, confidence_no_batchnorm=False, num_confidence_outputs=1): + super(TensorProductScoreModel, self).__init__() + self.t_to_sigma = t_to_sigma + self.in_lig_edge_features = in_lig_edge_features + self.sigma_embed_dim = sigma_embed_dim + self.lig_max_radius = lig_max_radius + self.rec_max_radius = rec_max_radius + self.cross_max_distance = cross_max_distance + self.dynamic_max_cross = dynamic_max_cross + self.center_max_distance = center_max_distance + self.distance_embed_dim = distance_embed_dim + self.cross_distance_embed_dim = cross_distance_embed_dim + self.sh_irreps = o3.Irreps.spherical_harmonics(lmax=sh_lmax) + self.ns, self.nv = ns, nv + self.scale_by_sigma = scale_by_sigma + self.device = device + self.no_torsion = no_torsion + self.num_conv_layers = num_conv_layers + self.timestep_emb_func = timestep_emb_func + self.confidence_mode = confidence_mode + self.num_conv_layers = num_conv_layers + + # embedding layers + self.lig_node_embedding = AtomEncoder(emb_dim=ns, feature_dims=lig_feature_dims, sigma_embed_dim=sigma_embed_dim) + self.lig_edge_embedding = nn.Sequential(nn.Linear(in_lig_edge_features + sigma_embed_dim + distance_embed_dim, ns),nn.ReLU(),nn.Dropout(dropout),nn.Linear(ns, ns)) + + self.rec_node_embedding = AtomEncoder(emb_dim=ns, feature_dims=rec_residue_feature_dims, sigma_embed_dim=sigma_embed_dim, lm_embedding_type=lm_embedding_type) + self.rec_edge_embedding = nn.Sequential(nn.Linear(sigma_embed_dim + distance_embed_dim, ns), nn.ReLU(), nn.Dropout(dropout),nn.Linear(ns, ns)) + + self.atom_node_embedding = AtomEncoder(emb_dim=ns, feature_dims=rec_atom_feature_dims, sigma_embed_dim=sigma_embed_dim) + self.atom_edge_embedding = nn.Sequential(nn.Linear(sigma_embed_dim + distance_embed_dim, ns), nn.ReLU(), nn.Dropout(dropout),nn.Linear(ns, ns)) + + self.lr_edge_embedding = nn.Sequential(nn.Linear(sigma_embed_dim + cross_distance_embed_dim, ns), nn.ReLU(), nn.Dropout(dropout),nn.Linear(ns, ns)) + self.ar_edge_embedding = nn.Sequential(nn.Linear(sigma_embed_dim + distance_embed_dim, ns), nn.ReLU(), nn.Dropout(dropout),nn.Linear(ns, ns)) + self.la_edge_embedding = nn.Sequential(nn.Linear(sigma_embed_dim + cross_distance_embed_dim, ns), nn.ReLU(), nn.Dropout(dropout),nn.Linear(ns, ns)) + + self.lig_distance_expansion = GaussianSmearing(0.0, lig_max_radius, distance_embed_dim) + self.rec_distance_expansion = GaussianSmearing(0.0, rec_max_radius, distance_embed_dim) + self.cross_distance_expansion = GaussianSmearing(0.0, cross_max_distance, cross_distance_embed_dim) + + if use_second_order_repr: + irrep_seq = [ + f'{ns}x0e', + f'{ns}x0e + {nv}x1o + {nv}x2e', + f'{ns}x0e + {nv}x1o + {nv}x2e + {nv}x1e + {nv}x2o', + f'{ns}x0e + {nv}x1o + {nv}x2e + {nv}x1e + {nv}x2o + {ns}x0o' + ] + else: + irrep_seq = [ + f'{ns}x0e', + f'{ns}x0e + {nv}x1o', + f'{ns}x0e + {nv}x1o + {nv}x1e', + f'{ns}x0e + {nv}x1o + {nv}x1e + {ns}x0o' + ] + + # convolutional layers + conv_layers = [] + for i in range(num_conv_layers): + in_irreps = irrep_seq[min(i, len(irrep_seq) - 1)] + out_irreps = irrep_seq[min(i + 1, len(irrep_seq) - 1)] + parameters = { + 'in_irreps': in_irreps, + 'sh_irreps': self.sh_irreps, + 'out_irreps': out_irreps, + 'n_edge_features': 3 * ns, + 'residual': False, + 'batch_norm': batch_norm, + 'dropout': dropout + } + + for _ in range(9): # 3 intra & 6 inter per each layer + conv_layers.append(TensorProductConvLayer(**parameters)) + + self.conv_layers = nn.ModuleList(conv_layers) + + # confidence and affinity prediction layers + if self.confidence_mode: + output_confidence_dim = num_confidence_outputs + + self.confidence_predictor = nn.Sequential( + nn.Linear(2 * self.ns if num_conv_layers >= 3 else self.ns, ns), + nn.BatchNorm1d(ns) if not confidence_no_batchnorm else nn.Identity(), + nn.ReLU(), + nn.Dropout(confidence_dropout), + nn.Linear(ns, ns), + nn.BatchNorm1d(ns) if not confidence_no_batchnorm else nn.Identity(), + nn.ReLU(), + nn.Dropout(confidence_dropout), + nn.Linear(ns, output_confidence_dim) + ) + + else: + # convolution for translational and rotational scores + self.center_distance_expansion = GaussianSmearing(0.0, center_max_distance, distance_embed_dim) + self.center_edge_embedding = nn.Sequential( + nn.Linear(distance_embed_dim + sigma_embed_dim, ns), + nn.ReLU(), + nn.Dropout(dropout), + nn.Linear(ns, ns) + ) + + self.final_conv = TensorProductConvLayer( + in_irreps=self.conv_layers[-1].out_irreps, + sh_irreps=self.sh_irreps, + out_irreps=f'2x1o + 2x1e', + n_edge_features=2 * ns, + residual=False, + dropout=dropout, + batch_norm=batch_norm + ) + + self.tr_final_layer = nn.Sequential(nn.Linear(1 + sigma_embed_dim, ns), nn.Dropout(dropout), nn.ReLU(), nn.Linear(ns, 1)) + self.rot_final_layer = nn.Sequential(nn.Linear(1 + sigma_embed_dim, ns), nn.Dropout(dropout), nn.ReLU(), nn.Linear(ns, 1)) + + if not no_torsion: + # convolution for torsional score + self.final_edge_embedding = nn.Sequential( + nn.Linear(distance_embed_dim, ns), + nn.ReLU(), + nn.Dropout(dropout), + nn.Linear(ns, ns) + ) + self.final_tp_tor = o3.FullTensorProduct(self.sh_irreps, "2e") + self.tor_bond_conv = TensorProductConvLayer( + in_irreps=self.conv_layers[-1].out_irreps, + sh_irreps=self.final_tp_tor.irreps_out, + out_irreps=f'{ns}x0o + {ns}x0e', + n_edge_features=3 * ns, + residual=False, + dropout=dropout, + batch_norm=batch_norm + ) + self.tor_final_layer = nn.Sequential( + nn.Linear(2 * ns, ns, bias=False), + nn.Tanh(), + nn.Dropout(dropout), + nn.Linear(ns, 1, bias=False) + ) + + def forward(self, data): + if not self.confidence_mode: + tr_sigma, rot_sigma, tor_sigma = self.t_to_sigma(*[data.complex_t[noise_type] for noise_type in ['tr', 'rot', 'tor']]) + else: + tr_sigma, rot_sigma, tor_sigma = [data.complex_t[noise_type] for noise_type in ['tr', 'rot', 'tor']] + + # build ligand graph + lig_node_attr, lig_edge_index, lig_edge_attr, lig_edge_sh = self.build_lig_conv_graph(data) + lig_node_attr = self.lig_node_embedding(lig_node_attr) + lig_edge_attr = self.lig_edge_embedding(lig_edge_attr) + + # build receptor graph + rec_node_attr, rec_edge_index, rec_edge_attr, rec_edge_sh = self.build_rec_conv_graph(data) + rec_node_attr = self.rec_node_embedding(rec_node_attr) + rec_edge_attr = self.rec_edge_embedding(rec_edge_attr) + + # build atom graph + atom_node_attr, atom_edge_index, atom_edge_attr, atom_edge_sh = self.build_atom_conv_graph(data) + atom_node_attr = self.atom_node_embedding(atom_node_attr) + atom_edge_attr = self.atom_edge_embedding(atom_edge_attr) + + # build cross graph + cross_cutoff = (tr_sigma * 3 + 20).unsqueeze(1) if self.dynamic_max_cross else self.cross_max_distance + lr_edge_index, lr_edge_attr, lr_edge_sh, la_edge_index, la_edge_attr, \ + la_edge_sh, ar_edge_index, ar_edge_attr, ar_edge_sh = self.build_cross_conv_graph(data, cross_cutoff) + lr_edge_attr= self.lr_edge_embedding(lr_edge_attr) + la_edge_attr = self.la_edge_embedding(la_edge_attr) + ar_edge_attr = self.ar_edge_embedding(ar_edge_attr) + + for l in range(self.num_conv_layers): + # LIGAND updates + lig_edge_attr_ = torch.cat([lig_edge_attr, lig_node_attr[lig_edge_index[0], :self.ns], lig_node_attr[lig_edge_index[1], :self.ns]], -1) + lig_update = self.conv_layers[9*l](lig_node_attr, lig_edge_index, lig_edge_attr_, lig_edge_sh) + + lr_edge_attr_ = torch.cat([lr_edge_attr, lig_node_attr[lr_edge_index[0], :self.ns], rec_node_attr[lr_edge_index[1], :self.ns]], -1) + lr_update = self.conv_layers[9*l+1](rec_node_attr, lr_edge_index, lr_edge_attr_, lr_edge_sh, + out_nodes=lig_node_attr.shape[0]) + + la_edge_attr_ = torch.cat([la_edge_attr, lig_node_attr[la_edge_index[0], :self.ns], atom_node_attr[la_edge_index[1], :self.ns]], -1) + la_update = self.conv_layers[9*l+2](atom_node_attr, la_edge_index, la_edge_attr_, la_edge_sh, + out_nodes=lig_node_attr.shape[0]) + + if l != self.num_conv_layers-1: # last layer optimisation + + # ATOM UPDATES + atom_edge_attr_ = torch.cat([atom_edge_attr, atom_node_attr[atom_edge_index[0], :self.ns], atom_node_attr[atom_edge_index[1], :self.ns]], -1) + atom_update = self.conv_layers[9*l+3](atom_node_attr, atom_edge_index, atom_edge_attr_, atom_edge_sh) + + al_edge_attr_ = torch.cat([la_edge_attr, atom_node_attr[la_edge_index[1], :self.ns], lig_node_attr[la_edge_index[0], :self.ns]], -1) + al_update = self.conv_layers[9*l+4](lig_node_attr, torch.flip(la_edge_index, dims=[0]), al_edge_attr_, + la_edge_sh, out_nodes=atom_node_attr.shape[0]) + + ar_edge_attr_ = torch.cat([ar_edge_attr, atom_node_attr[ar_edge_index[0], :self.ns], rec_node_attr[ar_edge_index[1], :self.ns]],-1) + ar_update = self.conv_layers[9*l+5](rec_node_attr, ar_edge_index, ar_edge_attr_, ar_edge_sh, out_nodes=atom_node_attr.shape[0]) + + # RECEPTOR updates + rec_edge_attr_ = torch.cat([rec_edge_attr, rec_node_attr[rec_edge_index[0], :self.ns], rec_node_attr[rec_edge_index[1], :self.ns]], -1) + rec_update = self.conv_layers[9*l+6](rec_node_attr, rec_edge_index, rec_edge_attr_, rec_edge_sh) + + rl_edge_attr_ = torch.cat([lr_edge_attr, rec_node_attr[lr_edge_index[1], :self.ns], lig_node_attr[lr_edge_index[0], :self.ns]], -1) + rl_update = self.conv_layers[9*l+7](lig_node_attr, torch.flip(lr_edge_index, dims=[0]), rl_edge_attr_, + lr_edge_sh, out_nodes=rec_node_attr.shape[0]) + + ra_edge_attr_ = torch.cat([ar_edge_attr, rec_node_attr[ar_edge_index[1], :self.ns], atom_node_attr[ar_edge_index[0], :self.ns]], -1) + ra_update = self.conv_layers[9*l+8](atom_node_attr, torch.flip(ar_edge_index, dims=[0]), ra_edge_attr_, + ar_edge_sh, out_nodes=rec_node_attr.shape[0]) + + # padding original features and update features with residual updates + lig_node_attr = F.pad(lig_node_attr, (0, lig_update.shape[-1] - lig_node_attr.shape[-1])) + lig_node_attr = lig_node_attr + lig_update + la_update + lr_update + + if l != self.num_conv_layers - 1: # last layer optimisation + atom_node_attr = F.pad(atom_node_attr, (0, atom_update.shape[-1] - rec_node_attr.shape[-1])) + atom_node_attr = atom_node_attr + atom_update + al_update + ar_update + rec_node_attr = F.pad(rec_node_attr, (0, rec_update.shape[-1] - rec_node_attr.shape[-1])) + rec_node_attr = rec_node_attr + rec_update + ra_update + rl_update + + # confidence and affinity prediction + if self.confidence_mode: + scalar_lig_attr = torch.cat([lig_node_attr[:,:self.ns],lig_node_attr[:,-self.ns:]], dim=1) if self.num_conv_layers >= 3 else lig_node_attr[:,:self.ns] + confidence = self.confidence_predictor(scatter_mean(scalar_lig_attr, data['ligand'].batch, dim=0)).squeeze(dim=-1) + return confidence + + # compute translational and rotational score vectors + center_edge_index, center_edge_attr, center_edge_sh = self.build_center_conv_graph(data) + center_edge_attr = self.center_edge_embedding(center_edge_attr) + center_edge_attr = torch.cat([center_edge_attr, lig_node_attr[center_edge_index[1], :self.ns]], -1) + global_pred = self.final_conv(lig_node_attr, center_edge_index, center_edge_attr, center_edge_sh, out_nodes=data.num_graphs) + + tr_pred = global_pred[:, :3] + global_pred[:, 6:9] + rot_pred = global_pred[:, 3:6] + global_pred[:, 9:] + data.graph_sigma_emb = self.timestep_emb_func(data.complex_t['tr']) + + # adjust the magniture of the score vectors + tr_norm = torch.linalg.vector_norm(tr_pred, dim=1).unsqueeze(1) + tr_pred = tr_pred / tr_norm * self.tr_final_layer(torch.cat([tr_norm, data.graph_sigma_emb], dim=1)) + + rot_norm = torch.linalg.vector_norm(rot_pred, dim=1).unsqueeze(1) + rot_pred = rot_pred / rot_norm * self.rot_final_layer(torch.cat([rot_norm, data.graph_sigma_emb], dim=1)) + + if self.scale_by_sigma: + tr_pred = tr_pred / tr_sigma.unsqueeze(1) + rot_pred = rot_pred * so3.score_norm(rot_sigma.cpu()).unsqueeze(1).to(data['ligand'].x.device) + + if self.no_torsion or data['ligand'].edge_mask.sum() == 0: return tr_pred, rot_pred, torch.empty(0,device=self.device) + + # torsional components + tor_bonds, tor_edge_index, tor_edge_attr, tor_edge_sh = self.build_bond_conv_graph(data) + tor_bond_vec = data['ligand'].pos[tor_bonds[1]] - data['ligand'].pos[tor_bonds[0]] + tor_bond_attr = lig_node_attr[tor_bonds[0]] + lig_node_attr[tor_bonds[1]] + + tor_bonds_sh = o3.spherical_harmonics("2e", tor_bond_vec, normalize=True, normalization='component') + tor_edge_sh = self.final_tp_tor(tor_edge_sh, tor_bonds_sh[tor_edge_index[0]]) + + tor_edge_attr = torch.cat([tor_edge_attr, lig_node_attr[tor_edge_index[1], :self.ns], + tor_bond_attr[tor_edge_index[0], :self.ns]], -1) + tor_pred = self.tor_bond_conv(lig_node_attr, tor_edge_index, tor_edge_attr, tor_edge_sh, + out_nodes=data['ligand'].edge_mask.sum(), reduce='mean') + tor_pred = self.tor_final_layer(tor_pred).squeeze(1) + edge_sigma = tor_sigma[data['ligand'].batch][data['ligand', 'ligand'].edge_index[0]][data['ligand'].edge_mask] + + if self.scale_by_sigma: + tor_pred = tor_pred * torch.sqrt(torch.tensor(torus.score_norm(edge_sigma.cpu().numpy())).float() + .to(data['ligand'].x.device)) + return tr_pred, rot_pred, tor_pred + + def build_lig_conv_graph(self, data): + # build the graph between ligand atoms + data['ligand'].node_sigma_emb = self.timestep_emb_func(data['ligand'].node_t['tr']) + + radius_edges = radius_graph(data['ligand'].pos, self.lig_max_radius, data['ligand'].batch) + edge_index = torch.cat([data['ligand', 'ligand'].edge_index, radius_edges], 1).long() + edge_attr = torch.cat([ + data['ligand', 'ligand'].edge_attr, + torch.zeros(radius_edges.shape[-1], self.in_lig_edge_features, device=data['ligand'].x.device) + ], 0) + + edge_sigma_emb = data['ligand'].node_sigma_emb[edge_index[0].long()] + edge_attr = torch.cat([edge_attr, edge_sigma_emb], 1) + node_attr = torch.cat([data['ligand'].x, data['ligand'].node_sigma_emb], 1) + + src, dst = edge_index + edge_vec = data['ligand'].pos[dst.long()] - data['ligand'].pos[src.long()] + edge_length_emb = self.lig_distance_expansion(edge_vec.norm(dim=-1)) + + edge_attr = torch.cat([edge_attr, edge_length_emb], 1) + edge_sh = o3.spherical_harmonics(self.sh_irreps, edge_vec, normalize=True, normalization='component') + + return node_attr, edge_index, edge_attr, edge_sh + + def build_rec_conv_graph(self, data): + # build the graph between receptor residues + data['receptor'].node_sigma_emb = self.timestep_emb_func(data['receptor'].node_t['tr']) + node_attr = torch.cat([data['receptor'].x, data['receptor'].node_sigma_emb], 1) + + # this assumes the edges were already created in preprocessing since protein's structure is fixed + edge_index = data['receptor', 'receptor'].edge_index + src, dst = edge_index + edge_vec = data['receptor'].pos[dst.long()] - data['receptor'].pos[src.long()] + + edge_length_emb = self.rec_distance_expansion(edge_vec.norm(dim=-1)) + edge_sigma_emb = data['receptor'].node_sigma_emb[edge_index[0].long()] + edge_attr = torch.cat([edge_sigma_emb, edge_length_emb], 1) + edge_sh = o3.spherical_harmonics(self.sh_irreps, edge_vec, normalize=True, normalization='component') + + return node_attr, edge_index, edge_attr, edge_sh + + def build_atom_conv_graph(self, data): + # build the graph between receptor atoms + data['atom'].node_sigma_emb = self.timestep_emb_func(data['atom'].node_t['tr']) + node_attr = torch.cat([data['atom'].x, data['atom'].node_sigma_emb], 1) + + # this assumes the edges were already created in preprocessing since protein's structure is fixed + edge_index = data['atom', 'atom'].edge_index + src, dst = edge_index + edge_vec = data['atom'].pos[dst.long()] - data['atom'].pos[src.long()] + + edge_length_emb = self.lig_distance_expansion(edge_vec.norm(dim=-1)) + edge_sigma_emb = data['atom'].node_sigma_emb[edge_index[0].long()] + edge_attr = torch.cat([edge_sigma_emb, edge_length_emb], 1) + edge_sh = o3.spherical_harmonics(self.sh_irreps, edge_vec, normalize=True, normalization='component') + + return node_attr, edge_index, edge_attr, edge_sh + + def build_cross_conv_graph(self, data, lr_cross_distance_cutoff): + # build the cross edges between ligan atoms, receptor residues and receptor atoms + + # LIGAND to RECEPTOR + if torch.is_tensor(lr_cross_distance_cutoff): + # different cutoff for every graph + lr_edge_index = radius(data['receptor'].pos / lr_cross_distance_cutoff[data['receptor'].batch], + data['ligand'].pos / lr_cross_distance_cutoff[data['ligand'].batch], 1, + data['receptor'].batch, data['ligand'].batch, max_num_neighbors=10000) + else: + lr_edge_index = radius(data['receptor'].pos, data['ligand'].pos, lr_cross_distance_cutoff, + data['receptor'].batch, data['ligand'].batch, max_num_neighbors=10000) + + lr_edge_vec = data['receptor'].pos[lr_edge_index[1].long()] - data['ligand'].pos[lr_edge_index[0].long()] + lr_edge_length_emb = self.cross_distance_expansion(lr_edge_vec.norm(dim=-1)) + lr_edge_sigma_emb = data['ligand'].node_sigma_emb[lr_edge_index[0].long()] + lr_edge_attr = torch.cat([lr_edge_sigma_emb, lr_edge_length_emb], 1) + lr_edge_sh = o3.spherical_harmonics(self.sh_irreps, lr_edge_vec, normalize=True, normalization='component') + + cutoff_d = lr_cross_distance_cutoff[data['ligand'].batch[lr_edge_index[0]]].squeeze() \ + if torch.is_tensor(lr_cross_distance_cutoff) else lr_cross_distance_cutoff + + # LIGAND to ATOM + la_edge_index = radius(data['atom'].pos, data['ligand'].pos, self.lig_max_radius, + data['atom'].batch, data['ligand'].batch, max_num_neighbors=10000) + + la_edge_vec = data['atom'].pos[la_edge_index[1].long()] - data['ligand'].pos[la_edge_index[0].long()] + la_edge_length_emb = self.cross_distance_expansion(la_edge_vec.norm(dim=-1)) + la_edge_sigma_emb = data['ligand'].node_sigma_emb[la_edge_index[0].long()] + la_edge_attr = torch.cat([la_edge_sigma_emb, la_edge_length_emb], 1) + la_edge_sh = o3.spherical_harmonics(self.sh_irreps, la_edge_vec, normalize=True, normalization='component') + + # ATOM to RECEPTOR + ar_edge_index = data['atom', 'receptor'].edge_index + ar_edge_vec = data['receptor'].pos[ar_edge_index[1].long()] - data['atom'].pos[ar_edge_index[0].long()] + ar_edge_length_emb = self.rec_distance_expansion(ar_edge_vec.norm(dim=-1)) + ar_edge_sigma_emb = data['atom'].node_sigma_emb[ar_edge_index[0].long()] + ar_edge_attr = torch.cat([ar_edge_sigma_emb, ar_edge_length_emb], 1) + ar_edge_sh = o3.spherical_harmonics(self.sh_irreps, ar_edge_vec, normalize=True, normalization='component') + + return lr_edge_index, lr_edge_attr, lr_edge_sh, la_edge_index, la_edge_attr, \ + la_edge_sh, ar_edge_index, ar_edge_attr, ar_edge_sh + + def build_center_conv_graph(self, data): + # build the filter for the convolution of the center with the ligand atoms + # for translational and rotational score + edge_index = torch.cat([data['ligand'].batch.unsqueeze(0), torch.arange(len(data['ligand'].batch)).to(data['ligand'].x.device).unsqueeze(0)], dim=0) + + center_pos, count = torch.zeros((data.num_graphs, 3)).to(data['ligand'].x.device), torch.zeros((data.num_graphs, 3)).to(data['ligand'].x.device) + center_pos.index_add_(0, index=data['ligand'].batch, source=data['ligand'].pos) + center_pos = center_pos / torch.bincount(data['ligand'].batch).unsqueeze(1) + + edge_vec = data['ligand'].pos[edge_index[1]] - center_pos[edge_index[0]] + edge_attr = self.center_distance_expansion(edge_vec.norm(dim=-1)) + edge_sigma_emb = data['ligand'].node_sigma_emb[edge_index[1].long()] + edge_attr = torch.cat([edge_attr, edge_sigma_emb], 1) + edge_sh = o3.spherical_harmonics(self.sh_irreps, edge_vec, normalize=True, normalization='component') + return edge_index, edge_attr, edge_sh + + def build_bond_conv_graph(self, data): + # build graph for the pseudotorque layer + bonds = data['ligand', 'ligand'].edge_index[:, data['ligand'].edge_mask].long() + bond_pos = (data['ligand'].pos[bonds[0]] + data['ligand'].pos[bonds[1]]) / 2 + bond_batch = data['ligand'].batch[bonds[0]] + edge_index = radius(data['ligand'].pos, bond_pos, self.lig_max_radius, batch_x=data['ligand'].batch, batch_y=bond_batch) + + edge_vec = data['ligand'].pos[edge_index[1]] - bond_pos[edge_index[0]] + edge_attr = self.lig_distance_expansion(edge_vec.norm(dim=-1)) + + edge_attr = self.final_edge_embedding(edge_attr) + edge_sh = o3.spherical_harmonics(self.sh_irreps, edge_vec, normalize=True, normalization='component') + + return bonds, edge_index, edge_attr, edge_sh diff --git a/forks/DiffDockv1/models/score_model.py b/forks/DiffDockv1/models/score_model.py new file mode 100644 index 00000000..22cfacd2 --- /dev/null +++ b/forks/DiffDockv1/models/score_model.py @@ -0,0 +1,442 @@ +import math + +from e3nn import o3 +import torch +from torch import nn +from torch.nn import functional as F +from torch_cluster import radius, radius_graph +from torch_scatter import scatter, scatter_mean +import numpy as np +from e3nn.nn import BatchNorm + +from utils import so3, torus +from datasets.process_mols import lig_feature_dims, rec_residue_feature_dims + + +class AtomEncoder(torch.nn.Module): + + def __init__(self, emb_dim, feature_dims, sigma_embed_dim, lm_embedding_type= None): + # first element of feature_dims tuple is a list with the lenght of each categorical feature and the second is the number of scalar features + super(AtomEncoder, self).__init__() + self.atom_embedding_list = torch.nn.ModuleList() + self.num_categorical_features = len(feature_dims[0]) + self.num_scalar_features = feature_dims[1] + sigma_embed_dim + self.lm_embedding_type = lm_embedding_type + for i, dim in enumerate(feature_dims[0]): + emb = torch.nn.Embedding(dim, emb_dim) + torch.nn.init.xavier_uniform_(emb.weight.data) + self.atom_embedding_list.append(emb) + + if self.num_scalar_features > 0: + self.linear = torch.nn.Linear(self.num_scalar_features, emb_dim) + if self.lm_embedding_type is not None: + if self.lm_embedding_type == 'esm': + self.lm_embedding_dim = 1280 + else: raise ValueError('LM Embedding type was not correctly determined. LM embedding type: ', self.lm_embedding_type) + self.lm_embedding_layer = torch.nn.Linear(self.lm_embedding_dim + emb_dim, emb_dim) + + def forward(self, x): + x_embedding = 0 + if self.lm_embedding_type is not None: + assert x.shape[1] == self.num_categorical_features + self.num_scalar_features + self.lm_embedding_dim + else: + assert x.shape[1] == self.num_categorical_features + self.num_scalar_features + for i in range(self.num_categorical_features): + x_embedding += self.atom_embedding_list[i](x[:, i].long()) + + if self.num_scalar_features > 0: + x_embedding += self.linear(x[:, self.num_categorical_features:self.num_categorical_features + self.num_scalar_features]) + if self.lm_embedding_type is not None: + x_embedding = self.lm_embedding_layer(torch.cat([x_embedding, x[:, -self.lm_embedding_dim:]], axis=1)) + return x_embedding + + +class TensorProductConvLayer(torch.nn.Module): + def __init__(self, in_irreps, sh_irreps, out_irreps, n_edge_features, residual=True, batch_norm=True, dropout=0.0, + hidden_features=None): + super(TensorProductConvLayer, self).__init__() + self.in_irreps = in_irreps + self.out_irreps = out_irreps + self.sh_irreps = sh_irreps + self.residual = residual + if hidden_features is None: + hidden_features = n_edge_features + + self.tp = tp = o3.FullyConnectedTensorProduct(in_irreps, sh_irreps, out_irreps, shared_weights=False) + + self.fc = nn.Sequential( + nn.Linear(n_edge_features, hidden_features), + nn.ReLU(), + nn.Dropout(dropout), + nn.Linear(hidden_features, tp.weight_numel) + ) + self.batch_norm = BatchNorm(out_irreps) if batch_norm else None + + def forward(self, node_attr, edge_index, edge_attr, edge_sh, out_nodes=None, reduce='mean'): + + edge_src, edge_dst = edge_index + tp = self.tp(node_attr[edge_dst], edge_sh, self.fc(edge_attr)) + + out_nodes = out_nodes or node_attr.shape[0] + out = scatter(tp, edge_src, dim=0, dim_size=out_nodes, reduce=reduce) + + if self.residual: + padded = F.pad(node_attr, (0, out.shape[-1] - node_attr.shape[-1])) + out = out + padded + + if self.batch_norm: + out = self.batch_norm(out) + return out + + +class TensorProductScoreModel(torch.nn.Module): + def __init__(self, t_to_sigma, device, timestep_emb_func, in_lig_edge_features=4, sigma_embed_dim=32, sh_lmax=2, + ns=16, nv=4, num_conv_layers=2, lig_max_radius=5, rec_max_radius=30, cross_max_distance=250, + center_max_distance=30, distance_embed_dim=32, cross_distance_embed_dim=32, no_torsion=False, + scale_by_sigma=True, use_second_order_repr=False, batch_norm=True, + dynamic_max_cross=False, dropout=0.0, lm_embedding_type=None, confidence_mode=False, + confidence_dropout=0, confidence_no_batchnorm=False, num_confidence_outputs=1): + super(TensorProductScoreModel, self).__init__() + self.t_to_sigma = t_to_sigma + self.in_lig_edge_features = in_lig_edge_features + self.sigma_embed_dim = sigma_embed_dim + self.lig_max_radius = lig_max_radius + self.rec_max_radius = rec_max_radius + self.cross_max_distance = cross_max_distance + self.dynamic_max_cross = dynamic_max_cross + self.center_max_distance = center_max_distance + self.distance_embed_dim = distance_embed_dim + self.cross_distance_embed_dim = cross_distance_embed_dim + self.sh_irreps = o3.Irreps.spherical_harmonics(lmax=sh_lmax) + self.ns, self.nv = ns, nv + self.scale_by_sigma = scale_by_sigma + self.device = device + self.no_torsion = no_torsion + self.timestep_emb_func = timestep_emb_func + self.confidence_mode = confidence_mode + self.num_conv_layers = num_conv_layers + + self.lig_node_embedding = AtomEncoder(emb_dim=ns, feature_dims=lig_feature_dims, sigma_embed_dim=sigma_embed_dim) + self.lig_edge_embedding = nn.Sequential(nn.Linear(in_lig_edge_features + sigma_embed_dim + distance_embed_dim, ns),nn.ReLU(), nn.Dropout(dropout),nn.Linear(ns, ns)) + + self.rec_node_embedding = AtomEncoder(emb_dim=ns, feature_dims=rec_residue_feature_dims, sigma_embed_dim=sigma_embed_dim, lm_embedding_type=lm_embedding_type) + self.rec_edge_embedding = nn.Sequential(nn.Linear(sigma_embed_dim + distance_embed_dim, ns), nn.ReLU(), nn.Dropout(dropout),nn.Linear(ns, ns)) + + self.cross_edge_embedding = nn.Sequential(nn.Linear(sigma_embed_dim + cross_distance_embed_dim, ns), nn.ReLU(), nn.Dropout(dropout),nn.Linear(ns, ns)) + + self.lig_distance_expansion = GaussianSmearing(0.0, lig_max_radius, distance_embed_dim) + self.rec_distance_expansion = GaussianSmearing(0.0, rec_max_radius, distance_embed_dim) + self.cross_distance_expansion = GaussianSmearing(0.0, cross_max_distance, cross_distance_embed_dim) + + if use_second_order_repr: + irrep_seq = [ + f'{ns}x0e', + f'{ns}x0e + {nv}x1o + {nv}x2e', + f'{ns}x0e + {nv}x1o + {nv}x2e + {nv}x1e + {nv}x2o', + f'{ns}x0e + {nv}x1o + {nv}x2e + {nv}x1e + {nv}x2o + {ns}x0o' + ] + else: + irrep_seq = [ + f'{ns}x0e', + f'{ns}x0e + {nv}x1o', + f'{ns}x0e + {nv}x1o + {nv}x1e', + f'{ns}x0e + {nv}x1o + {nv}x1e + {ns}x0o' + ] + + lig_conv_layers, rec_conv_layers, lig_to_rec_conv_layers, rec_to_lig_conv_layers = [], [], [], [] + for i in range(num_conv_layers): + in_irreps = irrep_seq[min(i, len(irrep_seq) - 1)] + out_irreps = irrep_seq[min(i + 1, len(irrep_seq) - 1)] + parameters = { + 'in_irreps': in_irreps, + 'sh_irreps': self.sh_irreps, + 'out_irreps': out_irreps, + 'n_edge_features': 3 * ns, + 'hidden_features': 3 * ns, + 'residual': False, + 'batch_norm': batch_norm, + 'dropout': dropout + } + + lig_layer = TensorProductConvLayer(**parameters) + lig_conv_layers.append(lig_layer) + rec_layer = TensorProductConvLayer(**parameters) + rec_conv_layers.append(rec_layer) + lig_to_rec_layer = TensorProductConvLayer(**parameters) + lig_to_rec_conv_layers.append(lig_to_rec_layer) + rec_to_lig_layer = TensorProductConvLayer(**parameters) + rec_to_lig_conv_layers.append(rec_to_lig_layer) + + self.lig_conv_layers = nn.ModuleList(lig_conv_layers) + self.rec_conv_layers = nn.ModuleList(rec_conv_layers) + self.lig_to_rec_conv_layers = nn.ModuleList(lig_to_rec_conv_layers) + self.rec_to_lig_conv_layers = nn.ModuleList(rec_to_lig_conv_layers) + + if self.confidence_mode: + self.confidence_predictor = nn.Sequential( + nn.Linear(2*self.ns if num_conv_layers >= 3 else self.ns,ns), + nn.BatchNorm1d(ns) if not confidence_no_batchnorm else nn.Identity(), + nn.ReLU(), + nn.Dropout(confidence_dropout), + nn.Linear(ns, ns), + nn.BatchNorm1d(ns) if not confidence_no_batchnorm else nn.Identity(), + nn.ReLU(), + nn.Dropout(confidence_dropout), + nn.Linear(ns, num_confidence_outputs) + ) + else: + # center of mass translation and rotation components + self.center_distance_expansion = GaussianSmearing(0.0, center_max_distance, distance_embed_dim) + self.center_edge_embedding = nn.Sequential( + nn.Linear(distance_embed_dim + sigma_embed_dim, ns), + nn.ReLU(), + nn.Dropout(dropout), + nn.Linear(ns, ns) + ) + + self.final_conv = TensorProductConvLayer( + in_irreps=self.lig_conv_layers[-1].out_irreps, + sh_irreps=self.sh_irreps, + out_irreps=f'2x1o + 2x1e', + n_edge_features=2 * ns, + residual=False, + dropout=dropout, + batch_norm=batch_norm + ) + self.tr_final_layer = nn.Sequential(nn.Linear(1 + sigma_embed_dim, ns),nn.Dropout(dropout), nn.ReLU(), nn.Linear(ns, 1)) + self.rot_final_layer = nn.Sequential(nn.Linear(1 + sigma_embed_dim, ns),nn.Dropout(dropout), nn.ReLU(), nn.Linear(ns, 1)) + + if not no_torsion: + # torsion angles components + self.final_edge_embedding = nn.Sequential( + nn.Linear(distance_embed_dim, ns), + nn.ReLU(), + nn.Dropout(dropout), + nn.Linear(ns, ns) + ) + self.final_tp_tor = o3.FullTensorProduct(self.sh_irreps, "2e") + self.tor_bond_conv = TensorProductConvLayer( + in_irreps=self.lig_conv_layers[-1].out_irreps, + sh_irreps=self.final_tp_tor.irreps_out, + out_irreps=f'{ns}x0o + {ns}x0e', + n_edge_features=3 * ns, + residual=False, + dropout=dropout, + batch_norm=batch_norm + ) + self.tor_final_layer = nn.Sequential( + nn.Linear(2 * ns, ns, bias=False), + nn.Tanh(), + nn.Dropout(dropout), + nn.Linear(ns, 1, bias=False) + ) + + def forward(self, data): + if not self.confidence_mode: + tr_sigma, rot_sigma, tor_sigma = self.t_to_sigma(*[data.complex_t[noise_type] for noise_type in ['tr', 'rot', 'tor']]) + else: + tr_sigma, rot_sigma, tor_sigma = [data.complex_t[noise_type] for noise_type in ['tr', 'rot', 'tor']] + + # build ligand graph + lig_node_attr, lig_edge_index, lig_edge_attr, lig_edge_sh = self.build_lig_conv_graph(data) + lig_src, lig_dst = lig_edge_index + lig_node_attr = self.lig_node_embedding(lig_node_attr) + lig_edge_attr = self.lig_edge_embedding(lig_edge_attr) + + # build receptor graph + rec_node_attr, rec_edge_index, rec_edge_attr, rec_edge_sh = self.build_rec_conv_graph(data) + rec_src, rec_dst = rec_edge_index + rec_node_attr = self.rec_node_embedding(rec_node_attr) + rec_edge_attr = self.rec_edge_embedding(rec_edge_attr) + + # build cross graph + if self.dynamic_max_cross: + cross_cutoff = (tr_sigma * 3 + 20).unsqueeze(1) + else: + cross_cutoff = self.cross_max_distance + cross_edge_index, cross_edge_attr, cross_edge_sh = self.build_cross_conv_graph(data, cross_cutoff) + cross_lig, cross_rec = cross_edge_index + cross_edge_attr = self.cross_edge_embedding(cross_edge_attr) + + for l in range(len(self.lig_conv_layers)): + # intra graph message passing + lig_edge_attr_ = torch.cat([lig_edge_attr, lig_node_attr[lig_src, :self.ns], lig_node_attr[lig_dst, :self.ns]], -1) + lig_intra_update = self.lig_conv_layers[l](lig_node_attr, lig_edge_index, lig_edge_attr_, lig_edge_sh) + + # inter graph message passing + rec_to_lig_edge_attr_ = torch.cat([cross_edge_attr, lig_node_attr[cross_lig, :self.ns], rec_node_attr[cross_rec, :self.ns]], -1) + lig_inter_update = self.rec_to_lig_conv_layers[l](rec_node_attr, cross_edge_index, rec_to_lig_edge_attr_, cross_edge_sh, + out_nodes=lig_node_attr.shape[0]) + + if l != len(self.lig_conv_layers) - 1: + rec_edge_attr_ = torch.cat([rec_edge_attr, rec_node_attr[rec_src, :self.ns], rec_node_attr[rec_dst, :self.ns]], -1) + rec_intra_update = self.rec_conv_layers[l](rec_node_attr, rec_edge_index, rec_edge_attr_, rec_edge_sh) + + lig_to_rec_edge_attr_ = torch.cat([cross_edge_attr, lig_node_attr[cross_lig, :self.ns], rec_node_attr[cross_rec, :self.ns]], -1) + rec_inter_update = self.lig_to_rec_conv_layers[l](lig_node_attr, torch.flip(cross_edge_index, dims=[0]), lig_to_rec_edge_attr_, + cross_edge_sh, out_nodes=rec_node_attr.shape[0]) + + # padding original features + lig_node_attr = F.pad(lig_node_attr, (0, lig_intra_update.shape[-1] - lig_node_attr.shape[-1])) + + # update features with residual updates + lig_node_attr = lig_node_attr + lig_intra_update + lig_inter_update + + if l != len(self.lig_conv_layers) - 1: + rec_node_attr = F.pad(rec_node_attr, (0, rec_intra_update.shape[-1] - rec_node_attr.shape[-1])) + rec_node_attr = rec_node_attr + rec_intra_update + rec_inter_update + + # compute confidence score + if self.confidence_mode: + scalar_lig_attr = torch.cat([lig_node_attr[:,:self.ns],lig_node_attr[:,-self.ns:] ], dim=1) if self.num_conv_layers >= 3 else lig_node_attr[:,:self.ns] + confidence = self.confidence_predictor(scatter_mean(scalar_lig_attr, data['ligand'].batch, dim=0)).squeeze(dim=-1) + return confidence + + # compute translational and rotational score vectors + center_edge_index, center_edge_attr, center_edge_sh = self.build_center_conv_graph(data) + center_edge_attr = self.center_edge_embedding(center_edge_attr) + center_edge_attr = torch.cat([center_edge_attr, lig_node_attr[center_edge_index[1], :self.ns]], -1) + global_pred = self.final_conv(lig_node_attr, center_edge_index, center_edge_attr, center_edge_sh, out_nodes=data.num_graphs) + + tr_pred = global_pred[:, :3] + global_pred[:, 6:9] + rot_pred = global_pred[:, 3:6] + global_pred[:, 9:] + data.graph_sigma_emb = self.timestep_emb_func(data.complex_t['tr']) + + # fix the magnitude of translational and rotational score vectors + tr_norm = torch.linalg.vector_norm(tr_pred, dim=1).unsqueeze(1) + tr_pred = tr_pred / tr_norm * self.tr_final_layer(torch.cat([tr_norm, data.graph_sigma_emb], dim=1)) + rot_norm = torch.linalg.vector_norm(rot_pred, dim=1).unsqueeze(1) + rot_pred = rot_pred / rot_norm * self.rot_final_layer(torch.cat([rot_norm, data.graph_sigma_emb], dim=1)) + + if self.scale_by_sigma: + tr_pred = tr_pred / tr_sigma.unsqueeze(1) + rot_pred = rot_pred * so3.score_norm(rot_sigma.cpu()).unsqueeze(1).to(data['ligand'].x.device) + + if self.no_torsion or data['ligand'].edge_mask.sum() == 0: return tr_pred, rot_pred, torch.empty(0, device=self.device) + + # torsional components + tor_bonds, tor_edge_index, tor_edge_attr, tor_edge_sh = self.build_bond_conv_graph(data) + tor_bond_vec = data['ligand'].pos[tor_bonds[1]] - data['ligand'].pos[tor_bonds[0]] + tor_bond_attr = lig_node_attr[tor_bonds[0]] + lig_node_attr[tor_bonds[1]] + + tor_bonds_sh = o3.spherical_harmonics("2e", tor_bond_vec, normalize=True, normalization='component') + tor_edge_sh = self.final_tp_tor(tor_edge_sh, tor_bonds_sh[tor_edge_index[0]]) + + tor_edge_attr = torch.cat([tor_edge_attr, lig_node_attr[tor_edge_index[1], :self.ns], + tor_bond_attr[tor_edge_index[0], :self.ns]], -1) + tor_pred = self.tor_bond_conv(lig_node_attr, tor_edge_index, tor_edge_attr, tor_edge_sh, + out_nodes=data['ligand'].edge_mask.sum(), reduce='mean') + tor_pred = self.tor_final_layer(tor_pred).squeeze(1) + edge_sigma = tor_sigma[data['ligand'].batch][data['ligand', 'ligand'].edge_index[0]][data['ligand'].edge_mask] + + if self.scale_by_sigma: + tor_pred = tor_pred * torch.sqrt(torch.tensor(torus.score_norm(edge_sigma.cpu().numpy())).float() + .to(data['ligand'].x.device)) + return tr_pred, rot_pred, tor_pred + + def build_lig_conv_graph(self, data): + # builds the ligand graph edges and initial node and edge features + data['ligand'].node_sigma_emb = self.timestep_emb_func(data['ligand'].node_t['tr']) + + # compute edges + radius_edges = radius_graph(data['ligand'].pos, self.lig_max_radius, data['ligand'].batch) + edge_index = torch.cat([data['ligand', 'ligand'].edge_index, radius_edges], 1).long() + edge_attr = torch.cat([ + data['ligand', 'ligand'].edge_attr, + torch.zeros(radius_edges.shape[-1], self.in_lig_edge_features, device=data['ligand'].x.device) + ], 0) + + # compute initial features + edge_sigma_emb = data['ligand'].node_sigma_emb[edge_index[0].long()] + edge_attr = torch.cat([edge_attr, edge_sigma_emb], 1) + node_attr = torch.cat([data['ligand'].x, data['ligand'].node_sigma_emb], 1) + + src, dst = edge_index + edge_vec = data['ligand'].pos[dst.long()] - data['ligand'].pos[src.long()] + edge_length_emb = self.lig_distance_expansion(edge_vec.norm(dim=-1)) + + edge_attr = torch.cat([edge_attr, edge_length_emb], 1) + edge_sh = o3.spherical_harmonics(self.sh_irreps, edge_vec, normalize=True, normalization='component') + + return node_attr, edge_index, edge_attr, edge_sh + + def build_rec_conv_graph(self, data): + # builds the receptor initial node and edge embeddings + data['receptor'].node_sigma_emb = self.timestep_emb_func(data['receptor'].node_t['tr']) # tr rot and tor noise is all the same + node_attr = torch.cat([data['receptor'].x, data['receptor'].node_sigma_emb], 1) + + # this assumes the edges were already created in preprocessing since protein's structure is fixed + edge_index = data['receptor', 'receptor'].edge_index + src, dst = edge_index + edge_vec = data['receptor'].pos[dst.long()] - data['receptor'].pos[src.long()] + + edge_length_emb = self.rec_distance_expansion(edge_vec.norm(dim=-1)) + edge_sigma_emb = data['receptor'].node_sigma_emb[edge_index[0].long()] + edge_attr = torch.cat([edge_sigma_emb, edge_length_emb], 1) + edge_sh = o3.spherical_harmonics(self.sh_irreps, edge_vec, normalize=True, normalization='component') + + return node_attr, edge_index, edge_attr, edge_sh + + def build_cross_conv_graph(self, data, cross_distance_cutoff): + # builds the cross edges between ligand and receptor + if torch.is_tensor(cross_distance_cutoff): + # different cutoff for every graph (depends on the diffusion time) + edge_index = radius(data['receptor'].pos / cross_distance_cutoff[data['receptor'].batch], + data['ligand'].pos / cross_distance_cutoff[data['ligand'].batch], 1, + data['receptor'].batch, data['ligand'].batch, max_num_neighbors=10000) + else: + edge_index = radius(data['receptor'].pos, data['ligand'].pos, cross_distance_cutoff, + data['receptor'].batch, data['ligand'].batch, max_num_neighbors=10000) + + src, dst = edge_index + edge_vec = data['receptor'].pos[dst.long()] - data['ligand'].pos[src.long()] + + edge_length_emb = self.cross_distance_expansion(edge_vec.norm(dim=-1)) + edge_sigma_emb = data['ligand'].node_sigma_emb[src.long()] + edge_attr = torch.cat([edge_sigma_emb, edge_length_emb], 1) + edge_sh = o3.spherical_harmonics(self.sh_irreps, edge_vec, normalize=True, normalization='component') + + return edge_index, edge_attr, edge_sh + + def build_center_conv_graph(self, data): + # builds the filter and edges for the convolution generating translational and rotational scores + edge_index = torch.cat([data['ligand'].batch.unsqueeze(0), torch.arange(len(data['ligand'].batch)).to(data['ligand'].x.device).unsqueeze(0)], dim=0) + + center_pos, count = torch.zeros((data.num_graphs, 3)).to(data['ligand'].x.device), torch.zeros((data.num_graphs, 3)).to(data['ligand'].x.device) + center_pos.index_add_(0, index=data['ligand'].batch, source=data['ligand'].pos) + center_pos = center_pos / torch.bincount(data['ligand'].batch).unsqueeze(1) + + edge_vec = data['ligand'].pos[edge_index[1]] - center_pos[edge_index[0]] + edge_attr = self.center_distance_expansion(edge_vec.norm(dim=-1)) + edge_sigma_emb = data['ligand'].node_sigma_emb[edge_index[1].long()] + edge_attr = torch.cat([edge_attr, edge_sigma_emb], 1) + edge_sh = o3.spherical_harmonics(self.sh_irreps, edge_vec, normalize=True, normalization='component') + return edge_index, edge_attr, edge_sh + + def build_bond_conv_graph(self, data): + # builds the graph for the convolution between the center of the rotatable bonds and the neighbouring nodes + bonds = data['ligand', 'ligand'].edge_index[:, data['ligand'].edge_mask].long() + bond_pos = (data['ligand'].pos[bonds[0]] + data['ligand'].pos[bonds[1]]) / 2 + bond_batch = data['ligand'].batch[bonds[0]] + edge_index = radius(data['ligand'].pos, bond_pos, self.lig_max_radius, batch_x=data['ligand'].batch, batch_y=bond_batch) + + edge_vec = data['ligand'].pos[edge_index[1]] - bond_pos[edge_index[0]] + edge_attr = self.lig_distance_expansion(edge_vec.norm(dim=-1)) + + edge_attr = self.final_edge_embedding(edge_attr) + edge_sh = o3.spherical_harmonics(self.sh_irreps, edge_vec, normalize=True, normalization='component') + + return bonds, edge_index, edge_attr, edge_sh + + +class GaussianSmearing(torch.nn.Module): + # used to embed the edge distances + def __init__(self, start=0.0, stop=5.0, num_gaussians=50): + super().__init__() + offset = torch.linspace(start, stop, num_gaussians) + self.coeff = -0.5 / (offset[1] - offset[0]).item() ** 2 + self.register_buffer('offset', offset) + + def forward(self, dist): + dist = dist.view(-1, 1) - self.offset.view(1, -1) + return torch.exp(self.coeff * torch.pow(dist, 2)) diff --git a/forks/DiffDockv1/train.py b/forks/DiffDockv1/train.py new file mode 100644 index 00000000..b58e7885 --- /dev/null +++ b/forks/DiffDockv1/train.py @@ -0,0 +1,158 @@ +import copy +import math +import os +from functools import partial + +import wandb +import torch +torch.multiprocessing.set_sharing_strategy('file_system') + +import resource +rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) +resource.setrlimit(resource.RLIMIT_NOFILE, (64000, rlimit[1])) + +import yaml + +from utils.diffusion_utils import t_to_sigma as t_to_sigma_compl +from datasets.pdbbind import construct_loader +from utils.parsing import parse_train_args +from utils.training import train_epoch, test_epoch, loss_function, inference_epoch +from utils.utils import save_yaml_file, get_optimizer_and_scheduler, get_model, ExponentialMovingAverage + + +def train(args, model, optimizer, scheduler, ema_weights, train_loader, val_loader, t_to_sigma, run_dir): + best_val_loss = math.inf + best_val_inference_value = math.inf if args.inference_earlystop_goal == 'min' else 0 + best_epoch = 0 + best_val_inference_epoch = 0 + loss_fn = partial(loss_function, tr_weight=args.tr_weight, rot_weight=args.rot_weight, + tor_weight=args.tor_weight, no_torsion=args.no_torsion) + + print("Starting training...") + for epoch in range(args.n_epochs): + if epoch % 5 == 0: print("Run name: ", args.run_name) + logs = {} + train_losses = train_epoch(model, train_loader, optimizer, device, t_to_sigma, loss_fn, ema_weights) + print("Epoch {}: Training loss {:.4f} tr {:.4f} rot {:.4f} tor {:.4f}" + .format(epoch, train_losses['loss'], train_losses['tr_loss'], train_losses['rot_loss'], + train_losses['tor_loss'])) + + ema_weights.store(model.parameters()) + if args.use_ema: ema_weights.copy_to(model.parameters()) # load ema parameters into model for running validation and inference + val_losses = test_epoch(model, val_loader, device, t_to_sigma, loss_fn, args.test_sigma_intervals) + print("Epoch {}: Validation loss {:.4f} tr {:.4f} rot {:.4f} tor {:.4f}" + .format(epoch, val_losses['loss'], val_losses['tr_loss'], val_losses['rot_loss'], val_losses['tor_loss'])) + + if args.val_inference_freq != None and (epoch + 1) % args.val_inference_freq == 0: + inf_metrics = inference_epoch(model, val_loader.dataset.complex_graphs[:args.num_inference_complexes], device, t_to_sigma, args) + print("Epoch {}: Val inference rmsds_lt2 {:.3f} rmsds_lt5 {:.3f}" + .format(epoch, inf_metrics['rmsds_lt2'], inf_metrics['rmsds_lt5'])) + logs.update({'valinf_' + k: v for k, v in inf_metrics.items()}, step=epoch + 1) + + if not args.use_ema: ema_weights.copy_to(model.parameters()) + ema_state_dict = copy.deepcopy(model.module.state_dict() if device.type == 'cuda' else model.state_dict()) + ema_weights.restore(model.parameters()) + + if args.wandb: + logs.update({'train_' + k: v for k, v in train_losses.items()}) + logs.update({'val_' + k: v for k, v in val_losses.items()}) + logs['current_lr'] = optimizer.param_groups[0]['lr'] + wandb.log(logs, step=epoch + 1) + + state_dict = model.module.state_dict() if device.type == 'cuda' else model.state_dict() + if args.inference_earlystop_metric in logs.keys() and \ + (args.inference_earlystop_goal == 'min' and logs[args.inference_earlystop_metric] <= best_val_inference_value or + args.inference_earlystop_goal == 'max' and logs[args.inference_earlystop_metric] >= best_val_inference_value): + best_val_inference_value = logs[args.inference_earlystop_metric] + best_val_inference_epoch = epoch + torch.save(state_dict, os.path.join(run_dir, 'best_inference_epoch_model.pt')) + torch.save(ema_state_dict, os.path.join(run_dir, 'best_ema_inference_epoch_model.pt')) + if val_losses['loss'] <= best_val_loss: + best_val_loss = val_losses['loss'] + best_epoch = epoch + torch.save(state_dict, os.path.join(run_dir, 'best_model.pt')) + torch.save(ema_state_dict, os.path.join(run_dir, 'best_ema_model.pt')) + + if scheduler: + if args.val_inference_freq is not None: + scheduler.step(best_val_inference_value) + else: + scheduler.step(val_losses['loss']) + + torch.save({ + 'epoch': epoch, + 'model': state_dict, + 'optimizer': optimizer.state_dict(), + 'ema_weights': ema_weights.state_dict(), + }, os.path.join(run_dir, 'last_model.pt')) + + print("Best Validation Loss {} on Epoch {}".format(best_val_loss, best_epoch)) + print("Best inference metric {} on Epoch {}".format(best_val_inference_value, best_val_inference_epoch)) + + +def main_function(): + args = parse_train_args() + if args.config: + config_dict = yaml.load(args.config, Loader=yaml.FullLoader) + arg_dict = args.__dict__ + for key, value in config_dict.items(): + if isinstance(value, list): + for v in value: + arg_dict[key].append(v) + else: + arg_dict[key] = value + args.config = args.config.name + assert (args.inference_earlystop_goal == 'max' or args.inference_earlystop_goal == 'min') + if args.val_inference_freq is not None and args.scheduler is not None: + assert (args.scheduler_patience > args.val_inference_freq) # otherwise we will just stop training after args.scheduler_patience epochs + if args.cudnn_benchmark: + torch.backends.cudnn.benchmark = True + + # construct loader + t_to_sigma = partial(t_to_sigma_compl, args=args) + train_loader, val_loader = construct_loader(args, t_to_sigma) + + model = get_model(args, device, t_to_sigma=t_to_sigma) + optimizer, scheduler = get_optimizer_and_scheduler(args, model, scheduler_mode=args.inference_earlystop_goal if args.val_inference_freq is not None else 'min') + ema_weights = ExponentialMovingAverage(model.parameters(),decay=args.ema_rate) + + if args.restart_dir: + try: + dict = torch.load(f'{args.restart_dir}/last_model.pt', map_location=torch.device('cpu')) + if args.restart_lr is not None: dict['optimizer']['param_groups'][0]['lr'] = args.restart_lr + optimizer.load_state_dict(dict['optimizer']) + model.module.load_state_dict(dict['model'], strict=True) + if hasattr(args, 'ema_rate'): + ema_weights.load_state_dict(dict['ema_weights'], device=device) + print("Restarting from epoch", dict['epoch']) + except Exception as e: + print("Exception", e) + dict = torch.load(f'{args.restart_dir}/best_model.pt', map_location=torch.device('cpu')) + model.module.load_state_dict(dict, strict=True) + print("Due to exception had to take the best epoch and no optimiser") + + numel = sum([p.numel() for p in model.parameters()]) + print('Model with', numel, 'parameters') + + if args.wandb: + wandb.init( + entity='entity', + settings=wandb.Settings(start_method="fork"), + project=args.project, + name=args.run_name, + config=args + ) + wandb.log({'numel': numel}) + + # record parameters + run_dir = os.path.join(args.log_dir, args.run_name) + yaml_file_name = os.path.join(run_dir, 'model_parameters.yml') + save_yaml_file(yaml_file_name, args.__dict__) + args.device = device + + train(args, model, optimizer, scheduler, ema_weights, train_loader, val_loader, t_to_sigma, run_dir) + + +if __name__ == '__main__': + device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') + main_function() \ No newline at end of file diff --git a/forks/DiffDockv1/utils/diffusion_utils.py b/forks/DiffDockv1/utils/diffusion_utils.py new file mode 100644 index 00000000..e1ffde4e --- /dev/null +++ b/forks/DiffDockv1/utils/diffusion_utils.py @@ -0,0 +1,96 @@ +import math +import numpy as np +import torch +import torch.nn.functional as F +from torch import nn +from scipy.stats import beta + +from utils.geometry import axis_angle_to_matrix, rigid_transform_Kabsch_3D_torch +from utils.torsion import modify_conformer_torsion_angles + + +def t_to_sigma(t_tr, t_rot, t_tor, args): + tr_sigma = args.tr_sigma_min ** (1-t_tr) * args.tr_sigma_max ** t_tr + rot_sigma = args.rot_sigma_min ** (1-t_rot) * args.rot_sigma_max ** t_rot + tor_sigma = args.tor_sigma_min ** (1-t_tor) * args.tor_sigma_max ** t_tor + return tr_sigma, rot_sigma, tor_sigma + + +def modify_conformer(data, tr_update, rot_update, torsion_updates): + lig_center = torch.mean(data['ligand'].pos, dim=0, keepdim=True) + rot_mat = axis_angle_to_matrix(rot_update.squeeze()) + rigid_new_pos = (data['ligand'].pos - lig_center) @ rot_mat.T + tr_update + lig_center + + if torsion_updates is not None: + flexible_new_pos = modify_conformer_torsion_angles(rigid_new_pos, + data['ligand', 'ligand'].edge_index.T[data['ligand'].edge_mask], + data['ligand'].mask_rotate if isinstance(data['ligand'].mask_rotate, np.ndarray) else data['ligand'].mask_rotate[0], + torsion_updates).to(rigid_new_pos.device) + R, t = rigid_transform_Kabsch_3D_torch(flexible_new_pos.T, rigid_new_pos.T) + aligned_flexible_pos = flexible_new_pos @ R.T + t.T + data['ligand'].pos = aligned_flexible_pos + else: + data['ligand'].pos = rigid_new_pos + return data + + +def sinusoidal_embedding(timesteps, embedding_dim, max_positions=10000): + """ from https://github.com/hojonathanho/diffusion/blob/master/diffusion_tf/nn.py """ + assert len(timesteps.shape) == 1 + half_dim = embedding_dim // 2 + emb = math.log(max_positions) / (half_dim - 1) + emb = torch.exp(torch.arange(half_dim, dtype=torch.float32, device=timesteps.device) * -emb) + emb = timesteps.float()[:, None] * emb[None, :] + emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1) + if embedding_dim % 2 == 1: # zero pad + emb = F.pad(emb, (0, 1), mode='constant') + assert emb.shape == (timesteps.shape[0], embedding_dim) + return emb + + +class GaussianFourierProjection(nn.Module): + """Gaussian Fourier embeddings for noise levels. + from https://github.com/yang-song/score_sde_pytorch/blob/1618ddea340f3e4a2ed7852a0694a809775cf8d0/models/layerspp.py#L32 + """ + + def __init__(self, embedding_size=256, scale=1.0): + super().__init__() + self.W = nn.Parameter(torch.randn(embedding_size//2) * scale, requires_grad=False) + + def forward(self, x): + x_proj = x[:, None] * self.W[None, :] * 2 * np.pi + emb = torch.cat([torch.sin(x_proj), torch.cos(x_proj)], dim=-1) + return emb + + +def get_timestep_embedding(embedding_type, embedding_dim, embedding_scale=10000): + if embedding_type == 'sinusoidal': + emb_func = (lambda x : sinusoidal_embedding(embedding_scale * x, embedding_dim)) + elif embedding_type == 'fourier': + emb_func = GaussianFourierProjection(embedding_size=embedding_dim, scale=embedding_scale) + else: + raise NotImplemented + return emb_func + + +def get_t_schedule(inference_steps): + return np.linspace(1, 0, inference_steps + 1)[:-1] + + +def set_time(complex_graphs, t_tr, t_rot, t_tor, batchsize, all_atoms, device): + complex_graphs['ligand'].node_t = { + 'tr': t_tr * torch.ones(complex_graphs['ligand'].num_nodes).to(device), + 'rot': t_rot * torch.ones(complex_graphs['ligand'].num_nodes).to(device), + 'tor': t_tor * torch.ones(complex_graphs['ligand'].num_nodes).to(device)} + complex_graphs['receptor'].node_t = { + 'tr': t_tr * torch.ones(complex_graphs['receptor'].num_nodes).to(device), + 'rot': t_rot * torch.ones(complex_graphs['receptor'].num_nodes).to(device), + 'tor': t_tor * torch.ones(complex_graphs['receptor'].num_nodes).to(device)} + complex_graphs.complex_t = {'tr': t_tr * torch.ones(batchsize).to(device), + 'rot': t_rot * torch.ones(batchsize).to(device), + 'tor': t_tor * torch.ones(batchsize).to(device)} + if all_atoms: + complex_graphs['atom'].node_t = { + 'tr': t_tr * torch.ones(complex_graphs['atom'].num_nodes).to(device), + 'rot': t_rot * torch.ones(complex_graphs['atom'].num_nodes).to(device), + 'tor': t_tor * torch.ones(complex_graphs['atom'].num_nodes).to(device)} \ No newline at end of file diff --git a/forks/DiffDockv1/utils/geometry.py b/forks/DiffDockv1/utils/geometry.py new file mode 100644 index 00000000..0b54bbea --- /dev/null +++ b/forks/DiffDockv1/utils/geometry.py @@ -0,0 +1,123 @@ +import math + +import torch + + +def quaternion_to_matrix(quaternions): + """ + From https://pytorch3d.readthedocs.io/en/latest/_modules/pytorch3d/transforms/rotation_conversions.html + Convert rotations given as quaternions to rotation matrices. + + Args: + quaternions: quaternions with real part first, + as tensor of shape (..., 4). + + Returns: + Rotation matrices as tensor of shape (..., 3, 3). + """ + r, i, j, k = torch.unbind(quaternions, -1) + two_s = 2.0 / (quaternions * quaternions).sum(-1) + + o = torch.stack( + ( + 1 - two_s * (j * j + k * k), + two_s * (i * j - k * r), + two_s * (i * k + j * r), + two_s * (i * j + k * r), + 1 - two_s * (i * i + k * k), + two_s * (j * k - i * r), + two_s * (i * k - j * r), + two_s * (j * k + i * r), + 1 - two_s * (i * i + j * j), + ), + -1, + ) + return o.reshape(quaternions.shape[:-1] + (3, 3)) + + +def axis_angle_to_quaternion(axis_angle): + """ + From https://pytorch3d.readthedocs.io/en/latest/_modules/pytorch3d/transforms/rotation_conversions.html + Convert rotations given as axis/angle to quaternions. + + Args: + axis_angle: Rotations given as a vector in axis angle form, + as a tensor of shape (..., 3), where the magnitude is + the angle turned anticlockwise in radians around the + vector's direction. + + Returns: + quaternions with real part first, as tensor of shape (..., 4). + """ + angles = torch.norm(axis_angle, p=2, dim=-1, keepdim=True) + half_angles = 0.5 * angles + eps = 1e-6 + small_angles = angles.abs() < eps + sin_half_angles_over_angles = torch.empty_like(angles) + sin_half_angles_over_angles[~small_angles] = ( + torch.sin(half_angles[~small_angles]) / angles[~small_angles] + ) + # for x small, sin(x/2) is about x/2 - (x/2)^3/6 + # so sin(x/2)/x is about 1/2 - (x*x)/48 + sin_half_angles_over_angles[small_angles] = ( + 0.5 - (angles[small_angles] * angles[small_angles]) / 48 + ) + quaternions = torch.cat( + [torch.cos(half_angles), axis_angle * sin_half_angles_over_angles], dim=-1 + ) + return quaternions + + +def axis_angle_to_matrix(axis_angle): + """ + From https://pytorch3d.readthedocs.io/en/latest/_modules/pytorch3d/transforms/rotation_conversions.html + Convert rotations given as axis/angle to rotation matrices. + + Args: + axis_angle: Rotations given as a vector in axis angle form, + as a tensor of shape (..., 3), where the magnitude is + the angle turned anticlockwise in radians around the + vector's direction. + + Returns: + Rotation matrices as tensor of shape (..., 3, 3). + """ + return quaternion_to_matrix(axis_angle_to_quaternion(axis_angle)) + + +def rigid_transform_Kabsch_3D_torch(A, B): + # R = 3x3 rotation matrix, t = 3x1 column vector + # This already takes residue identity into account. + + assert A.shape[1] == B.shape[1] + num_rows, num_cols = A.shape + if num_rows != 3: + raise Exception(f"matrix A is not 3xN, it is {num_rows}x{num_cols}") + num_rows, num_cols = B.shape + if num_rows != 3: + raise Exception(f"matrix B is not 3xN, it is {num_rows}x{num_cols}") + + + # find mean column wise: 3 x 1 + centroid_A = torch.mean(A, axis=1, keepdims=True) + centroid_B = torch.mean(B, axis=1, keepdims=True) + + # subtract mean + Am = A - centroid_A + Bm = B - centroid_B + + H = Am @ Bm.T + + # find rotation + U, S, Vt = torch.linalg.svd(H) + + R = Vt.T @ U.T + # special reflection case + if torch.linalg.det(R) < 0: + # print("det(R) < R, reflection detected!, correcting for it ...") + SS = torch.diag(torch.tensor([1.,1.,-1.], device=A.device)) + R = (Vt.T @ SS) @ U.T + assert math.fabs(torch.linalg.det(R) - 1) < 3e-3 # note I had to change this error bound to be higher + + t = -R @ centroid_A + centroid_B + return R, t diff --git a/forks/DiffDockv1/utils/inference_utils.py b/forks/DiffDockv1/utils/inference_utils.py new file mode 100644 index 00000000..956a4456 --- /dev/null +++ b/forks/DiffDockv1/utils/inference_utils.py @@ -0,0 +1,275 @@ +import os + +import torch +from Bio.PDB import PDBParser +from esm import FastaBatchedDataset, pretrained +from rdkit.Chem import AddHs, MolFromSmiles +from torch_geometric.data import Dataset, HeteroData +import esm + +from datasets.process_mols import parse_pdb_from_path, generate_conformer, read_molecule, get_lig_graph_with_matching, \ + extract_receptor_structure, get_rec_graph + + +three_to_one = {'ALA': 'A', +'ARG': 'R', +'ASN': 'N', +'ASP': 'D', +'CYS': 'C', +'GLN': 'Q', +'GLU': 'E', +'GLY': 'G', +'HIS': 'H', +'ILE': 'I', +'LEU': 'L', +'LYS': 'K', +'MET': 'M', +'MSE': 'M', # MSE this is almost the same AA as MET. The sulfur is just replaced by Selen +'PHE': 'F', +'PRO': 'P', +'PYL': 'O', +'SER': 'S', +'SEC': 'U', +'THR': 'T', +'TRP': 'W', +'TYR': 'Y', +'VAL': 'V', +'ASX': 'B', +'GLX': 'Z', +'XAA': 'X', +'XLE': 'J'} + +def get_sequences_from_pdbfile(file_path): + biopython_parser = PDBParser() + structure = biopython_parser.get_structure('random_id', file_path) + structure = structure[0] + sequence = None + for i, chain in enumerate(structure): + seq = '' + for res_idx, residue in enumerate(chain): + if residue.get_resname() == 'HOH': + continue + residue_coords = [] + c_alpha, n, c = None, None, None + for atom in residue: + if atom.name == 'CA': + c_alpha = list(atom.get_vector()) + if atom.name == 'N': + n = list(atom.get_vector()) + if atom.name == 'C': + c = list(atom.get_vector()) + if c_alpha != None and n != None and c != None: # only append residue if it is an amino acid + try: + seq += three_to_one[residue.get_resname()] + except Exception as e: + seq += '-' + print("encountered unknown AA: ", residue.get_resname(), ' in the complex. Replacing it with a dash - .') + + if sequence is None: + sequence = seq + else: + sequence += (":" + seq) + + return sequence + + +def set_nones(l): + return [s if str(s) != 'nan' else None for s in l] + + +def get_sequences(protein_files, protein_sequences): + new_sequences = [] + for i in range(len(protein_files)): + if protein_files[i] is not None: + new_sequences.append(get_sequences_from_pdbfile(protein_files[i])) + else: + new_sequences.append(protein_sequences[i]) + return new_sequences + + +def compute_ESM_embeddings(model, alphabet, labels, sequences): + # settings used + toks_per_batch = 4096 + repr_layers = [33] + include = "per_tok" + truncation_seq_length = 1022 + + dataset = FastaBatchedDataset(labels, sequences) + batches = dataset.get_batch_indices(toks_per_batch, extra_toks_per_seq=1) + data_loader = torch.utils.data.DataLoader( + dataset, collate_fn=alphabet.get_batch_converter(truncation_seq_length), batch_sampler=batches + ) + + assert all(-(model.num_layers + 1) <= i <= model.num_layers for i in repr_layers) + repr_layers = [(i + model.num_layers + 1) % (model.num_layers + 1) for i in repr_layers] + embeddings = {} + + with torch.no_grad(): + for batch_idx, (labels, strs, toks) in enumerate(data_loader): + print(f"Processing {batch_idx + 1} of {len(batches)} batches ({toks.size(0)} sequences)") + if torch.cuda.is_available(): + toks = toks.to(device="cuda", non_blocking=True) + + out = model(toks, repr_layers=repr_layers, return_contacts=False) + representations = {layer: t.to(device="cpu") for layer, t in out["representations"].items()} + + for i, label in enumerate(labels): + truncate_len = min(truncation_seq_length, len(strs[i])) + embeddings[label] = representations[33][i, 1: truncate_len + 1].clone() + return embeddings + + +def generate_ESM_structure(model, filename, sequence): + model.set_chunk_size(256) + chunk_size = 256 + output = None + + while output is None: + try: + with torch.no_grad(): + output = model.infer_pdb(sequence) + + with open(filename, "w") as f: + f.write(output) + print("saved", filename) + except RuntimeError as e: + if 'out of memory' in str(e): + print('| WARNING: ran out of memory on chunk_size', chunk_size) + for p in model.parameters(): + if p.grad is not None: + del p.grad # free some memory + torch.cuda.empty_cache() + chunk_size = chunk_size // 2 + if chunk_size > 2: + model.set_chunk_size(chunk_size) + else: + print("Not enough memory for ESMFold") + break + else: + raise e + return output is not None + + +class InferenceDataset(Dataset): + def __init__(self, out_dir, complex_names, protein_files, ligand_descriptions, protein_sequences, lm_embeddings, + receptor_radius=30, c_alpha_max_neighbors=None, precomputed_lm_embeddings=None, + remove_hs=False, all_atoms=False, atom_radius=5, atom_max_neighbors=None): + + super(InferenceDataset, self).__init__() + self.receptor_radius = receptor_radius + self.c_alpha_max_neighbors = c_alpha_max_neighbors + self.remove_hs = remove_hs + self.all_atoms = all_atoms + self.atom_radius, self.atom_max_neighbors = atom_radius, atom_max_neighbors + + self.complex_names = complex_names + self.protein_files = protein_files + self.ligand_descriptions = ligand_descriptions + self.protein_sequences = protein_sequences + + # generate LM embeddings + if lm_embeddings and (precomputed_lm_embeddings is None or precomputed_lm_embeddings[0] is None): + print("Generating ESM language model embeddings") + model_location = "esm2_t33_650M_UR50D" + model, alphabet = pretrained.load_model_and_alphabet(model_location) + model.eval() + if torch.cuda.is_available(): + model = model.cuda() + + protein_sequences = get_sequences(protein_files, protein_sequences) + labels, sequences = [], [] + for i in range(len(protein_sequences)): + s = protein_sequences[i].split(':') + sequences.extend(s) + labels.extend([complex_names[i] + '_chain_' + str(j) for j in range(len(s))]) + + lm_embeddings = compute_ESM_embeddings(model, alphabet, labels, sequences) + + self.lm_embeddings = [] + for i in range(len(protein_sequences)): + s = protein_sequences[i].split(':') + self.lm_embeddings.append([lm_embeddings[f'{complex_names[i]}_chain_{j}'] for j in range(len(s))]) + + elif not lm_embeddings: + self.lm_embeddings = [None] * len(self.complex_names) + + else: + self.lm_embeddings = precomputed_lm_embeddings + + # generate structures with ESMFold + if None in protein_files: + print("generating missing structures with ESMFold") + model = esm.pretrained.esmfold_v1() + model = model.eval().cuda() + + for i in range(len(protein_files)): + if protein_files[i] is None: + self.protein_files[i] = f"{out_dir}/{complex_names[i]}/{complex_names[i]}_esmfold.pdb" + if not os.path.exists(self.protein_files[i]): + print("generating", self.protein_files[i]) + generate_ESM_structure(model, self.protein_files[i], protein_sequences[i]) + + def len(self): + return len(self.complex_names) + + def get(self, idx): + + name, protein_file, ligand_description, lm_embedding = \ + self.complex_names[idx], self.protein_files[idx], self.ligand_descriptions[idx], self.lm_embeddings[idx] + + # build the pytorch geometric heterogeneous graph + complex_graph = HeteroData() + complex_graph['name'] = name + + # parse the ligand, either from file or smile + try: + mol = MolFromSmiles(ligand_description) # check if it is a smiles or a path + + if mol is not None: + mol = AddHs(mol) + generate_conformer(mol) + else: + mol = read_molecule(ligand_description, remove_hs=False, sanitize=True) + if mol is None: + raise Exception('RDKit could not read the molecule ', ligand_description) + mol.RemoveAllConformers() + mol = AddHs(mol) + generate_conformer(mol) + except Exception as e: + print('Failed to read molecule ', ligand_description, ' We are skipping it. The reason is the exception: ', e) + complex_graph['success'] = False + return complex_graph + + try: + # parse the receptor from the pdb file + rec_model = parse_pdb_from_path(protein_file) + get_lig_graph_with_matching(mol, complex_graph, popsize=None, maxiter=None, matching=False, keep_original=False, + num_conformers=1, remove_hs=self.remove_hs) + rec, rec_coords, c_alpha_coords, n_coords, c_coords, lm_embeddings = extract_receptor_structure(rec_model, mol, lm_embedding_chains=lm_embedding) + if lm_embeddings is not None and len(c_alpha_coords) != len(lm_embeddings): + print(f'LM embeddings for complex {name} did not have the right length for the protein. Skipping {name}.') + complex_graph['success'] = False + return complex_graph + + get_rec_graph(rec, rec_coords, c_alpha_coords, n_coords, c_coords, complex_graph, rec_radius=self.receptor_radius, + c_alpha_max_neighbors=self.c_alpha_max_neighbors, all_atoms=self.all_atoms, + atom_radius=self.atom_radius, atom_max_neighbors=self.atom_max_neighbors, remove_hs=self.remove_hs, lm_embeddings=lm_embeddings) + + except Exception as e: + print(f'Skipping {name} because of the error:') + print(e) + complex_graph['success'] = False + return complex_graph + + protein_center = torch.mean(complex_graph['receptor'].pos, dim=0, keepdim=True) + complex_graph['receptor'].pos -= protein_center + if self.all_atoms: + complex_graph['atom'].pos -= protein_center + + ligand_center = torch.mean(complex_graph['ligand'].pos, dim=0, keepdim=True) + complex_graph['ligand'].pos -= ligand_center + + complex_graph.original_center = protein_center + complex_graph.mol = mol + complex_graph['success'] = True + return complex_graph diff --git a/forks/DiffDockv1/utils/parsing.py b/forks/DiffDockv1/utils/parsing.py new file mode 100644 index 00000000..8f76d757 --- /dev/null +++ b/forks/DiffDockv1/utils/parsing.py @@ -0,0 +1,86 @@ + +from argparse import ArgumentParser,FileType + +def parse_train_args(): + + # General arguments + parser = ArgumentParser() + parser.add_argument('--config', type=FileType(mode='r'), default=None) + parser.add_argument('--log_dir', type=str, default='workdir', help='Folder in which to save model and logs') + parser.add_argument('--restart_dir', type=str, help='Folder of previous training model from which to restart') + parser.add_argument('--cache_path', type=str, default='data/cache', help='Folder from where to load/restore cached dataset') + parser.add_argument('--data_dir', type=str, default='data/PDBBind_processed/', help='Folder containing original structures') + parser.add_argument('--split_train', type=str, default='data/splits/timesplit_no_lig_overlap_train', help='Path of file defining the split') + parser.add_argument('--split_val', type=str, default='data/splits/timesplit_no_lig_overlap_val', help='Path of file defining the split') + parser.add_argument('--split_test', type=str, default='data/splits/timesplit_test', help='Path of file defining the split') + parser.add_argument('--test_sigma_intervals', action='store_true', default=False, help='Whether to log loss per noise interval') + parser.add_argument('--val_inference_freq', type=int, default=5, help='Frequency of epochs for which to run expensive inference on val data') + parser.add_argument('--train_inference_freq', type=int, default=None, help='Frequency of epochs for which to run expensive inference on train data') + parser.add_argument('--inference_steps', type=int, default=20, help='Number of denoising steps for inference on val') + parser.add_argument('--num_inference_complexes', type=int, default=100, help='Number of complexes for which inference is run every val/train_inference_freq epochs (None will run it on all)') + parser.add_argument('--inference_earlystop_metric', type=str, default='valinf_rmsds_lt2', help='This is the metric that is addionally used when val_inference_freq is not None') + parser.add_argument('--inference_earlystop_goal', type=str, default='max', help='Whether to maximize or minimize metric') + parser.add_argument('--wandb', action='store_true', default=False, help='') + parser.add_argument('--project', type=str, default='difdock_train', help='') + parser.add_argument('--run_name', type=str, default='', help='') + parser.add_argument('--cudnn_benchmark', action='store_true', default=False, help='CUDA optimization parameter for faster training') + parser.add_argument('--num_dataloader_workers', type=int, default=0, help='Number of workers for dataloader') + parser.add_argument('--pin_memory', action='store_true', default=False, help='pin_memory arg of dataloader') + + # Training arguments + parser.add_argument('--n_epochs', type=int, default=400, help='Number of epochs for training') + parser.add_argument('--batch_size', type=int, default=32, help='Batch size') + parser.add_argument('--scheduler', type=str, default=None, help='LR scheduler') + parser.add_argument('--scheduler_patience', type=int, default=20, help='Patience of the LR scheduler') + parser.add_argument('--lr', type=float, default=1e-3, help='Initial learning rate') + parser.add_argument('--restart_lr', type=float, default=None, help='If this is not none, the lr of the optimizer will be overwritten with this value when restarting from a checkpoint.') + parser.add_argument('--w_decay', type=float, default=0.0, help='Weight decay added to loss') + parser.add_argument('--num_workers', type=int, default=1, help='Number of workers for preprocessing') + parser.add_argument('--use_ema', action='store_true', default=False, help='Whether or not to use ema for the model weights') + parser.add_argument('--ema_rate', type=float, default=0.999, help='decay rate for the exponential moving average model parameters ') + + # Dataset + parser.add_argument('--limit_complexes', type=int, default=0, help='If positive, the number of training and validation complexes is capped') + parser.add_argument('--all_atoms', action='store_true', default=False, help='Whether to use the all atoms model') + parser.add_argument('--receptor_radius', type=float, default=30, help='Cutoff on distances for receptor edges') + parser.add_argument('--c_alpha_max_neighbors', type=int, default=10, help='Maximum number of neighbors for each residue') + parser.add_argument('--atom_radius', type=float, default=5, help='Cutoff on distances for atom connections') + parser.add_argument('--atom_max_neighbors', type=int, default=8, help='Maximum number of atom neighbours for receptor') + parser.add_argument('--matching_popsize', type=int, default=20, help='Differential evolution popsize parameter in matching') + parser.add_argument('--matching_maxiter', type=int, default=20, help='Differential evolution maxiter parameter in matching') + parser.add_argument('--max_lig_size', type=int, default=None, help='Maximum number of heavy atoms in ligand') + parser.add_argument('--remove_hs', action='store_true', default=False, help='remove Hs') + parser.add_argument('--num_conformers', type=int, default=1, help='Number of conformers to match to each ligand') + parser.add_argument('--esm_embeddings_path', type=str, default=None, help='If this is set then the LM embeddings at that path will be used for the receptor features') + + # Diffusion + parser.add_argument('--tr_weight', type=float, default=0.33, help='Weight of translation loss') + parser.add_argument('--rot_weight', type=float, default=0.33, help='Weight of rotation loss') + parser.add_argument('--tor_weight', type=float, default=0.33, help='Weight of torsional loss') + parser.add_argument('--rot_sigma_min', type=float, default=0.1, help='Minimum sigma for rotational component') + parser.add_argument('--rot_sigma_max', type=float, default=1.65, help='Maximum sigma for rotational component') + parser.add_argument('--tr_sigma_min', type=float, default=0.1, help='Minimum sigma for translational component') + parser.add_argument('--tr_sigma_max', type=float, default=30, help='Maximum sigma for translational component') + parser.add_argument('--tor_sigma_min', type=float, default=0.0314, help='Minimum sigma for torsional component') + parser.add_argument('--tor_sigma_max', type=float, default=3.14, help='Maximum sigma for torsional component') + parser.add_argument('--no_torsion', action='store_true', default=False, help='If set only rigid matching') + + # Model + parser.add_argument('--num_conv_layers', type=int, default=2, help='Number of interaction layers') + parser.add_argument('--max_radius', type=float, default=5.0, help='Radius cutoff for geometric graph') + parser.add_argument('--scale_by_sigma', action='store_true', default=True, help='Whether to normalise the score') + parser.add_argument('--ns', type=int, default=16, help='Number of hidden features per node of order 0') + parser.add_argument('--nv', type=int, default=4, help='Number of hidden features per node of order >0') + parser.add_argument('--distance_embed_dim', type=int, default=32, help='Embedding size for the distance') + parser.add_argument('--cross_distance_embed_dim', type=int, default=32, help='Embeddings size for the cross distance') + parser.add_argument('--no_batch_norm', action='store_true', default=False, help='If set, it removes the batch norm') + parser.add_argument('--use_second_order_repr', action='store_true', default=False, help='Whether to use only up to first order representations or also second') + parser.add_argument('--cross_max_distance', type=float, default=80, help='Maximum cross distance in case not dynamic') + parser.add_argument('--dynamic_max_cross', action='store_true', default=False, help='Whether to use the dynamic distance cutoff') + parser.add_argument('--dropout', type=float, default=0.0, help='MLP dropout') + parser.add_argument('--embedding_type', type=str, default="sinusoidal", help='Type of diffusion time embedding') + parser.add_argument('--sigma_embed_dim', type=int, default=32, help='Size of the embedding of the diffusion time') + parser.add_argument('--embedding_scale', type=int, default=1000, help='Parameter of the diffusion time embedding') + + args = parser.parse_args() + return args diff --git a/forks/DiffDockv1/utils/sampling.py b/forks/DiffDockv1/utils/sampling.py new file mode 100644 index 00000000..c764eeb7 --- /dev/null +++ b/forks/DiffDockv1/utils/sampling.py @@ -0,0 +1,114 @@ +import numpy as np +import torch +from torch_geometric.loader import DataLoader + +from utils.diffusion_utils import modify_conformer, set_time +from utils.torsion import modify_conformer_torsion_angles +from scipy.spatial.transform import Rotation as R + + +def randomize_position(data_list, no_torsion, no_random, tr_sigma_max): + # in place modification of the list + if not no_torsion: + # randomize torsion angles + for complex_graph in data_list: + torsion_updates = np.random.uniform(low=-np.pi, high=np.pi, size=complex_graph['ligand'].edge_mask.sum()) + complex_graph['ligand'].pos = \ + modify_conformer_torsion_angles(complex_graph['ligand'].pos, + complex_graph['ligand', 'ligand'].edge_index.T[ + complex_graph['ligand'].edge_mask], + complex_graph['ligand'].mask_rotate[0], torsion_updates) + + for complex_graph in data_list: + # randomize position + molecule_center = torch.mean(complex_graph['ligand'].pos, dim=0, keepdim=True) + random_rotation = torch.from_numpy(R.random().as_matrix()).float() + complex_graph['ligand'].pos = (complex_graph['ligand'].pos - molecule_center) @ random_rotation.T + # base_rmsd = np.sqrt(np.sum((complex_graph['ligand'].pos.cpu().numpy() - orig_complex_graph['ligand'].pos.numpy()) ** 2, axis=1).mean()) + + if not no_random: # note for now the torsion angles are still randomised + tr_update = torch.normal(mean=0, std=tr_sigma_max, size=(1, 3)) + complex_graph['ligand'].pos += tr_update + + +def sampling(data_list, model, inference_steps, tr_schedule, rot_schedule, tor_schedule, device, t_to_sigma, model_args, + no_random=False, ode=False, visualization_list=None, confidence_model=None, confidence_data_list=None, + confidence_model_args=None, batch_size=32, no_final_step_noise=False): + N = len(data_list) + + for t_idx in range(inference_steps): + t_tr, t_rot, t_tor = tr_schedule[t_idx], rot_schedule[t_idx], tor_schedule[t_idx] + dt_tr = tr_schedule[t_idx] - tr_schedule[t_idx + 1] if t_idx < inference_steps - 1 else tr_schedule[t_idx] + dt_rot = rot_schedule[t_idx] - rot_schedule[t_idx + 1] if t_idx < inference_steps - 1 else rot_schedule[t_idx] + dt_tor = tor_schedule[t_idx] - tor_schedule[t_idx + 1] if t_idx < inference_steps - 1 else tor_schedule[t_idx] + + loader = DataLoader(data_list, batch_size=batch_size) + new_data_list = [] + + for complex_graph_batch in loader: + b = complex_graph_batch.num_graphs + complex_graph_batch = complex_graph_batch.to(device) + + tr_sigma, rot_sigma, tor_sigma = t_to_sigma(t_tr, t_rot, t_tor) + set_time(complex_graph_batch, t_tr, t_rot, t_tor, b, model_args.all_atoms, device) + + with torch.no_grad(): + tr_score, rot_score, tor_score = model(complex_graph_batch) + + tr_g = tr_sigma * torch.sqrt(torch.tensor(2 * np.log(model_args.tr_sigma_max / model_args.tr_sigma_min))) + rot_g = 2 * rot_sigma * torch.sqrt(torch.tensor(np.log(model_args.rot_sigma_max / model_args.rot_sigma_min))) + + if ode: + tr_perturb = (0.5 * tr_g ** 2 * dt_tr * tr_score.cpu()).cpu() + rot_perturb = (0.5 * rot_score.cpu() * dt_rot * rot_g ** 2).cpu() + else: + tr_z = torch.zeros((b, 3)) if no_random or (no_final_step_noise and t_idx == inference_steps - 1) \ + else torch.normal(mean=0, std=1, size=(b, 3)) + tr_perturb = (tr_g ** 2 * dt_tr * tr_score.cpu() + tr_g * np.sqrt(dt_tr) * tr_z).cpu() + + rot_z = torch.zeros((b, 3)) if no_random or (no_final_step_noise and t_idx == inference_steps - 1) \ + else torch.normal(mean=0, std=1, size=(b, 3)) + rot_perturb = (rot_score.cpu() * dt_rot * rot_g ** 2 + rot_g * np.sqrt(dt_rot) * rot_z).cpu() + + if not model_args.no_torsion: + tor_g = tor_sigma * torch.sqrt(torch.tensor(2 * np.log(model_args.tor_sigma_max / model_args.tor_sigma_min))) + if ode: + tor_perturb = (0.5 * tor_g ** 2 * dt_tor * tor_score.cpu()).numpy() + else: + tor_z = torch.zeros(tor_score.shape) if no_random or (no_final_step_noise and t_idx == inference_steps - 1) \ + else torch.normal(mean=0, std=1, size=tor_score.shape) + tor_perturb = (tor_g ** 2 * dt_tor * tor_score.cpu() + tor_g * np.sqrt(dt_tor) * tor_z).numpy() + torsions_per_molecule = tor_perturb.shape[0] // b + else: + tor_perturb = None + + # Apply noise + new_data_list.extend([modify_conformer(complex_graph, tr_perturb[i:i + 1], rot_perturb[i:i + 1].squeeze(0), + tor_perturb[i * torsions_per_molecule:(i + 1) * torsions_per_molecule] if not model_args.no_torsion else None) + for i, complex_graph in enumerate(complex_graph_batch.to('cpu').to_data_list())]) + data_list = new_data_list + + if visualization_list is not None: + for idx, visualization in enumerate(visualization_list): + visualization.add((data_list[idx]['ligand'].pos + data_list[idx].original_center).detach().cpu(), + part=1, order=t_idx + 2) + + with torch.no_grad(): + if confidence_model is not None: + loader = DataLoader(data_list, batch_size=batch_size) + confidence_loader = iter(DataLoader(confidence_data_list, batch_size=batch_size)) + confidence = [] + for complex_graph_batch in loader: + complex_graph_batch = complex_graph_batch.to(device) + if confidence_data_list is not None: + confidence_complex_graph_batch = next(confidence_loader).to(device) + confidence_complex_graph_batch['ligand'].pos = complex_graph_batch['ligand'].pos + set_time(confidence_complex_graph_batch, 0, 0, 0, N, confidence_model_args.all_atoms, device) + confidence.append(confidence_model(confidence_complex_graph_batch)) + else: + confidence.append(confidence_model(complex_graph_batch)) + confidence = torch.cat(confidence, dim=0) + else: + confidence = None + + return data_list, confidence diff --git a/forks/DiffDockv1/utils/so3.py b/forks/DiffDockv1/utils/so3.py new file mode 100644 index 00000000..f0153235 --- /dev/null +++ b/forks/DiffDockv1/utils/so3.py @@ -0,0 +1,96 @@ +import os +import numpy as np +import torch +from scipy.spatial.transform import Rotation + +MIN_EPS, MAX_EPS, N_EPS = 0.01, 2, 1000 +X_N = 2000 + +""" + Preprocessing for the SO(3) sampling and score computations, truncated infinite series are computed and then + cached to memory, therefore the precomputation is only run the first time the repository is run on a machine +""" + +omegas = np.linspace(0, np.pi, X_N + 1)[1:] + + +def _compose(r1, r2): # R1 @ R2 but for Euler vecs + return Rotation.from_matrix(Rotation.from_rotvec(r1).as_matrix() @ Rotation.from_rotvec(r2).as_matrix()).as_rotvec() + + +def _expansion(omega, eps, L=2000): # the summation term only + p = 0 + for l in range(L): + p += (2 * l + 1) * np.exp(-l * (l + 1) * eps**2) * np.sin(omega * (l + 1 / 2)) / np.sin(omega / 2) + return p + + +def _density(expansion, omega, marginal=True): # if marginal, density over [0, pi], else over SO(3) + if marginal: + return expansion * (1 - np.cos(omega)) / np.pi + else: + return expansion / 8 / np.pi ** 2 # the constant factor doesn't affect any actual calculations though + + +def _score(exp, omega, eps, L=2000): # score of density over SO(3) + dSigma = 0 + for l in range(L): + hi = np.sin(omega * (l + 1 / 2)) + dhi = (l + 1 / 2) * np.cos(omega * (l + 1 / 2)) + lo = np.sin(omega / 2) + dlo = 1 / 2 * np.cos(omega / 2) + dSigma += (2 * l + 1) * np.exp(-l * (l + 1) * eps**2) * (lo * dhi - hi * dlo) / lo ** 2 + return dSigma / exp + + +if os.path.exists('.so3_omegas_array2.npy'): + _omegas_array = np.load('.so3_omegas_array2.npy') + _cdf_vals = np.load('.so3_cdf_vals2.npy') + _score_norms = np.load('.so3_score_norms2.npy') + _exp_score_norms = np.load('.so3_exp_score_norms2.npy') +else: + print("Precomputing and saving to cache SO(3) distribution table") + _eps_array = 10 ** np.linspace(np.log10(MIN_EPS), np.log10(MAX_EPS), N_EPS) + _omegas_array = np.linspace(0, np.pi, X_N + 1)[1:] + + _exp_vals = np.asarray([_expansion(_omegas_array, eps) for eps in _eps_array]) + _pdf_vals = np.asarray([_density(_exp, _omegas_array, marginal=True) for _exp in _exp_vals]) + _cdf_vals = np.asarray([_pdf.cumsum() / X_N * np.pi for _pdf in _pdf_vals]) + _score_norms = np.asarray([_score(_exp_vals[i], _omegas_array, _eps_array[i]) for i in range(len(_eps_array))]) + + _exp_score_norms = np.sqrt(np.sum(_score_norms**2 * _pdf_vals, axis=1) / np.sum(_pdf_vals, axis=1) / np.pi) + + np.save('.so3_omegas_array2.npy', _omegas_array) + np.save('.so3_cdf_vals2.npy', _cdf_vals) + np.save('.so3_score_norms2.npy', _score_norms) + np.save('.so3_exp_score_norms2.npy', _exp_score_norms) + + +def sample(eps): + eps_idx = (np.log10(eps) - np.log10(MIN_EPS)) / (np.log10(MAX_EPS) - np.log10(MIN_EPS)) * N_EPS + eps_idx = np.clip(np.around(eps_idx).astype(int), a_min=0, a_max=N_EPS - 1) + + x = np.random.rand() + return np.interp(x, _cdf_vals[eps_idx], _omegas_array) + + +def sample_vec(eps): + x = np.random.randn(3) + x /= np.linalg.norm(x) + return x * sample(eps) + + +def score_vec(eps, vec): + eps_idx = (np.log10(eps) - np.log10(MIN_EPS)) / (np.log10(MAX_EPS) - np.log10(MIN_EPS)) * N_EPS + eps_idx = np.clip(np.around(eps_idx).astype(int), a_min=0, a_max=N_EPS - 1) + + om = np.linalg.norm(vec) + return np.interp(om, _omegas_array, _score_norms[eps_idx]) * vec / om + + +def score_norm(eps): + eps = eps.numpy() + eps_idx = (np.log10(eps) - np.log10(MIN_EPS)) / (np.log10(MAX_EPS) - np.log10(MIN_EPS)) * N_EPS + eps_idx = np.clip(np.around(eps_idx).astype(int), a_min=0, a_max=N_EPS-1) + return torch.from_numpy(_exp_score_norms[eps_idx]).float() + diff --git a/forks/DiffDockv1/utils/torsion.py b/forks/DiffDockv1/utils/torsion.py new file mode 100644 index 00000000..e25ca42d --- /dev/null +++ b/forks/DiffDockv1/utils/torsion.py @@ -0,0 +1,94 @@ +import networkx as nx +import numpy as np +import torch, copy +from scipy.spatial.transform import Rotation as R +from torch_geometric.utils import to_networkx +from torch_geometric.data import Data + +""" + Preprocessing and computation for torsional updates to conformers +""" + + +def get_transformation_mask(pyg_data): + G = to_networkx(pyg_data.to_homogeneous(), to_undirected=False) + to_rotate = [] + edges = pyg_data['ligand', 'ligand'].edge_index.T.numpy() + for i in range(0, edges.shape[0], 2): + assert edges[i, 0] == edges[i+1, 1] + + G2 = G.to_undirected() + G2.remove_edge(*edges[i]) + if not nx.is_connected(G2): + l = list(sorted(nx.connected_components(G2), key=len)[0]) + if len(l) > 1: + if edges[i, 0] in l: + to_rotate.append([]) + to_rotate.append(l) + else: + to_rotate.append(l) + to_rotate.append([]) + continue + to_rotate.append([]) + to_rotate.append([]) + + mask_edges = np.asarray([0 if len(l) == 0 else 1 for l in to_rotate], dtype=bool) + mask_rotate = np.zeros((np.sum(mask_edges), len(G.nodes())), dtype=bool) + idx = 0 + for i in range(len(G.edges())): + if mask_edges[i]: + mask_rotate[idx][np.asarray(to_rotate[i], dtype=int)] = True + idx += 1 + + return mask_edges, mask_rotate + + +def modify_conformer_torsion_angles(pos, edge_index, mask_rotate, torsion_updates, as_numpy=False): + pos = copy.deepcopy(pos) + if type(pos) != np.ndarray: pos = pos.cpu().numpy() + + for idx_edge, e in enumerate(edge_index.cpu().numpy()): + if torsion_updates[idx_edge] == 0: + continue + u, v = e[0], e[1] + + # check if need to reverse the edge, v should be connected to the part that gets rotated + assert not mask_rotate[idx_edge, u] + assert mask_rotate[idx_edge, v] + + rot_vec = pos[u] - pos[v] # convention: positive rotation if pointing inwards + rot_vec = rot_vec * torsion_updates[idx_edge] / np.linalg.norm(rot_vec) # idx_edge! + rot_mat = R.from_rotvec(rot_vec).as_matrix() + + pos[mask_rotate[idx_edge]] = (pos[mask_rotate[idx_edge]] - pos[v]) @ rot_mat.T + pos[v] + + if not as_numpy: pos = torch.from_numpy(pos.astype(np.float32)) + return pos + + +def perturb_batch(data, torsion_updates, split=False, return_updates=False): + if type(data) is Data: + return modify_conformer_torsion_angles(data.pos, + data.edge_index.T[data.edge_mask], + data.mask_rotate, torsion_updates) + pos_new = [] if split else copy.deepcopy(data.pos) + edges_of_interest = data.edge_index.T[data.edge_mask] + idx_node = 0 + idx_edges = 0 + torsion_update_list = [] + for i, mask_rotate in enumerate(data.mask_rotate): + pos = data.pos[idx_node:idx_node + mask_rotate.shape[1]] + edges = edges_of_interest[idx_edges:idx_edges + mask_rotate.shape[0]] - idx_node + torsion_update = torsion_updates[idx_edges:idx_edges + mask_rotate.shape[0]] + torsion_update_list.append(torsion_update) + pos_new_ = modify_conformer_torsion_angles(pos, edges, mask_rotate, torsion_update) + if split: + pos_new.append(pos_new_) + else: + pos_new[idx_node:idx_node + mask_rotate.shape[1]] = pos_new_ + + idx_node += mask_rotate.shape[1] + idx_edges += mask_rotate.shape[0] + if return_updates: + return pos_new, torsion_update_list + return pos_new \ No newline at end of file diff --git a/forks/DiffDockv1/utils/torus.py b/forks/DiffDockv1/utils/torus.py new file mode 100644 index 00000000..a5294575 --- /dev/null +++ b/forks/DiffDockv1/utils/torus.py @@ -0,0 +1,83 @@ +import numpy as np +import tqdm +import os + +""" + Preprocessing for the SO(2)/torus sampling and score computations, truncated infinite series are computed and then + cached to memory, therefore the precomputation is only run the first time the repository is run on a machine +""" + + +def p(x, sigma, N=10): + p_ = 0 + for i in tqdm.trange(-N, N + 1): + p_ += np.exp(-(x + 2 * np.pi * i) ** 2 / 2 / sigma ** 2) + return p_ + + +def grad(x, sigma, N=10): + p_ = 0 + for i in tqdm.trange(-N, N + 1): + p_ += (x + 2 * np.pi * i) / sigma ** 2 * np.exp(-(x + 2 * np.pi * i) ** 2 / 2 / sigma ** 2) + return p_ + + +X_MIN, X_N = 1e-5, 5000 # relative to pi +SIGMA_MIN, SIGMA_MAX, SIGMA_N = 3e-3, 2, 5000 # relative to pi + +x = 10 ** np.linspace(np.log10(X_MIN), 0, X_N + 1) * np.pi +sigma = 10 ** np.linspace(np.log10(SIGMA_MIN), np.log10(SIGMA_MAX), SIGMA_N + 1) * np.pi + +if os.path.exists('.p.npy'): + p_ = np.load('.p.npy') + score_ = np.load('.score.npy') +else: + print("Precomputing and saving to cache torus distribution table") + p_ = p(x, sigma[:, None], N=100) + np.save('.p.npy', p_) + + score_ = grad(x, sigma[:, None], N=100) / p_ + np.save('.score.npy', score_) + + +def score(x, sigma): + x = (x + np.pi) % (2 * np.pi) - np.pi + sign = np.sign(x) + x = np.log(np.abs(x) / np.pi) + x = (x - np.log(X_MIN)) / (0 - np.log(X_MIN)) * X_N + x = np.round(np.clip(x, 0, X_N)).astype(int) + sigma = np.log(sigma / np.pi) + sigma = (sigma - np.log(SIGMA_MIN)) / (np.log(SIGMA_MAX) - np.log(SIGMA_MIN)) * SIGMA_N + sigma = np.round(np.clip(sigma, 0, SIGMA_N)).astype(int) + return -sign * score_[sigma, x] + + +def p(x, sigma): + x = (x + np.pi) % (2 * np.pi) - np.pi + x = np.log(np.abs(x) / np.pi) + x = (x - np.log(X_MIN)) / (0 - np.log(X_MIN)) * X_N + x = np.round(np.clip(x, 0, X_N)).astype(int) + sigma = np.log(sigma / np.pi) + sigma = (sigma - np.log(SIGMA_MIN)) / (np.log(SIGMA_MAX) - np.log(SIGMA_MIN)) * SIGMA_N + sigma = np.round(np.clip(sigma, 0, SIGMA_N)).astype(int) + return p_[sigma, x] + + +def sample(sigma): + out = sigma * np.random.randn(*sigma.shape) + out = (out + np.pi) % (2 * np.pi) - np.pi + return out + + +score_norm_ = score( + sample(sigma[None].repeat(10000, 0).flatten()), + sigma[None].repeat(10000, 0).flatten() +).reshape(10000, -1) +score_norm_ = (score_norm_ ** 2).mean(0) + + +def score_norm(sigma): + sigma = np.log(sigma / np.pi) + sigma = (sigma - np.log(SIGMA_MIN)) / (np.log(SIGMA_MAX) - np.log(SIGMA_MIN)) * SIGMA_N + sigma = np.round(np.clip(sigma, 0, SIGMA_N)).astype(int) + return score_norm_[sigma] diff --git a/forks/DiffDockv1/utils/training.py b/forks/DiffDockv1/utils/training.py new file mode 100644 index 00000000..83d10434 --- /dev/null +++ b/forks/DiffDockv1/utils/training.py @@ -0,0 +1,236 @@ +import copy + +import numpy as np +from torch_geometric.loader import DataLoader +from tqdm import tqdm + +from confidence.dataset import ListDataset +from utils import so3, torus +from utils.sampling import randomize_position, sampling +import torch +from utils.diffusion_utils import get_t_schedule + + +def loss_function(tr_pred, rot_pred, tor_pred, data, t_to_sigma, device, tr_weight=1, rot_weight=1, + tor_weight=1, apply_mean=True, no_torsion=False): + tr_sigma, rot_sigma, tor_sigma = t_to_sigma( + *[torch.cat([d.complex_t[noise_type] for d in data]) if device.type == 'cuda' else data.complex_t[noise_type] + for noise_type in ['tr', 'rot', 'tor']]) + mean_dims = (0, 1) if apply_mean else 1 + + # translation component + tr_score = torch.cat([d.tr_score for d in data], dim=0) if device.type == 'cuda' else data.tr_score + tr_sigma = tr_sigma.unsqueeze(-1) + tr_loss = ((tr_pred.cpu() - tr_score) ** 2 * tr_sigma ** 2).mean(dim=mean_dims) + tr_base_loss = (tr_score ** 2 * tr_sigma ** 2).mean(dim=mean_dims).detach() + + # rotation component + rot_score = torch.cat([d.rot_score for d in data], dim=0) if device.type == 'cuda' else data.rot_score + rot_score_norm = so3.score_norm(rot_sigma.cpu()).unsqueeze(-1) + rot_loss = (((rot_pred.cpu() - rot_score) / rot_score_norm) ** 2).mean(dim=mean_dims) + rot_base_loss = ((rot_score / rot_score_norm) ** 2).mean(dim=mean_dims).detach() + + # torsion component + if not no_torsion: + edge_tor_sigma = torch.from_numpy( + np.concatenate([d.tor_sigma_edge for d in data] if device.type == 'cuda' else data.tor_sigma_edge)) + tor_score = torch.cat([d.tor_score for d in data], dim=0) if device.type == 'cuda' else data.tor_score + tor_score_norm2 = torch.tensor(torus.score_norm(edge_tor_sigma.cpu().numpy())).float() + tor_loss = ((tor_pred.cpu() - tor_score) ** 2 / tor_score_norm2) + tor_base_loss = ((tor_score ** 2 / tor_score_norm2)).detach() + if apply_mean: + tor_loss, tor_base_loss = tor_loss.mean() * torch.ones(1, dtype=torch.float), tor_base_loss.mean() * torch.ones(1, dtype=torch.float) + else: + index = torch.cat([torch.ones(d['ligand'].edge_mask.sum()) * i for i, d in + enumerate(data)]).long() if device.type == 'cuda' else data['ligand'].batch[ + data['ligand', 'ligand'].edge_index[0][data['ligand'].edge_mask]] + num_graphs = len(data) if device.type == 'cuda' else data.num_graphs + t_l, t_b_l, c = torch.zeros(num_graphs), torch.zeros(num_graphs), torch.zeros(num_graphs) + c.index_add_(0, index, torch.ones(tor_loss.shape)) + c = c + 0.0001 + t_l.index_add_(0, index, tor_loss) + t_b_l.index_add_(0, index, tor_base_loss) + tor_loss, tor_base_loss = t_l / c, t_b_l / c + else: + if apply_mean: + tor_loss, tor_base_loss = torch.zeros(1, dtype=torch.float), torch.zeros(1, dtype=torch.float) + else: + tor_loss, tor_base_loss = torch.zeros(len(rot_loss), dtype=torch.float), torch.zeros(len(rot_loss), dtype=torch.float) + + loss = tr_loss * tr_weight + rot_loss * rot_weight + tor_loss * tor_weight + return loss, tr_loss.detach(), rot_loss.detach(), tor_loss.detach(), tr_base_loss, rot_base_loss, tor_base_loss + + +class AverageMeter(): + def __init__(self, types, unpooled_metrics=False, intervals=1): + self.types = types + self.intervals = intervals + self.count = 0 if intervals == 1 else torch.zeros(len(types), intervals) + self.acc = {t: torch.zeros(intervals) for t in types} + self.unpooled_metrics = unpooled_metrics + + def add(self, vals, interval_idx=None): + if self.intervals == 1: + self.count += 1 if vals[0].dim() == 0 else len(vals[0]) + for type_idx, v in enumerate(vals): + self.acc[self.types[type_idx]] += v.sum() if self.unpooled_metrics else v + else: + for type_idx, v in enumerate(vals): + self.count[type_idx].index_add_(0, interval_idx[type_idx], torch.ones(len(v))) + if not torch.allclose(v, torch.tensor(0.0)): + self.acc[self.types[type_idx]].index_add_(0, interval_idx[type_idx], v) + + def summary(self): + if self.intervals == 1: + out = {k: v.item() / self.count for k, v in self.acc.items()} + return out + else: + out = {} + for i in range(self.intervals): + for type_idx, k in enumerate(self.types): + out['int' + str(i) + '_' + k] = ( + list(self.acc.values())[type_idx][i] / self.count[type_idx][i]).item() + return out + + +def train_epoch(model, loader, optimizer, device, t_to_sigma, loss_fn, ema_weigths): + model.train() + meter = AverageMeter(['loss', 'tr_loss', 'rot_loss', 'tor_loss', 'tr_base_loss', 'rot_base_loss', 'tor_base_loss']) + + for data in tqdm(loader, total=len(loader)): + if device.type == 'cuda' and len(data) == 1 or device.type == 'cpu' and data.num_graphs == 1: + print("Skipping batch of size 1 since otherwise batchnorm would not work.") + optimizer.zero_grad() + try: + tr_pred, rot_pred, tor_pred = model(data) + loss, tr_loss, rot_loss, tor_loss, tr_base_loss, rot_base_loss, tor_base_loss = \ + loss_fn(tr_pred, rot_pred, tor_pred, data=data, t_to_sigma=t_to_sigma, device=device) + loss.backward() + optimizer.step() + ema_weigths.update(model.parameters()) + meter.add([loss.cpu().detach(), tr_loss, rot_loss, tor_loss, tr_base_loss, rot_base_loss, tor_base_loss]) + except RuntimeError as e: + if 'out of memory' in str(e): + print('| WARNING: ran out of memory, skipping batch') + for p in model.parameters(): + if p.grad is not None: + del p.grad # free some memory + torch.cuda.empty_cache() + continue + elif 'Input mismatch' in str(e): + print('| WARNING: weird torch_cluster error, skipping batch') + for p in model.parameters(): + if p.grad is not None: + del p.grad # free some memory + torch.cuda.empty_cache() + continue + else: + raise e + + return meter.summary() + + +def test_epoch(model, loader, device, t_to_sigma, loss_fn, test_sigma_intervals=False): + model.eval() + meter = AverageMeter(['loss', 'tr_loss', 'rot_loss', 'tor_loss', 'tr_base_loss', 'rot_base_loss', 'tor_base_loss'], + unpooled_metrics=True) + + if test_sigma_intervals: + meter_all = AverageMeter( + ['loss', 'tr_loss', 'rot_loss', 'tor_loss', 'tr_base_loss', 'rot_base_loss', 'tor_base_loss'], + unpooled_metrics=True, intervals=10) + + for data in tqdm(loader, total=len(loader)): + try: + with torch.no_grad(): + tr_pred, rot_pred, tor_pred = model(data) + + loss, tr_loss, rot_loss, tor_loss, tr_base_loss, rot_base_loss, tor_base_loss = \ + loss_fn(tr_pred, rot_pred, tor_pred, data=data, t_to_sigma=t_to_sigma, apply_mean=False, device=device) + meter.add([loss.cpu().detach(), tr_loss, rot_loss, tor_loss, tr_base_loss, rot_base_loss, tor_base_loss]) + + if test_sigma_intervals > 0: + complex_t_tr, complex_t_rot, complex_t_tor = [torch.cat([d.complex_t[noise_type] for d in data]) for + noise_type in ['tr', 'rot', 'tor']] + sigma_index_tr = torch.round(complex_t_tr.cpu() * (10 - 1)).long() + sigma_index_rot = torch.round(complex_t_rot.cpu() * (10 - 1)).long() + sigma_index_tor = torch.round(complex_t_tor.cpu() * (10 - 1)).long() + meter_all.add( + [loss.cpu().detach(), tr_loss, rot_loss, tor_loss, tr_base_loss, rot_base_loss, tor_base_loss], + [sigma_index_tr, sigma_index_tr, sigma_index_rot, sigma_index_tor, sigma_index_tr, sigma_index_rot, + sigma_index_tor, sigma_index_tr]) + + except RuntimeError as e: + if 'out of memory' in str(e): + print('| WARNING: ran out of memory, skipping batch') + for p in model.parameters(): + if p.grad is not None: + del p.grad # free some memory + torch.cuda.empty_cache() + continue + elif 'Input mismatch' in str(e): + print('| WARNING: weird torch_cluster error, skipping batch') + for p in model.parameters(): + if p.grad is not None: + del p.grad # free some memory + torch.cuda.empty_cache() + continue + else: + raise e + + out = meter.summary() + if test_sigma_intervals > 0: out.update(meter_all.summary()) + return out + + +def inference_epoch(model, complex_graphs, device, t_to_sigma, args): + t_schedule = get_t_schedule(inference_steps=args.inference_steps) + tr_schedule, rot_schedule, tor_schedule = t_schedule, t_schedule, t_schedule + + dataset = ListDataset(complex_graphs) + loader = DataLoader(dataset=dataset, batch_size=1, shuffle=False) + rmsds = [] + + for orig_complex_graph in tqdm(loader): + data_list = [copy.deepcopy(orig_complex_graph)] + randomize_position(data_list, args.no_torsion, False, args.tr_sigma_max) + + predictions_list = None + failed_convergence_counter = 0 + while predictions_list == None: + try: + predictions_list, confidences = sampling(data_list=data_list, model=model.module if device.type=='cuda' else model, + inference_steps=args.inference_steps, + tr_schedule=tr_schedule, rot_schedule=rot_schedule, + tor_schedule=tor_schedule, + device=device, t_to_sigma=t_to_sigma, model_args=args) + except Exception as e: + if 'failed to converge' in str(e): + failed_convergence_counter += 1 + if failed_convergence_counter > 5: + print('| WARNING: SVD failed to converge 5 times - skipping the complex') + break + print('| WARNING: SVD failed to converge - trying again with a new sample') + else: + raise e + if failed_convergence_counter > 5: continue + if args.no_torsion: + orig_complex_graph['ligand'].orig_pos = (orig_complex_graph['ligand'].pos.cpu().numpy() + + orig_complex_graph.original_center.cpu().numpy()) + + filterHs = torch.not_equal(predictions_list[0]['ligand'].x[:, 0], 0).cpu().numpy() + + if isinstance(orig_complex_graph['ligand'].orig_pos, list): + orig_complex_graph['ligand'].orig_pos = orig_complex_graph['ligand'].orig_pos[0] + + ligand_pos = np.asarray( + [complex_graph['ligand'].pos.cpu().numpy()[filterHs] for complex_graph in predictions_list]) + orig_ligand_pos = np.expand_dims( + orig_complex_graph['ligand'].orig_pos[filterHs] - orig_complex_graph.original_center.cpu().numpy(), axis=0) + rmsd = np.sqrt(((ligand_pos - orig_ligand_pos) ** 2).sum(axis=2).mean(axis=1)) + rmsds.append(rmsd) + + rmsds = np.array(rmsds) + losses = {'rmsds_lt2': (100 * (rmsds < 2).sum() / len(rmsds)), + 'rmsds_lt5': (100 * (rmsds < 5).sum() / len(rmsds))} + return losses diff --git a/forks/DiffDockv1/utils/utils.py b/forks/DiffDockv1/utils/utils.py new file mode 100644 index 00000000..975319f9 --- /dev/null +++ b/forks/DiffDockv1/utils/utils.py @@ -0,0 +1,243 @@ +import os +import subprocess +import warnings +from datetime import datetime +import signal +from contextlib import contextmanager +import numpy as np +import torch +import yaml +from rdkit import Chem +from rdkit.Chem import RemoveHs, MolToPDBFile +from torch_geometric.nn.data_parallel import DataParallel + +from models.all_atom_score_model import TensorProductScoreModel as AAScoreModel +from models.score_model import TensorProductScoreModel as CGScoreModel +from utils.diffusion_utils import get_timestep_embedding +from spyrmsd import rmsd, molecule + + +def get_obrmsd(mol1_path, mol2_path, cache_name=None): + cache_name = datetime.now().strftime('date%d-%m_time%H-%M-%S.%f') if cache_name is None else cache_name + os.makedirs(".openbabel_cache", exist_ok=True) + if not isinstance(mol1_path, str): + MolToPDBFile(mol1_path, '.openbabel_cache/obrmsd_mol1_cache.pdb') + mol1_path = '.openbabel_cache/obrmsd_mol1_cache.pdb' + if not isinstance(mol2_path, str): + MolToPDBFile(mol2_path, '.openbabel_cache/obrmsd_mol2_cache.pdb') + mol2_path = '.openbabel_cache/obrmsd_mol2_cache.pdb' + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + return_code = subprocess.run(f"obrms {mol1_path} {mol2_path} > .openbabel_cache/obrmsd_{cache_name}.rmsd", + shell=True) + print(return_code) + obrms_output = read_strings_from_txt(f".openbabel_cache/obrmsd_{cache_name}.rmsd") + rmsds = [line.split(" ")[-1] for line in obrms_output] + return np.array(rmsds, dtype=np.float) + + +def remove_all_hs(mol): + params = Chem.RemoveHsParameters() + params.removeAndTrackIsotopes = True + params.removeDefiningBondStereo = True + params.removeDegreeZero = True + params.removeDummyNeighbors = True + params.removeHigherDegrees = True + params.removeHydrides = True + params.removeInSGroups = True + params.removeIsotopes = True + params.removeMapped = True + params.removeNonimplicit = True + params.removeOnlyHNeighbors = True + params.removeWithQuery = True + params.removeWithWedgedBond = True + return RemoveHs(mol, params) + + +def read_strings_from_txt(path): + # every line will be one element of the returned list + with open(path) as file: + lines = file.readlines() + return [line.rstrip() for line in lines] + + +def save_yaml_file(path, content): + assert isinstance(path, str), f'path must be a string, got {path} which is a {type(path)}' + content = yaml.dump(data=content) + if '/' in path and os.path.dirname(path) and not os.path.exists(os.path.dirname(path)): + os.makedirs(os.path.dirname(path)) + with open(path, 'w') as f: + f.write(content) + + +def get_optimizer_and_scheduler(args, model, scheduler_mode='min'): + optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.w_decay) + + if args.scheduler == 'plateau': + scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode=scheduler_mode, factor=0.7, + patience=args.scheduler_patience, min_lr=args.lr / 100) + else: + print('No scheduler') + scheduler = None + + return optimizer, scheduler + + +def get_model(args, device, t_to_sigma, no_parallel=False, confidence_mode=False): + if 'all_atoms' in args and args.all_atoms: + model_class = AAScoreModel + else: + model_class = CGScoreModel + + timestep_emb_func = get_timestep_embedding( + embedding_type=args.embedding_type, + embedding_dim=args.sigma_embed_dim, + embedding_scale=args.embedding_scale) + + lm_embedding_type = None + if args.esm_embeddings_path is not None: lm_embedding_type = 'esm' + + model = model_class(t_to_sigma=t_to_sigma, + device=device, + no_torsion=args.no_torsion, + timestep_emb_func=timestep_emb_func, + num_conv_layers=args.num_conv_layers, + lig_max_radius=args.max_radius, + scale_by_sigma=args.scale_by_sigma, + sigma_embed_dim=args.sigma_embed_dim, + ns=args.ns, nv=args.nv, + distance_embed_dim=args.distance_embed_dim, + cross_distance_embed_dim=args.cross_distance_embed_dim, + batch_norm=not args.no_batch_norm, + dropout=args.dropout, + use_second_order_repr=args.use_second_order_repr, + cross_max_distance=args.cross_max_distance, + dynamic_max_cross=args.dynamic_max_cross, + lm_embedding_type=lm_embedding_type, + confidence_mode=confidence_mode, + num_confidence_outputs=len( + args.rmsd_classification_cutoff) + 1 if 'rmsd_classification_cutoff' in args and isinstance( + args.rmsd_classification_cutoff, list) else 1) + + if device.type == 'cuda' and not no_parallel: + model = DataParallel(model) + model.to(device) + return model + + +def get_symmetry_rmsd(mol, coords1, coords2, mol2=None): + with time_limit(10): + mol = molecule.Molecule.from_rdkit(mol) + mol2 = molecule.Molecule.from_rdkit(mol2) if mol2 is not None else mol2 + mol2_atomicnums = mol2.atomicnums if mol2 is not None else mol.atomicnums + mol2_adjacency_matrix = mol2.adjacency_matrix if mol2 is not None else mol.adjacency_matrix + RMSD = rmsd.symmrmsd( + coords1, + coords2, + mol.atomicnums, + mol2_atomicnums, + mol.adjacency_matrix, + mol2_adjacency_matrix, + ) + return RMSD + + +class TimeoutException(Exception): pass + + +@contextmanager +def time_limit(seconds): + def signal_handler(signum, frame): + raise TimeoutException("Timed out!") + + signal.signal(signal.SIGALRM, signal_handler) + signal.alarm(seconds) + try: + yield + finally: + signal.alarm(0) + + +class ExponentialMovingAverage: + """ from https://github.com/yang-song/score_sde_pytorch/blob/main/models/ema.py + Maintains (exponential) moving average of a set of parameters. """ + + def __init__(self, parameters, decay, use_num_updates=True): + """ + Args: + parameters: Iterable of `torch.nn.Parameter`; usually the result of + `model.parameters()`. + decay: The exponential decay. + use_num_updates: Whether to use number of updates when computing + averages. + """ + if decay < 0.0 or decay > 1.0: + raise ValueError('Decay must be between 0 and 1') + self.decay = decay + self.num_updates = 0 if use_num_updates else None + self.shadow_params = [p.clone().detach() + for p in parameters if p.requires_grad] + self.collected_params = [] + + def update(self, parameters): + """ + Update currently maintained parameters. + Call this every time the parameters are updated, such as the result of + the `optimizer.step()` call. + Args: + parameters: Iterable of `torch.nn.Parameter`; usually the same set of + parameters used to initialize this object. + """ + decay = self.decay + if self.num_updates is not None: + self.num_updates += 1 + decay = min(decay, (1 + self.num_updates) / (10 + self.num_updates)) + one_minus_decay = 1.0 - decay + with torch.no_grad(): + parameters = [p for p in parameters if p.requires_grad] + for s_param, param in zip(self.shadow_params, parameters): + s_param.sub_(one_minus_decay * (s_param - param)) + + def copy_to(self, parameters): + """ + Copy current parameters into given collection of parameters. + Args: + parameters: Iterable of `torch.nn.Parameter`; the parameters to be + updated with the stored moving averages. + """ + parameters = [p for p in parameters if p.requires_grad] + for s_param, param in zip(self.shadow_params, parameters): + if param.requires_grad: + param.data.copy_(s_param.data) + + def store(self, parameters): + """ + Save the current parameters for restoring later. + Args: + parameters: Iterable of `torch.nn.Parameter`; the parameters to be + temporarily stored. + """ + self.collected_params = [param.clone() for param in parameters] + + def restore(self, parameters): + """ + Restore the parameters stored with the `store` method. + Useful to validate the model with EMA parameters without affecting the + original optimization process. Store the parameters before the + `copy_to` method. After validation (or model saving), use this to + restore the former parameters. + Args: + parameters: Iterable of `torch.nn.Parameter`; the parameters to be + updated with the stored parameters. + """ + for c_param, param in zip(self.collected_params, parameters): + param.data.copy_(c_param.data) + + def state_dict(self): + return dict(decay=self.decay, num_updates=self.num_updates, + shadow_params=self.shadow_params) + + def load_state_dict(self, state_dict, device): + self.decay = state_dict['decay'] + self.num_updates = state_dict['num_updates'] + self.shadow_params = [tensor.to(device) for tensor in state_dict['shadow_params']] diff --git a/forks/DiffDockv1/utils/visualise.py b/forks/DiffDockv1/utils/visualise.py new file mode 100644 index 00000000..fc655429 --- /dev/null +++ b/forks/DiffDockv1/utils/visualise.py @@ -0,0 +1,52 @@ +from rdkit.Chem.rdmolfiles import MolToPDBBlock, MolToPDBFile +import rdkit.Chem +from rdkit import Geometry +from collections import defaultdict +import copy +import numpy as np +import torch + + +class PDBFile: + def __init__(self, mol): + self.parts = defaultdict(dict) + self.mol = copy.deepcopy(mol) + [self.mol.RemoveConformer(j) for j in range(mol.GetNumConformers()) if j] + def add(self, coords, order, part=0, repeat=1): + if type(coords) in [rdkit.Chem.Mol, rdkit.Chem.RWMol]: + block = MolToPDBBlock(coords).split('\n')[:-2] + self.parts[part][order] = {'block': block, 'repeat': repeat} + return + elif type(coords) is np.ndarray: + coords = coords.astype(np.float64) + elif type(coords) is torch.Tensor: + coords = coords.double().numpy() + for i in range(coords.shape[0]): + self.mol.GetConformer(0).SetAtomPosition(i, Geometry.Point3D(coords[i, 0], coords[i, 1], coords[i, 2])) + block = MolToPDBBlock(self.mol).split('\n')[:-2] + self.parts[part][order] = {'block': block, 'repeat': repeat} + + def write(self, path=None, limit_parts=None): + is_first = True + str_ = '' + for part in sorted(self.parts.keys()): + if limit_parts and part >= limit_parts: + break + part = self.parts[part] + keys_positive = sorted(filter(lambda x: x >=0, part.keys())) + keys_negative = sorted(filter(lambda x: x < 0, part.keys())) + keys = list(keys_positive) + list(keys_negative) + for key in keys: + block = part[key]['block'] + times = part[key]['repeat'] + for _ in range(times): + if not is_first: + block = [line for line in block if 'CONECT' not in line] + is_first = False + str_ += 'MODEL\n' + str_ += '\n'.join(block) + str_ += '\nENDMDL\n' + if not path: + return str_ + with open(path, 'w') as f: + f.write(str_) \ No newline at end of file diff --git a/forks/DiffDockv1/visualizations/README.md b/forks/DiffDockv1/visualizations/README.md new file mode 100644 index 00000000..0675fb01 --- /dev/null +++ b/forks/DiffDockv1/visualizations/README.md @@ -0,0 +1,14 @@ +## Visualizations of complexes that were unseen during training. EquiBind (cyan), DockDiff highest confidence sample (red), all other DockDiff samples (orange), and the crystal structure (green). + +Complex 6agt: +![Alt Text](example_6agt_symmetric.gif) + +Complex 6dz3: +![Alt Text](example_6dz3_symmetric.gif) + +Complex 6gdy: +![Alt Text](example_6gdy_symmetric.gif) + +Complex 6ckl: +![Alt Text](example_6ckl_symmetric.gif) + diff --git a/forks/DiffDockv1/visualizations/example_6agt_symmetric.gif b/forks/DiffDockv1/visualizations/example_6agt_symmetric.gif new file mode 100644 index 00000000..cd568e68 Binary files /dev/null and b/forks/DiffDockv1/visualizations/example_6agt_symmetric.gif differ diff --git a/forks/DiffDockv1/visualizations/example_6ckl_symmetric.gif b/forks/DiffDockv1/visualizations/example_6ckl_symmetric.gif new file mode 100644 index 00000000..07586468 Binary files /dev/null and b/forks/DiffDockv1/visualizations/example_6ckl_symmetric.gif differ diff --git a/forks/DiffDockv1/visualizations/example_6dz3_symmetric.gif b/forks/DiffDockv1/visualizations/example_6dz3_symmetric.gif new file mode 100644 index 00000000..0b43816b Binary files /dev/null and b/forks/DiffDockv1/visualizations/example_6dz3_symmetric.gif differ diff --git a/forks/DiffDockv1/visualizations/example_6gdy_symmetric.gif b/forks/DiffDockv1/visualizations/example_6gdy_symmetric.gif new file mode 100644 index 00000000..28786211 Binary files /dev/null and b/forks/DiffDockv1/visualizations/example_6gdy_symmetric.gif differ diff --git a/forks/DiffDockv1/visualizations/overview.png b/forks/DiffDockv1/visualizations/overview.png new file mode 100644 index 00000000..e89749c7 Binary files /dev/null and b/forks/DiffDockv1/visualizations/overview.png differ diff --git a/forks/DiffDockv1/workdir/paper_confidence_model/best_model_epoch75.pt b/forks/DiffDockv1/workdir/paper_confidence_model/best_model_epoch75.pt new file mode 100644 index 00000000..c7d8b64d Binary files /dev/null and b/forks/DiffDockv1/workdir/paper_confidence_model/best_model_epoch75.pt differ diff --git a/forks/DiffDockv1/workdir/paper_confidence_model/model_parameters.yml b/forks/DiffDockv1/workdir/paper_confidence_model/model_parameters.yml new file mode 100644 index 00000000..2b6cd415 --- /dev/null +++ b/forks/DiffDockv1/workdir/paper_confidence_model/model_parameters.yml @@ -0,0 +1,85 @@ +all_atoms: true +atom_max_neighbors: 8 +atom_radius: 5 +balance: false +batch_size: 16 +best_model_save_frequency: 5 +c_alpha_max_neighbors: 24 +cache_creation_id: 1 +cache_ids_to_combine: +- '1' +- '2' +- '3' +- '4' +cache_path: data/cache +ckpt: best_model.pt +confidence_dropout: 0.0 +confidence_loss_weigth: 1 +confidence_no_batchnorm: false +confidence_weight: 0.33 +config: null +cross_distance_embed_dim: 32 +cross_max_distance: 80 +data_dir: data/PDBBind_processed/ +distance_embed_dim: 32 +dropout: 0.1 +dynamic_max_cross: true +embedding_scale: 10000 +embedding_type: sinusoidal +esm_embeddings_path: data/esm2_3billion_embeddings.pt +high_confidence_threshold: 5.0 +include_confidence_prediction: false +inference_steps: 20 +limit_complexes: 0 +lm_embeddings_path: null +log_dir: workdir +lr: 0.0003 +main_metric: loss +main_metric_goal: min +matching_maxiter: 20 +matching_popsize: 20 +max_lig_size: null +max_radius: 5.0 +model_save_frequency: 0 +n_epochs: 100 +no_batch_norm: false +no_torsion: false +ns: 24 +num_conformers: 1 +num_conv_layers: 5 +num_workers: 1 +nv: 6 +original_model_dir: workdir/temp_restart_ema_ESM2emb_tr34 +project: diffdock_confidence +receptor_radius: 15.0 +remove_hs: true +restart_dir: null +rmsd_classification_cutoff: +- 2.0 +rmsd_prediction: false +rot_sigma_max: 1.55 +rot_sigma_min: 0.03 +rot_weight: 0.33 +run_name: confidencetrain_samples28_FILTERFROM_ema_ESM2emb_tr34 +samples_per_complex: 7 +scale_by_sigma: true +scheduler: plateau +scheduler_patience: 50 +sigma_embed_dim: 32 +split_test: data/splits/timesplit_test +split_train: data/splits/timesplit_no_lig_overlap_train +split_val: data/splits/timesplit_no_lig_overlap_val +tor_sigma_max: 3.14 +tor_sigma_min: 0.0314 +tor_sigma_schedule: expbeta +tor_weight: 0.33 +tr_only_confidence: true +tr_sigma_max: 34.0 +tr_sigma_min: 0.1 +tr_weight: 0.33 +train_sampling: linear +transfer_weights: false +use_original_model_cache: false +use_second_order_repr: false +w_decay: 0.0 +wandb: true diff --git a/forks/DiffDockv1/workdir/paper_score_model/best_ema_inference_epoch_model.pt b/forks/DiffDockv1/workdir/paper_score_model/best_ema_inference_epoch_model.pt new file mode 100644 index 00000000..f61f4ac3 Binary files /dev/null and b/forks/DiffDockv1/workdir/paper_score_model/best_ema_inference_epoch_model.pt differ diff --git a/forks/DiffDockv1/workdir/paper_score_model/model_parameters.yml b/forks/DiffDockv1/workdir/paper_score_model/model_parameters.yml new file mode 100644 index 00000000..7b0dd3fd --- /dev/null +++ b/forks/DiffDockv1/workdir/paper_score_model/model_parameters.yml @@ -0,0 +1,83 @@ +all_atoms: false +atom_max_neighbors: 8 +atom_radius: 5 +batch_size: 16 +c_alpha_max_neighbors: 24 +cache_path: data/cacheNew +confidence_dropout: 0.0 +confidence_no_batchnorm: false +config: null +cross_distance_embed_dim: 64 +cross_max_distance: 80 +cudnn_benchmark: true +data_dir: data/PDBBind_processed/ +dataset: pdbbind +distance_embed_dim: 64 +dropout: 0.1 +dynamic_max_cross: true +ema_rate: 0.999 +embedding_scale: 10000 +embedding_type: sinusoidal +esm_embeddings_path: data/esm2_3billion_embeddings.pt +high_confidence_threshold: 5.0 +include_confidence_prediction: false +inf_pocket_cutoff: 5 +inf_pocket_knowledge: false +inference_earlystop_goal: max +inference_earlystop_metric: valinf_rmsds_lt2 +inference_steps: 20 +limit_complexes: 0 +lm_embeddings_path: null +log_dir: workdir +lr: 0.001 +matching_maxiter: 20 +matching_popsize: 20 +max_lig_size: null +max_radius: 5.0 +multiplicity: 1 +n_epochs: 850 +no_batch_norm: false +no_torsion: false +norm_by_sigma: false +not_full_dataset: false +ns: 48 +num_conformers: 1 +num_conv_layers: 6 +num_dataloader_workers: 1 +num_gpus: 1 +num_inference_complexes: 500 +num_workers: 1 +nv: 10 +odd_parity: false +pin_memory: true +pretrained_model: null +project: diffdock_train +receptor_radius: 15.0 +remove_hs: true +restart_dir: null +rot_sigma_max: 1.55 +rot_sigma_min: 0.03 +rot_weight: 0.33 +run_name: big_ema_ESM2emb +scale_by_sigma: true +scheduler: plateau +scheduler_patience: 30 +sigma_embed_dim: 64 +split_test: data/splits/timesplit_test +split_train: data/splits/timesplit_no_lig_overlap_train +split_val: data/splits/timesplit_no_lig_overlap_val +test_sigma_intervals: true +tor_sigma_max: 3.14 +tor_sigma_min: 0.0314 +tor_weight: 0.33 +tr_only_confidence: true +tr_sigma_max: 19.0 +tr_sigma_min: 0.1 +tr_weight: 0.33 +train_inference_freq: null +train_sampling: linear +use_ema: true +use_second_order_repr: false +val_inference_freq: 5 +w_decay: 0.0 +wandb: true diff --git a/forks/DynamicBind/run_single_protein_inference.py b/forks/DynamicBind/run_single_protein_inference.py index 0470025b..79c0340a 100755 --- a/forks/DynamicBind/run_single_protein_inference.py +++ b/forks/DynamicBind/run_single_protein_inference.py @@ -10,6 +10,7 @@ import rdkit.Chem as Chem import glob import shutil +import uuid from typing import Literal import argparse @@ -21,6 +22,7 @@ parser.add_argument('--savings_per_complex', type=int, default=1, help='num of samples data saved for movie generation.') parser.add_argument('--inference_steps', type=int, default=20, help='num of coordinate updates. (movie frames)') parser.add_argument('--batch_size', type=int, default=5, help='chunk size for inference batches.') +parser.add_argument('--cache_path', type=str, default='data/cache', help='Folder from where to load/restore cached dataset') parser.add_argument('--header', type=str, default='test', help='informative name used to name result folder') parser.add_argument('--results', type=str, default='results', help='result folder.') parser.add_argument('--device', type=int, default=0, help='CUDA_VISIBLE_DEVICES') @@ -173,7 +175,8 @@ def ref_filename_sort_key(filepath): return ligand_number -timestamp = datetime.now().strftime("%Y_%m_%d_%H_%M") +unique_id = str(uuid.uuid4()) +timestamp = datetime.now().strftime("%Y_%m_%d_%H_%M") + f"_{unique_id}" logging.basicConfig(level=logging.INFO) handler = logging.FileHandler(f'run.log') @@ -217,7 +220,7 @@ def ref_filename_sort_key(filepath): elif args.ligand_is_sdf: # clean protein file os.system(f"mkdir -p {outputs_dir}") - cleaned_proteinFile = os.path.join(outputs_dir, "cleaned_input_proteinFile.pdb") + cleaned_proteinFile = os.path.join(outputs_dir, f"cleaned_input_proteinFile_{timestamp}_{unique_id}.pdb") ligandFile_with_protein_path = os.path.join(outputs_dir, f"ligandFile_with_protein_path_{timestamp}.csv") # if os.path.exists(ligandFile_with_protein_path): # os.system(f"rm {ligandFile_with_protein_path}") @@ -245,7 +248,7 @@ def ref_filename_sort_key(filepath): ligands.to_csv(ligandFile_with_protein_path, index=False) else: # clean protein file - cleaned_proteinFile = os.path.join(outputs_dir, "cleaned_input_proteinFile.pdb") + cleaned_proteinFile = os.path.join(outputs_dir, f"cleaned_input_proteinFile_{timestamp}_{unique_id}.pdb") ligandFile_with_protein_path = os.path.join(outputs_dir, f"ligandFile_with_protein_path_{timestamp}.csv") cmd = f"{relax_python} {script_folder}/clean_pdb.py {args.proteinFile} {cleaned_proteinFile}" do(cmd) @@ -277,17 +280,18 @@ def ref_filename_sort_key(filepath): protein_dynamic = "" results_dir = f'{outputs_dir}/results/{args.header}' + if multi_ligand_inputs: if args.hts: raise NotImplementedError("High-throughput mode is not yet supported when using multi-ligand inputs.") os.system(f"mkdir -p {outputs_dir}") - cmd = f"{python} {script_folder}/datasets/esm_embedding_preparation.py --protein_ligand_csv {ligandFile_with_protein_path} --out_file {os.path.join(outputs_dir, f'prepared_for_esm_{header}.fasta')}" + cmd = f"{python} {script_folder}/datasets/esm_embedding_preparation.py --protein_ligand_csv {ligandFile_with_protein_path} --out_file {os.path.join(outputs_dir, f'prepared_for_esm_{header}_{unique_id}.fasta')}" do(cmd) - cmd = f"CUDA_VISIBLE_DEVICES={args.device} {python} {script_folder}/esm/scripts/extract.py esm2_t33_650M_UR50D {os.path.join(outputs_dir, f'prepared_for_esm_{header}.fasta')} {os.path.join(outputs_dir, 'esm2_output')} --repr_layers 33 --include per_tok --truncation_seq_length 10000 --model_dir {script_folder}/esm_models" + cmd = f"CUDA_VISIBLE_DEVICES={args.device} {python} {script_folder}/esm/scripts/extract.py esm2_t33_650M_UR50D {os.path.join(outputs_dir, f'prepared_for_esm_{header}_{unique_id}.fasta')} {os.path.join(outputs_dir, 'esm2_output' + unique_id)} --repr_layers 33 --include per_tok --truncation_seq_length 10000 --model_dir {script_folder}/esm_models" do(cmd) cmd = f"CUDA_VISIBLE_DEVICES={args.device} {python} {script_folder}/screening.py --seed {args.seed} --ckpt {ckpt} {protein_dynamic}" cmd += f" --save_visualisation --model_dir {model_workdir} --protein_ligand_csv {ligandFile_with_protein_path} " - cmd += f" --esm_embeddings_path {os.path.join(outputs_dir, 'esm2_output')} --out_dir {args.results}/{header} --inference_steps {args.inference_steps} --samples_per_complex {args.samples_per_complex} --savings_per_complex {args.savings_per_complex} --batch_size {args.batch_size} --actual_steps {args.inference_steps} --no_final_step_noise" + cmd += f" --esm_embeddings_path {os.path.join(outputs_dir, 'esm2_output' + unique_id)} --out_dir {args.results}/{header} --inference_steps {args.inference_steps} --samples_per_complex {args.samples_per_complex} --savings_per_complex {args.savings_per_complex} --batch_size {args.batch_size} --actual_steps {args.inference_steps} --no_final_step_noise" do(cmd) print("hts complete.") else: @@ -295,13 +299,13 @@ def ref_filename_sort_key(filepath): os.system(f"mkdir -p {outputs_dir}") for ligand_idx in range(len(ligands)): ligands.iloc[ligand_idx:ligand_idx + 1].to_csv(ligandFile_with_protein_path, index=False) - cmd = f"{python} {script_folder}/datasets/esm_embedding_preparation.py --protein_ligand_csv {ligandFile_with_protein_path} --out_file {os.path.join(outputs_dir, f'prepared_for_esm_{header}.fasta')}" + cmd = f"{python} {script_folder}/datasets/esm_embedding_preparation.py --protein_ligand_csv {ligandFile_with_protein_path} --out_file {os.path.join(outputs_dir, f'prepared_for_esm_{header}_{unique_id}.fasta')}" do(cmd) - cmd = f"CUDA_VISIBLE_DEVICES={args.device} {python} {script_folder}/esm/scripts/extract.py esm2_t33_650M_UR50D {os.path.join(outputs_dir, f'prepared_for_esm_{header}.fasta')} {os.path.join(outputs_dir, 'esm2_output')} --repr_layers 33 --include per_tok --truncation_seq_length 10000 --model_dir {script_folder}/esm_models" + cmd = f"CUDA_VISIBLE_DEVICES={args.device} {python} {script_folder}/esm/scripts/extract.py esm2_t33_650M_UR50D {os.path.join(outputs_dir, f'prepared_for_esm_{header}_{unique_id}.fasta')} {os.path.join(outputs_dir, 'esm2_output' + unique_id)} --repr_layers 33 --include per_tok --truncation_seq_length 10000 --model_dir {script_folder}/esm_models" do(cmd) - cmd = f"{python} {script_folder}/inference.py --seed {args.seed} --ckpt {ckpt} {protein_dynamic}" + cmd = f"{python} {script_folder}/inference.py --cache_path {args.cache_path} --seed {args.seed} --ckpt {ckpt} {protein_dynamic}" cmd += f" --save_visualisation --model_dir {model_workdir} --protein_ligand_csv {ligandFile_with_protein_path} " - cmd += f" --esm_embeddings_path {os.path.join(outputs_dir, 'esm2_output')} --out_dir {args.results}/{header} --inference_steps {args.inference_steps} --samples_per_complex {args.samples_per_complex} --savings_per_complex {args.savings_per_complex} --batch_size {args.batch_size} --actual_steps {args.inference_steps} --no_final_step_noise" + cmd += f" --esm_embeddings_path {os.path.join(outputs_dir, 'esm2_output' + unique_id)} --out_dir {args.results}/{header} --inference_steps {args.inference_steps} --samples_per_complex {args.samples_per_complex} --savings_per_complex {args.savings_per_complex} --batch_size {args.batch_size} --actual_steps {args.inference_steps} --no_final_step_noise" os.environ['CUDA_VISIBLE_DEVICES'] = str(args.device) do(cmd) print(f"inference for ligand {ligand_idx + 1}/{len(ligands)} complete.") @@ -372,25 +376,25 @@ def ref_filename_sort_key(filepath): else: if args.hts: os.system(f"mkdir -p {outputs_dir}") - cmd = f"{python} {script_folder}/datasets/esm_embedding_preparation.py --protein_ligand_csv {ligandFile_with_protein_path} --out_file {os.path.join(outputs_dir, f'prepared_for_esm_{header}.fasta')}" + cmd = f"{python} {script_folder}/datasets/esm_embedding_preparation.py --protein_ligand_csv {ligandFile_with_protein_path} --out_file {os.path.join(outputs_dir, f'prepared_for_esm_{header}_{unique_id}.fasta')}" do(cmd) - cmd = f"CUDA_VISIBLE_DEVICES={args.device} {python} {script_folder}/esm/scripts/extract.py esm2_t33_650M_UR50D {os.path.join(outputs_dir, f'prepared_for_esm_{header}.fasta')} {os.path.join(outputs_dir, 'esm2_output')} --repr_layers 33 --include per_tok --truncation_seq_length 10000 --model_dir {script_folder}/esm_models" + cmd = f"CUDA_VISIBLE_DEVICES={args.device} {python} {script_folder}/esm/scripts/extract.py esm2_t33_650M_UR50D {os.path.join(outputs_dir, f'prepared_for_esm_{header}_{unique_id}.fasta')} {os.path.join(outputs_dir, 'esm2_output' + unique_id)} --repr_layers 33 --include per_tok --truncation_seq_length 10000 --model_dir {script_folder}/esm_models" do(cmd) cmd = f"CUDA_VISIBLE_DEVICES={args.device} {python} {script_folder}/screening.py --seed {args.seed} --ckpt {ckpt} {protein_dynamic}" cmd += f" --save_visualisation --model_dir {model_workdir} --protein_ligand_csv {ligandFile_with_protein_path} " - cmd += f" --esm_embeddings_path {os.path.join(outputs_dir, 'esm2_output')} --out_dir {args.results}/{header} --inference_steps {args.inference_steps} --samples_per_complex {args.samples_per_complex} --savings_per_complex {args.savings_per_complex} --batch_size {args.batch_size} --actual_steps {args.inference_steps} --no_final_step_noise" + cmd += f" --esm_embeddings_path {os.path.join(outputs_dir, 'esm2_output' + unique_id)} --out_dir {args.results}/{header} --inference_steps {args.inference_steps} --samples_per_complex {args.samples_per_complex} --savings_per_complex {args.savings_per_complex} --batch_size {args.batch_size} --actual_steps {args.inference_steps} --no_final_step_noise" do(cmd) print("hts complete.") else: if not args.no_inference: os.system(f"mkdir -p {outputs_dir}") - cmd = f"{python} {script_folder}/datasets/esm_embedding_preparation.py --protein_ligand_csv {ligandFile_with_protein_path} --out_file {os.path.join(outputs_dir, f'prepared_for_esm_{header}.fasta')}" + cmd = f"{python} {script_folder}/datasets/esm_embedding_preparation.py --protein_ligand_csv {ligandFile_with_protein_path} --out_file {os.path.join(outputs_dir, f'prepared_for_esm_{header}_{unique_id}.fasta')}" do(cmd) - cmd = f"CUDA_VISIBLE_DEVICES={args.device} {python} {script_folder}/esm/scripts/extract.py esm2_t33_650M_UR50D {os.path.join(outputs_dir, f'prepared_for_esm_{header}.fasta')} {os.path.join(outputs_dir, 'esm2_output')} --repr_layers 33 --include per_tok --truncation_seq_length 10000 --model_dir {script_folder}/esm_models" + cmd = f"CUDA_VISIBLE_DEVICES={args.device} {python} {script_folder}/esm/scripts/extract.py esm2_t33_650M_UR50D {os.path.join(outputs_dir, f'prepared_for_esm_{header}_{unique_id}.fasta')} {os.path.join(outputs_dir, 'esm2_output' + unique_id)} --repr_layers 33 --include per_tok --truncation_seq_length 10000 --model_dir {script_folder}/esm_models" do(cmd) - cmd = f"CUDA_VISIBLE_DEVICES={args.device} {python} {script_folder}/inference.py --seed {args.seed} --ckpt {ckpt} {protein_dynamic}" + cmd = f"CUDA_VISIBLE_DEVICES={args.device} {python} {script_folder}/inference.py --cache_path {args.cache_path} --seed {args.seed} --ckpt {ckpt} {protein_dynamic}" cmd += f" --save_visualisation --model_dir {model_workdir} --protein_ligand_csv {ligandFile_with_protein_path} " - cmd += f" --esm_embeddings_path {os.path.join(outputs_dir, 'esm2_output')} --out_dir {args.results}/{header} --inference_steps {args.inference_steps} --samples_per_complex {args.samples_per_complex} --savings_per_complex {args.savings_per_complex} --batch_size {args.batch_size} --actual_steps {args.inference_steps} --no_final_step_noise" + cmd += f" --esm_embeddings_path {os.path.join(outputs_dir, 'esm2_output' + unique_id)} --out_dir {args.results}/{header} --inference_steps {args.inference_steps} --samples_per_complex {args.samples_per_complex} --savings_per_complex {args.savings_per_complex} --batch_size {args.batch_size} --actual_steps {args.inference_steps} --no_final_step_noise" do(cmd) print("inference complete.") diff --git a/forks/FABind/inference/fabind_astex_diverse_inputs.csv b/forks/FABind/inference/fabind_astex_diverse_inputs.csv new file mode 100644 index 00000000..cc83e0af --- /dev/null +++ b/forks/FABind/inference/fabind_astex_diverse_inputs.csv @@ -0,0 +1,86 @@ +Cleaned_SMILES,pdb_id +CCCNC(=[NH2+])NCCC[C@H](N)C(=O)O,1MMV_3AR +O=C(O)c1sccc1S(=O)(=O)Nc1ccc(Cl)cc1,1L2S_STC +Cc1cc(-n2ncc(=O)[nH]c2=O)cc(C)c1Oc1ccc(O)c(C(C)C)c1,1N46_PFA +COc1ccc(S(=O)(=O)NCC2CC2)cc1Nc1ncc(-c2cccc(-c3cccnc3)c2)o1,1Y6B_AAX +C[C@H](Nc1nccc(-c2c(-c3ccc(F)cc3)c(=O)n(C3CCNCC3)n2C)n1)c1ccccc1,1YWR_LI9 +Cc1ccc2c(c1)C(=O)[C@]1(O)CCN(c3ccccc3)C1=N2,1YV3_BIT +C[C@H](C(=O)O)c1ccc(-c2ccccc2)c(F)c1,1R9O_FLP +Cc1ncc(C[n+]2csc(CCO)c2C)c(N)n1,1IG3_VIB +CC(C)SCC[C@@H](N)[C@H](O)C(=O)NNC(=O)c1cccc(Cl)c1,1R58_AO5 +CCOc1cc(OCC)c(F)c([C@@H](Nc2ccc(C(=N)N)c(O)c2)C(=O)NS(=O)(=O)c2cccc(N)c2)c1,1YGC_905 +C/C=C1\[C@@H]2C=C(C)C[C@@]1(N)c1ccc(=O)[nH]c1C2,1GPK_HUP +NS(=O)(=O)c1ccc2c(c1)CNCC2,1HNN_SKF +O=C(O)C[C@H](NC(=O)Cc1c[nH]c2ccccc12)C(=O)O,1K3U_IAD +CSC[C@H]1[NH2+][C@@H](c2c[nH]c3c(=O)[nH]cnc23)[C@H](O)[C@@H]1O,1Q1G_MTI +CC1(C)[C@H](C(=O)O)N2C(=O)[C@@H](NC(=O)Cc3ccccc3)[C@H]2[S@H]1O,1GM8_SOX +Cn1c(=O)c(-c2c(Cl)cccc2Cl)cc2cnc(Nc3cccc(CO)c3)nc21,1OPK_P16 +O=c1[nH]c(=O)n(COCCO)cc1Cc1ccccc1,1U1C_BAU +Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1,1T46_STI +Nc1nc2c(c(=O)[nH]1)N=C(CO)CN2,1HQ2_PH2 +CCCCc1nc2c(=O)[nH][nH]c(=O)c2[nH]1,1N2V_BDI +C=C1/C(=C\C=C2/CCC[C@]3(C)[C@@H]([C@H](C)/C=C/[C@@H](O)C4CC4)CC[C@@H]23)C[C@@H](O)C[C@@H]1O,1S19_MC9 +NC(=[NH2+])c1ccc([C@H]2[C@H]3C(=O)N(Cc4ccc(F)cc4)C(=O)[C@H]3[C@@H]3CCCN32)cc1,1OYT_FSN +CCCOc1cc([C@]2(C)CNC(=O)O2)ccc1OC,1XM6_5RM +CC/C(=C(/CC)c1ccc(O)cc1)c1ccc(O)cc1,1TZ8_DES +CCNC(=O)c1n[nH]c(-c2cc(Cl)c(O)cc2O)c1-c1ccc(OC)cc1,2BSM_BSM +CN1CCC[C@H]1c1cccnc1,1P2Y_NCT +CC(C)[C@H](CO)Nc1nc(Nc2ccc(C(=O)O)c(Cl)c2)c2ncn(C(C)C)c2n1,1V0P_PVB +Cc1ccccc1CNC(=O)[C@H]1N(C(=O)[C@@H](O)[C@H](Cc2ccccc2)NC(=O)c2cccc(O)c2C)CSC1(C)C,1KZK_JE2 +CNC(=O)[C@@H](NC(=O)[C@H](CC(C)C)[C@H](O)C(=O)NO)C(C)(C)C,1R55_097 +Oc1ccc(/C=C/c2cc(O)cc(O)c2)cc1,1SG0_STL +CCC(CC)[C@H](NC(C)=O)[C@@H]1[C@H](O)[C@@H](C(=O)O)C[C@H]1NC(=N)N,1L7F_BCZ +CC(C)c1cc(Oc2c(Cl)cc(CC(=O)O)cc2Cl)ccc1O,1NAV_IH5 +Cc1cn([C@H]2C[C@H](O)[C@@H](CO)O2)c(=O)[nH]c1=O,1W2G_THM +C[C@](O)(CS(=O)(=O)c1ccc(F)cc1)C(=O)Nc1ccc(C#N)c(C(F)(F)F)c1,1Z95_198 +O=C(O)/C(=C/c1ccc(Oc2ccccc2Br)cc1)NC(=O)c1ccccc1,1YVF_PH7 +N[C@@H](Cc1ccc(O)cc1)C(=O)O,1X8X_TYR +CC(C)n1c(/C=C/[C@@H](O)C[C@@H](O)CC(=O)O)c(-c2ccc(F)cc2)c2ccccc21,1HWI_115 +COc1cc(N(C)C[C@@H]2CCC3=C(C2)C(N)=N[C@@H](N)N3)cc(OC)c1OC,1S3V_TQD +O=C1NCC(=O)N2CCC[C@@H]12,1W1P_GIO +NC1=N/C(=C2/CCNC(=O)c3[nH]ccc32)C(=O)N1,1U4D_DBQ +Cn1ccnc1Sc1cc(C(=O)Nc2nccs2)c(N)cc1F,1V4S_MRK +Cc1cn([C@@]23C[C@H](O)[C@@H](CO)[C@@H]2C3)c(=O)[nH]c1=O,1OF1_SCT +Nc1nc2c(ncn2CCCCC(F)(F)P(=O)(O)O)c(=O)[nH]1,1V48_HA1 +C[C@H](C(=O)O)c1ccc(-c2ccccc2)cc1,1Q4G_BFL +O=C(O)[C@@H](Cc1ccccc1)[C@H](Cc1ccc2c(c1)OCO2)C(=O)O,1JJE_BYS +O=C(Nc1c(Cl)cncc1Cl)c1ccc(OC(F)F)c(OCC2CC2)c1,1XOQ_ROF +C[C@@H]1C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)[C@@]3(F)[C@@H](O)C[C@]2(C)[C@@]1(O)C(=O)CO,1M2Z_DEX +Nc1ccn([C@@H]2O[C@H](CO)[C@@H](O)C2(F)F)c(=O)n1,1P62_GEO +O=C(O)Cc1cccc2ccccc12,1LRH_NLA +Oc1ccc([C@H]2Sc3cc(O)ccc3O[C@H]2c2ccc(OCCN3CCCCC3)cc2)cc1,1SJ0_E4D +COc1c(C)c2c(c(O)c1C/C=C(\C)CCC(=O)O)C(=O)OC2,1MEH_MOA +CCCn1c(C2CCN(C)CC2)nc(-c2ccc(Cl)c(Cl)c2)c1-c1ccnc(NC2CC2)n1,1PMN_984 +C#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@@H]4[C@H]3CC[C@@]21C,1SQN_NDR +CNC(=O)[C@@H](NC(=O)[C@H](CC(C)C)CN(O)C=O)C(C)(C)C,1GKC_NFH +O=C(O)COc1cc(F)ccc1C(=O)NCc1nc2c(F)c(F)cc(F)c2s1,1T40_ID5 +NC(=O)c1cn([C@@H](CO)CCn2ccc3ccc(NC(=O)CCc4ccccc4)cc32)cn1,1UML_FR4 +CC[C@H](CO)Nc1nc(NCc2ccccc2)c2ncn(C(C)C)c2n1,1UNL_RRC +N[C@@H](Cc1ccc(O)cc1)C(=O)O,1OF6_DTY +CC(=O)Nc1nnc(S(N)(=O)=O)s1,1JD0_AZM +CC(C)(CO)[C@@H](O)C(=O)[O-],1N2J_PAF +CCc1nc(N)nc(N)c1-c1ccc(Cl)cc1,1J3J_CP6 +CC[C@@]1(c2cccc(Oc3cc([C@](C)(N)c4cncn4C)ccc3C#N)c2)CCCCN(C)C1=O,1MZC_BNE +N=C(N)c1ccc2cc(C(=O)Nc3ccccc3)ccc2c1,1OWE_675 +COc1nc(C)nc(NC(=O)NS(=O)(=O)c2ccccc2Cl)n1,1T9B_1CS +CC(C)c1c(Cc2ccccc2)n(COCc2ccccc2)c(=O)[nH]c1=O,1JLA_TNK +COc1ccc(-c2oc3ncnc(NCCO)c3c2-c2ccc(OC)cc2)cc1,2BR1_PFP +Cc1cc(C)cc(NC(=O)Cc2ccc(OC(C)(C)C(=O)O)cc2)c1,1G9V_RQ3 +NCc1cccc(C2CCN(C(=O)c3cncc(CCc4ccccc4)c3)CC2)c1,2BM2_PM2 +CN1CC(=O)N2[C@H](c3ccc4c(c3)OCO4)c3[nH]c4ccccc4c3C[C@@H]2C1=O,1XOZ_CIA +CC(C)(CO)[C@@H](O)C(=O)NCCC(=O)O,1SQ5_PAU +Cc1cc(S(=O)(=O)N[C@@H](C(=O)NO)C2CCOCC2)ccc1F,1YQY_915 +Nc1nc(N)c2c(Sc3ccccc3)cccc2n1,1IA1_TQ3 +Cc1nc(=O)c2cc(CN(C)c3ccc(C(=O)N[C@@H](CCC(=O)O)C(=O)O)s3)ccc2[nH]1,1HVY_D16 +O[C@H]1[C@H]2[C@H](O)CCCN2C[C@H]1O,1HWW_SWA +C=C(C)[C@H]1CN[C@H](C(=O)O)[C@H]1CC(=O)O,1TT1_KAI +O=C1Nc2ccccc2/C1=C1/Nc2ccccc2/C1=N\O,1Q41_IXM +CC(C)[C@H](N)C(=O)N1CCCC1,1N1M_A3M +CNS(=O)(=O)c1ccc(N/C=C2\C(=O)Nc3ccccc32)cc1,1KE5_LS1 +Nc1nccc2c1ncn2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,1HP0_AD3 +N=C1CCCN1Cc1[nH]c(=O)[nH]c(=O)c1Cl,1UOU_CMU +O=C(O)CCCn1c2ccccc2c2ccccc21,1TOW_CRZ +Cc1cccc2c1cc(C(=O)NCc1cc(Cl)cc(Cl)c1)n2Cc1cccc(C(=N)N)c1,1LPZ_CMB +CCC(CC)Nc1cc(C(=O)O)ccc1N1C(=O)CC[C@@]1(CN)CO,1VCJ_IBA +C[C@H](NC(=O)[C@H](Cc1ccc(-c2ccccc2)cc1)C[P@](=O)(O)[C@H](C)N)C(=O)O,1R1H_BIR +Cc1ccc(-c2cc(C(F)(F)F)nn2-c2ccc(S(N)(=O)=O)cc2)cc1,1OQ5_CEL diff --git a/forks/FABind/inference/fabind_dockgen_inputs.csv b/forks/FABind/inference/fabind_dockgen_inputs.csv index c613f50f..2d3d8898 100644 --- a/forks/FABind/inference/fabind_dockgen_inputs.csv +++ b/forks/FABind/inference/fabind_dockgen_inputs.csv @@ -1,190 +1,190 @@ Cleaned_SMILES,pdb_id -CC(O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(O)O)C[C@@H]1O,3gvl_1_SLB_2 -OC[C@H]1O[C@H](O[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCC(O)NC3O)[C@H](O)[C@@H]2O)[C@H](O)[C@@H](O)[C@H]1O,3inr_1_GDU_0 -NC1NC(O)C2NCN([C@@H]3O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]3O)C2N1,3jqm_1_GTP_5 -CC(O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(O)O)C[C@@H]1O,3ju4_1_SLB_2 -C[N+](C)(C)CCO,4cnl_1_CHT_1 -OC(O)CCC(O)O,1hg0_1_SIN_1 -CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2CC1C,1i8t_1_FAD_1 -OC1NC(O)N([C@H]2C[C@H](O)[C@@H](CO[PH](O)(O)O)O2)CC1F,1o28_1_UFP_2 -C[N+](C)(C)CCO[PH](O)(O)O,1o72_2_PC_0 -OC(O)CCC(O)O,1pj2_1_FUM_0 -OC(O)CCC(O)O,1pj4_1_FUM_1 -N[C@@H](CC1CNC2CCCCC12)C(O)O,1qaw_1_TRP_7 -C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O,1rqp_1_SAM_0 -C[C@@H]1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2C[C@@H]1C,1sbz_1_FMN_3 +CC[SnH](CC)CC,5u82_2_ZN0_0 +CC(C)CCN1C2CC(C)[C@@H](C)CC2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@H]2NC(O)N[C@@H](O)C21,4zav_1_4LS_8 +NC1NC(O)C2N[C@@H]3[C@H](NC2N1)O[C@H](CO[PH](O)(O)O)C(S)[C@@H]3S,3nvv_1_MTE_1 +N[C@@H](CCC(O)O)C(O)O,5k45_2_GLU_1 +CCC(O)NC1CCC(C([O-])O)C(C([O-])O)C1,6n19_2_K8V_0 +N[C@@H](CC(O)O)C(O)O,6paa_1_ASP_2 +OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,4rpj_1_UDP_0 +NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O,4zqx_1_ATP_2 +OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,3he3_5_UDP_0 +NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,4kgx_1_CTP_5 +CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2CC1C,6jls_1_FMN_6 NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@H]4NC5NC(N)NC(O)C5N[C@H]4[C@@H]4S[Mo](O)(O)SC43)[C@@H](O)[C@H]2O)C(O)N1,1sij_1_PCD_0 -N[C@@H](CO)C(O)O,1tke_1_SER_0 +OC[C@H]1O[C@H](OC[C@H]2O[C@H](OC[C@H]3O[C@@H](O)[C@@H](O)[C@@H](O[C@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]4O)[C@@H]3O)[C@@H](O)[C@@H](O[C@H]3O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]3O)[C@@H]2O)[C@@H](O)[C@@H](O)[C@@H]1O,4gk9_1_BMA-MAN-MAN-MAN-MAN_3 +CC(C)CCN1C2CC(C)[C@@H](C)CC2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2N[C@@H](O)N[C@@H](O)C21,4zaz_1_4LS_6 +OC1CCN([C@@H]2O[C@H](CO[PH](O)([OH2+])O[PH](O)(O)[OH2+])[C@@H](O)[C@H]2O)C(O)N1,4phs_1_UDP_0 NC1NCNC2C1NCN2[C@@H]1O[C@H](COS(O)(O)NC(O)[C@@H](N)CO)[C@@H](O)[C@H]1O,1tkg_1_SSA_0 +NC(O)C[C@H](N)C(O)NCCO,5k63_1_ASN-GLY_0 CCCCCCCC(O)O,1u8u_1_OCA_0 -CSCC[C@@H](NC(N)O)C(O)O,1uf5_1_CDT_0 -CC(C)[C@@H](NC(N)O)C(O)O,1uf7_1_CDV_0 -NC(O)N[C@H](CC1CCCCC1)C(O)O,1uf8_1_ING_2 +OC[C@H]1O[C@@H](O)[C@@H](O)[C@@H](O)[C@@H]1O,5mh1_1_BMA_0 +CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2CC1C,1i8t_1_FAD_1 +NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O,6rms_1_AMP_1 CCCCCCCC(O)O,1v2g_1_OCA_0 -NC1NC(O)[C@@H]2N[C@H]3C(S)C(S)[C@@H](CO[PH](O)(O)O)O[C@H]3NC2N1,1v97_1_MTE_1 -NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,1za2_1_CTP_4 +NCC(O)O,4o0d_1_GLY_3 +NC1NCNC2[C@H]1NCN2[C@@H]1O[C@H](CCl)[C@@H](O)[C@H]1O,6rz2_1_5CD_2 +OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CNNS1,5hqx_1_EDZ_0 +C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O,2v7u_1_SAM_2 +NC1CCN([C@H]2C[C@H](O)[C@@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)O2)C(O)N1,4fyv_1_DCP_2 +CC(C)C[C@H](N)C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)N[C@@H](Cc1ccccc1)C(=O)N[C@@H](CCC(N)=O)C(=O)O,5d9g_1_GLU-ASN-LEU-TYR-PHE-GLN_0 +NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)C[PH](O)(O)O)[C@@H](O)[C@H]1O,6tvg_1_AP2_1 +OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,4pfx_1_UDP_0 +OC(O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]2O)C1,4uoc_1_NCN_1 +NC1NC(O)C2N[C@H]3N([C@@H]2N1)[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@]3(O)[C@H]1O,4pya_1_2X3_0 +NC(O)NC1NC(O)NC1O,3o7j_1_2AL_0 +OC[C@H]1O[C@H](O)[C@H](F)[C@@H](O)[C@H]1O,4mos_1_GAF_1 +N[C@@H](CC(O)O)C(O)O,5k3o_2_ASP_0 +CC(O)NC1CCC(CCSC2NC(N3C[C@@H](C)O[C@@H](C)C3)[C@@H]3COC(C)(C)CC3C2CN)CC1,5enq_1_5QE_0 +NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)N[PH](O)(O)O)[C@@H](O)[C@H]1O,3se5_1_ANP_2 O[C@@H]1[C@@H](O)[C@@H](O)OC[C@H]1O,2cdc_1_XYS_5 -N[C@@H](CC1CNC2CCCCC12)C(O)O,2ext_1_TRP_0 +CCC[C@H]1CCC(O)C(OC)C1,5fxd_1_H7Y_1 +CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2C[C@H]1C,4cdn_2_FAD_0 +NC(O)C[C@H](N)C(O)O,6pa6_2_ASN_0 +CC1CCC(O)CC1,3q14_1_PCR_3 +OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O,4tvd_1_GLC_0 +OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CC(Cl)C[C@H](OC(F)(F)F)C1,6yap_1_OHZ_0 +NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,4fyw_1_CTP_4 +N[C@@H]1NCN[C@H]2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O,5gqi_1_ATP_7 +NC1NCNC2[C@H]1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O,6etf_1_AMP_0 +NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@H]2O[PH]([O-])(O)O[C@H]21,6o70_1_ACK_1 CC(O)N[C@@H]1[C@@H](O)[C@H](O[C@@H]2O[C@H](CO)[C@H](O)[C@H](O[C@H]3O[C@H](CO)[C@H](O)[C@H](O)[C@H]3O)[C@H]2O)[C@@H](CO)O[C@H]1O,2g7c_1_NAG-GAL-GLA_1 +NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O,5gql_1_ATP_4 +N[C@H](O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5[C@H](N)NCN[C@H]54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1,3ad7_1_NAD_0 +C[N+](C)(C)CCO,4cnl_1_CHT_1 +OS(O)(O)OC[C@H]1O[C@@](COS(O)(O)O)(O[C@H]2O[C@H](COS(O)(O)O)[C@@H](OS(O)(O)O)[C@H](OS(O)(O)O)[C@H]2OS(O)(O)O)[C@@H](OS(O)(O)O)[C@@H]1OS(O)(O)O,3qrc_2_GU4-YYJ_0 OC(O)C1CCCO1,2gag_1_FOA_0 -NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCCC([C@H](N)O)C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O,2gag_1_NAD_0 -NC(O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1,2gah_1_NAD_0 -OC(O)C1CCCO1,2gf3_2_FOA_1 -NC(O)C[C@H](N)C(O)O,2him_1_ASN_3 -OC(O)[C@H]1C[C@@H](O)[C@@H](O)[C@H](O)C1,2hk9_1_SKM_0 +C[C@@H](O)SCC[N+](C)(C)C,6ur1_2_AT3_0 +CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C4NCN[C@H]5N)[C@H](O)[C@@H]3O)C2CC1C,3gf4_1_FAD_1 +CC(N)O,3ub7_1_ACM_1 +NCC(O)NC1CCC2NC(O)NC2C1,4idk_1_1FE_0 +OCCC1CCCC[C@H]1N[C@H](O)N[C@H]1C[C@@H](Cl)C[C@@H](OC(F)(F)F)C1,6yaq_1_OHZ_0 +OC1NC(O)N([C@H]2C[C@H](O)[C@@H](CO[PH](O)(O)O)O2)CC1F,1o28_1_UFP_2 +NC1NCNC2[C@@H]1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCCC([C@H](N)O)C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O,3ad9_1_NAD_0 OCNCC(O)N[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O,2hs3_1_FGR_0 -CCC1C(C)C2C[C@@H]3[C@H](C)[C@H](CCC(O)O)C4CC5[C@@H](CCC(O)O)C(C)C6CC7[C@@H](CC)[C@H](C)C8CC1N2[Mn@SP2](N65)(N87)N43,2o5m_1_MNR_0 +COCCN1CCN([C@@H]2NC(SCC[C@H]3CCC(OC)[C@@H](OC)C3)[C@H](CN)[C@H]3CC(C)(C)OCC23)CC1,5enp_1_5QF_0 +C[C@H]1CC2NC3[C@H](O)N[C@H](O)N[C@@H]3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@@H]2C[C@@H]1C,3zqu_1_FNR_5 +O[Mo@]12S[Mo@@]1(O)S2,6a71_1_9UX_0 +CC(C)(O)C1CCC([C@H]2CC(Cl)C[C@@H](C34(C(N)N)CC3C4)C2)CC1,6nco_1_KQP_0 +CCCCCCCC,2vdf_1_OCT_0 +NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O,4uuw_1_AMP_0 +NC(O)CCCC1CCCCC1,4n4l_1_HG1_0 NC(O)N[C@H]1NC(O)NC1O,2q37_1_3AL_0 -NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,2q6k_1_ADN_1 -OC[C@@H](O)CO[PH](O)(O)O,2r4e_1_13P_0 -OS(O)(O)OC[C@H]1O[C@@](COS(O)(O)O)(O[C@H]2O[C@H](COS(O)(O)O)[C@@H](OS(O)(O)O)[C@H](OS(O)(O)O)[C@H]2OS(O)(O)O)[C@@H](OS(O)(O)O)[C@@H]1OS(O)(O)O,2v5e_1_GU4-YYJ_0 -NC1NCNC2C1NCN2[C@@H]1O[C@H](CSCC[C@H](N)C(O)O)[C@@H](O)[C@H]1O,2v7t_1_SAH_4 -C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O,2v7u_1_SAM_2 +CC(C)[C@@H](CO)NC(O)[C@@H](N)CC(N)O,5k62_1_ASN-VAL_0 +OC[C@H]1O[C@H](O[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCC(O)NC3O)[C@H](O)[C@@H]2O)[C@H](O)[C@@H](O)[C@H]1O,3inr_1_GDU_0 NC1NCNC2C1NCN2[C@@H]1O[C@H](CF)[C@@H](O)[C@H]1O,2v7v_1_5FD_0 -NC1NCNC2C1NCN2[C@@H]1O[C@H](CF)[C@@H](O)[C@H]1O,2v7w_1_5FD_0 -CCCCCCCC,2vdf_1_OCT_0 -OC[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)CO,2vfu_1_MTL_0 -OC[C@H]1O[C@@H](O[C@H]2[C@H](O)[C@@H](O)[C@H](O[C@H]3[C@H](O)[C@@H](O)[C@H](O[C@H]4[C@H](O)[C@@H](O)[C@H](O[C@H]5[C@H](O)[C@@H](O)[C@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O,2wab_1_BGC-BGC-BGC-BGC-BGC_0 -OC[C@H]1O[C@@H](O[C@H]2[C@H](O)[C@@H](O)[C@H](O[C@H]3[C@H](O)[C@@H](O)[C@H](O[C@H]4[C@H](O)[C@@H](O)[C@H](O[C@H]5[C@H](O)[C@@H](O)[C@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O,2wao_1_BGC-BGC-BGC-BGC-BGC_0 -NC1NCNC2C1NCN2[C@@H]1O[C@H](CSCC[C@H](N)C(O)O)[C@@H](O)[C@H]1O,2wr8_1_SAH_1 -C[N+](C)(C)CCO,2wwc_1_CHT_2 -COC1C(O)C(C)C(CCC(C)CCCC(C)CCCC(C)CCC[C@H](C)CCCC(C)CCC[C@H](C)CCCC(C)CCCC(C)C)C(O)C1OC,2x34_2_UQ8_0 -OC(O)[C@H]1CCCNC1,2xrh_1_NIO_1 -CCO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O[PH](O)(O)O,2xta_1_ACO_0 -N[C@@H](CC1CNC2CCCCC12)C(O)O,2zcz_2_TRP_3 -N[C@@H](CC1CNC2CCCCC12)C(O)O,2zd0_1_TRP_9 +OC(O)CCC(O)O,1hg0_1_SIN_1 CCCCCCC(O)OC[C@H](CO[PH](O)(O)O)OC(O)CCCCCC,2ze9_1_PD7_0 -N[C@H](O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5[C@H](N)NCN[C@H]54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1,3ad7_1_NAD_0 -NC1NCNC2[C@@H]1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCCC([C@H](N)O)C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O,3ad9_1_NAD_0 -N[C@H](O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CN[C@@H]5[C@H](N)NCN[C@H]54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1,3ada_1_NAD_0 -N[C@@H](CC(O)O)C(O)O,3eca_1_ASP_3 -CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C4NCN[C@H]5N)[C@H](O)[C@@H]3O)C2CC1C,3gf4_1_FAD_1 OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,3gf4_1_U5P_0 -OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,3he3_5_UDP_0 -NCCC[C@H](N)C(O)O,3it6_1_ORN_1 -OC[C@H]1O[C@H](O[C@H]2[C@H](O)[C@@H](O)[C@@H](O[C@H]3[C@H](O)[C@@H](O)[C@@H](O[C@H]4[C@H](O)[C@@H](O)[C@@H](O[C@H]5[C@H](O)[C@@H](O)[C@@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O,3k8l_1_GLC-GLC-GLC-GLC-GLC_0 -C[C@H]1O[C@H](O[C@H]2[C@H](O)[C@@H](O)[C@@H](O[C@H]3[C@H](O)[C@@H](O)[C@@H](O)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1N[C@H]1CC(CO)[C@@H](O)[C@H](O)[C@H]1O,3k8m_1_GLC-GLC-AC1_0 -NC1NC(O)C2N[C@@H]3[C@H](NC2N1)O[C@H](CO[PH](O)(O)O)C(S)[C@@H]3S,3nvv_1_MTE_1 -C[C@H](CCC(O)O)[C@H]1CC[C@H]2[C@@H]3CC[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3C[C@H](O)[C@]12C,3o01_2_DXC_0 -C[C@H](CCC(O)O)[C@H]1CC[C@H]2[C@@H]3[C@H](O)C[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3CC[C@]12C,3o02_2_JN3_0 -NC(O)NC1NC(O)NC1O,3o7j_1_2AL_0 -CC1CCC(O)CC1,3q14_1_PCR_3 -OS(O)(O)OC[C@H]1O[C@@](COS(O)(O)O)(O[C@H]2O[C@H](COS(O)(O)O)[C@@H](OS(O)(O)O)[C@H](OS(O)(O)O)[C@H]2OS(O)(O)O)[C@@H](OS(O)(O)O)[C@@H]1OS(O)(O)O,3qrc_2_GU4-YYJ_0 -OC[C@H]1O[C@H](O[C@@H]2[C@H](O)[C@@H](OC[C@H]3O[C@@H](O)[C@@H](O)[C@@H](O[C@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]4O)[C@@H]3O)O[C@H](CO)[C@H]2O)[C@@H](O)[C@@H](O)[C@@H]1O,3s5x_1_BMA-MAN-MAN-MAN-MAN_0 -NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](N)(O)O)[C@@H](O)[C@H]1O,3s6a_1_ANP_0 -NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)N[PH](O)(O)O)[C@@H](O)[C@H]1O,3se5_1_ANP_2 +NC(O)C[C@H](N)C(O)O,2him_1_ASN_3 +OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,4phr_1_UDP_0 +N[C@@H](CC1CNC2CCCCC12)C(O)O,2zd0_1_TRP_9 +NCC(O)O,4o0f_1_GLY_3 +NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O,5ers_1_AMP_1 +NC(O)N[C@H](CC1CCCCC1)C(O)O,1uf8_1_ING_2 NC1NC(O)[C@@H]2N[C@H]3C(S)C(S)[C@@H](CO[PH](O)(O)O)O[C@H]3NC2N1,3sr6_1_MTE_1 -CC(N)O,3ub7_1_ACM_1 -N[C@@H](O)NO,3ub9_1_NHY_1 -OC(O)C1CCCCC1O,3uni_1_SAL_0 -OC(O)C1CC(O)C(O)C(O)C1,3wrb_1_GDE_0 -CC1C(CC(O)O)NC(O)[C@@H](C)[C@@H]1O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(O)NC(N)NC32)[C@H](O)[C@@H]1O,3wvc_1_FEG_0 -NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)N[PH](O)(O)O)[C@@H](O)[C@H]1O,3zec_1_ANP_0 -CCCCCCCCO[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O,3zjx_1_BOG_0 -C[C@H]1CC2NC3[C@H](O)N[C@H](O)N[C@@H]3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@@H]2C[C@@H]1C,3zqu_1_FNR_5 +C[C@H](CCC(O)O)[C@H]1CC[C@H]2[C@@H]3CC[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3C[C@H](O)[C@]12C,3o01_2_DXC_0 +CCCCCCCCCCCCCCCC(O)O,5f2t_1_PLM_0 +CCCCCCCCO[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O,5b5s_1_BOG_0 +CC(O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(O)O)C[C@@H]1O,3gvl_1_SLB_2 +OC[C@H]1O[C@@H](O[C@H]2[C@H](O)[C@@H](O)[C@H](O[C@H]3[C@H](O)[C@@H](O)[C@H](O[C@H]4[C@H](O)[C@@H](O)[C@H](O[C@H]5[C@H](O)[C@@H](O)[C@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O,2wao_1_BGC-BGC-BGC-BGC-BGC_0 +OC(NC1CNNS1)N[C@H]1CCC[C@H](OC(F)(F)F)C1,5hmr_1_FDZ_0 +OC(O)CCC(O)O,1pj4_1_FUM_1 +C[N+](C)(C)CCO,2wwc_1_CHT_2 N[C@@H](CC1CNC2CCCCC12)C(O)O,3zzs_1_TRP_3 +OS(O)(O)OC[C@H]1O[C@@](COS(O)(O)O)(O[C@H]2O[C@H](COS(O)(O)O)[C@@H](OS(O)(O)O)[C@H](OS(O)(O)O)[C@H]2OS(O)(O)O)[C@@H](OS(O)(O)O)[C@@H]1OS(O)(O)O,2v5e_1_GU4-YYJ_0 NC1NC(O)C2C(NC[C@@H]3CN(C4CCC([C@H](O)N[C@@H](CCC(O)O)C(O)O)CC4)C(O)N23)N1,4b4v_1_L34_0 -CC1CC2[C@H](CC1C)N(C(C)CN)C1C(O)NC(O)NC1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCN[C@H]32)[C@H](O)[C@@H]1O,4bc9_1_CNV-FAD_0 -CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2C[C@H]1C,4cdn_2_FAD_0 -NC1CCN([C@H]2C[C@H](O)[C@@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)O2)C(O)N1,4fyv_1_DCP_2 -NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,4fyw_1_CTP_4 -OC[C@H]1O[C@H](OC[C@H]2O[C@H](OC[C@H]3O[C@@H](O)[C@@H](O)[C@@H](O[C@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]4O)[C@@H]3O)[C@@H](O)[C@@H](O[C@H]3O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]3O)[C@@H]2O)[C@@H](O)[C@@H](O)[C@@H]1O,4gk9_1_BMA-MAN-MAN-MAN-MAN_3 -NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,4h2f_1_ADN_0 -NCC(O)NC1CCC2NC(O)NC2C1,4idk_1_1FE_0 -NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,4kgx_1_CTP_5 -OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](F)[C@@H]1O,4mig_1_G3F_2 -CC1CC2[C@@H](CC1C)N[C@H]1C(O)NC(O)N[C@H]1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O,4mo2_2_FDA_0 -OC[C@H]1O[C@H](O)[C@H](F)[C@@H](O)[C@H]1O,4mos_1_GAF_1 -NC(O)CCCC1CCCCC1,4n4l_1_HG1_0 -NCC(O)O,4o0d_1_GLY_3 -NCC(O)O,4o0f_1_GLY_3 -OC(NC1CCCCC1)N[C@H]1CCNC(Cl)C1,4o95_1_245_0 -OC(NC1CCCCC1)N[C@H]1CCNC(Cl)C1,4oal_2_245_0 -NCC(O)O,4osx_1_GLY_2 -NCC(O)O,4osy_1_GLY_3 -OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,4pfx_1_UDP_0 -OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,4phr_1_UDP_0 -OC1CCN([C@@H]2O[C@H](CO[PH](O)([OH2+])O[PH](O)(O)[OH2+])[C@@H](O)[C@H]2O)C(O)N1,4phs_1_UDP_0 -NC1NC(O)C2N[C@H]3N([C@@H]2N1)[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@]3(O)[C@H]1O,4pya_1_2X3_0 -CCCCCCCCCCCCCC[C@@H](O)OC[C@@H](C)O[C@@H](O)CCCCCCCCCCC,4qa8_1_PJZ_0 -CC(O)N[C@H]1CO[C@H](CO)[C@@H](O)[C@@H]1O,4qo5_1_NAG_2 -CC1C[C@@H]2[C@@H](C[C@@H]1C)NC1[C@H](O)NC(O)N[C@@H]1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH]([O-])(O)O,4rhe_1_FMN_6 -OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,4rpj_1_UDP_0 +CCCCCCCCO[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O,3zjx_1_BOG_0 +OC(O)C1CCCCC1,5fxf_1_BEZ_0 +NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O,6fgc_1_ADP_1 +C[C@@H](O)SCC[N+](C)(C)C,6uqy_2_AT3_0 +C[C@@H]1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2C[C@@H]1C,1sbz_1_FMN_3 +CC1CCN(C2NCNC3NCNC32)C1,5hhz_1_ZME_0 +N[C@H](CCC(O)O)C(O)O,6wyz_1_DGL_1 +NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,4xdr_1_ADN_0 +N[C@@H](O)NO,3ub9_1_NHY_1 +[O-]C(O)C1CCCC(CCC2CCCCC2)C1N1CCCC1,6npp_1_KWG_0 +OC[C@H]1O[C@@H](O[C@H]2[C@H](O)[C@@H](O)[C@H](O[C@H]3[C@H](O)[C@@H](O)[C@H](O[C@H]4[C@H](O)[C@@H](O)[C@H](O[C@H]5[C@H](O)[C@@H](O)[C@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O,2wab_1_BGC-BGC-BGC-BGC-BGC_0 CCCCCC(O)SC,4rpm_1_HXC_0 +OC[C@H]1O[C@H](O[C@H]2[C@H](O)[C@@H](O)[C@@H](O[C@H]3[C@H](O)[C@@H](O)[C@@H](O[C@H]4[C@H](O)[C@@H](O)[C@@H](O[C@H]5[C@H](O)[C@@H](O)[C@@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O,3k8l_1_GLC-GLC-GLC-GLC-GLC_0 OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1O,4tvd_1_BGC_4 -OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O,4tvd_1_GLC_0 -CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2CC1C,4u63_1_FAD_0 -OC(O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]2O)C1,4uoc_1_NCN_1 -NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O,4uuw_1_AMP_0 -NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,4xdr_1_ADN_0 +CC(C)[C@@H](NC(N)O)C(O)O,1uf7_1_CDV_0 +NCC(O)O,4osy_1_GLY_3 +N[C@@H](CC1CNC2CCCCC12)C(O)O,1qaw_1_TRP_7 +NC1NC(O)[C@@H]2N[C@H]3C(S)C(S)[C@@H](CO[PH](O)(O)O)O[C@H]3NC2N1,1v97_1_MTE_1 +CCCCCC[C@H]1C(O)O[C@H](C)[C@H](N[C@H](O)[C@@H]2CCC[C@H](NCO)[C@H]2O)C(O)O[C@@H](C)[C@@H]1OC(O)CC(C)C,5ae3_2_AWB_1 +N[C@H](O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CN[C@@H]5[C@H](N)NCN[C@H]54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1,3ada_1_NAD_0 +NC1NCNC2C1NCN2[C@@H]1O[C@H](CSCC[C@H](N)C(O)O)[C@@H](O)[C@H]1O,2v7t_1_SAH_4 +NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O,6gbf_1_AMP_0 +N[C@@H](CCC(O)O)C(O)O,5hw0_1_GLU_2 OC[C@@H](O)[C@H](O)C(O)O,4xfm_1_THE_0 -OC(O)CCP(CCC(O)O)CCC(O)O,4ydx_1_TCE_0 -CC(C)CCN1C2CC(C)[C@@H](C)CC2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@H]2NC(O)N[C@@H](O)C21,4zav_1_4LS_8 -C[C@H]1[C@@H](C)C[C@@H]2C3[C@@H]1C(C)(C)CCN3[C@H]1C(O)NC(O)NC1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O,4zaw_1_4LU_1 +NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](N)(O)O)[C@@H](O)[C@H]1O,3s6a_1_ANP_0 +CCC1C(C)C2C[C@@H]3[C@H](C)[C@H](CCC(O)O)C4CC5[C@@H](CCC(O)O)C(C)C6CC7[C@@H](CC)[C@H](C)C8CC1N2[Mn@SP2](N65)(N87)N43,2o5m_1_MNR_0 +N[C@@H](CCC(O)O)C(O)O,5k4h_2_GLU_3 +N[C@@H](CC1CNC2CCCCC12)C(O)O,2ext_1_TRP_0 +NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O,6ep5_1_ADP_1 CC(C)CCN1[C@H]2[C@H](O)N[C@H](O)N[C@@H]2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@@H]2CC(C)C(C)C[C@H]21,4zay_1_4LS_6 -CC(C)CCN1C2CC(C)[C@@H](C)CC2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2N[C@@H](O)N[C@@H](O)C21,4zaz_1_4LS_6 -NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O,4zqx_1_ATP_2 -NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O,5a98_1_ATP_0 -CCCCCC[C@H]1C(O)O[C@H](C)[C@H](N[C@H](O)[C@@H]2CCC[C@H](NCO)[C@H]2O)C(O)O[C@@H](C)[C@@H]1OC(O)CC(C)C,5ae3_2_AWB_1 -CCCCCCCCO[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O,5b5s_1_BOG_0 -CC(C)C[C@H](N)C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)N[C@@H](Cc1ccccc1)C(=O)N[C@@H](CCC(N)=O)C(=O)O,5d9g_1_GLU-ASN-LEU-TYR-PHE-GLN_0 -NC(O)C[C@H](N)C(O)O,5dnc_1_ASN_2 +CC1CCC[C@H](N2NCC[C@H]2C2CC(Cl)C3NNN(C4CC[C@H]5CNNC5C4)C3C2)C1,6xug_1_O1Q_0 +OC(O)[C@H]1CCCNC1,2xrh_1_NIO_1 +OC(O)C1CCCO1,2gf3_2_FOA_1 +NCCC[C@H](N)C(O)O,3it6_1_ORN_1 +CCO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O[PH](O)(O)O,2xta_1_ACO_0 +CC1C(CC(O)O)NC(O)[C@@H](C)[C@@H]1O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(O)NC(N)NC32)[C@H](O)[C@@H]1O,3wvc_1_FEG_0 CC1(C)CC2C(CO1)C(N1CCOCC1)NC(SCCC1CCCCC1)[C@@H]2CN,5eno_1_5QG_0 -COCCN1CCN([C@@H]2NC(SCC[C@H]3CCC(OC)[C@@H](OC)C3)[C@H](CN)[C@H]3CC(C)(C)OCC23)CC1,5enp_1_5QF_0 -CC(O)NC1CCC(CCSC2NC(N3C[C@@H](C)O[C@@H](C)C3)[C@@H]3COC(C)(C)CC3C2CN)CC1,5enq_1_5QE_0 -CCC(O)NC1CCC(CCS[C@@H]2NC(N3C[C@H](C)O[C@@H](C)C3)C3COC(C)(C)CC3C2CN)CC1,5enr_1_MBX_0 -CN(C)C1CCC(O)C2C1C[C@H]1C[C@H]3[C@H](N(C)C)C(O)C(C(N)O)[C@@H](O)[C@@]3(O)C(O)[C@@H]1C2O,5ent_1_MIY_0 -NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O,5ers_1_AMP_1 -CCCCCCCCCCCCCCCC(O)O,5f2t_1_PLM_0 -N[C@@H](CC(O)O)C(O)O,5f52_1_ASP_2 -NC1NCNC2C1NCN2[C@@H]1O[C@H](C(F)F)[C@@H](O)[C@@H]1O,5fiu_1_Y3J_3 -CCC[C@H]1CCC(O)C(OC)C1,5fxd_1_H7Y_1 +NC1NCNC2C1NCN2[C@@H]1O[C@H](CF)[C@@H](O)[C@H]1O,2v7w_1_5FD_0 +NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCCC([C@H](N)O)C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O,2gag_1_NAD_0 +C[N+](C)(C)CCO[PH](O)(O)O,1o72_2_PC_0 +CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2CC1C,4u63_1_FAD_0 +O[Mo@]12S[Mo@@]1(O)S2,6a72_1_9UX_0 +CC1C[C@@H]2[C@@H](C[C@@H]1C)NC1[C@H](O)NC(O)N[C@@H]1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH]([O-])(O)O,4rhe_1_FMN_6 +OC(O)C1CC(O)C(O)C(O)C1,3wrb_1_GDE_0 +NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,1za2_1_CTP_4 CO[C@H]1CC(CCCO)CCC1O,5fxe_1_CIY_1 -OC(O)C1CCCCC1,5fxf_1_BEZ_0 -N[C@@H]1NCN[C@H]2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O,5gqi_1_ATP_7 -NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O,5gql_1_ATP_4 -CC1CCN(C2NCNC3NCNC32)C1,5hhz_1_ZME_0 -OC(NC1CNNS1)N[C@H]1CCC[C@H](OC(F)(F)F)C1,5hmr_1_FDZ_0 -OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CNNS1,5hqx_1_EDZ_0 -N[C@@H](CCC(O)O)C(O)O,5hw0_1_GLU_2 +OC[C@@H](O)CO[PH](O)(O)O,2r4e_1_13P_0 +NC1NCNC2C1NCN2[C@@H]1O[C@H](CSCC[C@H](N)C(O)O)[C@@H](O)[C@H]1O,2wr8_1_SAH_1 +OC[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)CO,2vfu_1_MTL_0 +OC(NC1CCCCC1)N[C@H]1CCNC(Cl)C1,4oal_2_245_0 +NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O,5a98_1_ATP_0 +COC1C(O)C(C)C(CCC(C)CCCC(C)CCCC(C)CCC[C@H](C)CCCC(C)CCC[C@H](C)CCCC(C)CCCC(C)C)C(O)C1OC,2x34_2_UQ8_0 +OC(O)C1CCCCC1O,3uni_1_SAL_0 +N[C@@H](CC(O)O)C(O)O,6pa2_1_ASP_2 +NCC(O)O,4osx_1_GLY_2 +N[C@@H](CC(O)O)C(O)O,3eca_1_ASP_3 +NC1NC(O)C2NCN([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O[PH]([O-])(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O[PH]([O-])([O-])O)C2N1,6ea9_1_9BG_0 +OC(O)CCC(O)O,1pj2_1_FUM_0 +NC1NC(O)C2NCN([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O[PH]([O-])(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O[PH]([O-])(O)O)C2N1,6xb3_3_9BG_1 +NC(O)C[C@H](N)C(O)O,6v2a_1_ASN_3 +C[C@H](CCC(O)O)[C@H]1CC[C@H]2[C@@H]3[C@H](O)C[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3CC[C@]12C,3o02_2_JN3_0 +CC1CC2[C@@H](CC1C)N[C@H]1C(O)NC(O)N[C@H]1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O,4mo2_2_FDA_0 +NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)N[PH](O)(O)O)[C@@H](O)[C@H]1O,3zec_1_ANP_0 +N[C@@H](CC1CNC2CCCCC12)C(O)O,2zcz_2_TRP_3 +CN(C)C1CCC(O)C2C1C[C@H]1C[C@H]3[C@H](N(C)C)C(O)C(C(N)O)[C@@H](O)[C@@]3(O)C(O)[C@@H]1C2O,5ent_1_MIY_0 OC[C@H]1O[C@@H](O)[C@@H](O)[C@@H](O)[C@@H]1O,5ida_1_BMA_0 -N[C@@H](CC(O)O)C(O)O,5k3o_2_ASP_0 -N[C@@H](CCC(O)O)C(O)O,5k45_2_GLU_1 -N[C@@H](CCC(O)O)C(O)O,5k4h_2_GLU_3 -CC(C)[C@@H](CO)NC(O)[C@@H](N)CC(N)O,5k62_1_ASN-VAL_0 -NC(O)C[C@H](N)C(O)NCCO,5k63_1_ASN-GLY_0 NC(O)C[C@H](N)C(O)N[C@H](CO)CCC(O)O,5k66_1_ASN-GLU_0 -OC[C@H]1O[C@@H](O)[C@@H](O)[C@@H](O)[C@@H]1O,5mh1_1_BMA_0 -CC[SnH](CC)CC,5u82_2_ZN0_0 -O[Mo@]12S[Mo@@]1(O)S2,6a71_1_9UX_0 -O[Mo@]12S[Mo@@]1(O)S2,6a72_1_9UX_0 -C[N+](C)(C)O,6b1b_1_TMO_15 -NC1NC(O)C2NCN([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O[PH]([O-])(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O[PH]([O-])([O-])O)C2N1,6ea9_1_9BG_0 -NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O,6ep5_1_ADP_1 -NC1NCNC2[C@H]1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O,6etf_1_AMP_0 -NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O,6fgc_1_ADP_1 +C[C@H]1[C@@H](C)C[C@@H]2C3[C@@H]1C(C)(C)CCN3[C@H]1C(O)NC(O)NC1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O,4zaw_1_4LU_1 +C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O,1rqp_1_SAM_0 +NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,2q6k_1_ADN_1 C[C@H]1[C@H](OC(O)CCC(O)O)O[C@@H]2O[C@@]3(C)CC[C@H]4[C@H](C)CC[C@@H]1[C@@]24OO3,6fgc_1_D95_1 -NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O,6gbf_1_AMP_0 -CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2CC1C,6jls_1_FMN_6 -CCC(O)NC1CCC(C([O-])O)C(C([O-])O)C1,6n19_2_K8V_0 -CC(C)(O)C1CCC([C@H]2CC(Cl)C[C@@H](C34(C(N)N)CC3C4)C2)CC1,6nco_1_KQP_0 -[O-]C(O)C1CCCC(CCC2CCCCC2)C1N1CCCC1,6npp_1_KWG_0 -NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@H]2O[PH]([O-])(O)O[C@H]21,6o6y_1_ACK_0 -NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@H]2O[PH]([O-])(O)O[C@H]21,6o70_1_ACK_1 -N[C@@H](CC(O)O)C(O)O,6pa2_1_ASP_2 -NC(O)C[C@H](N)C(O)O,6pa6_2_ASN_0 -N[C@@H](CC(O)O)C(O)O,6paa_1_ASP_2 +OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CCC[C@@H](OC(F)(F)F)C1,6yao_1_OJ2_0 +NC1NCNC2C1NCN2[C@@H]1O[C@H](C(F)F)[C@@H](O)[C@@H]1O,5fiu_1_Y3J_3 CC1CC2NC3C(NC(O)N[C@H]3O)N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C4NCN[C@H]5N)[C@H](O)[C@@H]3O)C2CC1C,6qkr_1_FAD_0 -NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O,6rms_1_AMP_1 +CSCC[C@@H](NC(N)O)C(O)O,1uf5_1_CDT_0 +CCC(O)NC1CCC(CCS[C@@H]2NC(N3C[C@H](C)O[C@@H](C)C3)C3COC(C)(C)CC3C2CN)CC1,5enr_1_MBX_0 +NC1NC(O)C2NCN([C@@H]3O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]3O)C2N1,3jqm_1_GTP_5 +C[C@H]1O[C@H](O[C@H]2[C@H](O)[C@@H](O)[C@@H](O[C@H]3[C@H](O)[C@@H](O)[C@@H](O)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1N[C@H]1CC(CO)[C@@H](O)[C@H](O)[C@H]1O,3k8m_1_GLC-GLC-AC1_0 +CC1CC2[C@H](CC1C)N(C(C)CN)C1C(O)NC(O)NC1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCN[C@H]32)[C@H](O)[C@@H]1O,4bc9_1_CNV-FAD_0 +NC(O)C[C@H](N)C(O)O,5dnc_1_ASN_2 +NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,4h2f_1_ADN_0 +N[C@@H](CC(O)O)C(O)O,5f52_1_ASP_2 +OC[C@H]1O[C@H](O[C@@H]2[C@H](O)[C@@H](OC[C@H]3O[C@@H](O)[C@@H](O)[C@@H](O[C@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]4O)[C@@H]3O)O[C@H](CO)[C@H]2O)[C@@H](O)[C@@H](O)[C@@H]1O,3s5x_1_BMA-MAN-MAN-MAN-MAN_0 +C[N+](C)(C)O,6b1b_1_TMO_15 +OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](F)[C@@H]1O,4mig_1_G3F_2 +N[C@@H](CO)C(O)O,1tke_1_SER_0 C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C2NCN[C@@H]3N)[C@H](O)[C@@H]1O,6ryz_1_SAM_2 -NC1NCNC2[C@H]1NCN2[C@@H]1O[C@H](CCl)[C@@H](O)[C@H]1O,6rz2_1_5CD_2 -NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)C[PH](O)(O)O)[C@@H](O)[C@H]1O,6tvg_1_AP2_1 -C[C@@H](O)SCC[N+](C)(C)C,6uqy_2_AT3_0 -C[C@@H](O)SCC[N+](C)(C)C,6ur1_2_AT3_0 -NC(O)C[C@H](N)C(O)O,6v2a_1_ASN_3 -N[C@H](CCC(O)O)C(O)O,6wyz_1_DGL_1 -NC1NC(O)C2NCN([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O[PH]([O-])(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O[PH]([O-])(O)O)C2N1,6xb3_3_9BG_1 -CC1CCC[C@H](N2NCC[C@H]2C2CC(Cl)C3NNN(C4CC[C@H]5CNNC5C4)C3C2)C1,6xug_1_O1Q_0 -OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CCC[C@@H](OC(F)(F)F)C1,6yao_1_OJ2_0 -OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CC(Cl)C[C@H](OC(F)(F)F)C1,6yap_1_OHZ_0 -OCCC1CCCC[C@H]1N[C@H](O)N[C@H]1C[C@@H](Cl)C[C@@H](OC(F)(F)F)C1,6yaq_1_OHZ_0 +OC(NC1CCCCC1)N[C@H]1CCNC(Cl)C1,4o95_1_245_0 +NC(O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1,2gah_1_NAD_0 +CC(O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(O)O)C[C@@H]1O,3ju4_1_SLB_2 +NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@H]2O[PH]([O-])(O)O[C@H]21,6o6y_1_ACK_0 +CC(O)N[C@H]1CO[C@H](CO)[C@@H](O)[C@@H]1O,4qo5_1_NAG_2 +OC(O)CCP(CCC(O)O)CCC(O)O,4ydx_1_TCE_0 +CCCCCCCCCCCCCC[C@@H](O)OC[C@@H](C)O[C@@H](O)CCCCCCCCCCC,4qa8_1_PJZ_0 +OC(O)[C@H]1C[C@@H](O)[C@@H](O)[C@H](O)C1,2hk9_1_SKM_0 diff --git a/forks/FABind/inference/fabind_pocket_only_posebusters_benchmark_inputs.csv b/forks/FABind/inference/fabind_pocket_only_posebusters_benchmark_inputs.csv new file mode 100644 index 00000000..f00d31ac --- /dev/null +++ b/forks/FABind/inference/fabind_pocket_only_posebusters_benchmark_inputs.csv @@ -0,0 +1,309 @@ +Cleaned_SMILES,pdb_id +O=C(O)[C@H]1NCC[C@H]1O,7CNQ_G8X +Nc1nc2c(ncn2[C@@H]2O[C@H](CO[P@](=O)(O)O[C@H]3[C@@H](O)[C@H](n4ccc(=O)[nH]c4=O)O[C@@H]3CO)[C@H]3CC(O)(O)C[C@H]32)c(=O)[nH]1,7C0U_FGO +CNc1cccc2c1NC(=O)C2(C)C,7EBG_J0L +C[C@@H]1C=C[C@@H]2CCCC[C@@H]2[C@H]1C(=O)c1c(O)c([C@]2(O)CC[C@H](O)[C@@H]3O[C@@H]32)c[nH]c1=O,7WUY_76N +CC(C)(O)C(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)CO[P@@](=O)(O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O,7PT3_3KK +Nc1nc(O)c2[nH]nnc2n1,7C3U_AZG +Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O)c2cc1C,7PGX_FMN +Cc1cc(C)c(N)cn1,7FB7_8NF +CNCCN(C)C(=O)c1cnn(C)c1C(=O)Nc1ccn2cc(-c3ccccc3)nc2c1,5SIS_JSM +Cc1ncc(COP(=O)(O)O)c(C/N=C(\CON)C(=O)O)c1O,8AIE_M7L +O=C(O)[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1O,7CTM_BDP +CN1CCN(c2ncc(Oc3cc(CN4CCC(CC(=O)O)CC4)cc(-c4cc(Cl)cc(Cl)c4)n3)cn2)CC1,7LCU_XTA +CC(C)C[C@H](NC(=O)CN[P@@](=O)(O)[C@@H](Cc1ccccc1)NC(=O)OCc1ccccc1)C(=O)O,6YMS_OZH +CC(C)(CO[P@@](=O)(O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCSC(=O)c1ccccc1,7PK0_BYC +COC(=O)c1ccccc1Cc1cn(C)nc1-c1cc(Cl)nc(N)n1,8CNH_V6U +O=C(NCc1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c1)Nc1ccc([N+](=O)[O-])cc1,7M6K_YRJ +Oc1cccc(CNc2nc(Cl)nc3scc(-c4ccccc4)c23)c1,7SFO_98L +O=C(N[C@H](C(=O)Nc1cnccc1-c1ccc(C(F)(F)F)cc1)c1cccc(C(F)(F)F)c1)c1cnccc1-c1ccc(C(F)(F)F)cc1,8EAB_VN2 +O=C(O)CN(CCO)CCO,7ZTL_BCN +O=C(O)C1CCC1,7MWU_ZPM +Cc1nccnc1-c1nn2c(=O)cc(-c3ccc(C4CCCCC4)cc3)[nH]c2c1C(=O)N1CC(CF)C1,7TYP_KUR +NS(=O)(=O)c1ccc(Cl)s1,6YQV_8K2 +COc1cc(C=O)ccc1O,6XM9_V55 +N[C@@H]1CCN(Cc2cccc(-c3ccc4c(O)nccc4c3)c2)C1,7PIH_7QW +CC(C)=CCC/C(C)=C/CC/C(C)=C/CO[P@](=O)(O)OP(=O)(O)O,7T0D_FPP +CCCCCCc1nc(N)nc(N)c1-c1ccccc1,7XI7_4RI +Nc1cc(C(Cl)=C(Cl)Cl)c(S(N)(=O)=O)cc1S(N)(=O)=O,7PRI_7TI +CN(c1ncnc2[nH]ccc12)[C@@H]1CCCN(C(=O)CNc2cc(Cl)cc(Cl)c2)C1,8FLV_ZB9 +COc1cc2c(cc1OC)[C@H]1Cc3ccc(OC)c(OC)c3CN1CC2,7N4W_P4V +CNC(=O)CN(CC(c1ccccc1)c1ccccc1)C(=O)c1cc(C)c(OC)c(C)c1,7OEO_V9Z +O=C(N[C@@H]1C[C@H]1c1ccccc1)c1cc(Cl)cc(COc2cnc3[nH]ccc3c2)c1,5SB2_1K2 +N[C@H]1C=C(CO)[C@@H](O[C@H]2O[C@H](CO)[C@@H](O)[C@H](O)[C@H]2O)[C@H](O)[C@H]1O,7MGY_ZD1 +CCCNc1nn2c(-c3ccc(O)cc3)cnc2s1,7OPG_06N +Nc1nc2c([nH]c(=O)n2[C@H]2C[C@H](O)[C@@H](CO)O2)c(=O)[nH]1,7OLI_8HG +O=C(O)c1ccc(-c2nn(C(=O)c3c(Cl)cccc3C(F)(F)F)c3cccc(F)c23)c(F)c1,8FAV_4Y5 +Oc1ccc2c(c1)sc1nncn12,7R59_I5F +Nc1ccn([C@@H]2O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OC[C@@H](O)[C@@H](O)[C@@H](O)CO)[C@@H](O)[C@H]2O)c(=O)n1,6XHT_V2V +O=C1CC(c2ccccc2)=Nc2c(-c3ccccc3)c(C(F)(F)F)nn21,8EX2_Q2Q +OC[C@H]1CNC[C@@H](O)[C@@H]1O,7LOU_IFM +Cc1cn([C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)nc1N,6XBO_5MC +CN(C)Cc1[nH]nc2ccc(-c3ccc(F)cc3OCCc3cccnc3)cc12,6TW7_NZB +CNC(=O)[C@H](CCc1ccccc1)NC(=O)[C@H](NC(=O)CNCc1ccc(OC)cc1OC)c1cccs1,7U3J_L6U +NS(=O)(=O)c1c(C(F)(F)F)ccc(-c2ccc(C3CCNCC3)cc2)c1-c1nnn[nH]1,7UYB_OK0 +OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1O,7WJB_BGC +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSCC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O,7RC3_SAH +O=[N+]([O-])c1ccc(O)cc1,7RZL_NPO +N#CC[C@H](C1CCCC1)n1cc(-c2ncnc3[nH]ccc23)cn1,6WTN_RXT +C=C(C)CCO[P@@](=O)(O)OP(=O)(O)O,7MY1_IPE +C=C(Oc1cccc(C(=O)O)c1)C(=O)O,7AN5_RDH +C[C@H](Oc1cc2cc(F)ccc2nc1N)c1[nH]c(=O)ccc1-n1cccn1,7JY3_VUD +CC[C@H](C)[C@H](N)C(=O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,7D5C_GV6 +N[C@@H](CC[C@H](O)[C@@H](N)COS(=O)(=O)O)C(=O)O,7WUX_6OI +NC(=O)C[C@H](NC(=O)c1c[nH]nc1-c1ccc(Cl)cc1F)c1ccc(F)c(Cl)c1,8F8E_XJI +C=Cc1c(C)c2cc3nc(c4c5[nH]c(cc6nc(cc1[nH]2)C(C)=C6CC)c(C)c5C(=O)[C@@H]4C(=O)OC)[C@@H](CCC(=O)OC/C=C(\C)CCC[C@H](C)CCC[C@H](C)CCCC(C)C)[C@@H]3C,8F4J_PHO +Nc1ncnc2c1ncn2[C@@H]1O[C@H](C[C@@H](N)CC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O,7PL1_SFG +CC1=[NH+][C@H]2[C@@H](O1)O[C@H](COS(=O)(=O)[O-])[C@@H](O)[C@@H]2O,6Z14_Q4Z +Nc1nc2c(ncn2[C@H]2C[C@H](O)[C@@H](CO[P@@](=O)(O)OP(=O)(O)O)O2)c(=O)[nH]1,7ODY_DGI +O=C(O[C@H](Cn1ccnc1)c1ccc(F)cc1)c1cc(Cl)cc(Cl)c1,7RKW_5TV +N=C1N/C(=N\Nc2ccccc2)c2ccccc21,5SAK_ZRY +CCOC(=O)/C(=N\O)C(C)=O,8AUH_L9I +CSC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,8C5M_MTA +CCCCCCNc1ccc2ncc(C(=O)NC)c(Nc3ccccc3)c2c1,7N03_ZRP +C=CC[C@@H]1/C=C(\C)C[C@H](C)C[C@H](OC)[C@H]2O[C@@](O)(C(=O)C(=O)N3CCCC[C@H]3C(=O)O[C@H](/C(C)=C/[C@@H]3CC[C@@H](O)[C@H](OC)C3)[C@H](C)[C@@H](O)CC1=O)[C@H](C)C[C@@H]2OC,7U0U_FK5 +CC(=O)N1Cc2cc(S(C)(=O)=O)ccc2[C@@H]1C(=O)Nc1ccc(C(O)(C(F)(F)F)C(F)(F)F)cc1,7OFK_VCH +Nc1nc2c(c(CN[C@H]3C=C[C@H](O)[C@@H]3O)cn2[C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1,7ULC_56B +CNCc1cccc(-c2cc(C)cc(N)n2)c1,7TS6_KMI +Cc1cc2c3c(c1C)C(C)(C)CC=[N+]3c1c([nH]c(=O)[nH]c1=O)N2C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,7NF3_4LU +O=C(O)c1cccnc1,7Z1Q_NIO +C=C1/C(=C\C=C2/CCC[C@]3(C)[C@@H]([C@H](C)CCCC(C)(C)O)CC[C@@H]23)C[C@@H](O)C[C@@H]1O,7QPP_VDX +NCCSc1nc(NCc2ccc(-n3cccn3)cc2)c2[nH]nc(C3CCC3)c2n1,7QHL_D5P +O=S1(=O)N[C@@H]2[C@H](O)[C@@H](O)[C@H](O)[C@@H](CO)[C@@H]2O1,7P4C_5OV +C=C(C)CCS[P@@](=O)(O)OP(=O)(O)O,7VQ9_ISY +NCC[C@H](O)C(=O)N[C@@H]1C[C@H](N)[C@@H](O[C@H]2O[C@H](CN)[C@@H](O)[C@H](O)[C@H]2O)[C@H](O)[C@H]1O[C@H]1O[C@H](CO)[C@@H](O)[C@H](N)[C@H]1O,6VTA_AKN +Cc1ccc(Cl)cc1,7V43_C4O +CN(/N=C/c1ccccc1C(=O)O)c1nc(-c2ccccc2Cl)cs1,6M2B_EZO +Clc1nc(Cl)c2[nH]cnc2n1,7OP9_06K +N[C@H](CCC(=O)O)C(=O)O,7UJ5_DGL +O=S(=O)(O)OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@H]1O,7OZC_G6S +CC(=O)N[C@H]1[C@@H](O[P@@](=O)(O)O[P@@](=O)(O)OC[C@H]2O[C@@H](n3ccc(=O)[nH]c3=O)[C@H](O)[C@@H]2O)O[C@H](CO)[C@@H](O)[C@@H]1O,7TB0_UD1 +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,8C3N_ADP +N[C@@H]1[C@@H](O)[C@H](O)[C@@H](COP(=O)(O)O)O[C@@H]1O,7NUT_GLP +Cn1c(CO[C@@H]2C=CO[C@H](CO)[C@@H]2O)nc2ccc(C(=O)O)cc21,7P1M_4IU +Cc1cc2c(cc1C)N(C[C@H](O)[C@H](O)[C@H](O)CO[P@@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n3cnc4c(N)ncnc43)[C@H](O)[C@@H]1O)c1[nH]c(=O)[nH]c(=O)c1N2,7EPV_FDA +N#Cc1cncnc1N,6Z0R_Q4H +O=C(O)C1=C[C@H](O)[C@@H](O)[C@H]([C@H](O)[C@H](O)CO)O1,7P1F_KFN +Oc1ccc(/C=C/c2cc(O)cc(O)c2)cc1,7VB8_STL +C[NH+](C)c1cc(NC(=O)CNC(C)(C)C)c(O)c2c1C[C@H]1C[C@H]3[C@H]([NH+](C)C)C(O)=C(C(N)=O)C(=O)[C@@]3(O)C(O)=C1C2=O,6YR2_T1C +CSCCC/N=C/c1c(COP(=O)(O)O)cnc(C)c1O,7CIJ_G0C +NC(=O)c1ccc[n+]([C@@H]2O[C@H](CO[P@@](=O)([O-])O[P@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1,7UMW_NAD +C[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,7TOM_5AD +O=S(=O)(O)CCS,7SUC_COM +Oc1ccc(Cl)cc1O,7DQL_4CL +O=C(O)c1ccc(O)cc1,7CUO_PHB +O=C1Nc2ccccc2/C1=C\c1ccc(C(=O)O)cc1,7QFM_AY3 +c1ccc(CC2NCCN2)cc1,7Q2B_M6H +Cc1c(C(=O)c2cccc3ccccc23)c2cccc3c2n1[C@H](CN1CCOCC1)CO3,7MWN_WI5 +CCCCCCCCCC(=O)NCCCC(=O)O,7L5F_XNG +NC(=O)c1cccnc1,8DSC_NCA +CO[C@@H]1OC[C@@H](O)[C@H](O)[C@H]1O,7ZDY_6MJ +Cc1oc2ccc(-c3cc(N)nc(N)c3)cc2c1C,7AKL_RK5 +O=C(O)c1ccc(OCc2c(-c3c(Cl)cccc3C(F)(F)F)noc2-c2cn[nH]c2)cc1,7NP6_UK8 +CC(C)(O)C#Cc1ccc2[nH]c3c(c2c1)-c1nc(N)ncc1CCC3,7JXX_VP7 +Fc1cccc2ccccc12,7LOE_Y84 +CCc1nc(N)nc(N)c1OCCCOc1cccc(C[C@@H](C(=O)O)C(F)F)c1,7KM8_WPD +CN[C@@H]1[C@H](O)[C@H](NC)[C@H]2O[C@]3(O)[C@H](O[C@@H]2[C@H]1O)O[C@H](C)CC3(O)O,7UY4_SMI +C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@@H]2O,7CL8_TES +Cc1ccc(-c2csc3ncnc(SCCC(=O)O)c23)cc1,6Z1C_7EY +COc1ccc(-c2cccc(S(=O)(=O)NC(=O)[C@@H](N)CS)c2)cn1,8HFN_XGC +Cc1nc(-c2cn3c(n2)-c2ccc(-c4cnn(C(C)(C)C(N)=O)c4)cc2OCC3)n(C(C)C)n1,8EXL_799 +CNc1nc(NC)c2ncn(C)c2n1,7Z7F_IF3 +Cc1ncc(COP(=O)(O)O)c(C=O)c1O,7XG5_PLP +CO[C@@H]1O[C@@H](C)[C@@H](O)[C@@H](O)[C@@H]1O,7P2I_MFU +O=P(O)(O)Oc1ccccc1,7MOI_HPS +CCN(CC)c1ccc2c(C)c(CCN3C(=O)N[C@@]4(CCN(C(=O)c5c[nH]cn5)C4)C3=O)c(=O)oc2c1,7LMO_NYO +CCNC(=O)CCNC(=O)[C@H](O)C(C)(C)CO[P@@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O,7MSR_DCA +CNC(=O)c1nn(C)c2ccc(Nc3nccc(-n4cc(N[C@@H]5CCNC5)c(C)n4)n3)cc12,6ZCY_QF8 +CS(=O)(=O)Nc1ccc(F)cc1C(=O)O,6ZC3_JOR +Cc1ccc(F)c2cc(C(=O)Nc3cccc(N4CCC(N(C)C)CC4)c3)[nH]c12,7LZD_YHY +COc1cc(C(=O)Nc2ccc(F)c([C@]3(CF)CC[C@@](C)(S(C)(=O)=O)C(N)=N3)c2)ncn1,7N4N_0BK +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)O[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,7KRU_ATP +NC(=O)C1=CN([C@@H]2O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1,7UTW_NAI +CCc1ccc(S(N)(=O)=O)cc1,7BKA_4JC +CCc1nc(N)nc(N)c1OCCCOc1cc(C)ccc1N1CC(C(=O)O)C1,5SD5_HWI +Nc1nc2c(ncn2[C@@H]2O[C@H](CO[P@](=O)(O)O[P@](=O)(O)OP(O)(O)=S)[C@@H](O)[C@H]2O)c(=O)[nH]1,7SCW_GSP +O=P(O)(O)O[C@H]1[C@H](O)[C@@H](OP(=O)(O)O)[C@H](OP(=O)(O)O)[C@@H](OP(=O)(O)O)[C@H]1O,7SDD_4IP +Nc1nc2c(c(=O)[nH]1)N(C=O)[C@@H](CNc1ccc(C(=O)N[C@@H](CCC(=O)O)C(=O)O)cc1)CN2,7TH4_FFO +N[C@@H](CCC(=O)N[C@@H](CS)C(=O)NCC(=O)O)C(=O)O,8D19_GSH +CCC[C@@H](CC(=O)C(=O)O)C(=O)O,7A1P_QW2 +C#CCO[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,8G0V_YHT +Cc1ncc(COP(=O)(O)O)c(/C=N/[C@H]2[C@H](O)[C@@H](O)[C@@H](O[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](n4ccc(=O)[nH]c4=O)[C@H](O)[C@@H]3O)O[C@@H]2C)c1O,7MFP_Z7P +Nc1ccn([C@@H]2O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)n1,7N7H_CTP +Nc1nc2c([nH]c(=O)n2[C@H]2C[C@H](O)[C@@H](COP(=O)(O)O)O2)c(=O)[nH]1,7X9K_8OG +O=c1c(-c2ccc(O)cc2)coc2cc(O)cc(O)c12,7NFB_GEN +OC[C@H]1O[C@H](F)[C@H](O)[C@@H](O)[C@@H]1O,7DKT_GLF +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]2O[C@@H](O)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O,7R3D_APR +CS(=O)(=O)N1CCN(c2ccc3nncn3n2)CC1,6YYO_Q1K +[O-][n+]1cc(-c2c(-n3cnnn3)ccc(Cl)c2F)ccc1[C@@H](CC1CC1)n1cc(-c2cncs2)cn1,7V14_ORU +Cn1cc(Nc2ncc(C3=CCC[C@@H](NC(=O)c4ccccc4)C3)nc2C(N)=O)cn1,7SIU_9ID +C=C(C)[C@@H]1CCC(C)=C[C@H]1c1c(O)cc(CCCCC)cc1O,7TE8_P0T +Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO[P@@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)c2cc1C,7F8T_FAD +Cc1cnc2c(N)c(C(=O)NCCc3ccc([C@@H]4C[C@H]5CC[C@@H](C4)N5)cc3)sc2n1,7TUO_KL9 +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)O[P@](=O)(O)NP(=O)(O)O)[C@@H](O)[C@H]1O,7B94_ANP +C=C(/N=C/c1c(COP(=O)(O)O)cnc(C)c1O)C(=O)O,7LEV_0JO +Cc1nc2c(=O)[nH]c(=O)nc-2n(C[C@H](O)[C@H](O)[C@H](O)CO)c1C,8A1H_DLZ +CC(C)C[C@H](N)CO,7NU0_DCL +Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO)c2cc1C,7QF4_RBF +COc1cccc2sc3nncn3c12,7Z2O_IAJ +Nc1ccn([C@@H]2O[C@H](CO[P@@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)n1,7O0N_CDP +O=S(=O)(NCc1ccc(S(=O)(=O)c2ccc(O)c(O)c2)cc1)c1ccc(O)c(O)c1,7FRX_O88 +Cc1n[nH]c2c1[C@](c1cc(CO)cc(-c3ccccc3)c1)(C(C)C)C(C#N)=C(N)O2,7X5N_5M5 +COc1cccc(NC(=O)c2ccc(C)c(Nc3nc(-c4cccnc4)nc4c3cnn4C)c2)c1,8BOM_QU6 +Cc1cc(F)c(NC(=O)NCCC(C)(C)C)cc1Nc1ccc2ncn(C)c(=O)c2c1F,7K0V_VQP +COc1ccc2nc(C)c(O[C@@H]3C[C@H]4C(=O)N[C@]5(C(=O)NS(=O)(=O)C6(C)CC6)C[C@H]5/C=C\CCCCC[C@H](NC(=O)OCC5(C(F)(F)F)CC5)C(=O)N4C3)nc2c1,7MMH_ZJY +CNC(=S)c1cccnc1,7PJQ_OWH +Cc1c[nH]c(=O)[nH]c1=O,7M31_TDR +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)OS(=O)(=O)O)[C@@H](O)[C@H]1O,7FHA_ADX +COc1cc2c(cc1Nc1nc(Nc3ccccc3S(=O)(=O)C(C)C)c3[nH]ccc3n1)N(C(=O)CN(C)C)CC2,7BTT_F8R +CC(C)C(=O)Nc1ncc(C(=O)NCCN(Cc2ccccc2)C(=O)c2ccc(S(=O)(=O)Nc3ccccc3)cc2)s1,7QHG_T3B +O=C(C1CCN(c2cncnc2-c2ccc(F)cc2)CC1)N1CC(F)C1,7N6F_0I1 +CC(C)C(=O)Nc1ncc(-c2cc(C(F)F)nn2-c2c(Cl)cccc2Cl)s1,8AAU_LH0 +CC(=O)N[C@H]1[C@H](OCCc2ccc3occc3c2)O[C@H](CO)[C@@H](O)[C@@H]1O,8GFD_ZHR +O=C(O)C1=C[C@@H](OP(=O)(O)O)[C@@H](O)[C@H](O)C1,7TBU_S3P +CNc1cnn(C)c(=O)c1Cl,6YQW_82I +C=C(CC(=O)O)C(=O)O,7W06_ITN +NS(=O)(=O)c1cc(-c2nnn[nH]2)c(NCc2cccs2)cc1Cl,7ZL5_IWE +O=P(O)(O)OCCNS(=O)(=O)c1ccc(OC(F)(F)F)cc1,7L03_F9F +NCCCNCCCN,7XJN_NSD +O=C(O)CNC(=O)C(=O)O,7ZCC_OGA +Cc1nn(C)c(C)c1CCOc1cc(F)ccc1-c1ccc2n[nH]c(CN(C)C)c2c1,6TW5_9M2 +CCCCOc1ccc(C[C@H](CC)C(=O)O)cc1CNC(=O)c1ccc(C(F)(F)F)cc1F,7VWF_K55 +Nc1cccc2c1C(=O)N([C@@H]1CCC(=O)NC1=O)C2=O,7SZA_DUI +COC(=O)c1cc(S(N)(=O)=O)c(SC2CCCCC2)cc1Cl,7POM_7VZ +N[C@@H](Cc1ccc(O)c(F)c1)C(=O)O,7KQU_YOF +CC(=O)c1ccc(NC(=O)[C@H](C)S)cc1,7ZOC_T8E +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)OC(=O)[C@@H](N)Cc2c[nH]c3ccccc23)[C@@H](O)[C@H]1O,7ELT_TYM +CCCCCCCCCCCCCCCCC,6YRV_PJ8 +C[C@@](O)(CCOP(=O)(O)O)CC(=O)O,7CNS_PMV +COc1cc(Cc2cnc(N)nc2N)cc(OC)c1OC,6XG5_TOP +O=c1cc(N2CCOCC2)oc2c(-c3ccc4c(c3)OCCO4)csc12,7USH_82V +CC(=O)N[C@@H]1[C@@H](O)[C@@H](OS(=O)(=O)O)[C@@H](CO)O[C@@H]1O,7OZ9_NGK +C[N+](C)(C)[C@@H](Cc1c[nH]c(=S)[nH]1)C(=O)O,7TXK_LW8 +CCC(O)(C#Cc1ccc2[nH]c3c(c2c1)-c1nc(N)ncc1CCC3)CC,7ZHP_IQY +Cc1ncc(COP(=O)(O)O)c(CNCC(=O)O)c1O,8AQL_PLG +CC1=C(/C=C/C(C)=C/C=C/C(C)=C/C=C/C=C(C)/C=C/C=C(C)/C=C/C2=C(C)C(=O)CCC2(C)C)C(C)(C)CCC1=O,7ZXV_45D +COCC(=O)n1ccc2c(Cl)cccc21,8BTI_RFO +CS(=O)(=O)Nc1ccc(C(=O)O)cc1,7FT9_4MB +Nc1ncnc2c1c(OC(F)F)nn2[C@@H]1O[C@H](COS(=O)(=O)NC(=O)[C@@H](N)Cc2ccc(O)cc2)[C@@H](O)[C@H]1O,7ROU_66I +CC(C)[C@@H](NC(=O)[C@H](CS)NC(=O)CCC[C@H](N)C(=O)O)C(=O)O,6ZAE_ACV +O=C(O)C[C@H](NC(=O)CP(=O)(O)O)C(=O)O,6YSP_PAL +Cc1ncc(COP(=O)(O)O)c(C[NH2+]c2conc2O)c1O,8B8H_OJQ +O=C(O)COP(=O)(O)O,7THI_PGA +O=C(N[C@@H](C(=O)O)c1ccccc1)c1cccc2c1-c1ccccc1C2=O,7OFF_VCB +CCCCCCCN1CCC[C@H]1C(=O)N[C@@H](Cc1ccccc1)C(=O)O,7WY1_D0L +O=c1ccn([C@@H]2O[C@H](CO[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1,7ES1_UDP +CC(C)CN(C[C@@H](O)[C@H](Cc1ccccc1)NC(=O)O[C@H]1CCOC1)S(=O)(=O)c1ccc(N)cc1,6XCT_478 +O=C(O)CCC(=O)C(=O)O,7V3N_AKG +O=C(O)C[NH2+]CP(=O)(O)O,7TM6_GPJ +O=C(O)CCC(=O)O,7ECR_SIN +CCS(C)(=O)=O,7A9E_R4W +CC(=O)N[C@H]1[C@@H](O[P@@](=O)(O)O[P@](=O)(O)OC[C@H]2O[C@@H](n3ccc(=O)[nH]c3=O)[C@H](O)[C@@H]2O)O[C@H](CO)[C@@H](O)[C@@H]1O[C@H](C)C(=O)N[C@@H](C)C(=O)O,8DP2_UMA +N#C[C@@H](O)c1ccc(O)cc1,7ZF0_DHR +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]2O[C@@H](O)[C@H](OC(=O)c3ccccc3)[C@@H]2O)[C@@H](O)[C@H]1O,7F51_BA7 +Cc1nc([C@@H]2O[C@H](CO)[C@H](O)[C@H](n3cc(-c4cc(F)c(Cl)c(F)c4)nn3)[C@H]2O)n(-c2cc(Cl)ccc2C(F)(F)F)n1,7XFA_D9J +O=C(O)[C@@H]1CCCO1,8DKO_TFB +O=C(O)CCc1cnc[nH]1,6T88_MWQ +O=C(O)[C@H](O)[C@@H](O)[C@H](O)[C@H](O)CO,7BCP_GCO +Cc1cc2c3c(c1C)C(C)(C)C[C@@H](O)N3c1c(nc(O)[nH]c1=O)N2C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,7NF0_BYN +CC(=O)N[C@@H]1[C@@H](O)[C@@H](O)[C@@H](CO)O[C@H]1O,7QE4_NGA +CCCCc1ccc(NS(=O)(=O)c2ccc(O)c(C(=O)O)c2)cc1,7M3H_YPV +O=C(O)CCCCCN1C(=O)[C@@H]2[C@H](C1=O)[C@]1(Cl)C(Cl)=C(Cl)[C@@]2(Cl)C1(Cl)Cl,6Z2C_Q5E +Cc1c(C#Cc2ccc(CN3CCC(CO)CC3)cc2)cc(C(F)F)c2cn([C@@H](C(=O)Nc3nccs3)c3ncn4c3CCC43CC3)nc12,8A2D_KXY +CC1CCN(C(=O)Nc2ccc(O)cc2)CC1,7NGW_UAW +O=C(CNCCO)NCCO,7KZ9_XN7 +CCn1cc(-c2cc(Cn3ccnc3C)cc3c2CCN([C@H](c2cc(C)ccn2)C2CC2)C3=O)c(C(F)(F)F)n1,7UAS_MBU +CO[C@H]1O[C@H](CS(=O)(=O)O)[C@@H](O)[C@H](O)[C@H]1O,7YZU_DO7 +OC[C@H]1NC[C@H](O)[C@@H](O)[C@@H]1O,7VKZ_NOJ +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)OC(=O)[C@@H](N)Cc2ccc(O)cc2)[C@@H](O)[C@H]1O,7ROR_69X +CCn1c(=O)cc(C)c2cc(CNS(=O)(=O)c3ccccc3)ccc21,8AY3_OE3 +NC(=O)C[C@@H](N)C(=O)O,7C8Q_DSG +CC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,7XRL_FWK +CN1C[C@H](Nc2nc3cc[nH]c3c(=O)n2C)C[C@H](c2ccc(OCc3ccccc3)cc2)C1,7CD9_FVR +CC(=O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(=O)O)C[C@@H]1O,7T3E_SLB +Nc1ncnc2c1ncn2[C@@H]1O[C@@H]2CO[P@@](=O)(O)O[C@H]3[C@@H](O)[C@H](n4cnc5c(N)ncnc54)O[C@@H]3CO[P@](=O)(O)O[C@H]2[C@H]1O,6YJA_2BA +Cc1ccc(Sc2cc(C(=O)N3Cc4ccccc4C3)ccc2O)cc1C,7LT0_ONJ +Nc1nc2c(c(=O)[nH]1)N[C@H]1C(S)=C(S)[C@@H](COP(=O)(O)O)O[C@H]1N2,7D6O_MTE +CCN(C(=O)c1cc(F)ccc1Oc1cncnc1N1CC2(CCN(C[C@H]3CC[C@H](NS(=O)(=O)CC)CC3)CC2)C1)C(C)C,7UJ4_OQ4 +OCC[C@H](O)CO,7OSO_0V1 +Cc1cccc2c(-c3ccccc3Cl)c(C(=O)O)[nH]c12,7AFX_R9K +Cc1cn(-c2ccc3c(c2)CN(c2ncc(Cc4ccc(-n5cccn5)cc4)s2)CC3)c(C)n1,7T1D_E7K +CC(C)n1ncc2cnc(Nc3cc([C@@H]4CCNC4)nc(N4CCC(F)(F)C4)n3)cc21,7R9N_F97 +Nc1nc(Cl)nc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,7MGT_ZD4 +COc1cnc(C(=O)Nc2ccc(F)c([C@]34CN(c5ncc(F)cn5)C[C@H]3CSC(N)=N4)c2)cn1,7MYU_ZR7 +CO[C@@H]1O[C@H](CO)[C@H](O)[C@H](OC(=O)c2ccc(C)cc2)[C@@H]1OC(=O)c1ccc(Cl)cc1[N+](=O)[O-],7RH3_59O +NC(=O)c1ccc[n+]([C@@H]2C[C@H](CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1,7OMX_CNA +Cc1ccccc1Oc1cc(-n2c(=O)cc(S(=O)(=O)c3ccccc3)[nH]c2=O)c(F)cc1C#N,7NXO_UU8 +CCn1c(CO)nn(-c2nc(O[C@@H](C)C(F)(F)F)c(C(=O)Nc3c(C)ccnc3Cl)cc2F)c1=O,8DHG_T78 +CCC[C@@H](NC(=O)c1cccc2c1CC(=O)N2)[C@@H](O)c1cccc(Cl)c1C,7NPL_UKZ +O=C(c1ccco1)N1CCN([C@@H]2CC(=O)N(c3ccc(-c4ccc(F)cc4)cc3)C2)CC1,7PRM_81I +CC(=O)N[C@@H]1[C@@H](O)[C@H](O)[C@@H](COS(=O)(=O)O)O[C@H]1O,7WDT_NGS +Nc1ncnc2c1ncn2[C@@H]1O[C@@H]2CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](O[C@@H]1[C@@H]2O)[C@H](O)[C@@H]3O,7UAW_MF6 +Nc1nc2c(ncn2[C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)[nH]1,7W05_GMP +C[C@H]1CCN(CCOc2ccc([C@@H]3c4ccc(O)cc4CC[C@@H]3c3ccccc3)cc2)C1,7UJF_R3V +O=C(O)c1ccc(C(=O)c2ccccc2)cc1,8D39_QDB +CNc1cc(-c2ccc3[nH]ccc3c2)nc(S(C)(=O)=O)n1,7F5D_EUO +O=C(O)c1ccnc(C(=O)O)c1F,7BMI_U4B +C=C/C(=N\Cc1c(COP(=O)(O)O)cnc(C)c1O)C(=O)O,7KB1_WBJ +CN(C)c1ccc(/C=C(\C#N)C(=O)N[C@@H]2O[C@H](CO)[C@@H](O)[C@H](O)[C@H]2O)cc1,7Q5I_I0F +Cc1nc([C@](C)(O)CO)sc1-c1cnc(N)c(O[C@H](C)c2cc(F)ccc2N2NC=CN2)c1,7R7R_AWJ +CS(=O)(=O)c1ccc2nc(NC(=O)Cc3csc(-n4cccc4)n3)sc2c1,7L00_XCJ +Nc1ncnc2n[nH]cc12,7BJJ_TVW +O=C(O)C[C@@H]1CC(=O)N(O)C1=O,7UQ3_O2U +CC(C)=CCC/C(C)=C/CC/C(C)=C(\F)CO[P@@](=O)(O)OP(=O)(O)O,7XQZ_FPF +C[C@@H](OP(=O)(O)O)[C@H](NC(=O)CCCCCCS)C(=O)O,7B2C_TP7 +O=[N+]([O-])c1ccc(O)c(O)c1,7JMV_4NC +O=C(O)c1ccccc1,7BNH_BEZ +CC(=O)c1ccn(S(=O)(=O)c2ccccc2)c1,8FO5_Y4U +C[C@]12CCC(=O)C[C@@H]1CC[C@@H]1[C@@H]2CC[C@]2(C)[C@@H](O)CC[C@@H]12,7ZU2_DHT +Cc1ncc(C[n+]2csc(CCO[P@](=O)(O)OP(=O)(O)O)c2C)c(N)n1,7A9H_TPP +Cc1cc(NC(=O)c2cn(C3(C)CC3)c3ncnc(N)c23)n[nH]1,7DUA_HJ0 +COc1ccc(CNc2ccc(Cc3ccncc3)cc2)cc1,7P5T_5YG +Cc1nn(C)c(C)c1C(=O)N1CCN(Cc2nc3ccccc3n2CC(C)(C)C)CC1,7RNI_60I +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)O[P@@](=O)(O)CP(=O)(O)O)[C@@H](O)[C@H]1O,7WCF_ACP +Cc1cc2c(cc1C)N(C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O)c1[nH]c(=O)[nH]c(=O)c1N2,6M73_FNR +OC[C@H]1N[C@@H](c2c[nH]c3c(O)ncnc23)[C@H](O)[C@@H]1O,6ZK5_IMH +N[C@H](CCC[C@H](N)C(=O)O)C(=O)O,8G6P_API +O=C(C[C@H]1NCCC[C@@H]1O)Cn1cnc2ccccc2c1=O,7VC5_9SF +O=C([C@H]1C[C@@H](c2cccc(Cl)c2)CN1)N1CCN(c2nccc3ccsc23)CC1,7ZZW_KKW +Cc1cc(NCc2cccc(CN3C[C@H](O)[C@@H](O)[C@H](O)[C@H]3CO)c2)cc(-c2ncccn2)c1,7R6J_2I7 +C[C@H](C(=O)N[C@H](CO)c1cccc(N2CCN(C)CC2)n1)N1Cc2ccc(-c3nc(NC4CCOCC4)ncc3Cl)cc2C1=O,7NR8_UOE +O=C1N[C@@H](Cc2c[nH]c3c(F)cccc23)C(=O)N2CCC[C@@H]12,8HO0_3ZI +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)C[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,7XBV_APC +Nc1ncnc2c1ncn2[C@@H]1O[C@@H]2CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](O[C@H]2[C@H]1O)[C@H](O)[C@@H]3O,7UXS_OJC +CCCCCCCCCCCCCCC(=O)O,7WPW_F15 +Nc1ccn([C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)n1,8CSD_C5P +N#CCc1c[nH]c2ccc(Cl)cc12,8AEM_LVF +CCCC[C@H](N[C@@H](CCc1ccccc1)C(=O)O)C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)O,7Q25_8J9 +Nc1ncnc2c1ncn2[C@H]1C[C@H](O)[C@@H](CO)O1,6ZPB_3D1 +C[C@H](O)[C@H](O)[C@H]1CNc2nc(N)[nH]c(=O)c2N1,7TSF_H4B +Nc1nc2c(ncn2[C@@H]2O[C@H](CO[P@](=O)(O)O[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1,7LJN_GTP +O=P(O)(O)CP(=O)(O)O,7E4L_MDN +Cc1cn([C@H]2C[C@H](O)[C@@H](CO[P@@](=O)(O)O[P@](=O)(O)O[C@H]3O[C@H](C)[C@H](O)[C@H](N)[C@H]3O)O2)c(=O)[nH]c1=O,7N7B_T3F +O=C(O)c1ccc(-c2cccc(C(=O)O)c2)cc1,7JHQ_VAJ +Oc1ccccc1O,7WKL_CAQ +O=P(O)(O)OC[C@H]1O[C@H](O[P@@](=O)(O)OP(=O)(O)O)[C@H](O)[C@@H]1O,8AP0_PRP +O=C(Nc1ccc(F)cc1)C1(C(=O)Nc2ccc(Oc3ccnc4c3Oc3ccccc3N4)c(F)c2)CC1,7V3S_5I9 +O=c1ccn([C@@H]2O[C@H](CO[P@](=O)(O)O[P@](=O)(O)O[C@H]3O[C@H](CO)[C@@H](O)[C@H](O)[C@H]3O)[C@@H](O)[C@H]2O)c(=O)[nH]1,7XPO_UPG +CN1C(=O)N(c2cc(Cl)cc(Cl)c2)C(=O)[C@]12CN(c1ccc(C(=O)O)cn1)C[C@H]2c1ccc(C#N)cc1,7KC5_BJZ +O=C(O)/C=C/c1ccc(O)cc1,7NSW_HC4 +Nc1nc2c(ncn2[C@@H]2O[C@@H]3CO[P@@](=O)(O)O[C@@H]4[C@H](O)[C@@H](CO[P@](=O)(O)O[C@H]3[C@H]2O)O[C@H]4n2cnc3c(N)ncnc32)c(=O)[nH]1,7RWS_4UR +Cc1ccc2c(n1)[nH]c1c(C3CC3)cccc12,7VBU_6I4 +O=c1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)[nH]1,7QTA_URI +CC(C)(C)c1cc(C(=O)/C=C/c2ccc(C(=O)O)cc2)cc(C(C)(C)C)c1,7WQQ_5Z6 +Cc1ncc(COP(=O)(O)O)c(/C=N/[C@H](CCCNC(=N)N)C(=O)O)c1O,8D5D_5DK +CCCC[C@H](N[C@@H](CCc1ccccc1)C(=O)O)C(=O)N[C@@H](Cc1c[nH]c2ccccc12)C(=O)O,7Q27_8KC +Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](OP(=O)(O)O)[C@H]1O,7ED2_A3P +CN(c1ncccc1CNc1ccnc(Nc2ccc3c(c2)CC(=O)N3)n1)S(C)(=O)=O,6YT6_PKE +NCC(=O)N[C@@H]1O[C@H](COP(=O)([O-])[O-])[C@@H](O)[C@H]1O,7JG0_GAR +O=C(CNc1cc(F)cc(F)c1)N[C@@H](C(=O)NO)c1ccc(-c2cc(F)c(F)c(F)c2)cc1,8EYE_X4I +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O,7O1T_5X8 +Cc1ccc(C[C@@]2(C(=O)O)C[C@H]2c2ccccc2)cc1,6Z4N_Q7B +CCN1C(=O)c2cc(N3CCN(C)CC3)nc3c(NS(=O)(=O)c4ccc(F)cc4F)ccc1c23,7WL4_JFU +NCC(=O)NS(=O)(=O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,8SLG_G5A +COc1cccc(-c2ccc3c(CC(=O)O)coc3c2)c1,7L7C_XQ1 +O=C(CCCC[C@@H]1SC[C@@H]2NC(=O)N[C@@H]21)N[C@H]1CCNC1,7NLV_UJE +NC(=O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,7VYJ_CA0 +COC(=O)c1cc(S(N)(=O)=O)c(Cl)cc1S(=O)(=O)c1ccccc1,7PUV_84Z +C[C@@H]1COCCN1c1cc2n(n1)[C@@H]1CCC[C@@H]1NC2=O,7RSV_7IQ +Cc1cc(Cl)ccc1CNC(=O)Nc1ccc2cc1OCCOCCNc1ccn3ncc-2c3n1,7QGP_DJ8 diff --git a/forks/FABind/inference/fabind_posebusters_benchmark_inputs.csv b/forks/FABind/inference/fabind_posebusters_benchmark_inputs.csv new file mode 100644 index 00000000..f00d31ac --- /dev/null +++ b/forks/FABind/inference/fabind_posebusters_benchmark_inputs.csv @@ -0,0 +1,309 @@ +Cleaned_SMILES,pdb_id +O=C(O)[C@H]1NCC[C@H]1O,7CNQ_G8X +Nc1nc2c(ncn2[C@@H]2O[C@H](CO[P@](=O)(O)O[C@H]3[C@@H](O)[C@H](n4ccc(=O)[nH]c4=O)O[C@@H]3CO)[C@H]3CC(O)(O)C[C@H]32)c(=O)[nH]1,7C0U_FGO +CNc1cccc2c1NC(=O)C2(C)C,7EBG_J0L +C[C@@H]1C=C[C@@H]2CCCC[C@@H]2[C@H]1C(=O)c1c(O)c([C@]2(O)CC[C@H](O)[C@@H]3O[C@@H]32)c[nH]c1=O,7WUY_76N +CC(C)(O)C(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)CO[P@@](=O)(O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O,7PT3_3KK +Nc1nc(O)c2[nH]nnc2n1,7C3U_AZG +Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O)c2cc1C,7PGX_FMN +Cc1cc(C)c(N)cn1,7FB7_8NF +CNCCN(C)C(=O)c1cnn(C)c1C(=O)Nc1ccn2cc(-c3ccccc3)nc2c1,5SIS_JSM +Cc1ncc(COP(=O)(O)O)c(C/N=C(\CON)C(=O)O)c1O,8AIE_M7L +O=C(O)[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1O,7CTM_BDP +CN1CCN(c2ncc(Oc3cc(CN4CCC(CC(=O)O)CC4)cc(-c4cc(Cl)cc(Cl)c4)n3)cn2)CC1,7LCU_XTA +CC(C)C[C@H](NC(=O)CN[P@@](=O)(O)[C@@H](Cc1ccccc1)NC(=O)OCc1ccccc1)C(=O)O,6YMS_OZH +CC(C)(CO[P@@](=O)(O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCSC(=O)c1ccccc1,7PK0_BYC +COC(=O)c1ccccc1Cc1cn(C)nc1-c1cc(Cl)nc(N)n1,8CNH_V6U +O=C(NCc1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c1)Nc1ccc([N+](=O)[O-])cc1,7M6K_YRJ +Oc1cccc(CNc2nc(Cl)nc3scc(-c4ccccc4)c23)c1,7SFO_98L +O=C(N[C@H](C(=O)Nc1cnccc1-c1ccc(C(F)(F)F)cc1)c1cccc(C(F)(F)F)c1)c1cnccc1-c1ccc(C(F)(F)F)cc1,8EAB_VN2 +O=C(O)CN(CCO)CCO,7ZTL_BCN +O=C(O)C1CCC1,7MWU_ZPM +Cc1nccnc1-c1nn2c(=O)cc(-c3ccc(C4CCCCC4)cc3)[nH]c2c1C(=O)N1CC(CF)C1,7TYP_KUR +NS(=O)(=O)c1ccc(Cl)s1,6YQV_8K2 +COc1cc(C=O)ccc1O,6XM9_V55 +N[C@@H]1CCN(Cc2cccc(-c3ccc4c(O)nccc4c3)c2)C1,7PIH_7QW +CC(C)=CCC/C(C)=C/CC/C(C)=C/CO[P@](=O)(O)OP(=O)(O)O,7T0D_FPP +CCCCCCc1nc(N)nc(N)c1-c1ccccc1,7XI7_4RI +Nc1cc(C(Cl)=C(Cl)Cl)c(S(N)(=O)=O)cc1S(N)(=O)=O,7PRI_7TI +CN(c1ncnc2[nH]ccc12)[C@@H]1CCCN(C(=O)CNc2cc(Cl)cc(Cl)c2)C1,8FLV_ZB9 +COc1cc2c(cc1OC)[C@H]1Cc3ccc(OC)c(OC)c3CN1CC2,7N4W_P4V +CNC(=O)CN(CC(c1ccccc1)c1ccccc1)C(=O)c1cc(C)c(OC)c(C)c1,7OEO_V9Z +O=C(N[C@@H]1C[C@H]1c1ccccc1)c1cc(Cl)cc(COc2cnc3[nH]ccc3c2)c1,5SB2_1K2 +N[C@H]1C=C(CO)[C@@H](O[C@H]2O[C@H](CO)[C@@H](O)[C@H](O)[C@H]2O)[C@H](O)[C@H]1O,7MGY_ZD1 +CCCNc1nn2c(-c3ccc(O)cc3)cnc2s1,7OPG_06N +Nc1nc2c([nH]c(=O)n2[C@H]2C[C@H](O)[C@@H](CO)O2)c(=O)[nH]1,7OLI_8HG +O=C(O)c1ccc(-c2nn(C(=O)c3c(Cl)cccc3C(F)(F)F)c3cccc(F)c23)c(F)c1,8FAV_4Y5 +Oc1ccc2c(c1)sc1nncn12,7R59_I5F +Nc1ccn([C@@H]2O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OC[C@@H](O)[C@@H](O)[C@@H](O)CO)[C@@H](O)[C@H]2O)c(=O)n1,6XHT_V2V +O=C1CC(c2ccccc2)=Nc2c(-c3ccccc3)c(C(F)(F)F)nn21,8EX2_Q2Q +OC[C@H]1CNC[C@@H](O)[C@@H]1O,7LOU_IFM +Cc1cn([C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)nc1N,6XBO_5MC +CN(C)Cc1[nH]nc2ccc(-c3ccc(F)cc3OCCc3cccnc3)cc12,6TW7_NZB +CNC(=O)[C@H](CCc1ccccc1)NC(=O)[C@H](NC(=O)CNCc1ccc(OC)cc1OC)c1cccs1,7U3J_L6U +NS(=O)(=O)c1c(C(F)(F)F)ccc(-c2ccc(C3CCNCC3)cc2)c1-c1nnn[nH]1,7UYB_OK0 +OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1O,7WJB_BGC +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSCC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O,7RC3_SAH +O=[N+]([O-])c1ccc(O)cc1,7RZL_NPO +N#CC[C@H](C1CCCC1)n1cc(-c2ncnc3[nH]ccc23)cn1,6WTN_RXT +C=C(C)CCO[P@@](=O)(O)OP(=O)(O)O,7MY1_IPE +C=C(Oc1cccc(C(=O)O)c1)C(=O)O,7AN5_RDH +C[C@H](Oc1cc2cc(F)ccc2nc1N)c1[nH]c(=O)ccc1-n1cccn1,7JY3_VUD +CC[C@H](C)[C@H](N)C(=O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,7D5C_GV6 +N[C@@H](CC[C@H](O)[C@@H](N)COS(=O)(=O)O)C(=O)O,7WUX_6OI +NC(=O)C[C@H](NC(=O)c1c[nH]nc1-c1ccc(Cl)cc1F)c1ccc(F)c(Cl)c1,8F8E_XJI +C=Cc1c(C)c2cc3nc(c4c5[nH]c(cc6nc(cc1[nH]2)C(C)=C6CC)c(C)c5C(=O)[C@@H]4C(=O)OC)[C@@H](CCC(=O)OC/C=C(\C)CCC[C@H](C)CCC[C@H](C)CCCC(C)C)[C@@H]3C,8F4J_PHO +Nc1ncnc2c1ncn2[C@@H]1O[C@H](C[C@@H](N)CC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O,7PL1_SFG +CC1=[NH+][C@H]2[C@@H](O1)O[C@H](COS(=O)(=O)[O-])[C@@H](O)[C@@H]2O,6Z14_Q4Z +Nc1nc2c(ncn2[C@H]2C[C@H](O)[C@@H](CO[P@@](=O)(O)OP(=O)(O)O)O2)c(=O)[nH]1,7ODY_DGI +O=C(O[C@H](Cn1ccnc1)c1ccc(F)cc1)c1cc(Cl)cc(Cl)c1,7RKW_5TV +N=C1N/C(=N\Nc2ccccc2)c2ccccc21,5SAK_ZRY +CCOC(=O)/C(=N\O)C(C)=O,8AUH_L9I +CSC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,8C5M_MTA +CCCCCCNc1ccc2ncc(C(=O)NC)c(Nc3ccccc3)c2c1,7N03_ZRP +C=CC[C@@H]1/C=C(\C)C[C@H](C)C[C@H](OC)[C@H]2O[C@@](O)(C(=O)C(=O)N3CCCC[C@H]3C(=O)O[C@H](/C(C)=C/[C@@H]3CC[C@@H](O)[C@H](OC)C3)[C@H](C)[C@@H](O)CC1=O)[C@H](C)C[C@@H]2OC,7U0U_FK5 +CC(=O)N1Cc2cc(S(C)(=O)=O)ccc2[C@@H]1C(=O)Nc1ccc(C(O)(C(F)(F)F)C(F)(F)F)cc1,7OFK_VCH +Nc1nc2c(c(CN[C@H]3C=C[C@H](O)[C@@H]3O)cn2[C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1,7ULC_56B +CNCc1cccc(-c2cc(C)cc(N)n2)c1,7TS6_KMI +Cc1cc2c3c(c1C)C(C)(C)CC=[N+]3c1c([nH]c(=O)[nH]c1=O)N2C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,7NF3_4LU +O=C(O)c1cccnc1,7Z1Q_NIO +C=C1/C(=C\C=C2/CCC[C@]3(C)[C@@H]([C@H](C)CCCC(C)(C)O)CC[C@@H]23)C[C@@H](O)C[C@@H]1O,7QPP_VDX +NCCSc1nc(NCc2ccc(-n3cccn3)cc2)c2[nH]nc(C3CCC3)c2n1,7QHL_D5P +O=S1(=O)N[C@@H]2[C@H](O)[C@@H](O)[C@H](O)[C@@H](CO)[C@@H]2O1,7P4C_5OV +C=C(C)CCS[P@@](=O)(O)OP(=O)(O)O,7VQ9_ISY +NCC[C@H](O)C(=O)N[C@@H]1C[C@H](N)[C@@H](O[C@H]2O[C@H](CN)[C@@H](O)[C@H](O)[C@H]2O)[C@H](O)[C@H]1O[C@H]1O[C@H](CO)[C@@H](O)[C@H](N)[C@H]1O,6VTA_AKN +Cc1ccc(Cl)cc1,7V43_C4O +CN(/N=C/c1ccccc1C(=O)O)c1nc(-c2ccccc2Cl)cs1,6M2B_EZO +Clc1nc(Cl)c2[nH]cnc2n1,7OP9_06K +N[C@H](CCC(=O)O)C(=O)O,7UJ5_DGL +O=S(=O)(O)OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@H]1O,7OZC_G6S +CC(=O)N[C@H]1[C@@H](O[P@@](=O)(O)O[P@@](=O)(O)OC[C@H]2O[C@@H](n3ccc(=O)[nH]c3=O)[C@H](O)[C@@H]2O)O[C@H](CO)[C@@H](O)[C@@H]1O,7TB0_UD1 +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,8C3N_ADP +N[C@@H]1[C@@H](O)[C@H](O)[C@@H](COP(=O)(O)O)O[C@@H]1O,7NUT_GLP +Cn1c(CO[C@@H]2C=CO[C@H](CO)[C@@H]2O)nc2ccc(C(=O)O)cc21,7P1M_4IU +Cc1cc2c(cc1C)N(C[C@H](O)[C@H](O)[C@H](O)CO[P@@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n3cnc4c(N)ncnc43)[C@H](O)[C@@H]1O)c1[nH]c(=O)[nH]c(=O)c1N2,7EPV_FDA +N#Cc1cncnc1N,6Z0R_Q4H +O=C(O)C1=C[C@H](O)[C@@H](O)[C@H]([C@H](O)[C@H](O)CO)O1,7P1F_KFN +Oc1ccc(/C=C/c2cc(O)cc(O)c2)cc1,7VB8_STL +C[NH+](C)c1cc(NC(=O)CNC(C)(C)C)c(O)c2c1C[C@H]1C[C@H]3[C@H]([NH+](C)C)C(O)=C(C(N)=O)C(=O)[C@@]3(O)C(O)=C1C2=O,6YR2_T1C +CSCCC/N=C/c1c(COP(=O)(O)O)cnc(C)c1O,7CIJ_G0C +NC(=O)c1ccc[n+]([C@@H]2O[C@H](CO[P@@](=O)([O-])O[P@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1,7UMW_NAD +C[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,7TOM_5AD +O=S(=O)(O)CCS,7SUC_COM +Oc1ccc(Cl)cc1O,7DQL_4CL +O=C(O)c1ccc(O)cc1,7CUO_PHB +O=C1Nc2ccccc2/C1=C\c1ccc(C(=O)O)cc1,7QFM_AY3 +c1ccc(CC2NCCN2)cc1,7Q2B_M6H +Cc1c(C(=O)c2cccc3ccccc23)c2cccc3c2n1[C@H](CN1CCOCC1)CO3,7MWN_WI5 +CCCCCCCCCC(=O)NCCCC(=O)O,7L5F_XNG +NC(=O)c1cccnc1,8DSC_NCA +CO[C@@H]1OC[C@@H](O)[C@H](O)[C@H]1O,7ZDY_6MJ +Cc1oc2ccc(-c3cc(N)nc(N)c3)cc2c1C,7AKL_RK5 +O=C(O)c1ccc(OCc2c(-c3c(Cl)cccc3C(F)(F)F)noc2-c2cn[nH]c2)cc1,7NP6_UK8 +CC(C)(O)C#Cc1ccc2[nH]c3c(c2c1)-c1nc(N)ncc1CCC3,7JXX_VP7 +Fc1cccc2ccccc12,7LOE_Y84 +CCc1nc(N)nc(N)c1OCCCOc1cccc(C[C@@H](C(=O)O)C(F)F)c1,7KM8_WPD +CN[C@@H]1[C@H](O)[C@H](NC)[C@H]2O[C@]3(O)[C@H](O[C@@H]2[C@H]1O)O[C@H](C)CC3(O)O,7UY4_SMI +C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@@H]2O,7CL8_TES +Cc1ccc(-c2csc3ncnc(SCCC(=O)O)c23)cc1,6Z1C_7EY +COc1ccc(-c2cccc(S(=O)(=O)NC(=O)[C@@H](N)CS)c2)cn1,8HFN_XGC +Cc1nc(-c2cn3c(n2)-c2ccc(-c4cnn(C(C)(C)C(N)=O)c4)cc2OCC3)n(C(C)C)n1,8EXL_799 +CNc1nc(NC)c2ncn(C)c2n1,7Z7F_IF3 +Cc1ncc(COP(=O)(O)O)c(C=O)c1O,7XG5_PLP +CO[C@@H]1O[C@@H](C)[C@@H](O)[C@@H](O)[C@@H]1O,7P2I_MFU +O=P(O)(O)Oc1ccccc1,7MOI_HPS +CCN(CC)c1ccc2c(C)c(CCN3C(=O)N[C@@]4(CCN(C(=O)c5c[nH]cn5)C4)C3=O)c(=O)oc2c1,7LMO_NYO +CCNC(=O)CCNC(=O)[C@H](O)C(C)(C)CO[P@@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O,7MSR_DCA +CNC(=O)c1nn(C)c2ccc(Nc3nccc(-n4cc(N[C@@H]5CCNC5)c(C)n4)n3)cc12,6ZCY_QF8 +CS(=O)(=O)Nc1ccc(F)cc1C(=O)O,6ZC3_JOR +Cc1ccc(F)c2cc(C(=O)Nc3cccc(N4CCC(N(C)C)CC4)c3)[nH]c12,7LZD_YHY +COc1cc(C(=O)Nc2ccc(F)c([C@]3(CF)CC[C@@](C)(S(C)(=O)=O)C(N)=N3)c2)ncn1,7N4N_0BK +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)O[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,7KRU_ATP +NC(=O)C1=CN([C@@H]2O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1,7UTW_NAI +CCc1ccc(S(N)(=O)=O)cc1,7BKA_4JC +CCc1nc(N)nc(N)c1OCCCOc1cc(C)ccc1N1CC(C(=O)O)C1,5SD5_HWI +Nc1nc2c(ncn2[C@@H]2O[C@H](CO[P@](=O)(O)O[P@](=O)(O)OP(O)(O)=S)[C@@H](O)[C@H]2O)c(=O)[nH]1,7SCW_GSP +O=P(O)(O)O[C@H]1[C@H](O)[C@@H](OP(=O)(O)O)[C@H](OP(=O)(O)O)[C@@H](OP(=O)(O)O)[C@H]1O,7SDD_4IP +Nc1nc2c(c(=O)[nH]1)N(C=O)[C@@H](CNc1ccc(C(=O)N[C@@H](CCC(=O)O)C(=O)O)cc1)CN2,7TH4_FFO +N[C@@H](CCC(=O)N[C@@H](CS)C(=O)NCC(=O)O)C(=O)O,8D19_GSH +CCC[C@@H](CC(=O)C(=O)O)C(=O)O,7A1P_QW2 +C#CCO[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,8G0V_YHT +Cc1ncc(COP(=O)(O)O)c(/C=N/[C@H]2[C@H](O)[C@@H](O)[C@@H](O[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](n4ccc(=O)[nH]c4=O)[C@H](O)[C@@H]3O)O[C@@H]2C)c1O,7MFP_Z7P +Nc1ccn([C@@H]2O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)n1,7N7H_CTP +Nc1nc2c([nH]c(=O)n2[C@H]2C[C@H](O)[C@@H](COP(=O)(O)O)O2)c(=O)[nH]1,7X9K_8OG +O=c1c(-c2ccc(O)cc2)coc2cc(O)cc(O)c12,7NFB_GEN +OC[C@H]1O[C@H](F)[C@H](O)[C@@H](O)[C@@H]1O,7DKT_GLF +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]2O[C@@H](O)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O,7R3D_APR +CS(=O)(=O)N1CCN(c2ccc3nncn3n2)CC1,6YYO_Q1K +[O-][n+]1cc(-c2c(-n3cnnn3)ccc(Cl)c2F)ccc1[C@@H](CC1CC1)n1cc(-c2cncs2)cn1,7V14_ORU +Cn1cc(Nc2ncc(C3=CCC[C@@H](NC(=O)c4ccccc4)C3)nc2C(N)=O)cn1,7SIU_9ID +C=C(C)[C@@H]1CCC(C)=C[C@H]1c1c(O)cc(CCCCC)cc1O,7TE8_P0T +Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO[P@@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)c2cc1C,7F8T_FAD +Cc1cnc2c(N)c(C(=O)NCCc3ccc([C@@H]4C[C@H]5CC[C@@H](C4)N5)cc3)sc2n1,7TUO_KL9 +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)O[P@](=O)(O)NP(=O)(O)O)[C@@H](O)[C@H]1O,7B94_ANP +C=C(/N=C/c1c(COP(=O)(O)O)cnc(C)c1O)C(=O)O,7LEV_0JO +Cc1nc2c(=O)[nH]c(=O)nc-2n(C[C@H](O)[C@H](O)[C@H](O)CO)c1C,8A1H_DLZ +CC(C)C[C@H](N)CO,7NU0_DCL +Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO)c2cc1C,7QF4_RBF +COc1cccc2sc3nncn3c12,7Z2O_IAJ +Nc1ccn([C@@H]2O[C@H](CO[P@@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)n1,7O0N_CDP +O=S(=O)(NCc1ccc(S(=O)(=O)c2ccc(O)c(O)c2)cc1)c1ccc(O)c(O)c1,7FRX_O88 +Cc1n[nH]c2c1[C@](c1cc(CO)cc(-c3ccccc3)c1)(C(C)C)C(C#N)=C(N)O2,7X5N_5M5 +COc1cccc(NC(=O)c2ccc(C)c(Nc3nc(-c4cccnc4)nc4c3cnn4C)c2)c1,8BOM_QU6 +Cc1cc(F)c(NC(=O)NCCC(C)(C)C)cc1Nc1ccc2ncn(C)c(=O)c2c1F,7K0V_VQP +COc1ccc2nc(C)c(O[C@@H]3C[C@H]4C(=O)N[C@]5(C(=O)NS(=O)(=O)C6(C)CC6)C[C@H]5/C=C\CCCCC[C@H](NC(=O)OCC5(C(F)(F)F)CC5)C(=O)N4C3)nc2c1,7MMH_ZJY +CNC(=S)c1cccnc1,7PJQ_OWH +Cc1c[nH]c(=O)[nH]c1=O,7M31_TDR +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)OS(=O)(=O)O)[C@@H](O)[C@H]1O,7FHA_ADX +COc1cc2c(cc1Nc1nc(Nc3ccccc3S(=O)(=O)C(C)C)c3[nH]ccc3n1)N(C(=O)CN(C)C)CC2,7BTT_F8R +CC(C)C(=O)Nc1ncc(C(=O)NCCN(Cc2ccccc2)C(=O)c2ccc(S(=O)(=O)Nc3ccccc3)cc2)s1,7QHG_T3B +O=C(C1CCN(c2cncnc2-c2ccc(F)cc2)CC1)N1CC(F)C1,7N6F_0I1 +CC(C)C(=O)Nc1ncc(-c2cc(C(F)F)nn2-c2c(Cl)cccc2Cl)s1,8AAU_LH0 +CC(=O)N[C@H]1[C@H](OCCc2ccc3occc3c2)O[C@H](CO)[C@@H](O)[C@@H]1O,8GFD_ZHR +O=C(O)C1=C[C@@H](OP(=O)(O)O)[C@@H](O)[C@H](O)C1,7TBU_S3P +CNc1cnn(C)c(=O)c1Cl,6YQW_82I +C=C(CC(=O)O)C(=O)O,7W06_ITN +NS(=O)(=O)c1cc(-c2nnn[nH]2)c(NCc2cccs2)cc1Cl,7ZL5_IWE +O=P(O)(O)OCCNS(=O)(=O)c1ccc(OC(F)(F)F)cc1,7L03_F9F +NCCCNCCCN,7XJN_NSD +O=C(O)CNC(=O)C(=O)O,7ZCC_OGA +Cc1nn(C)c(C)c1CCOc1cc(F)ccc1-c1ccc2n[nH]c(CN(C)C)c2c1,6TW5_9M2 +CCCCOc1ccc(C[C@H](CC)C(=O)O)cc1CNC(=O)c1ccc(C(F)(F)F)cc1F,7VWF_K55 +Nc1cccc2c1C(=O)N([C@@H]1CCC(=O)NC1=O)C2=O,7SZA_DUI +COC(=O)c1cc(S(N)(=O)=O)c(SC2CCCCC2)cc1Cl,7POM_7VZ +N[C@@H](Cc1ccc(O)c(F)c1)C(=O)O,7KQU_YOF +CC(=O)c1ccc(NC(=O)[C@H](C)S)cc1,7ZOC_T8E +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)OC(=O)[C@@H](N)Cc2c[nH]c3ccccc23)[C@@H](O)[C@H]1O,7ELT_TYM +CCCCCCCCCCCCCCCCC,6YRV_PJ8 +C[C@@](O)(CCOP(=O)(O)O)CC(=O)O,7CNS_PMV +COc1cc(Cc2cnc(N)nc2N)cc(OC)c1OC,6XG5_TOP +O=c1cc(N2CCOCC2)oc2c(-c3ccc4c(c3)OCCO4)csc12,7USH_82V +CC(=O)N[C@@H]1[C@@H](O)[C@@H](OS(=O)(=O)O)[C@@H](CO)O[C@@H]1O,7OZ9_NGK +C[N+](C)(C)[C@@H](Cc1c[nH]c(=S)[nH]1)C(=O)O,7TXK_LW8 +CCC(O)(C#Cc1ccc2[nH]c3c(c2c1)-c1nc(N)ncc1CCC3)CC,7ZHP_IQY +Cc1ncc(COP(=O)(O)O)c(CNCC(=O)O)c1O,8AQL_PLG +CC1=C(/C=C/C(C)=C/C=C/C(C)=C/C=C/C=C(C)/C=C/C=C(C)/C=C/C2=C(C)C(=O)CCC2(C)C)C(C)(C)CCC1=O,7ZXV_45D +COCC(=O)n1ccc2c(Cl)cccc21,8BTI_RFO +CS(=O)(=O)Nc1ccc(C(=O)O)cc1,7FT9_4MB +Nc1ncnc2c1c(OC(F)F)nn2[C@@H]1O[C@H](COS(=O)(=O)NC(=O)[C@@H](N)Cc2ccc(O)cc2)[C@@H](O)[C@H]1O,7ROU_66I +CC(C)[C@@H](NC(=O)[C@H](CS)NC(=O)CCC[C@H](N)C(=O)O)C(=O)O,6ZAE_ACV +O=C(O)C[C@H](NC(=O)CP(=O)(O)O)C(=O)O,6YSP_PAL +Cc1ncc(COP(=O)(O)O)c(C[NH2+]c2conc2O)c1O,8B8H_OJQ +O=C(O)COP(=O)(O)O,7THI_PGA +O=C(N[C@@H](C(=O)O)c1ccccc1)c1cccc2c1-c1ccccc1C2=O,7OFF_VCB +CCCCCCCN1CCC[C@H]1C(=O)N[C@@H](Cc1ccccc1)C(=O)O,7WY1_D0L +O=c1ccn([C@@H]2O[C@H](CO[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1,7ES1_UDP +CC(C)CN(C[C@@H](O)[C@H](Cc1ccccc1)NC(=O)O[C@H]1CCOC1)S(=O)(=O)c1ccc(N)cc1,6XCT_478 +O=C(O)CCC(=O)C(=O)O,7V3N_AKG +O=C(O)C[NH2+]CP(=O)(O)O,7TM6_GPJ +O=C(O)CCC(=O)O,7ECR_SIN +CCS(C)(=O)=O,7A9E_R4W +CC(=O)N[C@H]1[C@@H](O[P@@](=O)(O)O[P@](=O)(O)OC[C@H]2O[C@@H](n3ccc(=O)[nH]c3=O)[C@H](O)[C@@H]2O)O[C@H](CO)[C@@H](O)[C@@H]1O[C@H](C)C(=O)N[C@@H](C)C(=O)O,8DP2_UMA +N#C[C@@H](O)c1ccc(O)cc1,7ZF0_DHR +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]2O[C@@H](O)[C@H](OC(=O)c3ccccc3)[C@@H]2O)[C@@H](O)[C@H]1O,7F51_BA7 +Cc1nc([C@@H]2O[C@H](CO)[C@H](O)[C@H](n3cc(-c4cc(F)c(Cl)c(F)c4)nn3)[C@H]2O)n(-c2cc(Cl)ccc2C(F)(F)F)n1,7XFA_D9J +O=C(O)[C@@H]1CCCO1,8DKO_TFB +O=C(O)CCc1cnc[nH]1,6T88_MWQ +O=C(O)[C@H](O)[C@@H](O)[C@H](O)[C@H](O)CO,7BCP_GCO +Cc1cc2c3c(c1C)C(C)(C)C[C@@H](O)N3c1c(nc(O)[nH]c1=O)N2C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,7NF0_BYN +CC(=O)N[C@@H]1[C@@H](O)[C@@H](O)[C@@H](CO)O[C@H]1O,7QE4_NGA +CCCCc1ccc(NS(=O)(=O)c2ccc(O)c(C(=O)O)c2)cc1,7M3H_YPV +O=C(O)CCCCCN1C(=O)[C@@H]2[C@H](C1=O)[C@]1(Cl)C(Cl)=C(Cl)[C@@]2(Cl)C1(Cl)Cl,6Z2C_Q5E +Cc1c(C#Cc2ccc(CN3CCC(CO)CC3)cc2)cc(C(F)F)c2cn([C@@H](C(=O)Nc3nccs3)c3ncn4c3CCC43CC3)nc12,8A2D_KXY +CC1CCN(C(=O)Nc2ccc(O)cc2)CC1,7NGW_UAW +O=C(CNCCO)NCCO,7KZ9_XN7 +CCn1cc(-c2cc(Cn3ccnc3C)cc3c2CCN([C@H](c2cc(C)ccn2)C2CC2)C3=O)c(C(F)(F)F)n1,7UAS_MBU +CO[C@H]1O[C@H](CS(=O)(=O)O)[C@@H](O)[C@H](O)[C@H]1O,7YZU_DO7 +OC[C@H]1NC[C@H](O)[C@@H](O)[C@@H]1O,7VKZ_NOJ +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)OC(=O)[C@@H](N)Cc2ccc(O)cc2)[C@@H](O)[C@H]1O,7ROR_69X +CCn1c(=O)cc(C)c2cc(CNS(=O)(=O)c3ccccc3)ccc21,8AY3_OE3 +NC(=O)C[C@@H](N)C(=O)O,7C8Q_DSG +CC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,7XRL_FWK +CN1C[C@H](Nc2nc3cc[nH]c3c(=O)n2C)C[C@H](c2ccc(OCc3ccccc3)cc2)C1,7CD9_FVR +CC(=O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(=O)O)C[C@@H]1O,7T3E_SLB +Nc1ncnc2c1ncn2[C@@H]1O[C@@H]2CO[P@@](=O)(O)O[C@H]3[C@@H](O)[C@H](n4cnc5c(N)ncnc54)O[C@@H]3CO[P@](=O)(O)O[C@H]2[C@H]1O,6YJA_2BA +Cc1ccc(Sc2cc(C(=O)N3Cc4ccccc4C3)ccc2O)cc1C,7LT0_ONJ +Nc1nc2c(c(=O)[nH]1)N[C@H]1C(S)=C(S)[C@@H](COP(=O)(O)O)O[C@H]1N2,7D6O_MTE +CCN(C(=O)c1cc(F)ccc1Oc1cncnc1N1CC2(CCN(C[C@H]3CC[C@H](NS(=O)(=O)CC)CC3)CC2)C1)C(C)C,7UJ4_OQ4 +OCC[C@H](O)CO,7OSO_0V1 +Cc1cccc2c(-c3ccccc3Cl)c(C(=O)O)[nH]c12,7AFX_R9K +Cc1cn(-c2ccc3c(c2)CN(c2ncc(Cc4ccc(-n5cccn5)cc4)s2)CC3)c(C)n1,7T1D_E7K +CC(C)n1ncc2cnc(Nc3cc([C@@H]4CCNC4)nc(N4CCC(F)(F)C4)n3)cc21,7R9N_F97 +Nc1nc(Cl)nc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,7MGT_ZD4 +COc1cnc(C(=O)Nc2ccc(F)c([C@]34CN(c5ncc(F)cn5)C[C@H]3CSC(N)=N4)c2)cn1,7MYU_ZR7 +CO[C@@H]1O[C@H](CO)[C@H](O)[C@H](OC(=O)c2ccc(C)cc2)[C@@H]1OC(=O)c1ccc(Cl)cc1[N+](=O)[O-],7RH3_59O +NC(=O)c1ccc[n+]([C@@H]2C[C@H](CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1,7OMX_CNA +Cc1ccccc1Oc1cc(-n2c(=O)cc(S(=O)(=O)c3ccccc3)[nH]c2=O)c(F)cc1C#N,7NXO_UU8 +CCn1c(CO)nn(-c2nc(O[C@@H](C)C(F)(F)F)c(C(=O)Nc3c(C)ccnc3Cl)cc2F)c1=O,8DHG_T78 +CCC[C@@H](NC(=O)c1cccc2c1CC(=O)N2)[C@@H](O)c1cccc(Cl)c1C,7NPL_UKZ +O=C(c1ccco1)N1CCN([C@@H]2CC(=O)N(c3ccc(-c4ccc(F)cc4)cc3)C2)CC1,7PRM_81I +CC(=O)N[C@@H]1[C@@H](O)[C@H](O)[C@@H](COS(=O)(=O)O)O[C@H]1O,7WDT_NGS +Nc1ncnc2c1ncn2[C@@H]1O[C@@H]2CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](O[C@@H]1[C@@H]2O)[C@H](O)[C@@H]3O,7UAW_MF6 +Nc1nc2c(ncn2[C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)[nH]1,7W05_GMP +C[C@H]1CCN(CCOc2ccc([C@@H]3c4ccc(O)cc4CC[C@@H]3c3ccccc3)cc2)C1,7UJF_R3V +O=C(O)c1ccc(C(=O)c2ccccc2)cc1,8D39_QDB +CNc1cc(-c2ccc3[nH]ccc3c2)nc(S(C)(=O)=O)n1,7F5D_EUO +O=C(O)c1ccnc(C(=O)O)c1F,7BMI_U4B +C=C/C(=N\Cc1c(COP(=O)(O)O)cnc(C)c1O)C(=O)O,7KB1_WBJ +CN(C)c1ccc(/C=C(\C#N)C(=O)N[C@@H]2O[C@H](CO)[C@@H](O)[C@H](O)[C@H]2O)cc1,7Q5I_I0F +Cc1nc([C@](C)(O)CO)sc1-c1cnc(N)c(O[C@H](C)c2cc(F)ccc2N2NC=CN2)c1,7R7R_AWJ +CS(=O)(=O)c1ccc2nc(NC(=O)Cc3csc(-n4cccc4)n3)sc2c1,7L00_XCJ +Nc1ncnc2n[nH]cc12,7BJJ_TVW +O=C(O)C[C@@H]1CC(=O)N(O)C1=O,7UQ3_O2U +CC(C)=CCC/C(C)=C/CC/C(C)=C(\F)CO[P@@](=O)(O)OP(=O)(O)O,7XQZ_FPF +C[C@@H](OP(=O)(O)O)[C@H](NC(=O)CCCCCCS)C(=O)O,7B2C_TP7 +O=[N+]([O-])c1ccc(O)c(O)c1,7JMV_4NC +O=C(O)c1ccccc1,7BNH_BEZ +CC(=O)c1ccn(S(=O)(=O)c2ccccc2)c1,8FO5_Y4U +C[C@]12CCC(=O)C[C@@H]1CC[C@@H]1[C@@H]2CC[C@]2(C)[C@@H](O)CC[C@@H]12,7ZU2_DHT +Cc1ncc(C[n+]2csc(CCO[P@](=O)(O)OP(=O)(O)O)c2C)c(N)n1,7A9H_TPP +Cc1cc(NC(=O)c2cn(C3(C)CC3)c3ncnc(N)c23)n[nH]1,7DUA_HJ0 +COc1ccc(CNc2ccc(Cc3ccncc3)cc2)cc1,7P5T_5YG +Cc1nn(C)c(C)c1C(=O)N1CCN(Cc2nc3ccccc3n2CC(C)(C)C)CC1,7RNI_60I +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)O[P@@](=O)(O)CP(=O)(O)O)[C@@H](O)[C@H]1O,7WCF_ACP +Cc1cc2c(cc1C)N(C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O)c1[nH]c(=O)[nH]c(=O)c1N2,6M73_FNR +OC[C@H]1N[C@@H](c2c[nH]c3c(O)ncnc23)[C@H](O)[C@@H]1O,6ZK5_IMH +N[C@H](CCC[C@H](N)C(=O)O)C(=O)O,8G6P_API +O=C(C[C@H]1NCCC[C@@H]1O)Cn1cnc2ccccc2c1=O,7VC5_9SF +O=C([C@H]1C[C@@H](c2cccc(Cl)c2)CN1)N1CCN(c2nccc3ccsc23)CC1,7ZZW_KKW +Cc1cc(NCc2cccc(CN3C[C@H](O)[C@@H](O)[C@H](O)[C@H]3CO)c2)cc(-c2ncccn2)c1,7R6J_2I7 +C[C@H](C(=O)N[C@H](CO)c1cccc(N2CCN(C)CC2)n1)N1Cc2ccc(-c3nc(NC4CCOCC4)ncc3Cl)cc2C1=O,7NR8_UOE +O=C1N[C@@H](Cc2c[nH]c3c(F)cccc23)C(=O)N2CCC[C@@H]12,8HO0_3ZI +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)C[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,7XBV_APC +Nc1ncnc2c1ncn2[C@@H]1O[C@@H]2CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](O[C@H]2[C@H]1O)[C@H](O)[C@@H]3O,7UXS_OJC +CCCCCCCCCCCCCCC(=O)O,7WPW_F15 +Nc1ccn([C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)n1,8CSD_C5P +N#CCc1c[nH]c2ccc(Cl)cc12,8AEM_LVF +CCCC[C@H](N[C@@H](CCc1ccccc1)C(=O)O)C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)O,7Q25_8J9 +Nc1ncnc2c1ncn2[C@H]1C[C@H](O)[C@@H](CO)O1,6ZPB_3D1 +C[C@H](O)[C@H](O)[C@H]1CNc2nc(N)[nH]c(=O)c2N1,7TSF_H4B +Nc1nc2c(ncn2[C@@H]2O[C@H](CO[P@](=O)(O)O[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1,7LJN_GTP +O=P(O)(O)CP(=O)(O)O,7E4L_MDN +Cc1cn([C@H]2C[C@H](O)[C@@H](CO[P@@](=O)(O)O[P@](=O)(O)O[C@H]3O[C@H](C)[C@H](O)[C@H](N)[C@H]3O)O2)c(=O)[nH]c1=O,7N7B_T3F +O=C(O)c1ccc(-c2cccc(C(=O)O)c2)cc1,7JHQ_VAJ +Oc1ccccc1O,7WKL_CAQ +O=P(O)(O)OC[C@H]1O[C@H](O[P@@](=O)(O)OP(=O)(O)O)[C@H](O)[C@@H]1O,8AP0_PRP +O=C(Nc1ccc(F)cc1)C1(C(=O)Nc2ccc(Oc3ccnc4c3Oc3ccccc3N4)c(F)c2)CC1,7V3S_5I9 +O=c1ccn([C@@H]2O[C@H](CO[P@](=O)(O)O[P@](=O)(O)O[C@H]3O[C@H](CO)[C@@H](O)[C@H](O)[C@H]3O)[C@@H](O)[C@H]2O)c(=O)[nH]1,7XPO_UPG +CN1C(=O)N(c2cc(Cl)cc(Cl)c2)C(=O)[C@]12CN(c1ccc(C(=O)O)cn1)C[C@H]2c1ccc(C#N)cc1,7KC5_BJZ +O=C(O)/C=C/c1ccc(O)cc1,7NSW_HC4 +Nc1nc2c(ncn2[C@@H]2O[C@@H]3CO[P@@](=O)(O)O[C@@H]4[C@H](O)[C@@H](CO[P@](=O)(O)O[C@H]3[C@H]2O)O[C@H]4n2cnc3c(N)ncnc32)c(=O)[nH]1,7RWS_4UR +Cc1ccc2c(n1)[nH]c1c(C3CC3)cccc12,7VBU_6I4 +O=c1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)[nH]1,7QTA_URI +CC(C)(C)c1cc(C(=O)/C=C/c2ccc(C(=O)O)cc2)cc(C(C)(C)C)c1,7WQQ_5Z6 +Cc1ncc(COP(=O)(O)O)c(/C=N/[C@H](CCCNC(=N)N)C(=O)O)c1O,8D5D_5DK +CCCC[C@H](N[C@@H](CCc1ccccc1)C(=O)O)C(=O)N[C@@H](Cc1c[nH]c2ccccc12)C(=O)O,7Q27_8KC +Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](OP(=O)(O)O)[C@H]1O,7ED2_A3P +CN(c1ncccc1CNc1ccnc(Nc2ccc3c(c2)CC(=O)N3)n1)S(C)(=O)=O,6YT6_PKE +NCC(=O)N[C@@H]1O[C@H](COP(=O)([O-])[O-])[C@@H](O)[C@H]1O,7JG0_GAR +O=C(CNc1cc(F)cc(F)c1)N[C@@H](C(=O)NO)c1ccc(-c2cc(F)c(F)c(F)c2)cc1,8EYE_X4I +Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O,7O1T_5X8 +Cc1ccc(C[C@@]2(C(=O)O)C[C@H]2c2ccccc2)cc1,6Z4N_Q7B +CCN1C(=O)c2cc(N3CCN(C)CC3)nc3c(NS(=O)(=O)c4ccc(F)cc4F)ccc1c23,7WL4_JFU +NCC(=O)NS(=O)(=O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,8SLG_G5A +COc1cccc(-c2ccc3c(CC(=O)O)coc3c2)c1,7L7C_XQ1 +O=C(CCCC[C@@H]1SC[C@@H]2NC(=O)N[C@@H]21)N[C@H]1CCNC1,7NLV_UJE +NC(=O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,7VYJ_CA0 +COC(=O)c1cc(S(N)(=O)=O)c(Cl)cc1S(=O)(=O)c1ccccc1,7PUV_84Z +C[C@@H]1COCCN1c1cc2n(n1)[C@@H]1CCC[C@@H]1NC2=O,7RSV_7IQ +Cc1cc(Cl)ccc1CNC(=O)Nc1ccc2cc1OCCOCCNc1ccn3ncc-2c3n1,7QGP_DJ8 diff --git a/forks/NeuralPLexer/inference/neuralplexer_astex_diverse_inputs.csv b/forks/NeuralPLexer/inference/neuralplexer_astex_diverse_inputs.csv new file mode 100644 index 00000000..88d5f2c5 --- /dev/null +++ b/forks/NeuralPLexer/inference/neuralplexer_astex_diverse_inputs.csv @@ -0,0 +1,81 @@ +id,input_receptor,input_ligand,input_template +1MMV_3AR,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1MMV_3AR_holo_aligned_predicted_protein.pdb,CCCNC(=[NH2+])NCCC[C@H](N)C(=O)O,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1MMV_3AR_holo_aligned_predicted_protein.pdb +1L2S_STC,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1L2S_STC_holo_aligned_predicted_protein.pdb,O=C(O)c1sccc1S(=O)(=O)Nc1ccc(Cl)cc1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1L2S_STC_holo_aligned_predicted_protein.pdb +1N46_PFA,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1N46_PFA_holo_aligned_predicted_protein.pdb,Cc1cc(-n2ncc(=O)[nH]c2=O)cc(C)c1Oc1ccc(O)c(C(C)C)c1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1N46_PFA_holo_aligned_predicted_protein.pdb +1Y6B_AAX,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1Y6B_AAX_holo_aligned_predicted_protein.pdb,COc1ccc(S(=O)(=O)NCC2CC2)cc1Nc1ncc(-c2cccc(-c3cccnc3)c2)o1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1Y6B_AAX_holo_aligned_predicted_protein.pdb +1YWR_LI9,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1YWR_LI9_holo_aligned_predicted_protein.pdb,C[C@H](Nc1nccc(-c2c(-c3ccc(F)cc3)c(=O)n(C3CCNCC3)n2C)n1)c1ccccc1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1YWR_LI9_holo_aligned_predicted_protein.pdb +1YV3_BIT,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1YV3_BIT_holo_aligned_predicted_protein.pdb,Cc1ccc2c(c1)C(=O)[C@]1(O)CCN(c3ccccc3)C1=N2,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1YV3_BIT_holo_aligned_predicted_protein.pdb +1R9O_FLP,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1R9O_FLP_holo_aligned_predicted_protein.pdb,C[C@H](C(=O)O)c1ccc(-c2ccccc2)c(F)c1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1R9O_FLP_holo_aligned_predicted_protein.pdb +1IG3_VIB,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1IG3_VIB_holo_aligned_predicted_protein.pdb,Cc1ncc(C[n+]2csc(CCO)c2C)c(N)n1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1IG3_VIB_holo_aligned_predicted_protein.pdb +1R58_AO5,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1R58_AO5_holo_aligned_predicted_protein.pdb,CC(C)SCC[C@@H](N)[C@H](O)C(=O)NNC(=O)c1cccc(Cl)c1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1R58_AO5_holo_aligned_predicted_protein.pdb +1YGC_905,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1YGC_905_holo_aligned_predicted_protein.pdb,CCOc1cc(OCC)c(F)c([C@@H](Nc2ccc(C(=N)N)c(O)c2)C(=O)NS(=O)(=O)c2cccc(N)c2)c1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1YGC_905_holo_aligned_predicted_protein.pdb +1GPK_HUP,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1GPK_HUP_holo_aligned_predicted_protein.pdb,C/C=C1\[C@@H]2C=C(C)C[C@@]1(N)c1ccc(=O)[nH]c1C2,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1GPK_HUP_holo_aligned_predicted_protein.pdb +1K3U_IAD,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1K3U_IAD_holo_aligned_predicted_protein.pdb,O=C(O)C[C@H](NC(=O)Cc1c[nH]c2ccccc12)C(=O)O,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1K3U_IAD_holo_aligned_predicted_protein.pdb +1Q1G_MTI,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1Q1G_MTI_holo_aligned_predicted_protein.pdb,CSC[C@H]1[NH2+][C@@H](c2c[nH]c3c(=O)[nH]cnc23)[C@H](O)[C@@H]1O,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1Q1G_MTI_holo_aligned_predicted_protein.pdb +1GM8_SOX,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1GM8_SOX_holo_aligned_predicted_protein.pdb,CC1(C)[C@H](C(=O)O)N2C(=O)[C@@H](NC(=O)Cc3ccccc3)[C@H]2[S@H]1O,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1GM8_SOX_holo_aligned_predicted_protein.pdb +1OPK_P16,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OPK_P16_holo_aligned_predicted_protein.pdb,Cn1c(=O)c(-c2c(Cl)cccc2Cl)cc2cnc(Nc3cccc(CO)c3)nc21,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OPK_P16_holo_aligned_predicted_protein.pdb +1U1C_BAU,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1U1C_BAU_holo_aligned_predicted_protein.pdb,O=c1[nH]c(=O)n(COCCO)cc1Cc1ccccc1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1U1C_BAU_holo_aligned_predicted_protein.pdb +1T46_STI,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1T46_STI_holo_aligned_predicted_protein.pdb,Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1T46_STI_holo_aligned_predicted_protein.pdb +1HQ2_PH2,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1HQ2_PH2_holo_aligned_predicted_protein.pdb,Nc1nc2c(c(=O)[nH]1)N=C(CO)CN2,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1HQ2_PH2_holo_aligned_predicted_protein.pdb +1N2V_BDI,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1N2V_BDI_holo_aligned_predicted_protein.pdb,CCCCc1nc2c(=O)[nH][nH]c(=O)c2[nH]1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1N2V_BDI_holo_aligned_predicted_protein.pdb +1S19_MC9,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1S19_MC9_holo_aligned_predicted_protein.pdb,C=C1/C(=C\C=C2/CCC[C@]3(C)[C@@H]([C@H](C)/C=C/[C@@H](O)C4CC4)CC[C@@H]23)C[C@@H](O)C[C@@H]1O,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1S19_MC9_holo_aligned_predicted_protein.pdb +1OYT_FSN,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OYT_FSN_holo_aligned_predicted_protein.pdb,NC(=[NH2+])c1ccc([C@H]2[C@H]3C(=O)N(Cc4ccc(F)cc4)C(=O)[C@H]3[C@@H]3CCCN32)cc1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OYT_FSN_holo_aligned_predicted_protein.pdb +1TZ8_DES,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1TZ8_DES_holo_aligned_predicted_protein.pdb,CC/C(=C(/CC)c1ccc(O)cc1)c1ccc(O)cc1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1TZ8_DES_holo_aligned_predicted_protein.pdb +2BSM_BSM,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/2BSM_BSM_holo_aligned_predicted_protein.pdb,CCNC(=O)c1n[nH]c(-c2cc(Cl)c(O)cc2O)c1-c1ccc(OC)cc1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/2BSM_BSM_holo_aligned_predicted_protein.pdb +1P2Y_NCT,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1P2Y_NCT_holo_aligned_predicted_protein.pdb,CN1CCC[C@H]1c1cccnc1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1P2Y_NCT_holo_aligned_predicted_protein.pdb +1V0P_PVB,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1V0P_PVB_holo_aligned_predicted_protein.pdb,CC(C)[C@H](CO)Nc1nc(Nc2ccc(C(=O)O)c(Cl)c2)c2ncn(C(C)C)c2n1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1V0P_PVB_holo_aligned_predicted_protein.pdb +1KZK_JE2,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1KZK_JE2_holo_aligned_predicted_protein.pdb,Cc1ccccc1CNC(=O)[C@H]1N(C(=O)[C@@H](O)[C@H](Cc2ccccc2)NC(=O)c2cccc(O)c2C)CSC1(C)C,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1KZK_JE2_holo_aligned_predicted_protein.pdb +1R55_097,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1R55_097_holo_aligned_predicted_protein.pdb,CNC(=O)[C@@H](NC(=O)[C@H](CC(C)C)[C@H](O)C(=O)NO)C(C)(C)C,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1R55_097_holo_aligned_predicted_protein.pdb +1SG0_STL,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1SG0_STL_holo_aligned_predicted_protein.pdb,Oc1ccc(/C=C/c2cc(O)cc(O)c2)cc1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1SG0_STL_holo_aligned_predicted_protein.pdb +1L7F_BCZ,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1L7F_BCZ_holo_aligned_predicted_protein.pdb,CCC(CC)[C@H](NC(C)=O)[C@@H]1[C@H](O)[C@@H](C(=O)O)C[C@H]1NC(=N)N,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1L7F_BCZ_holo_aligned_predicted_protein.pdb +1NAV_IH5,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1NAV_IH5_holo_aligned_predicted_protein.pdb,CC(C)c1cc(Oc2c(Cl)cc(CC(=O)O)cc2Cl)ccc1O,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1NAV_IH5_holo_aligned_predicted_protein.pdb +1W2G_THM,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1W2G_THM_holo_aligned_predicted_protein.pdb,Cc1cn([C@H]2C[C@H](O)[C@@H](CO)O2)c(=O)[nH]c1=O,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1W2G_THM_holo_aligned_predicted_protein.pdb +1Z95_198,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1Z95_198_holo_aligned_predicted_protein.pdb,C[C@](O)(CS(=O)(=O)c1ccc(F)cc1)C(=O)Nc1ccc(C#N)c(C(F)(F)F)c1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1Z95_198_holo_aligned_predicted_protein.pdb +1YVF_PH7,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1YVF_PH7_holo_aligned_predicted_protein.pdb,O=C(O)/C(=C/c1ccc(Oc2ccccc2Br)cc1)NC(=O)c1ccccc1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1YVF_PH7_holo_aligned_predicted_protein.pdb +1X8X_TYR,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1X8X_TYR_holo_aligned_predicted_protein.pdb,N[C@@H](Cc1ccc(O)cc1)C(=O)O,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1X8X_TYR_holo_aligned_predicted_protein.pdb +1HWI_115,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1HWI_115_holo_aligned_predicted_protein.pdb,CC(C)n1c(/C=C/[C@@H](O)C[C@@H](O)CC(=O)O)c(-c2ccc(F)cc2)c2ccccc21,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1HWI_115_holo_aligned_predicted_protein.pdb +1S3V_TQD,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1S3V_TQD_holo_aligned_predicted_protein.pdb,COc1cc(N(C)C[C@@H]2CCC3=C(C2)C(N)=N[C@@H](N)N3)cc(OC)c1OC,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1S3V_TQD_holo_aligned_predicted_protein.pdb +1W1P_GIO,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1W1P_GIO_holo_aligned_predicted_protein.pdb,O=C1NCC(=O)N2CCC[C@@H]12,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1W1P_GIO_holo_aligned_predicted_protein.pdb +1U4D_DBQ,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1U4D_DBQ_holo_aligned_predicted_protein.pdb,NC1=N/C(=C2/CCNC(=O)c3[nH]ccc32)C(=O)N1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1U4D_DBQ_holo_aligned_predicted_protein.pdb +1V4S_MRK,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1V4S_MRK_holo_aligned_predicted_protein.pdb,Cn1ccnc1Sc1cc(C(=O)Nc2nccs2)c(N)cc1F,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1V4S_MRK_holo_aligned_predicted_protein.pdb +1OF1_SCT,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OF1_SCT_holo_aligned_predicted_protein.pdb,Cc1cn([C@@]23C[C@H](O)[C@@H](CO)[C@@H]2C3)c(=O)[nH]c1=O,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OF1_SCT_holo_aligned_predicted_protein.pdb +1V48_HA1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1V48_HA1_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2CCCCC(F)(F)P(=O)(O)O)c(=O)[nH]1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1V48_HA1_holo_aligned_predicted_protein.pdb +1Q4G_BFL,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1Q4G_BFL_holo_aligned_predicted_protein.pdb,C[C@H](C(=O)O)c1ccc(-c2ccccc2)cc1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1Q4G_BFL_holo_aligned_predicted_protein.pdb +1JJE_BYS,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1JJE_BYS_holo_aligned_predicted_protein.pdb,O=C(O)[C@@H](Cc1ccccc1)[C@H](Cc1ccc2c(c1)OCO2)C(=O)O,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1JJE_BYS_holo_aligned_predicted_protein.pdb +1XOQ_ROF,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1XOQ_ROF_holo_aligned_predicted_protein.pdb,O=C(Nc1c(Cl)cncc1Cl)c1ccc(OC(F)F)c(OCC2CC2)c1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1XOQ_ROF_holo_aligned_predicted_protein.pdb +1M2Z_DEX,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1M2Z_DEX_holo_aligned_predicted_protein.pdb,C[C@@H]1C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)[C@@]3(F)[C@@H](O)C[C@]2(C)[C@@]1(O)C(=O)CO,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1M2Z_DEX_holo_aligned_predicted_protein.pdb +1P62_GEO,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1P62_GEO_holo_aligned_predicted_protein.pdb,Nc1ccn([C@@H]2O[C@H](CO)[C@@H](O)C2(F)F)c(=O)n1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1P62_GEO_holo_aligned_predicted_protein.pdb +1LRH_NLA,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1LRH_NLA_holo_aligned_predicted_protein.pdb,O=C(O)Cc1cccc2ccccc12,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1LRH_NLA_holo_aligned_predicted_protein.pdb +1SJ0_E4D,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1SJ0_E4D_holo_aligned_predicted_protein.pdb,Oc1ccc([C@H]2Sc3cc(O)ccc3O[C@H]2c2ccc(OCCN3CCCCC3)cc2)cc1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1SJ0_E4D_holo_aligned_predicted_protein.pdb +1PMN_984,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1PMN_984_holo_aligned_predicted_protein.pdb,CCCn1c(C2CCN(C)CC2)nc(-c2ccc(Cl)c(Cl)c2)c1-c1ccnc(NC2CC2)n1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1PMN_984_holo_aligned_predicted_protein.pdb +1SQN_NDR,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1SQN_NDR_holo_aligned_predicted_protein.pdb,C#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@@H]4[C@H]3CC[C@@]21C,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1SQN_NDR_holo_aligned_predicted_protein.pdb +1GKC_NFH,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1GKC_NFH_holo_aligned_predicted_protein.pdb,CNC(=O)[C@@H](NC(=O)[C@H](CC(C)C)CN(O)C=O)C(C)(C)C,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1GKC_NFH_holo_aligned_predicted_protein.pdb +1T40_ID5,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1T40_ID5_holo_aligned_predicted_protein.pdb,O=C(O)COc1cc(F)ccc1C(=O)NCc1nc2c(F)c(F)cc(F)c2s1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1T40_ID5_holo_aligned_predicted_protein.pdb +1UML_FR4,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1UML_FR4_holo_aligned_predicted_protein.pdb,NC(=O)c1cn([C@@H](CO)CCn2ccc3ccc(NC(=O)CCc4ccccc4)cc32)cn1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1UML_FR4_holo_aligned_predicted_protein.pdb +1UNL_RRC,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1UNL_RRC_holo_aligned_predicted_protein.pdb,CC[C@H](CO)Nc1nc(NCc2ccccc2)c2ncn(C(C)C)c2n1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1UNL_RRC_holo_aligned_predicted_protein.pdb +1OF6_DTY,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OF6_DTY_holo_aligned_predicted_protein.pdb,N[C@@H](Cc1ccc(O)cc1)C(=O)O,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OF6_DTY_holo_aligned_predicted_protein.pdb +1JD0_AZM,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1JD0_AZM_holo_aligned_predicted_protein.pdb,CC(=O)Nc1nnc(S(N)(=O)=O)s1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1JD0_AZM_holo_aligned_predicted_protein.pdb +1N2J_PAF,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1N2J_PAF_holo_aligned_predicted_protein.pdb,CC(C)(CO)[C@@H](O)C(=O)[O-],data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1N2J_PAF_holo_aligned_predicted_protein.pdb +1J3J_CP6,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1J3J_CP6_holo_aligned_predicted_protein.pdb,CCc1nc(N)nc(N)c1-c1ccc(Cl)cc1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1J3J_CP6_holo_aligned_predicted_protein.pdb +1MZC_BNE,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1MZC_BNE_holo_aligned_predicted_protein.pdb,CC[C@@]1(c2cccc(Oc3cc([C@](C)(N)c4cncn4C)ccc3C#N)c2)CCCCN(C)C1=O,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1MZC_BNE_holo_aligned_predicted_protein.pdb +1OWE_675,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OWE_675_holo_aligned_predicted_protein.pdb,N=C(N)c1ccc2cc(C(=O)Nc3ccccc3)ccc2c1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OWE_675_holo_aligned_predicted_protein.pdb +1T9B_1CS,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1T9B_1CS_holo_aligned_predicted_protein.pdb,COc1nc(C)nc(NC(=O)NS(=O)(=O)c2ccccc2Cl)n1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1T9B_1CS_holo_aligned_predicted_protein.pdb +2BR1_PFP,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/2BR1_PFP_holo_aligned_predicted_protein.pdb,COc1ccc(-c2oc3ncnc(NCCO)c3c2-c2ccc(OC)cc2)cc1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/2BR1_PFP_holo_aligned_predicted_protein.pdb +1G9V_RQ3,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1G9V_RQ3_holo_aligned_predicted_protein.pdb,Cc1cc(C)cc(NC(=O)Cc2ccc(OC(C)(C)C(=O)O)cc2)c1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1G9V_RQ3_holo_aligned_predicted_protein.pdb +2BM2_PM2,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/2BM2_PM2_holo_aligned_predicted_protein.pdb,NCc1cccc(C2CCN(C(=O)c3cncc(CCc4ccccc4)c3)CC2)c1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/2BM2_PM2_holo_aligned_predicted_protein.pdb +1XOZ_CIA,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1XOZ_CIA_holo_aligned_predicted_protein.pdb,CN1CC(=O)N2[C@H](c3ccc4c(c3)OCO4)c3[nH]c4ccccc4c3C[C@@H]2C1=O,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1XOZ_CIA_holo_aligned_predicted_protein.pdb +1SQ5_PAU,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1SQ5_PAU_holo_aligned_predicted_protein.pdb,CC(C)(CO)[C@@H](O)C(=O)NCCC(=O)O,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1SQ5_PAU_holo_aligned_predicted_protein.pdb +1YQY_915,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1YQY_915_holo_aligned_predicted_protein.pdb,Cc1cc(S(=O)(=O)N[C@@H](C(=O)NO)C2CCOCC2)ccc1F,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1YQY_915_holo_aligned_predicted_protein.pdb +1IA1_TQ3,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1IA1_TQ3_holo_aligned_predicted_protein.pdb,Nc1nc(N)c2c(Sc3ccccc3)cccc2n1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1IA1_TQ3_holo_aligned_predicted_protein.pdb +1HWW_SWA,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1HWW_SWA_holo_aligned_predicted_protein.pdb,O[C@H]1[C@H]2[C@H](O)CCCN2C[C@H]1O,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1HWW_SWA_holo_aligned_predicted_protein.pdb +1TT1_KAI,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1TT1_KAI_holo_aligned_predicted_protein.pdb,C=C(C)[C@H]1CN[C@H](C(=O)O)[C@H]1CC(=O)O,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1TT1_KAI_holo_aligned_predicted_protein.pdb +1Q41_IXM,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1Q41_IXM_holo_aligned_predicted_protein.pdb,O=C1Nc2ccccc2/C1=C1/Nc2ccccc2/C1=N\O,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1Q41_IXM_holo_aligned_predicted_protein.pdb +1N1M_A3M,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1N1M_A3M_holo_aligned_predicted_protein.pdb,CC(C)[C@H](N)C(=O)N1CCCC1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1N1M_A3M_holo_aligned_predicted_protein.pdb +1KE5_LS1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1KE5_LS1_holo_aligned_predicted_protein.pdb,CNS(=O)(=O)c1ccc(N/C=C2\C(=O)Nc3ccccc32)cc1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1KE5_LS1_holo_aligned_predicted_protein.pdb +1HP0_AD3,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1HP0_AD3_holo_aligned_predicted_protein.pdb,Nc1nccc2c1ncn2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1HP0_AD3_holo_aligned_predicted_protein.pdb +1UOU_CMU,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1UOU_CMU_holo_aligned_predicted_protein.pdb,N=C1CCCN1Cc1[nH]c(=O)[nH]c(=O)c1Cl,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1UOU_CMU_holo_aligned_predicted_protein.pdb +1TOW_CRZ,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1TOW_CRZ_holo_aligned_predicted_protein.pdb,O=C(O)CCCn1c2ccccc2c2ccccc21,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1TOW_CRZ_holo_aligned_predicted_protein.pdb +1LPZ_CMB,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1LPZ_CMB_holo_aligned_predicted_protein.pdb,Cc1cccc2c1cc(C(=O)NCc1cc(Cl)cc(Cl)c1)n2Cc1cccc(C(=N)N)c1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1LPZ_CMB_holo_aligned_predicted_protein.pdb +1VCJ_IBA,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1VCJ_IBA_holo_aligned_predicted_protein.pdb,CCC(CC)Nc1cc(C(=O)O)ccc1N1C(=O)CC[C@@]1(CN)CO,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1VCJ_IBA_holo_aligned_predicted_protein.pdb +1R1H_BIR,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1R1H_BIR_holo_aligned_predicted_protein.pdb,C[C@H](NC(=O)[C@H](Cc1ccc(-c2ccccc2)cc1)C[P@](=O)(O)[C@H](C)N)C(=O)O,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1R1H_BIR_holo_aligned_predicted_protein.pdb +1OQ5_CEL,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OQ5_CEL_holo_aligned_predicted_protein.pdb,Cc1ccc(-c2cc(C(F)(F)F)nn2-c2ccc(S(N)(=O)=O)cc2)cc1,data/astex_diverse_set/astex_diverse_holo_aligned_predicted_structures/1OQ5_CEL_holo_aligned_predicted_protein.pdb diff --git a/forks/NeuralPLexer/inference/neuralplexer_astex_diverse_inputs_first_20.csv b/forks/NeuralPLexer/inference/neuralplexer_astex_diverse_inputs_first_20.csv index 656e4059..42f3d415 100644 --- a/forks/NeuralPLexer/inference/neuralplexer_astex_diverse_inputs_first_20.csv +++ b/forks/NeuralPLexer/inference/neuralplexer_astex_diverse_inputs_first_20.csv @@ -1,21 +1,21 @@ id,input_receptor,input_ligand,input_template -1VCJ_IBA,PEWTYPRLSCQGSTFQKALLISPHRFGEIKGNSAPLIIREPFVACGPKECRHFALTHYAAQPGGYYNGTRKDRNKLRHLVSVKLGKIPTVENSIFHMAAWSGSACHDGREWTYIGVDGPDNDALVKIKYGEAYTDTYHSYAHNILRTQESACNCIGGDCYLMITDGSASGISKCRFLKIREGRIIKEILPTGRVEHTEECTCGFASNKTIECACRDNSYTAKRPFVKLNVETDTAEIRLMCTKTYLDTPRPDDGSIAGPCESNGDKWLGGIKGGFVHQRMASKIGRWYSRTMSKTNRMGMELYVRYDGDPWTDSDALTLSGVMVSIEEPGWYSFGFEIKDKKCDVPCIGIEMVHDGGKDTWHSAATAIYCLMGSGQLLWDTVTGVDMAL,CCC(CC)Nc1cc(C(=O)O)ccc1N1C(=O)CC[C@@]1(CN)CO,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1VCJ_IBA_holo_aligned_esmfold_protein.pdb -1G9V_RQ3,VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKYR|VHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH|VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKYR|VHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH,Cc1cc(C)cc(NC(=O)Cc2ccc(OC(C)(C)C(=O)O)cc2)c1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1G9V_RQ3_holo_aligned_esmfold_protein.pdb -1UOU_CMU,PKQLPELIRMKRDGGRLSEADIRGFVAAVVNGSAQGAQIGAMLMAIRLRGMDLEETSVLTQALAQSGQQLEWPEAWRQQLVDKHSTGGVGDKVSLVLAPALAACGCKVPMISGRGLGHTGGTLDKLESIPGFNVIQSPEQMQVLLDQAGCCIVGQSEQLVPADGILYAARDVTATVDSLPLITASILSKKLVEGLSALVVDVKFGAVFPNQEQARELAKTLVGVGASLGLRVAAALTAMDKPLGRCVGHALEVEEALLCMDGAGPPDLRDLVTTLGGALLWLSGHAGTQAQGAARVAAALDDGSALGRFERMLAAQGVDPGLARALCSGSPAERRQLLPRAREQEELLAPADGTVELVRALPLALVLHELGALRLGVGAELLVDVGQRLRRGTPWLRVHRDGPALSGPQSRALQEALVLSDRAPFAAPLPFAELVLPP,N=C1CCCN1Cc1[nH]c(=O)[nH]c(=O)c1Cl,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1UOU_CMU_holo_aligned_esmfold_protein.pdb -1TZ8_DES,PLMVKVLDAVRGSPAINVAVHVFRKAADDTWEPFASGKTSESGELHGLTTEEEFVEGIYKVEIDTKSYWKALGISPFHEHAEVVFTANDSGPRRYTIAALLSPYSYSTTAVVTN|PLMVKVLDAVRGSPAINVAVHVFRKAADDTWEPFASGKTSESGELHGLTTEEEFVEGIYKVEIDTKSYWKALGISPFHEHAEVVFTANDSGPRRYTIAALLSPYSYSTTAVVTN|CPLMVKVLDAVRGSPAINVAVHVFRKAADDTWEPFASGKTSESGELHGLTTEEEFVEGIYKVEIDTKSYWKALGISPFHEHAEVVFTANDSGPRRYTIAALLSPYSYSTTAVVTNP|CPLMVKVLDAVRGSPAINVAVHVFRKAADDTWEPFASGKTSESGELHGLTTEEEFVEGIYKVEIDTKSYWKALGISPFHEHAEVVFTANDSGPRRYTIAALLSPYSYSTTAVVTN,CC/C(=C(/CC)c1ccc(O)cc1)c1ccc(O)cc1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1TZ8_DES_holo_aligned_esmfold_protein.pdb -1TOW_CRZ,CDAFVGTWKLVSSENFDDYMKEVGVGFATRKVAGMAKPNMIISVNGDVITIKSESTFKNTEISFILGQEFDEVTADDRKVKSTITLDGGVLVHVQKWDGKSTTIKRKREDDKLVVECVMKGVTSTRVYERA,O=C(O)CCCn1c2ccccc2c2ccccc21,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1TOW_CRZ_holo_aligned_esmfold_protein.pdb -1XOQ_ROF,TEQEDVLAKELEDVNKWGLHVFRIAELSGNRPLTVIMHTIFQERDLLKTFKIPVDTLITYLMTLEDHYHADVAYHNNIHAADVVQSTHVLLSTPALEAVFTDLEILAAIFASAIHDVDHPGVSNQFLINTNSELALMYNDSSVLENHHLAVGFKLLQEENCDIFQNLTKKQRQSLRKMVIDIVLATDMSKHMNLLADLKTMVETKKVTSSGVLLLDNYSDRIQVLQNMVHCADLSNPTKPLQLYRQWTDRIMEEFFRQGDRERERGMEISPMCDKHNASVEKSQVGFIDYIVHPLWETWADLVHPDAQDILDTLEDNREWYQSTIP|TEQEDVLAKELEDVNKWGLHVFRIAELSGNRPLTVIMHTIFQERDLLKTFKIPVDTLITYLMTLEDHYHADVAYHNNIHAADVVQSTHVLLSTPALEAVFTDLEILAAIFASAIHDVDHPGVSNQFLINTNSELALMYNDSSVLENHHLAVGFKLLQEENCDIFQNLTKKQRQSLRKMVIDIVLATDMSKHMNLLADLKTMVETKKVVLLLDNYSDRIQVLQNMVHCADLSNPTKPLQLYRQWTDRIMEEFFRQGDRERERGMEISPMCDKHNASVEKSQVGFIDYIVHPLWETWADLVHPDAQDILDTLEDNREWYQSTIP,O=C(Nc1c(Cl)cncc1Cl)c1ccc(OC(F)F)c(OCC2CC2)c1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1XOQ_ROF_holo_aligned_esmfold_protein.pdb -1MZC_BNE,FVSLDSPSYVLYRDRAEWADIDPVPQNDGPNPVVQIIYSDKFRDVYDYFRAVLQRDERSERAFKLTRDAIELNAANYTVWHFRRVLLKSLQKDLHEEMNYITAIIEEQPKNYQVWHHRRVLVEWLRDPSQELEFIADILNQDAKNYHAWQHRQWVIQEFKLWDNELQYVDQLLKEDVRNNSVWNQRYFVISNTTGYNDRAVLEREVQYTLEMIKLVPHNESAWNYLKGILQDRGLSKYPNLLNQLLDLQPSHSSPYLIAFLVDIYEDMLENQCDNKEDILNKALELCEILAKEKDTIRKEYWRYIGRSLQSKH|PVWSEPLYSLRPEHARERLQDDSVETVTSIEQAKVEEKIQEVFSSYKFNHLVPRLVLQREKHFHYLKRGLRQLTDAYECLDASRPWLCYWILHSLELLDEPIPQIVATDVCQFLELCQSPEGGFGGGPGQYPHLAPTYAAVNALCIIGTEEAYDIINREKLLQYLYSLKQPDGSFLMHVGGEVDVRSAYCAASVASLTNIITPDLFEGTAEWIARCQNWEGGIGGVPGMEAHGGYTFCGLAALVILKRERSLNLKSLLQWVTSRQMRFEGGFQGRCNKLVDGCYSFWQAGLLPLLHRALHAQGDPALSMSHWMFHQQALQEYILMCCQCPAGGLLDKPGKSRDFYHTCYCLSGLSIAQHFGSGAMLHDVVLGVPENALQPTHPVYNIGPDKVIQATTYFLQKPVPGF,CC[C@@]1(c2cccc(Oc3cc([C@](C)(N)c4cncn4C)ccc3C#N)c2)CCCCN(C)C1=O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1MZC_BNE_holo_aligned_esmfold_protein.pdb -1N46_PFA,KPEPTDEEWELIKTVTEAHVATNAQWKQKRKFLPEDIGQAPIVNAPEGGKVDLEAFSHFTKIITPAITRVVDFAKKLPMFCELPCEDQIILLKGCCMEIMSLRAAVRYDPESETLTLNGEMAVTRGQLKNGGLGVVSDAIFDLGMSLSSFNLDDTEVALLQAVLLMSSDRPGLACVERIEKYQDSFLLAFEHYINYRKHHVTHFWPKLLMKVTDLRMIGACHASRFLHMKVECPTELFPPLFLEVFED|KPEPTDEEWELIKTVTEAHVATNAQWKQKRKFLPEDIGQAKVDLEAFSHFTKIITPAITRVVDFAKKLPMFCELPCEDQIILLKGCCMEIMSLRAAVRYDPESETLTLNGEMAVTRGQLKNGGLGVVSDAIFDLGMSLSSFNLDDTEVALLQAVLLMSSDRPGLACVERIEKYQDSFLLAFEHYINYRKHHVTHFWPKLLMKVTDLRMIGACHASRFLHMKVECPTELFPPLFLEVFED,Cc1cc(-n2ncc(=O)[nH]c2=O)cc(C)c1Oc1ccc(O)c(C(C)C)c1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1N46_PFA_holo_aligned_esmfold_protein.pdb -1R9O_FLP,RGKLPPGPTPLPLQIGIKDISKSLTNLSKVYGPVFTLYFGLKPIVVLHGYEAVKEALIDLGEEFSGRGIFPLAERANRGFGIVFSNGKKWKEIRRFSLMTLRNFGMGKRSIEDRVQEEARCLVEELRKTKASPCDPTFILGCAPCNVICSIIFHKRFDYKDQQFLNLMEKLNENIKILSSPWIPIIDYFPGTHNKLLKNVAFMKSYILEKVKEHQESMDMNNPQDFIDCFLMKMEKEKHNQPSEFTIESLENTAVDLFGAGTETTSTTLRYALLLLLKHPEVTAKVQEEIERVIGRNRSPCMQDRSHMPYTDAVVHEVQRYIDLLPTSLPHAVTCDIKFRNYLIPKGTTILISLTSVLHDNKEFPNPEMFDPHHFLDEGGNFKKSKYFMPFSAGKRICVGEALAGMELFLFLTSILQNFNLKSLVDPKNLDTTPVVNGFASVPPFYQLCFIPIHH,C[C@H](C(=O)O)c1ccc(-c2ccccc2)c(F)c1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1R9O_FLP_holo_aligned_esmfold_protein.pdb -1K3U_IAD,MERYENLFAQLNDRREGAFVPFVTLGDPGIEQSLKIIDTLIDAGADALELGVPFSDPLADGPTIQNANLRAFAAGVTPAQCFEMLALIREKHPTIPIGLLMYANLVFNNGIDAFYARCEQVGVDSVLVADVPVEESAPFRQAALRHNIAPIFICPPNADDDLLRQVASYGRGYTYLLSRSGVTGAENRGALPLHHLIEKLKEYHAAPALQGFGISSPEQVSAAVRAGAAGAISGSAIVKIIEKNLASPKQMLAELRSFVSAMKAASRA|TTLLNPYFGEFGGMYVPQILMPALNQLEEAFVSAQKDPEFQAQFADLLKNYAGRPTALTKCQNITAGTRTTLYLKREDLLHGGAHKTNQVLGQALLAKRMGKSEIIAETGAGQHGVASALASALLGLKCRIYMGAKDVERQSPNVFRMRLMGAEVIPVHSGSATLKDACNEALRDWSGSYETAHYMLGTAAGPHPYPTIVREFQRMIGEETKAQILDKEGRLPDAVIACVGGGSNAIGMFADFINDTSVGLIGVEPGGHGIETGEHGAPLKHGRVGIYFGMKAPMMQTADGQIEESYSISAGLDFPSVGPQHAYLNSIGRADYVSITDDEALEAFKTLCRHEGIIPALESSHALAHALKMMREQPEKEQLLVVNLSGRGDKDIFTVHDILKARG,O=C(O)C[C@H](NC(=O)Cc1c[nH]c2ccccc12)C(=O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1K3U_IAD_holo_aligned_esmfold_protein.pdb -1X8X_TYR,MASSNLIKQLQERGLVAQVTDEEALAERLAQGPIALYCGFDPTADSLHLGHLVPLLCLKRFQQAGHKPVALVGGATGLIGDPSFKAAERKLNTEETVQEWVDKIRKQVAPFLDFDCGENSAIAANNYDWFGNMNVLTFLRDIGKHFSVNQMINKEAVKQRLNREDQGISFTEFSYNLLQGYDFACLNKQYGVVLQIGGSDQWGNITSGIDLTRRLHQNQVFGLTVPLITKADGTKFGKTEGGAVWLDPKKTSPYKFYQFWINTADADVYRFLKFFTFMSIEEINALEEEDKNSGKAPRAQYVLAEQVTRLVHGEEGLQAAKR,N[C@@H](Cc1ccc(O)cc1)C(=O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1X8X_TYR_holo_aligned_esmfold_protein.pdb -1S19_MC9,LRPKLSEEQQRIIAILLDAHHKTYDPTYSDFCQFRPPVRVNDGGGSVTLELSQLSMLPHLADLVSYSIQKVIGFAKMIPGFRDLTSEDQIVLLKSSAIEVIMLRSNESFTMDDMSWTCGNQDYKYRVSDVTKAGHSLELIEPLIKFQVGLKKLNLHEEEHVLLMAICIVSPDRPGVQDAALIEAIQDRLSNTLQTYIRCRHPPPGSHLLYAKMIQKLADLRSLNEEHSKQYRCLSFQPECSMKLTPLVLEVFG,C=C1/C(=C\C=C2/CCC[C@]3(C)[C@@H]([C@H](C)/C=C/[C@@H](O)C4CC4)CC[C@@H]23)C[C@@H](O)C[C@@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1S19_MC9_holo_aligned_esmfold_protein.pdb -1OF1_SCT,MPTLLRVYIDGPHGMGKTTTTQLLVADDIVYVPEPMTYWRVLGASETIANIYTTQHRLDQGEISAGDAAVVMTSAQITMGMPYAVTDAVLAPHIGGEAPPPALTLIFDRHPIAALLCYPAARYLMGSMTPQAVLAFVALIPPTLPGTNIVLGALPEDRHIDRLAKRQRPGERLDLAMLAAIRRVYGLLANTVRYLQCGGSWREDWGQLSGTGPRPHIGDTLFTLFRAPELLAPNGDLYNVFAWALDVLAKRLRSMHVFILDYDQSPAGCRDALLQLTSGMVQTHVTTPGSIPTICDLARTFAREMGE|MPTLLRVYIDGPHGMGKTTTTQLLVALGSRDDIVYVPEPMTYWRVLGASETIANIYTTQHRLDQGEISAGDAAVVMTSAQITMGMPYAVTDAVLAPHIGGEAPPPALTLIFDRHPIAALLCYPAARYLMGSMTPQAVLAFVALIPPTLPGTNIVLGALPEDRHIDRLAKRGERLDLAMLAAIRRVYGLLANTVRYLQCGGSWREDWGQLSGTPQSNAGPRPHIGDTLFTLFRAPELLAPNGDLYNVFAWALDVLAKRLRSMHVFILDYDQSPAGCRDALLQLTSGMVQTHVTTPGSIPTICDLARTFAREMGE,Cc1cn([C@@]23C[C@H](O)[C@@H](CO)[C@@H]2C3)c(=O)[nH]c1=O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1OF1_SCT_holo_aligned_esmfold_protein.pdb -1LPZ_CMB,RKLCSLDNGDCDQFCHEEQNSVVCSCARGYTLADNGKACIPTGPYPCGKQTLE|IVGGQECKDGECPWQALLINEENEGFCGGTILSEFYILTAAHCLYQAKRFKVRVGDRNTEQEEGGEAVHEVEVVIKHNRFTKETYDFDIAVLRLKTPITFRMNVAPACLPERDWAESTLMTQKTGIVSGFGRTHEKGRQSTRLKMLEVPYVDRNSCKLSSSFIITQNMFCAGYDTKQEDACQGDSGGPHVTRFKDTYFVTGIVSWGEGCARKGKYGIYTKVTAFLKWIDRSMKT,Cc1cccc2c1cc(C(=O)NCc1cc(Cl)cc(Cl)c1)n2Cc1cccc(C(=N)N)c1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1LPZ_CMB_holo_aligned_esmfold_protein.pdb -1T46_STI,GNNYVYIDPTQLPYDHKWEFPRNRLSFGKTLGAGAFGKVVEATAYGLIKSDAAMTVAVKMLKPSAHLTEREALMSELKVLSYLGNHMNIVNLLGACTIGGPTLVITEYCCYGDLLNFLRRKRDSFLALDLEDLLSFSYQVAKGMAFLASKNCIHRDLAARNILLTHGRITKICDFGLARDIKNDSNYVVKGNARLPVKWMAPESIFNCVYTFESDVWSYGIFLWELFSLGSSPYPGMPVDSKFYKMIKEGFRMLSPEHAPAEMYDIMKTCWDADPLKRPTFKQIVQLIEKQISESTN,Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1T46_STI_holo_aligned_esmfold_protein.pdb -1XOZ_CIA,EEETRELQSLAAAVVPSAQTLKITDFSFSDFELSDLETALCTIRMFTDLNLVQNFQMKHEVLCRWILSVKKNYRKNVAYHNWRHAFNTAQCMFAALKAGKIQNKLTDLEILALLIAALSHDLDHPGVSNQFLINTNSELALMYNDESVLEHHHFDQCLMILNSPGNQILSGLSIEEYKTTLKIIKQAILATDLALYIKRRGEFFELIRKNQFNLEDPHQKELFLAMLMTACDLSAITKPWPIQQRIAELVATEFFDQGDRERKELNIEPTDLMNREKKNKIPSMQVGFIDAICLQLYEALTHVSEDCFPLLDGCRKNRQKWQALAE,CN1CC(=O)N2[C@H](c3ccc4c(c3)OCO4)c3[nH]c4ccccc4c3C[C@@H]2C1=O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1XOZ_CIA_holo_aligned_esmfold_protein.pdb -1UML_FR4,TPAFDKPKVELHVHLDGAIKPETILYYGKRRGIALPADTPEELQNIIGMDKPLTLPDFLAKFDYYMPAIAGCRDAIKRIAYEFVEMKAKDGVVYVEVRYSPHLLANSKVEPIPWNQAEGDLTPDEVVSLVNQGLQEGERDFGVKVRSILCCMRHQPSWSSEVVELCKKYREQTVVAIDLAGDETIEGSSLFPGHVQAYAEAVKSGVHRTVHAGEVGSANVVKEAVDTLKTERLGHGYHTLEDTTLYNRLRQENMHFEICPWSSYLTGAWKPDTEHAVIRFKNDQVNYSLNTDDPLIFKSTLDTDYQMTKKDMGFTEEEFKRLNINAAKSSFLPEDEKKELLDLLYKAYR,NC(=O)c1cn([C@@H](CO)CCn2ccc3ccc(NC(=O)CCc4ccccc4)cc32)cn1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1UML_FR4_holo_aligned_esmfold_protein.pdb -1SQ5_PAU,MTPYLQFDRNQWAALRDMLSEDEIARLKGINEDLSLEEVAEIYLPLSRLLNFYISSNLRRQAVLEQFLGTNQRIPYIISIAGSVAVGKSTTARVLQALLSRWPEHRRVELITTDGFLHPNQVLKERGLMKKKGFPESYDMHRLVKFVSDLKSGVPNVTAPVYSHLIYDVIPDGDKTVVPDILILEGLNVLQSGMDYPHDPHHVFVSDFVDFSIYVDAPEDLLQTWYINRFLKFREGAFTDPDSYFHNYAKLTKEEAIKTAMTLWKEINWLNLKQNILPTRERASLILTKSANHAVEEVRLRK|MTPYLQFDRNQWAALRLSEDEIARLKGINEDLSLEEVAEIYLPLSRLLNFYISSNLRRQAVLEQFLGTNGQRIPYIISIAGSVAVGKSTTARVLQALLSRWPEHRRVELITTDGFLHPNQVLKERGLMKKKGFPESYDMHRLVKFVSDLKSGVPNVTAPVYSHLIYDVIPDGDKTVVQPDILILEGLNVLQSGMDYPHDPHHVFVSDFVDFSIYVDAPEDLLQTWYINRFLKFREGAFTDPDSYFHNYAKLTKEEAIKTAMTLWKEINWLNLKQNILPTRERASLILTKSANHAVEEVRLRK|MTPYLQFDRNQWAALRMTLSEDEIARLKGINEDLSLEEVAEIYLPLSRLLNFYISSNLRRQAVLEQFLGTNRPYIISIAGSVAVGKSTTARVLQALLSRWPERVELITTDGFLHPNQVLKERGLMKKKGFPESYDMHRLVKFVSDLKSGVPNVTAPVYSHLIYDVIPDGDKTVVPDILILEGLNVLQSGMDYPHDPHHVFVSDFVDFSIYVDAPEDLLQTWYINRFLKFREGAFTDPDSYFHNYAKLTKEEAIKTAMTLWKEINWLNLKQNILPTRERASLILTKSANHAVEEVRLRK|MTPYLQFDRNQWAALRDMTLSEDEIARLKGINEDLSLEEVAEIYLPLSRLLNFYISSNLRRQAVLEQFLGTNGQRIPYIISIAGSVAVGKSTTARVLQALLSRWPEHRRVELITTDGFLHPNQVLKERGLMKKKGFPESYDMHRLVKFVSDLKSGVPNVTAPVYSHLIYDVIPDGDKTVVPDILILEGLNVLQSGMDYPHDPHHVFVSDFVDFSIYVDAPEDLLQTWYINRFLKFREGAFTDPDSYFHNYAKLTKEEAIKTAMTLWKEINWLNLKQNILPTRERASLILTKSANHAVEEVRLRK,CC(C)(CO)[C@@H](O)C(=O)NCCC(=O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1SQ5_PAU_holo_aligned_esmfold_protein.pdb -1MMV_3AR,RFLKVKNWETDVVLTDTLHLKSTLETGCTEHICMGSIMLPTKDQLFPLAKEFLDQYYSSIKRFGSKAHMDRLEEVNKEIESTSTYQLKDTELIYGAKHAWRNASRCVGRIQWSKLQVFDARDCTTAHGMFNYICNHVKYATNKGNLRSAITIFPQRTDGKHDFRVWNSQLIRYAGYKQPDGSTLGDPANVQFTEICIQQGWKAPRGRFDVLPLLLQANGNDPELFQIPPELVLEVPIRHPKFDWFKDLGLKWYGLPAVSNMLLEIGGLEFSACPFSGWYMGTEIGVRDYCDNSRYNILEEVAKKMDLDMRKTSSLWKDQALVEINIAVLYSFQSDKVTIVDHHSATESFIKHMENEYRCRGGCPADWVWIVPPMSGSITPVFHQEMLNYRLTPSFEYQPDPWNTHVW|RFLKVKNWETDVVLTDTLHLKSTLETGCTEHICMGSIMLPRTKDQLFPLAKEFLDQYYSSIKRFGSKAHMDRLEEVNKEIESTSTYQLKDTELIYGAKHAWRNASRCVGRIQWSKLQVFDARDCTTAHGMFNYICNHVKYATNKGNLRSAITIFPQRTDGKHDFRVWNSQLIRYAGYKQPDGSTLGDPANVQFTEICIQQGWKAPRGRFDVLPLLLQANGNDPELFQIPPELVLEVPIRHPKFDWFKDLGLKWYGLPAVSNMLLEIGGLEFSACPFSGWYMGTEIGVRDYCDNSRYNILEEVAKKMDLDMRKTSSLWKDQALVEINIAVLYSFQSDKVTIVDHHSATESFIKHMENEYRCRGGCPADWVWIVPPMSGSITPVFHQEMLNYRLTPSFEYQPDPWNTHVWK,CCCNC(=[NH2+])NCCC[C@H](N)C(=O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1MMV_3AR_holo_aligned_esmfold_protein.pdb -1J3J_CP6,MMEQVCDVFDIYAICACCKVESKNEGKKNEVFNNYTFRGLGNKGVLPWKCNSLDMKYFRAVTTYVNESKYEKLKYKRCKYLNKETKKLQNVVVMGRTNWESIPKKFKPLSNRINVILSRTLKKEDFDEDVYIINKVEDLIVLLGKLNYYKCFIIGGSVVYQEFLEKKLIKKIYFTRINSTYECDVFFPEINENEYQIISVSDVYTSNNTTLDFIIYKKTNN|MMEQVCDVFDIYAICACCKVESKNEGKKNEVFNNYTFRGLGNKGVLPWKCNSLDMKYFRAVTTYVNESKYEKLKYKRCKYLKLQNVVVMGRTNWESIPKKFKPLSNRINVILSRTLKKEDFDEDVYIINKVEDLIVLLGKLNYYKCFIIGGSVVYQEFLEKKLIKKIYFTRINSTYECDVFFPEINENEYQIISVSDVYTSNNTTLDFIIYKKTNN|DDEEEDDFVYFNFNKEKEEKNKNSIHPNDFQIYNSLKYKYHPEYQYLNIIYDIMMNGNKQSDRTGVGVLSKFGYIMKFDLSQYFPLLTTKKLFLRGIIEELLWFIRGETNGNTLLNKNVRIWEANGTREFLDNRKLFHREVNDLGPIYGFQWRHFGAEYTNMYDNYENKGVDQLKNIINLIKNDPTSRRILLCAWNVKDLDQMALPPCHILCQFYVFDGKLSCIMYQRSCDLGLGVPFNIASYSIFTHMIAQVCNLQPAQFIHVLGNAHVYNNHIDSLKIQLNRIPYPFPTLKLNPDIKNIEDFTISDFTIQNYVHHEKISMDMAA|DDEEEDDFVYFNFNKEKEEKNKNSIHPNDFQIYNSLKYKYHPEYQYLNIIYDIMMNGNKQSDRTGVGVLSKFGYIMKFDLSQYFPLLTTKKLFLRGIIEELLWFIRGETNGNTLLNKNVRIWEANGTREFLDNRKLFHREVNDLGPIYGFQWRHFGAEYTNMYDNYENKGVDQLKNIINLIKNDPTSRRILLCAWNVKDLDQMALPPCHILCQFYVFDGKLSCIMYQRSCDLGLGVPFNIASYSIFTHMIAQVCNLQPAQFIHVLGNAHVYNNHIDSLKIQLNRIPYPFPTLKLNPDIKNIEDFTISDFTIQNYVHHEKISMDMAA,CCc1nc(N)nc(N)c1-c1ccc(Cl)cc1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1J3J_CP6_holo_aligned_esmfold_protein.pdb \ No newline at end of file +1VCJ_IBA,PEWTYPRLSCQGSTFQKALLISPHRFGEIKGNSAPLIIREPFVACGPKECRHFALTHYAAQPGGYYNGTRKDRNKLRHLVSVKLGKIPTVENSIFHMAAWSGSACHDGREWTYIGVDGPDNDALVKIKYGEAYTDTYHSYAHNILRTQESACNCIGGDCYLMITDGSASGISKCRFLKIREGRIIKEILPTGRVEHTEECTCGFASNKTIECACRDNSYTAKRPFVKLNVETDTAEIRLMCTKTYLDTPRPDDGSIAGPCESNGDKWLGGIKGGFVHQRMASKIGRWYSRTMSKTNRMGMELYVRYDGDPWTDSDALTLSGVMVSIEEPGWYSFGFEIKDKKCDVPCIGIEMVHDGGKDTWHSAATAIYCLMGSGQLLWDTVTGVDMAL,CCC(CC)Nc1cc(C(=O)O)ccc1N1C(=O)CC[C@@]1(CN)CO,data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1VCJ_IBA_holo_aligned_esmfold_protein.pdb +1G9V_RQ3,VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKYR|VHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH|VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKYR|VHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH,Cc1cc(C)cc(NC(=O)Cc2ccc(OC(C)(C)C(=O)O)cc2)c1,data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1G9V_RQ3_holo_aligned_esmfold_protein.pdb +1UOU_CMU,PKQLPELIRMKRDGGRLSEADIRGFVAAVVNGSAQGAQIGAMLMAIRLRGMDLEETSVLTQALAQSGQQLEWPEAWRQQLVDKHSTGGVGDKVSLVLAPALAACGCKVPMISGRGLGHTGGTLDKLESIPGFNVIQSPEQMQVLLDQAGCCIVGQSEQLVPADGILYAARDVTATVDSLPLITASILSKKLVEGLSALVVDVKFGAVFPNQEQARELAKTLVGVGASLGLRVAAALTAMDKPLGRCVGHALEVEEALLCMDGAGPPDLRDLVTTLGGALLWLSGHAGTQAQGAARVAAALDDGSALGRFERMLAAQGVDPGLARALCSGSPAERRQLLPRAREQEELLAPADGTVELVRALPLALVLHELGALRLGVGAELLVDVGQRLRRGTPWLRVHRDGPALSGPQSRALQEALVLSDRAPFAAPLPFAELVLPP,N=C1CCCN1Cc1[nH]c(=O)[nH]c(=O)c1Cl,data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1UOU_CMU_holo_aligned_esmfold_protein.pdb +1TZ8_DES,PLMVKVLDAVRGSPAINVAVHVFRKAADDTWEPFASGKTSESGELHGLTTEEEFVEGIYKVEIDTKSYWKALGISPFHEHAEVVFTANDSGPRRYTIAALLSPYSYSTTAVVTN|PLMVKVLDAVRGSPAINVAVHVFRKAADDTWEPFASGKTSESGELHGLTTEEEFVEGIYKVEIDTKSYWKALGISPFHEHAEVVFTANDSGPRRYTIAALLSPYSYSTTAVVTN|CPLMVKVLDAVRGSPAINVAVHVFRKAADDTWEPFASGKTSESGELHGLTTEEEFVEGIYKVEIDTKSYWKALGISPFHEHAEVVFTANDSGPRRYTIAALLSPYSYSTTAVVTNP|CPLMVKVLDAVRGSPAINVAVHVFRKAADDTWEPFASGKTSESGELHGLTTEEEFVEGIYKVEIDTKSYWKALGISPFHEHAEVVFTANDSGPRRYTIAALLSPYSYSTTAVVTN,CC/C(=C(/CC)c1ccc(O)cc1)c1ccc(O)cc1,data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1TZ8_DES_holo_aligned_esmfold_protein.pdb +1TOW_CRZ,CDAFVGTWKLVSSENFDDYMKEVGVGFATRKVAGMAKPNMIISVNGDVITIKSESTFKNTEISFILGQEFDEVTADDRKVKSTITLDGGVLVHVQKWDGKSTTIKRKREDDKLVVECVMKGVTSTRVYERA,O=C(O)CCCn1c2ccccc2c2ccccc21,data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1TOW_CRZ_holo_aligned_esmfold_protein.pdb +1XOQ_ROF,TEQEDVLAKELEDVNKWGLHVFRIAELSGNRPLTVIMHTIFQERDLLKTFKIPVDTLITYLMTLEDHYHADVAYHNNIHAADVVQSTHVLLSTPALEAVFTDLEILAAIFASAIHDVDHPGVSNQFLINTNSELALMYNDSSVLENHHLAVGFKLLQEENCDIFQNLTKKQRQSLRKMVIDIVLATDMSKHMNLLADLKTMVETKKVTSSGVLLLDNYSDRIQVLQNMVHCADLSNPTKPLQLYRQWTDRIMEEFFRQGDRERERGMEISPMCDKHNASVEKSQVGFIDYIVHPLWETWADLVHPDAQDILDTLEDNREWYQSTIP|TEQEDVLAKELEDVNKWGLHVFRIAELSGNRPLTVIMHTIFQERDLLKTFKIPVDTLITYLMTLEDHYHADVAYHNNIHAADVVQSTHVLLSTPALEAVFTDLEILAAIFASAIHDVDHPGVSNQFLINTNSELALMYNDSSVLENHHLAVGFKLLQEENCDIFQNLTKKQRQSLRKMVIDIVLATDMSKHMNLLADLKTMVETKKVVLLLDNYSDRIQVLQNMVHCADLSNPTKPLQLYRQWTDRIMEEFFRQGDRERERGMEISPMCDKHNASVEKSQVGFIDYIVHPLWETWADLVHPDAQDILDTLEDNREWYQSTIP,O=C(Nc1c(Cl)cncc1Cl)c1ccc(OC(F)F)c(OCC2CC2)c1,data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1XOQ_ROF_holo_aligned_esmfold_protein.pdb +1MZC_BNE,FVSLDSPSYVLYRDRAEWADIDPVPQNDGPNPVVQIIYSDKFRDVYDYFRAVLQRDERSERAFKLTRDAIELNAANYTVWHFRRVLLKSLQKDLHEEMNYITAIIEEQPKNYQVWHHRRVLVEWLRDPSQELEFIADILNQDAKNYHAWQHRQWVIQEFKLWDNELQYVDQLLKEDVRNNSVWNQRYFVISNTTGYNDRAVLEREVQYTLEMIKLVPHNESAWNYLKGILQDRGLSKYPNLLNQLLDLQPSHSSPYLIAFLVDIYEDMLENQCDNKEDILNKALELCEILAKEKDTIRKEYWRYIGRSLQSKH|PVWSEPLYSLRPEHARERLQDDSVETVTSIEQAKVEEKIQEVFSSYKFNHLVPRLVLQREKHFHYLKRGLRQLTDAYECLDASRPWLCYWILHSLELLDEPIPQIVATDVCQFLELCQSPEGGFGGGPGQYPHLAPTYAAVNALCIIGTEEAYDIINREKLLQYLYSLKQPDGSFLMHVGGEVDVRSAYCAASVASLTNIITPDLFEGTAEWIARCQNWEGGIGGVPGMEAHGGYTFCGLAALVILKRERSLNLKSLLQWVTSRQMRFEGGFQGRCNKLVDGCYSFWQAGLLPLLHRALHAQGDPALSMSHWMFHQQALQEYILMCCQCPAGGLLDKPGKSRDFYHTCYCLSGLSIAQHFGSGAMLHDVVLGVPENALQPTHPVYNIGPDKVIQATTYFLQKPVPGF,CC[C@@]1(c2cccc(Oc3cc([C@](C)(N)c4cncn4C)ccc3C#N)c2)CCCCN(C)C1=O,data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1MZC_BNE_holo_aligned_esmfold_protein.pdb +1N46_PFA,KPEPTDEEWELIKTVTEAHVATNAQWKQKRKFLPEDIGQAPIVNAPEGGKVDLEAFSHFTKIITPAITRVVDFAKKLPMFCELPCEDQIILLKGCCMEIMSLRAAVRYDPESETLTLNGEMAVTRGQLKNGGLGVVSDAIFDLGMSLSSFNLDDTEVALLQAVLLMSSDRPGLACVERIEKYQDSFLLAFEHYINYRKHHVTHFWPKLLMKVTDLRMIGACHASRFLHMKVECPTELFPPLFLEVFED|KPEPTDEEWELIKTVTEAHVATNAQWKQKRKFLPEDIGQAKVDLEAFSHFTKIITPAITRVVDFAKKLPMFCELPCEDQIILLKGCCMEIMSLRAAVRYDPESETLTLNGEMAVTRGQLKNGGLGVVSDAIFDLGMSLSSFNLDDTEVALLQAVLLMSSDRPGLACVERIEKYQDSFLLAFEHYINYRKHHVTHFWPKLLMKVTDLRMIGACHASRFLHMKVECPTELFPPLFLEVFED,Cc1cc(-n2ncc(=O)[nH]c2=O)cc(C)c1Oc1ccc(O)c(C(C)C)c1,data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1N46_PFA_holo_aligned_esmfold_protein.pdb +1R9O_FLP,RGKLPPGPTPLPLQIGIKDISKSLTNLSKVYGPVFTLYFGLKPIVVLHGYEAVKEALIDLGEEFSGRGIFPLAERANRGFGIVFSNGKKWKEIRRFSLMTLRNFGMGKRSIEDRVQEEARCLVEELRKTKASPCDPTFILGCAPCNVICSIIFHKRFDYKDQQFLNLMEKLNENIKILSSPWIPIIDYFPGTHNKLLKNVAFMKSYILEKVKEHQESMDMNNPQDFIDCFLMKMEKEKHNQPSEFTIESLENTAVDLFGAGTETTSTTLRYALLLLLKHPEVTAKVQEEIERVIGRNRSPCMQDRSHMPYTDAVVHEVQRYIDLLPTSLPHAVTCDIKFRNYLIPKGTTILISLTSVLHDNKEFPNPEMFDPHHFLDEGGNFKKSKYFMPFSAGKRICVGEALAGMELFLFLTSILQNFNLKSLVDPKNLDTTPVVNGFASVPPFYQLCFIPIHH,C[C@H](C(=O)O)c1ccc(-c2ccccc2)c(F)c1,data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1R9O_FLP_holo_aligned_esmfold_protein.pdb +1K3U_IAD,MERYENLFAQLNDRREGAFVPFVTLGDPGIEQSLKIIDTLIDAGADALELGVPFSDPLADGPTIQNANLRAFAAGVTPAQCFEMLALIREKHPTIPIGLLMYANLVFNNGIDAFYARCEQVGVDSVLVADVPVEESAPFRQAALRHNIAPIFICPPNADDDLLRQVASYGRGYTYLLSRSGVTGAENRGALPLHHLIEKLKEYHAAPALQGFGISSPEQVSAAVRAGAAGAISGSAIVKIIEKNLASPKQMLAELRSFVSAMKAASRA|TTLLNPYFGEFGGMYVPQILMPALNQLEEAFVSAQKDPEFQAQFADLLKNYAGRPTALTKCQNITAGTRTTLYLKREDLLHGGAHKTNQVLGQALLAKRMGKSEIIAETGAGQHGVASALASALLGLKCRIYMGAKDVERQSPNVFRMRLMGAEVIPVHSGSATLKDACNEALRDWSGSYETAHYMLGTAAGPHPYPTIVREFQRMIGEETKAQILDKEGRLPDAVIACVGGGSNAIGMFADFINDTSVGLIGVEPGGHGIETGEHGAPLKHGRVGIYFGMKAPMMQTADGQIEESYSISAGLDFPSVGPQHAYLNSIGRADYVSITDDEALEAFKTLCRHEGIIPALESSHALAHALKMMREQPEKEQLLVVNLSGRGDKDIFTVHDILKARG,O=C(O)C[C@H](NC(=O)Cc1c[nH]c2ccccc12)C(=O)O,data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1K3U_IAD_holo_aligned_esmfold_protein.pdb +1X8X_TYR,MASSNLIKQLQERGLVAQVTDEEALAERLAQGPIALYCGFDPTADSLHLGHLVPLLCLKRFQQAGHKPVALVGGATGLIGDPSFKAAERKLNTEETVQEWVDKIRKQVAPFLDFDCGENSAIAANNYDWFGNMNVLTFLRDIGKHFSVNQMINKEAVKQRLNREDQGISFTEFSYNLLQGYDFACLNKQYGVVLQIGGSDQWGNITSGIDLTRRLHQNQVFGLTVPLITKADGTKFGKTEGGAVWLDPKKTSPYKFYQFWINTADADVYRFLKFFTFMSIEEINALEEEDKNSGKAPRAQYVLAEQVTRLVHGEEGLQAAKR,N[C@@H](Cc1ccc(O)cc1)C(=O)O,data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1X8X_TYR_holo_aligned_esmfold_protein.pdb +1S19_MC9,LRPKLSEEQQRIIAILLDAHHKTYDPTYSDFCQFRPPVRVNDGGGSVTLELSQLSMLPHLADLVSYSIQKVIGFAKMIPGFRDLTSEDQIVLLKSSAIEVIMLRSNESFTMDDMSWTCGNQDYKYRVSDVTKAGHSLELIEPLIKFQVGLKKLNLHEEEHVLLMAICIVSPDRPGVQDAALIEAIQDRLSNTLQTYIRCRHPPPGSHLLYAKMIQKLADLRSLNEEHSKQYRCLSFQPECSMKLTPLVLEVFG,C=C1/C(=C\C=C2/CCC[C@]3(C)[C@@H]([C@H](C)/C=C/[C@@H](O)C4CC4)CC[C@@H]23)C[C@@H](O)C[C@@H]1O,data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1S19_MC9_holo_aligned_esmfold_protein.pdb +1OF1_SCT,MPTLLRVYIDGPHGMGKTTTTQLLVADDIVYVPEPMTYWRVLGASETIANIYTTQHRLDQGEISAGDAAVVMTSAQITMGMPYAVTDAVLAPHIGGEAPPPALTLIFDRHPIAALLCYPAARYLMGSMTPQAVLAFVALIPPTLPGTNIVLGALPEDRHIDRLAKRQRPGERLDLAMLAAIRRVYGLLANTVRYLQCGGSWREDWGQLSGTGPRPHIGDTLFTLFRAPELLAPNGDLYNVFAWALDVLAKRLRSMHVFILDYDQSPAGCRDALLQLTSGMVQTHVTTPGSIPTICDLARTFAREMGE|MPTLLRVYIDGPHGMGKTTTTQLLVALGSRDDIVYVPEPMTYWRVLGASETIANIYTTQHRLDQGEISAGDAAVVMTSAQITMGMPYAVTDAVLAPHIGGEAPPPALTLIFDRHPIAALLCYPAARYLMGSMTPQAVLAFVALIPPTLPGTNIVLGALPEDRHIDRLAKRGERLDLAMLAAIRRVYGLLANTVRYLQCGGSWREDWGQLSGTPQSNAGPRPHIGDTLFTLFRAPELLAPNGDLYNVFAWALDVLAKRLRSMHVFILDYDQSPAGCRDALLQLTSGMVQTHVTTPGSIPTICDLARTFAREMGE,Cc1cn([C@@]23C[C@H](O)[C@@H](CO)[C@@H]2C3)c(=O)[nH]c1=O,data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1OF1_SCT_holo_aligned_esmfold_protein.pdb +1LPZ_CMB,RKLCSLDNGDCDQFCHEEQNSVVCSCARGYTLADNGKACIPTGPYPCGKQTLE|IVGGQECKDGECPWQALLINEENEGFCGGTILSEFYILTAAHCLYQAKRFKVRVGDRNTEQEEGGEAVHEVEVVIKHNRFTKETYDFDIAVLRLKTPITFRMNVAPACLPERDWAESTLMTQKTGIVSGFGRTHEKGRQSTRLKMLEVPYVDRNSCKLSSSFIITQNMFCAGYDTKQEDACQGDSGGPHVTRFKDTYFVTGIVSWGEGCARKGKYGIYTKVTAFLKWIDRSMKT,Cc1cccc2c1cc(C(=O)NCc1cc(Cl)cc(Cl)c1)n2Cc1cccc(C(=N)N)c1,data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1LPZ_CMB_holo_aligned_esmfold_protein.pdb +1T46_STI,GNNYVYIDPTQLPYDHKWEFPRNRLSFGKTLGAGAFGKVVEATAYGLIKSDAAMTVAVKMLKPSAHLTEREALMSELKVLSYLGNHMNIVNLLGACTIGGPTLVITEYCCYGDLLNFLRRKRDSFLALDLEDLLSFSYQVAKGMAFLASKNCIHRDLAARNILLTHGRITKICDFGLARDIKNDSNYVVKGNARLPVKWMAPESIFNCVYTFESDVWSYGIFLWELFSLGSSPYPGMPVDSKFYKMIKEGFRMLSPEHAPAEMYDIMKTCWDADPLKRPTFKQIVQLIEKQISESTN,Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1,data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1T46_STI_holo_aligned_esmfold_protein.pdb +1XOZ_CIA,EEETRELQSLAAAVVPSAQTLKITDFSFSDFELSDLETALCTIRMFTDLNLVQNFQMKHEVLCRWILSVKKNYRKNVAYHNWRHAFNTAQCMFAALKAGKIQNKLTDLEILALLIAALSHDLDHPGVSNQFLINTNSELALMYNDESVLEHHHFDQCLMILNSPGNQILSGLSIEEYKTTLKIIKQAILATDLALYIKRRGEFFELIRKNQFNLEDPHQKELFLAMLMTACDLSAITKPWPIQQRIAELVATEFFDQGDRERKELNIEPTDLMNREKKNKIPSMQVGFIDAICLQLYEALTHVSEDCFPLLDGCRKNRQKWQALAE,CN1CC(=O)N2[C@H](c3ccc4c(c3)OCO4)c3[nH]c4ccccc4c3C[C@@H]2C1=O,data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1XOZ_CIA_holo_aligned_esmfold_protein.pdb +1UML_FR4,TPAFDKPKVELHVHLDGAIKPETILYYGKRRGIALPADTPEELQNIIGMDKPLTLPDFLAKFDYYMPAIAGCRDAIKRIAYEFVEMKAKDGVVYVEVRYSPHLLANSKVEPIPWNQAEGDLTPDEVVSLVNQGLQEGERDFGVKVRSILCCMRHQPSWSSEVVELCKKYREQTVVAIDLAGDETIEGSSLFPGHVQAYAEAVKSGVHRTVHAGEVGSANVVKEAVDTLKTERLGHGYHTLEDTTLYNRLRQENMHFEICPWSSYLTGAWKPDTEHAVIRFKNDQVNYSLNTDDPLIFKSTLDTDYQMTKKDMGFTEEEFKRLNINAAKSSFLPEDEKKELLDLLYKAYR,NC(=O)c1cn([C@@H](CO)CCn2ccc3ccc(NC(=O)CCc4ccccc4)cc32)cn1,data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1UML_FR4_holo_aligned_esmfold_protein.pdb +1SQ5_PAU,MTPYLQFDRNQWAALRDMLSEDEIARLKGINEDLSLEEVAEIYLPLSRLLNFYISSNLRRQAVLEQFLGTNQRIPYIISIAGSVAVGKSTTARVLQALLSRWPEHRRVELITTDGFLHPNQVLKERGLMKKKGFPESYDMHRLVKFVSDLKSGVPNVTAPVYSHLIYDVIPDGDKTVVPDILILEGLNVLQSGMDYPHDPHHVFVSDFVDFSIYVDAPEDLLQTWYINRFLKFREGAFTDPDSYFHNYAKLTKEEAIKTAMTLWKEINWLNLKQNILPTRERASLILTKSANHAVEEVRLRK|MTPYLQFDRNQWAALRLSEDEIARLKGINEDLSLEEVAEIYLPLSRLLNFYISSNLRRQAVLEQFLGTNGQRIPYIISIAGSVAVGKSTTARVLQALLSRWPEHRRVELITTDGFLHPNQVLKERGLMKKKGFPESYDMHRLVKFVSDLKSGVPNVTAPVYSHLIYDVIPDGDKTVVQPDILILEGLNVLQSGMDYPHDPHHVFVSDFVDFSIYVDAPEDLLQTWYINRFLKFREGAFTDPDSYFHNYAKLTKEEAIKTAMTLWKEINWLNLKQNILPTRERASLILTKSANHAVEEVRLRK|MTPYLQFDRNQWAALRMTLSEDEIARLKGINEDLSLEEVAEIYLPLSRLLNFYISSNLRRQAVLEQFLGTNRPYIISIAGSVAVGKSTTARVLQALLSRWPERVELITTDGFLHPNQVLKERGLMKKKGFPESYDMHRLVKFVSDLKSGVPNVTAPVYSHLIYDVIPDGDKTVVPDILILEGLNVLQSGMDYPHDPHHVFVSDFVDFSIYVDAPEDLLQTWYINRFLKFREGAFTDPDSYFHNYAKLTKEEAIKTAMTLWKEINWLNLKQNILPTRERASLILTKSANHAVEEVRLRK|MTPYLQFDRNQWAALRDMTLSEDEIARLKGINEDLSLEEVAEIYLPLSRLLNFYISSNLRRQAVLEQFLGTNGQRIPYIISIAGSVAVGKSTTARVLQALLSRWPEHRRVELITTDGFLHPNQVLKERGLMKKKGFPESYDMHRLVKFVSDLKSGVPNVTAPVYSHLIYDVIPDGDKTVVPDILILEGLNVLQSGMDYPHDPHHVFVSDFVDFSIYVDAPEDLLQTWYINRFLKFREGAFTDPDSYFHNYAKLTKEEAIKTAMTLWKEINWLNLKQNILPTRERASLILTKSANHAVEEVRLRK,CC(C)(CO)[C@@H](O)C(=O)NCCC(=O)O,data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1SQ5_PAU_holo_aligned_esmfold_protein.pdb +1MMV_3AR,RFLKVKNWETDVVLTDTLHLKSTLETGCTEHICMGSIMLPTKDQLFPLAKEFLDQYYSSIKRFGSKAHMDRLEEVNKEIESTSTYQLKDTELIYGAKHAWRNASRCVGRIQWSKLQVFDARDCTTAHGMFNYICNHVKYATNKGNLRSAITIFPQRTDGKHDFRVWNSQLIRYAGYKQPDGSTLGDPANVQFTEICIQQGWKAPRGRFDVLPLLLQANGNDPELFQIPPELVLEVPIRHPKFDWFKDLGLKWYGLPAVSNMLLEIGGLEFSACPFSGWYMGTEIGVRDYCDNSRYNILEEVAKKMDLDMRKTSSLWKDQALVEINIAVLYSFQSDKVTIVDHHSATESFIKHMENEYRCRGGCPADWVWIVPPMSGSITPVFHQEMLNYRLTPSFEYQPDPWNTHVW|RFLKVKNWETDVVLTDTLHLKSTLETGCTEHICMGSIMLPRTKDQLFPLAKEFLDQYYSSIKRFGSKAHMDRLEEVNKEIESTSTYQLKDTELIYGAKHAWRNASRCVGRIQWSKLQVFDARDCTTAHGMFNYICNHVKYATNKGNLRSAITIFPQRTDGKHDFRVWNSQLIRYAGYKQPDGSTLGDPANVQFTEICIQQGWKAPRGRFDVLPLLLQANGNDPELFQIPPELVLEVPIRHPKFDWFKDLGLKWYGLPAVSNMLLEIGGLEFSACPFSGWYMGTEIGVRDYCDNSRYNILEEVAKKMDLDMRKTSSLWKDQALVEINIAVLYSFQSDKVTIVDHHSATESFIKHMENEYRCRGGCPADWVWIVPPMSGSITPVFHQEMLNYRLTPSFEYQPDPWNTHVWK,CCCNC(=[NH2+])NCCC[C@H](N)C(=O)O,data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1MMV_3AR_holo_aligned_esmfold_protein.pdb +1J3J_CP6,MMEQVCDVFDIYAICACCKVESKNEGKKNEVFNNYTFRGLGNKGVLPWKCNSLDMKYFRAVTTYVNESKYEKLKYKRCKYLNKETKKLQNVVVMGRTNWESIPKKFKPLSNRINVILSRTLKKEDFDEDVYIINKVEDLIVLLGKLNYYKCFIIGGSVVYQEFLEKKLIKKIYFTRINSTYECDVFFPEINENEYQIISVSDVYTSNNTTLDFIIYKKTNN|MMEQVCDVFDIYAICACCKVESKNEGKKNEVFNNYTFRGLGNKGVLPWKCNSLDMKYFRAVTTYVNESKYEKLKYKRCKYLKLQNVVVMGRTNWESIPKKFKPLSNRINVILSRTLKKEDFDEDVYIINKVEDLIVLLGKLNYYKCFIIGGSVVYQEFLEKKLIKKIYFTRINSTYECDVFFPEINENEYQIISVSDVYTSNNTTLDFIIYKKTNN|DDEEEDDFVYFNFNKEKEEKNKNSIHPNDFQIYNSLKYKYHPEYQYLNIIYDIMMNGNKQSDRTGVGVLSKFGYIMKFDLSQYFPLLTTKKLFLRGIIEELLWFIRGETNGNTLLNKNVRIWEANGTREFLDNRKLFHREVNDLGPIYGFQWRHFGAEYTNMYDNYENKGVDQLKNIINLIKNDPTSRRILLCAWNVKDLDQMALPPCHILCQFYVFDGKLSCIMYQRSCDLGLGVPFNIASYSIFTHMIAQVCNLQPAQFIHVLGNAHVYNNHIDSLKIQLNRIPYPFPTLKLNPDIKNIEDFTISDFTIQNYVHHEKISMDMAA|DDEEEDDFVYFNFNKEKEEKNKNSIHPNDFQIYNSLKYKYHPEYQYLNIIYDIMMNGNKQSDRTGVGVLSKFGYIMKFDLSQYFPLLTTKKLFLRGIIEELLWFIRGETNGNTLLNKNVRIWEANGTREFLDNRKLFHREVNDLGPIYGFQWRHFGAEYTNMYDNYENKGVDQLKNIINLIKNDPTSRRILLCAWNVKDLDQMALPPCHILCQFYVFDGKLSCIMYQRSCDLGLGVPFNIASYSIFTHMIAQVCNLQPAQFIHVLGNAHVYNNHIDSLKIQLNRIPYPFPTLKLNPDIKNIEDFTISDFTIQNYVHHEKISMDMAA,CCc1nc(N)nc(N)c1-c1ccc(Cl)cc1,data/astex_diverse_set/astex_diverse_holo_aligned_esmfold_structures/1J3J_CP6_holo_aligned_esmfold_protein.pdb \ No newline at end of file diff --git a/forks/NeuralPLexer/inference/neuralplexer_casp15_inputs.csv b/forks/NeuralPLexer/inference/neuralplexer_casp15_inputs.csv new file mode 100644 index 00000000..4d156036 --- /dev/null +++ b/forks/NeuralPLexer/inference/neuralplexer_casp15_inputs.csv @@ -0,0 +1,20 @@ +id,input_receptor,input_ligand,input_template +T1124,data/casp15_set/casp15_holo_aligned_predicted_structures/T1124.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSCC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSCC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O|N[C@@H](Cc1ccc(O)cc1)C(=O)O|N[C@@H](Cc1ccc(O)cc1)C(=O)O,data/casp15_set/casp15_holo_aligned_predicted_structures/T1124.pdb +T1186,data/casp15_set/casp15_holo_aligned_predicted_structures/T1186.pdb,Cc1onc(c1C(=O)N[C@H](C=O)[C@@H]1N[C@@H](C(O)=O)C(C)(C)S1)-c1c(Cl)cccc1Cl,data/casp15_set/casp15_holo_aligned_predicted_structures/T1186.pdb +T1181,data/casp15_set/casp15_holo_aligned_predicted_structures/T1181.pdb,CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O|CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O|CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O|CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O|[Zn+2]|[Zn+2]|[Zn+2]|[Ca+2]|CC(=O)N[C@H]1[C@@H](O)O[C@H](CO[C@H]2O[C@H](CO)[C@@H](O[C@@H]3O[C@H](C(=O)N[C@@H](CO)C([O-])=O)[C@@H](O[C@H]4O[C@H](CO[C@H]5O[C@H](CO)[C@@H](O[C@@H]6OC(C(=O)N[C@@H](CO)C([O-])=O)=C[C@H](O)[C@H]6O)[C@H](O)[C@H]5NC(C)=O)[C@@H](O)[C@H](O)[C@H]4NC(C)=O)[C@H](O)[C@H]3O)[C@H](O)[C@H]2NC(C)=O)[C@@H](O)[C@@H]1O,data/casp15_set/casp15_holo_aligned_predicted_structures/T1181.pdb +T1187,data/casp15_set/casp15_holo_aligned_predicted_structures/T1187.pdb,CC(=O)N[C@H]1[C@H](O[C@@H]2[C@@H](CO)O[C@@H](O[C@@H]3[C@@H](CO)O[C@@H](O)[C@H](NC(C)=O)[C@H]3O)[C@H](NC(C)=O)[C@H]2O)O[C@H](CO)[C@@H](O)[C@@H]1O|CC(=O)N[C@H]1[C@H](O[C@@H]2[C@@H](CO)O[C@@H](O[C@@H]3[C@@H](CO)O[C@@H](O)[C@H](NC(C)=O)[C@H]3O)[C@H](NC(C)=O)[C@H]2O)O[C@H](CO)[C@@H](O)[C@@H]1O,data/casp15_set/casp15_holo_aligned_predicted_structures/T1187.pdb +T1158v1,data/casp15_set/casp15_holo_aligned_predicted_structures/T1158v1.pdb,CCCCC[C@@H](/C=C/[C@H]1[C@@H](CC(=O)[C@@H]1CCCCCCC(=O)O)O)O,data/casp15_set/casp15_holo_aligned_predicted_structures/T1158v1.pdb +H1172v2,data/casp15_set/casp15_holo_aligned_predicted_structures/H1172v2.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|[Mg+2]|[Mg+2]|[Mg+2],data/casp15_set/casp15_holo_aligned_predicted_structures/H1172v2.pdb +T1158v2,data/casp15_set/casp15_holo_aligned_predicted_structures/T1158v2.pdb,CCCCC[C@@H](/C=C/[C@H]1[C@@H](CC(=O)[C@@H]1C/C=C\CCCC(=O)O)O)O,data/casp15_set/casp15_holo_aligned_predicted_structures/T1158v2.pdb +H1172v3,data/casp15_set/casp15_holo_aligned_predicted_structures/H1172v3.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|[Mg+2]|[Mg+2],data/casp15_set/casp15_holo_aligned_predicted_structures/H1172v3.pdb +H1172v1,data/casp15_set/casp15_holo_aligned_predicted_structures/H1172v1.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|[Mg+2]|[Mg+2]|[Mg+2],data/casp15_set/casp15_holo_aligned_predicted_structures/H1172v1.pdb +H1172v4,data/casp15_set/casp15_holo_aligned_predicted_structures/H1172v4.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|[Mg+2]|[Mg+2]|[Mg+2],data/casp15_set/casp15_holo_aligned_predicted_structures/H1172v4.pdb +T1127v2,data/casp15_set/casp15_holo_aligned_predicted_structures/T1127v2.pdb,CC(C)(CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS|CC(C)(CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS|O=S(=O)(O)CCN1CCN(CCO)CC1|O=S(=O)(O)CCN1CCN(CCO)CC1|C[C@H](O)CC(C)(C)O,data/casp15_set/casp15_holo_aligned_predicted_structures/T1127v2.pdb +H1171v1,data/casp15_set/casp15_holo_aligned_predicted_structures/H1171v1.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|[Mg+2]|[Mg+2]|[Mg+2]|[Mg+2]|[Mg+2],data/casp15_set/casp15_holo_aligned_predicted_structures/H1171v1.pdb +T1152,data/casp15_set/casp15_holo_aligned_predicted_structures/T1152.pdb,CC(=O)NC1C(O)OC(CO)C(OC2OC(CO)C(OC3OC(CO)C(O)C(O)C3NC(C)=O)C(O)C2NC(C)=O)C1O,data/casp15_set/casp15_holo_aligned_predicted_structures/T1152.pdb +T1188,data/casp15_set/casp15_holo_aligned_predicted_structures/T1188.pdb,Cn1cnc2c1c(=O)n(CCCn1c(=O)c3c(ncn3C)n(C)c1=O)c(=O)n2C|Cn1cnc2c1c(=O)n(CCCn1c(=O)c3c(ncn3C)n(C)c1=O)c(=O)n2C|[Cd+2]|[Cd+2]|[Co+2],data/casp15_set/casp15_holo_aligned_predicted_structures/T1188.pdb +T1146,data/casp15_set/casp15_holo_aligned_predicted_structures/T1146.pdb,CC(=O)N[C@H]1[C@H](O)O[C@H](CO)[C@@H](O)[C@@H]1O,data/casp15_set/casp15_holo_aligned_predicted_structures/T1146.pdb +T1158v3,data/casp15_set/casp15_holo_aligned_predicted_structures/T1158v3.pdb,C[C@]12CC[C@H]3[C@H]([C@@H]1CCC2=O)CC=C4[C@@]3(CC[C@@H](C4)OS(=O)(=O)O)C,data/casp15_set/casp15_holo_aligned_predicted_structures/T1158v3.pdb +H1171v2,data/casp15_set/casp15_holo_aligned_predicted_structures/H1171v2.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP([O-])([O-])=S)[C@@H](O)[C@H]1O|[Mg+2]|[Mg+2]|[Mg+2],data/casp15_set/casp15_holo_aligned_predicted_structures/H1171v2.pdb +H1135,data/casp15_set/casp15_holo_aligned_predicted_structures/H1135.pdb,[Cl-]|[Cl-]|[Cl-]|[K+]|[K+]|[K+]|[K+]|[K+]|[K+]|[K+]|[K+]|[K+],data/casp15_set/casp15_holo_aligned_predicted_structures/H1135.pdb +T1158v4,data/casp15_set/casp15_holo_aligned_predicted_structures/T1158v4.pdb,C1=NC(=C2C(=N1)N(C=N2)[C@H]3[C@@H]([C@@H]([C@H](O3)COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O)O)N|C1=NC(=C2C(=N1)N(C=N2)[C@H]3[C@@H]([C@@H]([C@H](O3)COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O)O)N|[Mg+2]|[Mg+2],data/casp15_set/casp15_holo_aligned_predicted_structures/T1158v4.pdb diff --git a/forks/NeuralPLexer/inference/neuralplexer_dockgen_inputs.csv b/forks/NeuralPLexer/inference/neuralplexer_dockgen_inputs.csv index d11b1fb8..5d631f92 100644 --- a/forks/NeuralPLexer/inference/neuralplexer_dockgen_inputs.csv +++ b/forks/NeuralPLexer/inference/neuralplexer_dockgen_inputs.csv @@ -1,190 +1,92 @@ id,input_receptor,input_ligand,input_template -3gvl_1_SLB_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3gvl_1_SLB_2_holo_aligned_esmfold_protein.pdb,CC(O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(O)O)C[C@@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3gvl_1_SLB_2_holo_aligned_esmfold_protein.pdb -3inr_1_GDU_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3inr_1_GDU_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](O[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCC(O)NC3O)[C@H](O)[C@@H]2O)[C@H](O)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3inr_1_GDU_0_holo_aligned_esmfold_protein.pdb -3jqm_1_GTP_5,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3jqm_1_GTP_5_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]3O)C2N1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3jqm_1_GTP_5_holo_aligned_esmfold_protein.pdb -3ju4_1_SLB_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ju4_1_SLB_2_holo_aligned_esmfold_protein.pdb,CC(O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(O)O)C[C@@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ju4_1_SLB_2_holo_aligned_esmfold_protein.pdb -4cnl_1_CHT_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4cnl_1_CHT_1_holo_aligned_esmfold_protein.pdb,C[N+](C)(C)CCO,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4cnl_1_CHT_1_holo_aligned_esmfold_protein.pdb -1hg0_1_SIN_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1hg0_1_SIN_1_holo_aligned_esmfold_protein.pdb,OC(O)CCC(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1hg0_1_SIN_1_holo_aligned_esmfold_protein.pdb -1i8t_1_FAD_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1i8t_1_FAD_1_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2CC1C,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1i8t_1_FAD_1_holo_aligned_esmfold_protein.pdb -1o28_1_UFP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1o28_1_UFP_2_holo_aligned_esmfold_protein.pdb,OC1NC(O)N([C@H]2C[C@H](O)[C@@H](CO[PH](O)(O)O)O2)CC1F,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1o28_1_UFP_2_holo_aligned_esmfold_protein.pdb -1o72_2_PC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1o72_2_PC_0_holo_aligned_esmfold_protein.pdb,C[N+](C)(C)CCO[PH](O)(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1o72_2_PC_0_holo_aligned_esmfold_protein.pdb -1pj2_1_FUM_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1pj2_1_FUM_0_holo_aligned_esmfold_protein.pdb,OC(O)CCC(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1pj2_1_FUM_0_holo_aligned_esmfold_protein.pdb -1pj4_1_FUM_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1pj4_1_FUM_1_holo_aligned_esmfold_protein.pdb,OC(O)CCC(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1pj4_1_FUM_1_holo_aligned_esmfold_protein.pdb -1qaw_1_TRP_7,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1qaw_1_TRP_7_holo_aligned_esmfold_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1qaw_1_TRP_7_holo_aligned_esmfold_protein.pdb -1rqp_1_SAM_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1rqp_1_SAM_0_holo_aligned_esmfold_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1rqp_1_SAM_0_holo_aligned_esmfold_protein.pdb -1sbz_1_FMN_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1sbz_1_FMN_3_holo_aligned_esmfold_protein.pdb,C[C@@H]1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2C[C@@H]1C,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1sbz_1_FMN_3_holo_aligned_esmfold_protein.pdb -1sij_1_PCD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1sij_1_PCD_0_holo_aligned_esmfold_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@H]4NC5NC(N)NC(O)C5N[C@H]4[C@@H]4S[Mo](O)(O)SC43)[C@@H](O)[C@H]2O)C(O)N1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1sij_1_PCD_0_holo_aligned_esmfold_protein.pdb -1tke_1_SER_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1tke_1_SER_0_holo_aligned_esmfold_protein.pdb,N[C@@H](CO)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1tke_1_SER_0_holo_aligned_esmfold_protein.pdb -1tkg_1_SSA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1tkg_1_SSA_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](COS(O)(O)NC(O)[C@@H](N)CO)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1tkg_1_SSA_0_holo_aligned_esmfold_protein.pdb -1u8u_1_OCA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1u8u_1_OCA_0_holo_aligned_esmfold_protein.pdb,CCCCCCCC(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1u8u_1_OCA_0_holo_aligned_esmfold_protein.pdb -1uf5_1_CDT_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1uf5_1_CDT_0_holo_aligned_esmfold_protein.pdb,CSCC[C@@H](NC(N)O)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1uf5_1_CDT_0_holo_aligned_esmfold_protein.pdb -1uf7_1_CDV_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1uf7_1_CDV_0_holo_aligned_esmfold_protein.pdb,CC(C)[C@@H](NC(N)O)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1uf7_1_CDV_0_holo_aligned_esmfold_protein.pdb -1uf8_1_ING_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1uf8_1_ING_2_holo_aligned_esmfold_protein.pdb,NC(O)N[C@H](CC1CCCCC1)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1uf8_1_ING_2_holo_aligned_esmfold_protein.pdb -1v2g_1_OCA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1v2g_1_OCA_0_holo_aligned_esmfold_protein.pdb,CCCCCCCC(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1v2g_1_OCA_0_holo_aligned_esmfold_protein.pdb -1v97_1_MTE_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1v97_1_MTE_1_holo_aligned_esmfold_protein.pdb,NC1NC(O)[C@@H]2N[C@H]3C(S)C(S)[C@@H](CO[PH](O)(O)O)O[C@H]3NC2N1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1v97_1_MTE_1_holo_aligned_esmfold_protein.pdb -1za2_1_CTP_4,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1za2_1_CTP_4_holo_aligned_esmfold_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1za2_1_CTP_4_holo_aligned_esmfold_protein.pdb -2cdc_1_XYS_5,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2cdc_1_XYS_5_holo_aligned_esmfold_protein.pdb,O[C@@H]1[C@@H](O)[C@@H](O)OC[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2cdc_1_XYS_5_holo_aligned_esmfold_protein.pdb -2ext_1_TRP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2ext_1_TRP_0_holo_aligned_esmfold_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2ext_1_TRP_0_holo_aligned_esmfold_protein.pdb -2g7c_1_NAG-GAL-GLA_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2g7c_1_NAG-GAL-GLA_1_holo_aligned_esmfold_protein.pdb,CC(O)N[C@@H]1[C@@H](O)[C@H](O[C@@H]2O[C@H](CO)[C@H](O)[C@H](O[C@H]3O[C@H](CO)[C@H](O)[C@H](O)[C@H]3O)[C@H]2O)[C@@H](CO)O[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2g7c_1_NAG-GAL-GLA_1_holo_aligned_esmfold_protein.pdb -2gag_1_FOA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2gag_1_FOA_0_holo_aligned_esmfold_protein.pdb,OC(O)C1CCCO1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2gag_1_FOA_0_holo_aligned_esmfold_protein.pdb -2gag_1_NAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2gag_1_NAD_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCCC([C@H](N)O)C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2gag_1_NAD_0_holo_aligned_esmfold_protein.pdb -2gah_1_NAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2gah_1_NAD_0_holo_aligned_esmfold_protein.pdb,NC(O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2gah_1_NAD_0_holo_aligned_esmfold_protein.pdb -2gf3_2_FOA_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2gf3_2_FOA_1_holo_aligned_esmfold_protein.pdb,OC(O)C1CCCO1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2gf3_2_FOA_1_holo_aligned_esmfold_protein.pdb -2him_1_ASN_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2him_1_ASN_3_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2him_1_ASN_3_holo_aligned_esmfold_protein.pdb -2hk9_1_SKM_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2hk9_1_SKM_0_holo_aligned_esmfold_protein.pdb,OC(O)[C@H]1C[C@@H](O)[C@@H](O)[C@H](O)C1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2hk9_1_SKM_0_holo_aligned_esmfold_protein.pdb -2hs3_1_FGR_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2hs3_1_FGR_0_holo_aligned_esmfold_protein.pdb,OCNCC(O)N[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2hs3_1_FGR_0_holo_aligned_esmfold_protein.pdb -2o5m_1_MNR_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2o5m_1_MNR_0_holo_aligned_esmfold_protein.pdb,CCC1C(C)C2C[C@@H]3[C@H](C)[C@H](CCC(O)O)C4CC5[C@@H](CCC(O)O)C(C)C6CC7[C@@H](CC)[C@H](C)C8CC1N2[Mn@SP2](N65)(N87)N43,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2o5m_1_MNR_0_holo_aligned_esmfold_protein.pdb -2q37_1_3AL_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2q37_1_3AL_0_holo_aligned_esmfold_protein.pdb,NC(O)N[C@H]1NC(O)NC1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2q37_1_3AL_0_holo_aligned_esmfold_protein.pdb -2q6k_1_ADN_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2q6k_1_ADN_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2q6k_1_ADN_1_holo_aligned_esmfold_protein.pdb -2r4e_1_13P_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2r4e_1_13P_0_holo_aligned_esmfold_protein.pdb,OC[C@@H](O)CO[PH](O)(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2r4e_1_13P_0_holo_aligned_esmfold_protein.pdb -2v5e_1_GU4-YYJ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v5e_1_GU4-YYJ_0_holo_aligned_esmfold_protein.pdb,OS(O)(O)OC[C@H]1O[C@@](COS(O)(O)O)(O[C@H]2O[C@H](COS(O)(O)O)[C@@H](OS(O)(O)O)[C@H](OS(O)(O)O)[C@H]2OS(O)(O)O)[C@@H](OS(O)(O)O)[C@@H]1OS(O)(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v5e_1_GU4-YYJ_0_holo_aligned_esmfold_protein.pdb -2v7t_1_SAH_4,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v7t_1_SAH_4_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CSCC[C@H](N)C(O)O)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v7t_1_SAH_4_holo_aligned_esmfold_protein.pdb -2v7u_1_SAM_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v7u_1_SAM_2_holo_aligned_esmfold_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v7u_1_SAM_2_holo_aligned_esmfold_protein.pdb -2v7v_1_5FD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v7v_1_5FD_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CF)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v7v_1_5FD_0_holo_aligned_esmfold_protein.pdb -2v7w_1_5FD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v7w_1_5FD_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CF)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v7w_1_5FD_0_holo_aligned_esmfold_protein.pdb -2vdf_1_OCT_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2vdf_1_OCT_0_holo_aligned_esmfold_protein.pdb,CCCCCCCC,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2vdf_1_OCT_0_holo_aligned_esmfold_protein.pdb -2vfu_1_MTL_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2vfu_1_MTL_0_holo_aligned_esmfold_protein.pdb,OC[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)CO,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2vfu_1_MTL_0_holo_aligned_esmfold_protein.pdb -2wab_1_BGC-BGC-BGC-BGC-BGC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2wab_1_BGC-BGC-BGC-BGC-BGC_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O[C@H]2[C@H](O)[C@@H](O)[C@H](O[C@H]3[C@H](O)[C@@H](O)[C@H](O[C@H]4[C@H](O)[C@@H](O)[C@H](O[C@H]5[C@H](O)[C@@H](O)[C@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2wab_1_BGC-BGC-BGC-BGC-BGC_0_holo_aligned_esmfold_protein.pdb -2wao_1_BGC-BGC-BGC-BGC-BGC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2wao_1_BGC-BGC-BGC-BGC-BGC_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O[C@H]2[C@H](O)[C@@H](O)[C@H](O[C@H]3[C@H](O)[C@@H](O)[C@H](O[C@H]4[C@H](O)[C@@H](O)[C@H](O[C@H]5[C@H](O)[C@@H](O)[C@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2wao_1_BGC-BGC-BGC-BGC-BGC_0_holo_aligned_esmfold_protein.pdb -2wr8_1_SAH_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2wr8_1_SAH_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CSCC[C@H](N)C(O)O)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2wr8_1_SAH_1_holo_aligned_esmfold_protein.pdb -2wwc_1_CHT_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2wwc_1_CHT_2_holo_aligned_esmfold_protein.pdb,C[N+](C)(C)CCO,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2wwc_1_CHT_2_holo_aligned_esmfold_protein.pdb -2x34_2_UQ8_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2x34_2_UQ8_0_holo_aligned_esmfold_protein.pdb,COC1C(O)C(C)C(CCC(C)CCCC(C)CCCC(C)CCC[C@H](C)CCCC(C)CCC[C@H](C)CCCC(C)CCCC(C)C)C(O)C1OC,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2x34_2_UQ8_0_holo_aligned_esmfold_protein.pdb -2xrh_1_NIO_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2xrh_1_NIO_1_holo_aligned_esmfold_protein.pdb,OC(O)[C@H]1CCCNC1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2xrh_1_NIO_1_holo_aligned_esmfold_protein.pdb -2xta_1_ACO_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2xta_1_ACO_0_holo_aligned_esmfold_protein.pdb,CCO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O[PH](O)(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2xta_1_ACO_0_holo_aligned_esmfold_protein.pdb -2zcz_2_TRP_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2zcz_2_TRP_3_holo_aligned_esmfold_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2zcz_2_TRP_3_holo_aligned_esmfold_protein.pdb -2zd0_1_TRP_9,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2zd0_1_TRP_9_holo_aligned_esmfold_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2zd0_1_TRP_9_holo_aligned_esmfold_protein.pdb -2ze9_1_PD7_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2ze9_1_PD7_0_holo_aligned_esmfold_protein.pdb,CCCCCCC(O)OC[C@H](CO[PH](O)(O)O)OC(O)CCCCCC,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2ze9_1_PD7_0_holo_aligned_esmfold_protein.pdb -3ad7_1_NAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ad7_1_NAD_0_holo_aligned_esmfold_protein.pdb,N[C@H](O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5[C@H](N)NCN[C@H]54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ad7_1_NAD_0_holo_aligned_esmfold_protein.pdb -3ad9_1_NAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ad9_1_NAD_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2[C@@H]1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCCC([C@H](N)O)C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ad9_1_NAD_0_holo_aligned_esmfold_protein.pdb -3ada_1_NAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ada_1_NAD_0_holo_aligned_esmfold_protein.pdb,N[C@H](O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CN[C@@H]5[C@H](N)NCN[C@H]54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ada_1_NAD_0_holo_aligned_esmfold_protein.pdb -3eca_1_ASP_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3eca_1_ASP_3_holo_aligned_esmfold_protein.pdb,N[C@@H](CC(O)O)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3eca_1_ASP_3_holo_aligned_esmfold_protein.pdb -3gf4_1_FAD_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3gf4_1_FAD_1_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C4NCN[C@H]5N)[C@H](O)[C@@H]3O)C2CC1C,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3gf4_1_FAD_1_holo_aligned_esmfold_protein.pdb -3gf4_1_U5P_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3gf4_1_U5P_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3gf4_1_U5P_0_holo_aligned_esmfold_protein.pdb -3he3_5_UDP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3he3_5_UDP_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3he3_5_UDP_0_holo_aligned_esmfold_protein.pdb -3it6_1_ORN_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3it6_1_ORN_1_holo_aligned_esmfold_protein.pdb,NCCC[C@H](N)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3it6_1_ORN_1_holo_aligned_esmfold_protein.pdb -3k8l_1_GLC-GLC-GLC-GLC-GLC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3k8l_1_GLC-GLC-GLC-GLC-GLC_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](O[C@H]2[C@H](O)[C@@H](O)[C@@H](O[C@H]3[C@H](O)[C@@H](O)[C@@H](O[C@H]4[C@H](O)[C@@H](O)[C@@H](O[C@H]5[C@H](O)[C@@H](O)[C@@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3k8l_1_GLC-GLC-GLC-GLC-GLC_0_holo_aligned_esmfold_protein.pdb -3k8m_1_GLC-GLC-AC1_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3k8m_1_GLC-GLC-AC1_0_holo_aligned_esmfold_protein.pdb,C[C@H]1O[C@H](O[C@H]2[C@H](O)[C@@H](O)[C@@H](O[C@H]3[C@H](O)[C@@H](O)[C@@H](O)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1N[C@H]1CC(CO)[C@@H](O)[C@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3k8m_1_GLC-GLC-AC1_0_holo_aligned_esmfold_protein.pdb -3nvv_1_MTE_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3nvv_1_MTE_1_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2N[C@@H]3[C@H](NC2N1)O[C@H](CO[PH](O)(O)O)C(S)[C@@H]3S,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3nvv_1_MTE_1_holo_aligned_esmfold_protein.pdb -3o01_2_DXC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3o01_2_DXC_0_holo_aligned_esmfold_protein.pdb,C[C@H](CCC(O)O)[C@H]1CC[C@H]2[C@@H]3CC[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3C[C@H](O)[C@]12C,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3o01_2_DXC_0_holo_aligned_esmfold_protein.pdb -3o02_2_JN3_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3o02_2_JN3_0_holo_aligned_esmfold_protein.pdb,C[C@H](CCC(O)O)[C@H]1CC[C@H]2[C@@H]3[C@H](O)C[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3CC[C@]12C,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3o02_2_JN3_0_holo_aligned_esmfold_protein.pdb -3o7j_1_2AL_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3o7j_1_2AL_0_holo_aligned_esmfold_protein.pdb,NC(O)NC1NC(O)NC1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3o7j_1_2AL_0_holo_aligned_esmfold_protein.pdb -3q14_1_PCR_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3q14_1_PCR_3_holo_aligned_esmfold_protein.pdb,CC1CCC(O)CC1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3q14_1_PCR_3_holo_aligned_esmfold_protein.pdb -3qrc_2_GU4-YYJ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3qrc_2_GU4-YYJ_0_holo_aligned_esmfold_protein.pdb,OS(O)(O)OC[C@H]1O[C@@](COS(O)(O)O)(O[C@H]2O[C@H](COS(O)(O)O)[C@@H](OS(O)(O)O)[C@H](OS(O)(O)O)[C@H]2OS(O)(O)O)[C@@H](OS(O)(O)O)[C@@H]1OS(O)(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3qrc_2_GU4-YYJ_0_holo_aligned_esmfold_protein.pdb -3s5x_1_BMA-MAN-MAN-MAN-MAN_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3s5x_1_BMA-MAN-MAN-MAN-MAN_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](O[C@@H]2[C@H](O)[C@@H](OC[C@H]3O[C@@H](O)[C@@H](O)[C@@H](O[C@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]4O)[C@@H]3O)O[C@H](CO)[C@H]2O)[C@@H](O)[C@@H](O)[C@@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3s5x_1_BMA-MAN-MAN-MAN-MAN_0_holo_aligned_esmfold_protein.pdb -3s6a_1_ANP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3s6a_1_ANP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](N)(O)O)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3s6a_1_ANP_0_holo_aligned_esmfold_protein.pdb -3se5_1_ANP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3se5_1_ANP_2_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)N[PH](O)(O)O)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3se5_1_ANP_2_holo_aligned_esmfold_protein.pdb -3sr6_1_MTE_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3sr6_1_MTE_1_holo_aligned_esmfold_protein.pdb,NC1NC(O)[C@@H]2N[C@H]3C(S)C(S)[C@@H](CO[PH](O)(O)O)O[C@H]3NC2N1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3sr6_1_MTE_1_holo_aligned_esmfold_protein.pdb -3ub7_1_ACM_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ub7_1_ACM_1_holo_aligned_esmfold_protein.pdb,CC(N)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ub7_1_ACM_1_holo_aligned_esmfold_protein.pdb -3ub9_1_NHY_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ub9_1_NHY_1_holo_aligned_esmfold_protein.pdb,N[C@@H](O)NO,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ub9_1_NHY_1_holo_aligned_esmfold_protein.pdb -3uni_1_SAL_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3uni_1_SAL_0_holo_aligned_esmfold_protein.pdb,OC(O)C1CCCCC1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3uni_1_SAL_0_holo_aligned_esmfold_protein.pdb -3wrb_1_GDE_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3wrb_1_GDE_0_holo_aligned_esmfold_protein.pdb,OC(O)C1CC(O)C(O)C(O)C1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3wrb_1_GDE_0_holo_aligned_esmfold_protein.pdb -3wvc_1_FEG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3wvc_1_FEG_0_holo_aligned_esmfold_protein.pdb,CC1C(CC(O)O)NC(O)[C@@H](C)[C@@H]1O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(O)NC(N)NC32)[C@H](O)[C@@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3wvc_1_FEG_0_holo_aligned_esmfold_protein.pdb -3zec_1_ANP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3zec_1_ANP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)N[PH](O)(O)O)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3zec_1_ANP_0_holo_aligned_esmfold_protein.pdb -3zjx_1_BOG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3zjx_1_BOG_0_holo_aligned_esmfold_protein.pdb,CCCCCCCCO[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3zjx_1_BOG_0_holo_aligned_esmfold_protein.pdb -3zqu_1_FNR_5,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3zqu_1_FNR_5_holo_aligned_esmfold_protein.pdb,C[C@H]1CC2NC3[C@H](O)N[C@H](O)N[C@@H]3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@@H]2C[C@@H]1C,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3zqu_1_FNR_5_holo_aligned_esmfold_protein.pdb -3zzs_1_TRP_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3zzs_1_TRP_3_holo_aligned_esmfold_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3zzs_1_TRP_3_holo_aligned_esmfold_protein.pdb -4b4v_1_L34_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4b4v_1_L34_0_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2C(NC[C@@H]3CN(C4CCC([C@H](O)N[C@@H](CCC(O)O)C(O)O)CC4)C(O)N23)N1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4b4v_1_L34_0_holo_aligned_esmfold_protein.pdb -4bc9_1_CNV-FAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4bc9_1_CNV-FAD_0_holo_aligned_esmfold_protein.pdb,CC1CC2[C@H](CC1C)N(C(C)CN)C1C(O)NC(O)NC1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCN[C@H]32)[C@H](O)[C@@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4bc9_1_CNV-FAD_0_holo_aligned_esmfold_protein.pdb -4cdn_2_FAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4cdn_2_FAD_0_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2C[C@H]1C,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4cdn_2_FAD_0_holo_aligned_esmfold_protein.pdb -4fyv_1_DCP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4fyv_1_DCP_2_holo_aligned_esmfold_protein.pdb,NC1CCN([C@H]2C[C@H](O)[C@@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)O2)C(O)N1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4fyv_1_DCP_2_holo_aligned_esmfold_protein.pdb -4fyw_1_CTP_4,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4fyw_1_CTP_4_holo_aligned_esmfold_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4fyw_1_CTP_4_holo_aligned_esmfold_protein.pdb -4gk9_1_BMA-MAN-MAN-MAN-MAN_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4gk9_1_BMA-MAN-MAN-MAN-MAN_3_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](OC[C@H]2O[C@H](OC[C@H]3O[C@@H](O)[C@@H](O)[C@@H](O[C@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]4O)[C@@H]3O)[C@@H](O)[C@@H](O[C@H]3O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]3O)[C@@H]2O)[C@@H](O)[C@@H](O)[C@@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4gk9_1_BMA-MAN-MAN-MAN-MAN_3_holo_aligned_esmfold_protein.pdb -4h2f_1_ADN_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4h2f_1_ADN_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4h2f_1_ADN_0_holo_aligned_esmfold_protein.pdb -4idk_1_1FE_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4idk_1_1FE_0_holo_aligned_esmfold_protein.pdb,NCC(O)NC1CCC2NC(O)NC2C1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4idk_1_1FE_0_holo_aligned_esmfold_protein.pdb -4kgx_1_CTP_5,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4kgx_1_CTP_5_holo_aligned_esmfold_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4kgx_1_CTP_5_holo_aligned_esmfold_protein.pdb -4mig_1_G3F_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4mig_1_G3F_2_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](F)[C@@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4mig_1_G3F_2_holo_aligned_esmfold_protein.pdb -4mo2_2_FDA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4mo2_2_FDA_0_holo_aligned_esmfold_protein.pdb,CC1CC2[C@@H](CC1C)N[C@H]1C(O)NC(O)N[C@H]1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4mo2_2_FDA_0_holo_aligned_esmfold_protein.pdb -4mos_1_GAF_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4mos_1_GAF_1_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](O)[C@H](F)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4mos_1_GAF_1_holo_aligned_esmfold_protein.pdb -4n4l_1_HG1_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4n4l_1_HG1_0_holo_aligned_esmfold_protein.pdb,NC(O)CCCC1CCCCC1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4n4l_1_HG1_0_holo_aligned_esmfold_protein.pdb -4o0d_1_GLY_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4o0d_1_GLY_3_holo_aligned_esmfold_protein.pdb,NCC(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4o0d_1_GLY_3_holo_aligned_esmfold_protein.pdb -4o0f_1_GLY_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4o0f_1_GLY_3_holo_aligned_esmfold_protein.pdb,NCC(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4o0f_1_GLY_3_holo_aligned_esmfold_protein.pdb -4o95_1_245_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4o95_1_245_0_holo_aligned_esmfold_protein.pdb,OC(NC1CCCCC1)N[C@H]1CCNC(Cl)C1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4o95_1_245_0_holo_aligned_esmfold_protein.pdb -4oal_2_245_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4oal_2_245_0_holo_aligned_esmfold_protein.pdb,OC(NC1CCCCC1)N[C@H]1CCNC(Cl)C1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4oal_2_245_0_holo_aligned_esmfold_protein.pdb -4osx_1_GLY_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4osx_1_GLY_2_holo_aligned_esmfold_protein.pdb,NCC(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4osx_1_GLY_2_holo_aligned_esmfold_protein.pdb -4osy_1_GLY_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4osy_1_GLY_3_holo_aligned_esmfold_protein.pdb,NCC(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4osy_1_GLY_3_holo_aligned_esmfold_protein.pdb -4pfx_1_UDP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4pfx_1_UDP_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4pfx_1_UDP_0_holo_aligned_esmfold_protein.pdb -4phr_1_UDP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4phr_1_UDP_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4phr_1_UDP_0_holo_aligned_esmfold_protein.pdb -4phs_1_UDP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4phs_1_UDP_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)([OH2+])O[PH](O)(O)[OH2+])[C@@H](O)[C@H]2O)C(O)N1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4phs_1_UDP_0_holo_aligned_esmfold_protein.pdb -4pya_1_2X3_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4pya_1_2X3_0_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2N[C@H]3N([C@@H]2N1)[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@]3(O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4pya_1_2X3_0_holo_aligned_esmfold_protein.pdb -4qa8_1_PJZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4qa8_1_PJZ_0_holo_aligned_esmfold_protein.pdb,CCCCCCCCCCCCCC[C@@H](O)OC[C@@H](C)O[C@@H](O)CCCCCCCCCCC,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4qa8_1_PJZ_0_holo_aligned_esmfold_protein.pdb -4qo5_1_NAG_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4qo5_1_NAG_2_holo_aligned_esmfold_protein.pdb,CC(O)N[C@H]1CO[C@H](CO)[C@@H](O)[C@@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4qo5_1_NAG_2_holo_aligned_esmfold_protein.pdb -4rhe_1_FMN_6,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4rhe_1_FMN_6_holo_aligned_esmfold_protein.pdb,CC1C[C@@H]2[C@@H](C[C@@H]1C)NC1[C@H](O)NC(O)N[C@@H]1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH]([O-])(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4rhe_1_FMN_6_holo_aligned_esmfold_protein.pdb -4rpj_1_UDP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4rpj_1_UDP_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4rpj_1_UDP_0_holo_aligned_esmfold_protein.pdb -4rpm_1_HXC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4rpm_1_HXC_0_holo_aligned_esmfold_protein.pdb,CCCCCC(O)SC,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4rpm_1_HXC_0_holo_aligned_esmfold_protein.pdb -4tvd_1_BGC_4,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4tvd_1_BGC_4_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4tvd_1_BGC_4_holo_aligned_esmfold_protein.pdb -4tvd_1_GLC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4tvd_1_GLC_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4tvd_1_GLC_0_holo_aligned_esmfold_protein.pdb -4u63_1_FAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4u63_1_FAD_0_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2CC1C,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4u63_1_FAD_0_holo_aligned_esmfold_protein.pdb -4uoc_1_NCN_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4uoc_1_NCN_1_holo_aligned_esmfold_protein.pdb,OC(O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]2O)C1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4uoc_1_NCN_1_holo_aligned_esmfold_protein.pdb -4uuw_1_AMP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4uuw_1_AMP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4uuw_1_AMP_0_holo_aligned_esmfold_protein.pdb -4xdr_1_ADN_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4xdr_1_ADN_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4xdr_1_ADN_0_holo_aligned_esmfold_protein.pdb -4xfm_1_THE_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4xfm_1_THE_0_holo_aligned_esmfold_protein.pdb,OC[C@@H](O)[C@H](O)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4xfm_1_THE_0_holo_aligned_esmfold_protein.pdb -4ydx_1_TCE_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4ydx_1_TCE_0_holo_aligned_esmfold_protein.pdb,OC(O)CCP(CCC(O)O)CCC(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4ydx_1_TCE_0_holo_aligned_esmfold_protein.pdb -4zav_1_4LS_8,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zav_1_4LS_8_holo_aligned_esmfold_protein.pdb,CC(C)CCN1C2CC(C)[C@@H](C)CC2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@H]2NC(O)N[C@@H](O)C21,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zav_1_4LS_8_holo_aligned_esmfold_protein.pdb -4zaw_1_4LU_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zaw_1_4LU_1_holo_aligned_esmfold_protein.pdb,C[C@H]1[C@@H](C)C[C@@H]2C3[C@@H]1C(C)(C)CCN3[C@H]1C(O)NC(O)NC1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zaw_1_4LU_1_holo_aligned_esmfold_protein.pdb -4zay_1_4LS_6,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zay_1_4LS_6_holo_aligned_esmfold_protein.pdb,CC(C)CCN1[C@H]2[C@H](O)N[C@H](O)N[C@@H]2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@@H]2CC(C)C(C)C[C@H]21,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zay_1_4LS_6_holo_aligned_esmfold_protein.pdb -4zaz_1_4LS_6,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zaz_1_4LS_6_holo_aligned_esmfold_protein.pdb,CC(C)CCN1C2CC(C)[C@@H](C)CC2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2N[C@@H](O)N[C@@H](O)C21,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zaz_1_4LS_6_holo_aligned_esmfold_protein.pdb -4zqx_1_ATP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zqx_1_ATP_2_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zqx_1_ATP_2_holo_aligned_esmfold_protein.pdb -5a98_1_ATP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5a98_1_ATP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5a98_1_ATP_0_holo_aligned_esmfold_protein.pdb -5ae3_2_AWB_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5ae3_2_AWB_1_holo_aligned_esmfold_protein.pdb,CCCCCC[C@H]1C(O)O[C@H](C)[C@H](N[C@H](O)[C@@H]2CCC[C@H](NCO)[C@H]2O)C(O)O[C@@H](C)[C@@H]1OC(O)CC(C)C,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5ae3_2_AWB_1_holo_aligned_esmfold_protein.pdb -5b5s_1_BOG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5b5s_1_BOG_0_holo_aligned_esmfold_protein.pdb,CCCCCCCCO[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5b5s_1_BOG_0_holo_aligned_esmfold_protein.pdb -5d9g_1_GLU-ASN-LEU-TYR-PHE-GLN_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5d9g_1_GLU-ASN-LEU-TYR-PHE-GLN_0_holo_aligned_esmfold_protein.pdb,CC(C)C[C@H](N)C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)N[C@@H](Cc1ccccc1)C(=O)N[C@@H](CCC(N)=O)C(=O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5d9g_1_GLU-ASN-LEU-TYR-PHE-GLN_0_holo_aligned_esmfold_protein.pdb -5dnc_1_ASN_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5dnc_1_ASN_2_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5dnc_1_ASN_2_holo_aligned_esmfold_protein.pdb -5eno_1_5QG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5eno_1_5QG_0_holo_aligned_esmfold_protein.pdb,CC1(C)CC2C(CO1)C(N1CCOCC1)NC(SCCC1CCCCC1)[C@@H]2CN,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5eno_1_5QG_0_holo_aligned_esmfold_protein.pdb -5enp_1_5QF_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5enp_1_5QF_0_holo_aligned_esmfold_protein.pdb,COCCN1CCN([C@@H]2NC(SCC[C@H]3CCC(OC)[C@@H](OC)C3)[C@H](CN)[C@H]3CC(C)(C)OCC23)CC1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5enp_1_5QF_0_holo_aligned_esmfold_protein.pdb -5enq_1_5QE_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5enq_1_5QE_0_holo_aligned_esmfold_protein.pdb,CC(O)NC1CCC(CCSC2NC(N3C[C@@H](C)O[C@@H](C)C3)[C@@H]3COC(C)(C)CC3C2CN)CC1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5enq_1_5QE_0_holo_aligned_esmfold_protein.pdb -5enr_1_MBX_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5enr_1_MBX_0_holo_aligned_esmfold_protein.pdb,CCC(O)NC1CCC(CCS[C@@H]2NC(N3C[C@H](C)O[C@@H](C)C3)C3COC(C)(C)CC3C2CN)CC1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5enr_1_MBX_0_holo_aligned_esmfold_protein.pdb -5ent_1_MIY_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5ent_1_MIY_0_holo_aligned_esmfold_protein.pdb,CN(C)C1CCC(O)C2C1C[C@H]1C[C@H]3[C@H](N(C)C)C(O)C(C(N)O)[C@@H](O)[C@@]3(O)C(O)[C@@H]1C2O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5ent_1_MIY_0_holo_aligned_esmfold_protein.pdb -5ers_1_AMP_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5ers_1_AMP_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5ers_1_AMP_1_holo_aligned_esmfold_protein.pdb -5f2t_1_PLM_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5f2t_1_PLM_0_holo_aligned_esmfold_protein.pdb,CCCCCCCCCCCCCCCC(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5f2t_1_PLM_0_holo_aligned_esmfold_protein.pdb -5f52_1_ASP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5f52_1_ASP_2_holo_aligned_esmfold_protein.pdb,N[C@@H](CC(O)O)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5f52_1_ASP_2_holo_aligned_esmfold_protein.pdb -5fiu_1_Y3J_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5fiu_1_Y3J_3_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](C(F)F)[C@@H](O)[C@@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5fiu_1_Y3J_3_holo_aligned_esmfold_protein.pdb -5fxd_1_H7Y_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5fxd_1_H7Y_1_holo_aligned_esmfold_protein.pdb,CCC[C@H]1CCC(O)C(OC)C1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5fxd_1_H7Y_1_holo_aligned_esmfold_protein.pdb -5fxe_1_CIY_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5fxe_1_CIY_1_holo_aligned_esmfold_protein.pdb,CO[C@H]1CC(CCCO)CCC1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5fxe_1_CIY_1_holo_aligned_esmfold_protein.pdb -5fxf_1_BEZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5fxf_1_BEZ_0_holo_aligned_esmfold_protein.pdb,OC(O)C1CCCCC1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5fxf_1_BEZ_0_holo_aligned_esmfold_protein.pdb -5gqi_1_ATP_7,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5gqi_1_ATP_7_holo_aligned_esmfold_protein.pdb,N[C@@H]1NCN[C@H]2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5gqi_1_ATP_7_holo_aligned_esmfold_protein.pdb -5gql_1_ATP_4,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5gql_1_ATP_4_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5gql_1_ATP_4_holo_aligned_esmfold_protein.pdb -5hhz_1_ZME_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5hhz_1_ZME_0_holo_aligned_esmfold_protein.pdb,CC1CCN(C2NCNC3NCNC32)C1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5hhz_1_ZME_0_holo_aligned_esmfold_protein.pdb -5hmr_1_FDZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5hmr_1_FDZ_0_holo_aligned_esmfold_protein.pdb,OC(NC1CNNS1)N[C@H]1CCC[C@H](OC(F)(F)F)C1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5hmr_1_FDZ_0_holo_aligned_esmfold_protein.pdb -5hqx_1_EDZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5hqx_1_EDZ_0_holo_aligned_esmfold_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CNNS1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5hqx_1_EDZ_0_holo_aligned_esmfold_protein.pdb -5hw0_1_GLU_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5hw0_1_GLU_2_holo_aligned_esmfold_protein.pdb,N[C@@H](CCC(O)O)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5hw0_1_GLU_2_holo_aligned_esmfold_protein.pdb -5ida_1_BMA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5ida_1_BMA_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O)[C@@H](O)[C@@H](O)[C@@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5ida_1_BMA_0_holo_aligned_esmfold_protein.pdb -5k3o_2_ASP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k3o_2_ASP_0_holo_aligned_esmfold_protein.pdb,N[C@@H](CC(O)O)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k3o_2_ASP_0_holo_aligned_esmfold_protein.pdb -5k45_2_GLU_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k45_2_GLU_1_holo_aligned_esmfold_protein.pdb,N[C@@H](CCC(O)O)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k45_2_GLU_1_holo_aligned_esmfold_protein.pdb -5k4h_2_GLU_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k4h_2_GLU_3_holo_aligned_esmfold_protein.pdb,N[C@@H](CCC(O)O)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k4h_2_GLU_3_holo_aligned_esmfold_protein.pdb -5k62_1_ASN-VAL_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k62_1_ASN-VAL_0_holo_aligned_esmfold_protein.pdb,CC(C)[C@@H](CO)NC(O)[C@@H](N)CC(N)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k62_1_ASN-VAL_0_holo_aligned_esmfold_protein.pdb -5k63_1_ASN-GLY_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k63_1_ASN-GLY_0_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)NCCO,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k63_1_ASN-GLY_0_holo_aligned_esmfold_protein.pdb -5k66_1_ASN-GLU_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k66_1_ASN-GLU_0_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)N[C@H](CO)CCC(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k66_1_ASN-GLU_0_holo_aligned_esmfold_protein.pdb -5mh1_1_BMA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5mh1_1_BMA_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O)[C@@H](O)[C@@H](O)[C@@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5mh1_1_BMA_0_holo_aligned_esmfold_protein.pdb -5u82_2_ZN0_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5u82_2_ZN0_0_holo_aligned_esmfold_protein.pdb,CC[SnH](CC)CC,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5u82_2_ZN0_0_holo_aligned_esmfold_protein.pdb -6a71_1_9UX_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6a71_1_9UX_0_holo_aligned_esmfold_protein.pdb,O[Mo@]12S[Mo@@]1(O)S2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6a71_1_9UX_0_holo_aligned_esmfold_protein.pdb -6a72_1_9UX_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6a72_1_9UX_0_holo_aligned_esmfold_protein.pdb,O[Mo@]12S[Mo@@]1(O)S2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6a72_1_9UX_0_holo_aligned_esmfold_protein.pdb -6b1b_1_TMO_15,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6b1b_1_TMO_15_holo_aligned_esmfold_protein.pdb,C[N+](C)(C)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6b1b_1_TMO_15_holo_aligned_esmfold_protein.pdb -6ea9_1_9BG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6ea9_1_9BG_0_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O[PH]([O-])(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O[PH]([O-])([O-])O)C2N1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6ea9_1_9BG_0_holo_aligned_esmfold_protein.pdb -6ep5_1_ADP_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6ep5_1_ADP_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6ep5_1_ADP_1_holo_aligned_esmfold_protein.pdb -6etf_1_AMP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6etf_1_AMP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2[C@H]1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6etf_1_AMP_0_holo_aligned_esmfold_protein.pdb -6fgc_1_ADP_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6fgc_1_ADP_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6fgc_1_ADP_1_holo_aligned_esmfold_protein.pdb -6fgc_1_D95_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6fgc_1_D95_1_holo_aligned_esmfold_protein.pdb,C[C@H]1[C@H](OC(O)CCC(O)O)O[C@@H]2O[C@@]3(C)CC[C@H]4[C@H](C)CC[C@@H]1[C@@]24OO3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6fgc_1_D95_1_holo_aligned_esmfold_protein.pdb -6gbf_1_AMP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6gbf_1_AMP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6gbf_1_AMP_0_holo_aligned_esmfold_protein.pdb -6jls_1_FMN_6,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6jls_1_FMN_6_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2CC1C,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6jls_1_FMN_6_holo_aligned_esmfold_protein.pdb -6n19_2_K8V_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6n19_2_K8V_0_holo_aligned_esmfold_protein.pdb,CCC(O)NC1CCC(C([O-])O)C(C([O-])O)C1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6n19_2_K8V_0_holo_aligned_esmfold_protein.pdb -6nco_1_KQP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6nco_1_KQP_0_holo_aligned_esmfold_protein.pdb,CC(C)(O)C1CCC([C@H]2CC(Cl)C[C@@H](C34(C(N)N)CC3C4)C2)CC1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6nco_1_KQP_0_holo_aligned_esmfold_protein.pdb -6npp_1_KWG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6npp_1_KWG_0_holo_aligned_esmfold_protein.pdb,[O-]C(O)C1CCCC(CCC2CCCCC2)C1N1CCCC1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6npp_1_KWG_0_holo_aligned_esmfold_protein.pdb -6o6y_1_ACK_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6o6y_1_ACK_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@H]2O[PH]([O-])(O)O[C@H]21,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6o6y_1_ACK_0_holo_aligned_esmfold_protein.pdb -6o70_1_ACK_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6o70_1_ACK_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@H]2O[PH]([O-])(O)O[C@H]21,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6o70_1_ACK_1_holo_aligned_esmfold_protein.pdb -6pa2_1_ASP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6pa2_1_ASP_2_holo_aligned_esmfold_protein.pdb,N[C@@H](CC(O)O)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6pa2_1_ASP_2_holo_aligned_esmfold_protein.pdb -6pa6_2_ASN_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6pa6_2_ASN_0_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6pa6_2_ASN_0_holo_aligned_esmfold_protein.pdb -6paa_1_ASP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6paa_1_ASP_2_holo_aligned_esmfold_protein.pdb,N[C@@H](CC(O)O)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6paa_1_ASP_2_holo_aligned_esmfold_protein.pdb -6qkr_1_FAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6qkr_1_FAD_0_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(NC(O)N[C@H]3O)N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C4NCN[C@H]5N)[C@H](O)[C@@H]3O)C2CC1C,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6qkr_1_FAD_0_holo_aligned_esmfold_protein.pdb -6rms_1_AMP_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6rms_1_AMP_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6rms_1_AMP_1_holo_aligned_esmfold_protein.pdb -6ryz_1_SAM_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6ryz_1_SAM_2_holo_aligned_esmfold_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C2NCN[C@@H]3N)[C@H](O)[C@@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6ryz_1_SAM_2_holo_aligned_esmfold_protein.pdb -6rz2_1_5CD_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6rz2_1_5CD_2_holo_aligned_esmfold_protein.pdb,NC1NCNC2[C@H]1NCN2[C@@H]1O[C@H](CCl)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6rz2_1_5CD_2_holo_aligned_esmfold_protein.pdb -6tvg_1_AP2_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6tvg_1_AP2_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)C[PH](O)(O)O)[C@@H](O)[C@H]1O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6tvg_1_AP2_1_holo_aligned_esmfold_protein.pdb -6uqy_2_AT3_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6uqy_2_AT3_0_holo_aligned_esmfold_protein.pdb,C[C@@H](O)SCC[N+](C)(C)C,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6uqy_2_AT3_0_holo_aligned_esmfold_protein.pdb -6ur1_2_AT3_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6ur1_2_AT3_0_holo_aligned_esmfold_protein.pdb,C[C@@H](O)SCC[N+](C)(C)C,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6ur1_2_AT3_0_holo_aligned_esmfold_protein.pdb -6v2a_1_ASN_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6v2a_1_ASN_3_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6v2a_1_ASN_3_holo_aligned_esmfold_protein.pdb -6wyz_1_DGL_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6wyz_1_DGL_1_holo_aligned_esmfold_protein.pdb,N[C@H](CCC(O)O)C(O)O,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6wyz_1_DGL_1_holo_aligned_esmfold_protein.pdb -6xb3_3_9BG_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6xb3_3_9BG_1_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O[PH]([O-])(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O[PH]([O-])(O)O)C2N1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6xb3_3_9BG_1_holo_aligned_esmfold_protein.pdb -6xug_1_O1Q_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6xug_1_O1Q_0_holo_aligned_esmfold_protein.pdb,CC1CCC[C@H](N2NCC[C@H]2C2CC(Cl)C3NNN(C4CC[C@H]5CNNC5C4)C3C2)C1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6xug_1_O1Q_0_holo_aligned_esmfold_protein.pdb -6yao_1_OJ2_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6yao_1_OJ2_0_holo_aligned_esmfold_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CCC[C@@H](OC(F)(F)F)C1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6yao_1_OJ2_0_holo_aligned_esmfold_protein.pdb -6yap_1_OHZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6yap_1_OHZ_0_holo_aligned_esmfold_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CC(Cl)C[C@H](OC(F)(F)F)C1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6yap_1_OHZ_0_holo_aligned_esmfold_protein.pdb -6yaq_1_OHZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6yaq_1_OHZ_0_holo_aligned_esmfold_protein.pdb,OCCC1CCCC[C@H]1N[C@H](O)N[C@H]1C[C@@H](Cl)C[C@@H](OC(F)(F)F)C1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6yaq_1_OHZ_0_holo_aligned_esmfold_protein.pdb +5u82_2_ZN0_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5u82_2_ZN0_0_holo_aligned_predicted_protein.pdb,CC[SnH](CC)CC,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5u82_2_ZN0_0_holo_aligned_predicted_protein.pdb +4zav_1_4LS_8,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4zav_1_4LS_8_holo_aligned_predicted_protein.pdb,CC(C)CCN1C2CC(C)[C@@H](C)CC2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@H]2NC(O)N[C@@H](O)C21,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4zav_1_4LS_8_holo_aligned_predicted_protein.pdb +4rpj_1_UDP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4rpj_1_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4rpj_1_UDP_0_holo_aligned_predicted_protein.pdb +3he3_5_UDP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3he3_5_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3he3_5_UDP_0_holo_aligned_predicted_protein.pdb +4kgx_1_CTP_5,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4kgx_1_CTP_5_holo_aligned_predicted_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4kgx_1_CTP_5_holo_aligned_predicted_protein.pdb +4phs_1_UDP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4phs_1_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)([OH2+])O[PH](O)(O)[OH2+])[C@@H](O)[C@H]2O)C(O)N1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4phs_1_UDP_0_holo_aligned_predicted_protein.pdb +1tkg_1_SSA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1tkg_1_SSA_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](COS(O)(O)NC(O)[C@@H](N)CO)[C@@H](O)[C@H]1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1tkg_1_SSA_0_holo_aligned_predicted_protein.pdb +5k63_1_ASN-GLY_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k63_1_ASN-GLY_0_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)NCCO,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k63_1_ASN-GLY_0_holo_aligned_predicted_protein.pdb +1u8u_1_OCA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1u8u_1_OCA_0_holo_aligned_predicted_protein.pdb,CCCCCCCC(O)O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1u8u_1_OCA_0_holo_aligned_predicted_protein.pdb +5mh1_1_BMA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5mh1_1_BMA_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@@H](O)[C@@H](O)[C@@H]1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5mh1_1_BMA_0_holo_aligned_predicted_protein.pdb +1i8t_1_FAD_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1i8t_1_FAD_1_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2CC1C,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1i8t_1_FAD_1_holo_aligned_predicted_protein.pdb +1v2g_1_OCA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1v2g_1_OCA_0_holo_aligned_predicted_protein.pdb,CCCCCCCC(O)O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1v2g_1_OCA_0_holo_aligned_predicted_protein.pdb +5hqx_1_EDZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5hqx_1_EDZ_0_holo_aligned_predicted_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CNNS1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5hqx_1_EDZ_0_holo_aligned_predicted_protein.pdb +4fyv_1_DCP_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4fyv_1_DCP_2_holo_aligned_predicted_protein.pdb,NC1CCN([C@H]2C[C@H](O)[C@@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)O2)C(O)N1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4fyv_1_DCP_2_holo_aligned_predicted_protein.pdb +4pfx_1_UDP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4pfx_1_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4pfx_1_UDP_0_holo_aligned_predicted_protein.pdb +3o7j_1_2AL_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3o7j_1_2AL_0_holo_aligned_predicted_protein.pdb,NC(O)NC1NC(O)NC1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3o7j_1_2AL_0_holo_aligned_predicted_protein.pdb +4mos_1_GAF_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4mos_1_GAF_1_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O)[C@H](F)[C@@H](O)[C@H]1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4mos_1_GAF_1_holo_aligned_predicted_protein.pdb +5enq_1_5QE_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5enq_1_5QE_0_holo_aligned_predicted_protein.pdb,CC(O)NC1CCC(CCSC2NC(N3C[C@@H](C)O[C@@H](C)C3)[C@@H]3COC(C)(C)CC3C2CN)CC1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5enq_1_5QE_0_holo_aligned_predicted_protein.pdb +5fxd_1_H7Y_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5fxd_1_H7Y_1_holo_aligned_predicted_protein.pdb,CCC[C@H]1CCC(O)C(OC)C1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5fxd_1_H7Y_1_holo_aligned_predicted_protein.pdb +4cdn_2_FAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4cdn_2_FAD_0_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2C[C@H]1C,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4cdn_2_FAD_0_holo_aligned_predicted_protein.pdb +6yap_1_OHZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6yap_1_OHZ_0_holo_aligned_predicted_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CC(Cl)C[C@H](OC(F)(F)F)C1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6yap_1_OHZ_0_holo_aligned_predicted_protein.pdb +4fyw_1_CTP_4,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4fyw_1_CTP_4_holo_aligned_predicted_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4fyw_1_CTP_4_holo_aligned_predicted_protein.pdb +2g7c_1_NAG-GAL-GLA_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2g7c_1_NAG-GAL-GLA_1_holo_aligned_predicted_protein.pdb,CC(O)N[C@@H]1[C@@H](O)[C@H](O[C@@H]2O[C@H](CO)[C@H](O)[C@H](O[C@H]3O[C@H](CO)[C@H](O)[C@H](O)[C@H]3O)[C@H]2O)[C@@H](CO)O[C@H]1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2g7c_1_NAG-GAL-GLA_1_holo_aligned_predicted_protein.pdb +3ad7_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ad7_1_NAD_0_holo_aligned_predicted_protein.pdb,N[C@H](O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5[C@H](N)NCN[C@H]54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ad7_1_NAD_0_holo_aligned_predicted_protein.pdb +3qrc_2_GU4-YYJ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3qrc_2_GU4-YYJ_0_holo_aligned_predicted_protein.pdb,OS(O)(O)OC[C@H]1O[C@@](COS(O)(O)O)(O[C@H]2O[C@H](COS(O)(O)O)[C@@H](OS(O)(O)O)[C@H](OS(O)(O)O)[C@H]2OS(O)(O)O)[C@@H](OS(O)(O)O)[C@@H]1OS(O)(O)O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3qrc_2_GU4-YYJ_0_holo_aligned_predicted_protein.pdb +2gag_1_FOA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2gag_1_FOA_0_holo_aligned_predicted_protein.pdb,OC(O)C1CCCO1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2gag_1_FOA_0_holo_aligned_predicted_protein.pdb +6ur1_2_AT3_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6ur1_2_AT3_0_holo_aligned_predicted_protein.pdb,C[C@@H](O)SCC[N+](C)(C)C,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6ur1_2_AT3_0_holo_aligned_predicted_protein.pdb +3gf4_1_FAD_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3gf4_1_FAD_1_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C4NCN[C@H]5N)[C@H](O)[C@@H]3O)C2CC1C,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3gf4_1_FAD_1_holo_aligned_predicted_protein.pdb +3ub7_1_ACM_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ub7_1_ACM_1_holo_aligned_predicted_protein.pdb,CC(N)O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ub7_1_ACM_1_holo_aligned_predicted_protein.pdb +6yaq_1_OHZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6yaq_1_OHZ_0_holo_aligned_predicted_protein.pdb,OCCC1CCCC[C@H]1N[C@H](O)N[C@H]1C[C@@H](Cl)C[C@@H](OC(F)(F)F)C1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6yaq_1_OHZ_0_holo_aligned_predicted_protein.pdb +3ad9_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ad9_1_NAD_0_holo_aligned_predicted_protein.pdb,NC1NCNC2[C@@H]1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCCC([C@H](N)O)C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ad9_1_NAD_0_holo_aligned_predicted_protein.pdb +2hs3_1_FGR_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2hs3_1_FGR_0_holo_aligned_predicted_protein.pdb,OCNCC(O)N[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2hs3_1_FGR_0_holo_aligned_predicted_protein.pdb +5enp_1_5QF_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5enp_1_5QF_0_holo_aligned_predicted_protein.pdb,COCCN1CCN([C@@H]2NC(SCC[C@H]3CCC(OC)[C@@H](OC)C3)[C@H](CN)[C@H]3CC(C)(C)OCC23)CC1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5enp_1_5QF_0_holo_aligned_predicted_protein.pdb +6a71_1_9UX_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6a71_1_9UX_0_holo_aligned_predicted_protein.pdb,O[Mo@]12S[Mo@@]1(O)S2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6a71_1_9UX_0_holo_aligned_predicted_protein.pdb +2vdf_1_OCT_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2vdf_1_OCT_0_holo_aligned_predicted_protein.pdb,CCCCCCCC,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2vdf_1_OCT_0_holo_aligned_predicted_protein.pdb +4uuw_1_AMP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4uuw_1_AMP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4uuw_1_AMP_0_holo_aligned_predicted_protein.pdb +2q37_1_3AL_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2q37_1_3AL_0_holo_aligned_predicted_protein.pdb,NC(O)N[C@H]1NC(O)NC1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2q37_1_3AL_0_holo_aligned_predicted_protein.pdb +5k62_1_ASN-VAL_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k62_1_ASN-VAL_0_holo_aligned_predicted_protein.pdb,CC(C)[C@@H](CO)NC(O)[C@@H](N)CC(N)O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k62_1_ASN-VAL_0_holo_aligned_predicted_protein.pdb +3inr_1_GDU_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3inr_1_GDU_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCC(O)NC3O)[C@H](O)[C@@H]2O)[C@H](O)[C@@H](O)[C@H]1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3inr_1_GDU_0_holo_aligned_predicted_protein.pdb +2ze9_1_PD7_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2ze9_1_PD7_0_holo_aligned_predicted_protein.pdb,CCCCCCC(O)OC[C@H](CO[PH](O)(O)O)OC(O)CCCCCC,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2ze9_1_PD7_0_holo_aligned_predicted_protein.pdb +3gf4_1_U5P_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3gf4_1_U5P_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3gf4_1_U5P_0_holo_aligned_predicted_protein.pdb +4phr_1_UDP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4phr_1_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4phr_1_UDP_0_holo_aligned_predicted_protein.pdb +3o01_2_DXC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3o01_2_DXC_0_holo_aligned_predicted_protein.pdb,C[C@H](CCC(O)O)[C@H]1CC[C@H]2[C@@H]3CC[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3C[C@H](O)[C@]12C,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3o01_2_DXC_0_holo_aligned_predicted_protein.pdb +5b5s_1_BOG_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5b5s_1_BOG_0_holo_aligned_predicted_protein.pdb,CCCCCCCCO[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5b5s_1_BOG_0_holo_aligned_predicted_protein.pdb +5hmr_1_FDZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5hmr_1_FDZ_0_holo_aligned_predicted_protein.pdb,OC(NC1CNNS1)N[C@H]1CCC[C@H](OC(F)(F)F)C1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5hmr_1_FDZ_0_holo_aligned_predicted_protein.pdb +2wwc_1_CHT_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2wwc_1_CHT_2_holo_aligned_predicted_protein.pdb,C[N+](C)(C)CCO,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2wwc_1_CHT_2_holo_aligned_predicted_protein.pdb +2v5e_1_GU4-YYJ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2v5e_1_GU4-YYJ_0_holo_aligned_predicted_protein.pdb,OS(O)(O)OC[C@H]1O[C@@](COS(O)(O)O)(O[C@H]2O[C@H](COS(O)(O)O)[C@@H](OS(O)(O)O)[C@H](OS(O)(O)O)[C@H]2OS(O)(O)O)[C@@H](OS(O)(O)O)[C@@H]1OS(O)(O)O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2v5e_1_GU4-YYJ_0_holo_aligned_predicted_protein.pdb +3zjx_1_BOG_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3zjx_1_BOG_0_holo_aligned_predicted_protein.pdb,CCCCCCCCO[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3zjx_1_BOG_0_holo_aligned_predicted_protein.pdb +5fxf_1_BEZ_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5fxf_1_BEZ_0_holo_aligned_predicted_protein.pdb,OC(O)C1CCCCC1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5fxf_1_BEZ_0_holo_aligned_predicted_protein.pdb +6uqy_2_AT3_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6uqy_2_AT3_0_holo_aligned_predicted_protein.pdb,C[C@@H](O)SCC[N+](C)(C)C,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6uqy_2_AT3_0_holo_aligned_predicted_protein.pdb +5hhz_1_ZME_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5hhz_1_ZME_0_holo_aligned_predicted_protein.pdb,CC1CCN(C2NCNC3NCNC32)C1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5hhz_1_ZME_0_holo_aligned_predicted_protein.pdb +4xdr_1_ADN_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4xdr_1_ADN_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4xdr_1_ADN_0_holo_aligned_predicted_protein.pdb +3ub9_1_NHY_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ub9_1_NHY_1_holo_aligned_predicted_protein.pdb,N[C@@H](O)NO,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ub9_1_NHY_1_holo_aligned_predicted_protein.pdb +6npp_1_KWG_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6npp_1_KWG_0_holo_aligned_predicted_protein.pdb,[O-]C(O)C1CCCC(CCC2CCCCC2)C1N1CCCC1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6npp_1_KWG_0_holo_aligned_predicted_protein.pdb +2wab_1_BGC-BGC-BGC-BGC-BGC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2wab_1_BGC-BGC-BGC-BGC-BGC_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O[C@H]2[C@H](O)[C@@H](O)[C@H](O[C@H]3[C@H](O)[C@@H](O)[C@H](O[C@H]4[C@H](O)[C@@H](O)[C@H](O[C@H]5[C@H](O)[C@@H](O)[C@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2wab_1_BGC-BGC-BGC-BGC-BGC_0_holo_aligned_predicted_protein.pdb +4rpm_1_HXC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4rpm_1_HXC_0_holo_aligned_predicted_protein.pdb,CCCCCC(O)SC,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4rpm_1_HXC_0_holo_aligned_predicted_protein.pdb +3k8l_1_GLC-GLC-GLC-GLC-GLC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3k8l_1_GLC-GLC-GLC-GLC-GLC_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O[C@H]2[C@H](O)[C@@H](O)[C@@H](O[C@H]3[C@H](O)[C@@H](O)[C@@H](O[C@H]4[C@H](O)[C@@H](O)[C@@H](O[C@H]5[C@H](O)[C@@H](O)[C@@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3k8l_1_GLC-GLC-GLC-GLC-GLC_0_holo_aligned_predicted_protein.pdb +1v97_1_MTE_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1v97_1_MTE_1_holo_aligned_predicted_protein.pdb,NC1NC(O)[C@@H]2N[C@H]3C(S)C(S)[C@@H](CO[PH](O)(O)O)O[C@H]3NC2N1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1v97_1_MTE_1_holo_aligned_predicted_protein.pdb +5ae3_2_AWB_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5ae3_2_AWB_1_holo_aligned_predicted_protein.pdb,CCCCCC[C@H]1C(O)O[C@H](C)[C@H](N[C@H](O)[C@@H]2CCC[C@H](NCO)[C@H]2O)C(O)O[C@@H](C)[C@@H]1OC(O)CC(C)C,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5ae3_2_AWB_1_holo_aligned_predicted_protein.pdb +3ada_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ada_1_NAD_0_holo_aligned_predicted_protein.pdb,N[C@H](O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CN[C@@H]5[C@H](N)NCN[C@H]54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ada_1_NAD_0_holo_aligned_predicted_protein.pdb +3s6a_1_ANP_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3s6a_1_ANP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](N)(O)O)[C@@H](O)[C@H]1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3s6a_1_ANP_0_holo_aligned_predicted_protein.pdb +2o5m_1_MNR_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2o5m_1_MNR_0_holo_aligned_predicted_protein.pdb,CCC1C(C)C2C[C@@H]3[C@H](C)[C@H](CCC(O)O)C4CC5[C@@H](CCC(O)O)C(C)C6CC7[C@@H](CC)[C@H](C)C8CC1N2[Mn@SP2](N65)(N87)N43,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2o5m_1_MNR_0_holo_aligned_predicted_protein.pdb +6xug_1_O1Q_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6xug_1_O1Q_0_holo_aligned_predicted_protein.pdb,CC1CCC[C@H](N2NCC[C@H]2C2CC(Cl)C3NNN(C4CC[C@H]5CNNC5C4)C3C2)C1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6xug_1_O1Q_0_holo_aligned_predicted_protein.pdb +2xrh_1_NIO_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2xrh_1_NIO_1_holo_aligned_predicted_protein.pdb,OC(O)[C@H]1CCCNC1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2xrh_1_NIO_1_holo_aligned_predicted_protein.pdb +2gf3_2_FOA_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2gf3_2_FOA_1_holo_aligned_predicted_protein.pdb,OC(O)C1CCCO1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2gf3_2_FOA_1_holo_aligned_predicted_protein.pdb +2xta_1_ACO_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2xta_1_ACO_0_holo_aligned_predicted_protein.pdb,CCO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O[PH](O)(O)O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2xta_1_ACO_0_holo_aligned_predicted_protein.pdb +5eno_1_5QG_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5eno_1_5QG_0_holo_aligned_predicted_protein.pdb,CC1(C)CC2C(CO1)C(N1CCOCC1)NC(SCCC1CCCCC1)[C@@H]2CN,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5eno_1_5QG_0_holo_aligned_predicted_protein.pdb +2gag_1_NAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2gag_1_NAD_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCCC([C@H](N)O)C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2gag_1_NAD_0_holo_aligned_predicted_protein.pdb +1o72_2_PC_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1o72_2_PC_0_holo_aligned_predicted_protein.pdb,C[N+](C)(C)CCO[PH](O)(O)O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1o72_2_PC_0_holo_aligned_predicted_protein.pdb +1za2_1_CTP_4,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1za2_1_CTP_4_holo_aligned_predicted_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1za2_1_CTP_4_holo_aligned_predicted_protein.pdb +5fxe_1_CIY_1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5fxe_1_CIY_1_holo_aligned_predicted_protein.pdb,CO[C@H]1CC(CCCO)CCC1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5fxe_1_CIY_1_holo_aligned_predicted_protein.pdb +2r4e_1_13P_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2r4e_1_13P_0_holo_aligned_predicted_protein.pdb,OC[C@@H](O)CO[PH](O)(O)O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2r4e_1_13P_0_holo_aligned_predicted_protein.pdb +2vfu_1_MTL_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2vfu_1_MTL_0_holo_aligned_predicted_protein.pdb,OC[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)CO,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2vfu_1_MTL_0_holo_aligned_predicted_protein.pdb +4oal_2_245_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4oal_2_245_0_holo_aligned_predicted_protein.pdb,OC(NC1CCCCC1)N[C@H]1CCNC(Cl)C1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4oal_2_245_0_holo_aligned_predicted_protein.pdb +2x34_2_UQ8_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2x34_2_UQ8_0_holo_aligned_predicted_protein.pdb,COC1C(O)C(C)C(CCC(C)CCCC(C)CCCC(C)CCC[C@H](C)CCCC(C)CCC[C@H](C)CCCC(C)CCCC(C)C)C(O)C1OC,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2x34_2_UQ8_0_holo_aligned_predicted_protein.pdb +3uni_1_SAL_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3uni_1_SAL_0_holo_aligned_predicted_protein.pdb,OC(O)C1CCCCC1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3uni_1_SAL_0_holo_aligned_predicted_protein.pdb +3o02_2_JN3_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3o02_2_JN3_0_holo_aligned_predicted_protein.pdb,C[C@H](CCC(O)O)[C@H]1CC[C@H]2[C@@H]3[C@H](O)C[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3CC[C@]12C,data/dockgen_set/dockgen_holo_aligned_predicted_structures/3o02_2_JN3_0_holo_aligned_predicted_protein.pdb +4mo2_2_FDA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4mo2_2_FDA_0_holo_aligned_predicted_protein.pdb,CC1CC2[C@@H](CC1C)N[C@H]1C(O)NC(O)N[C@H]1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4mo2_2_FDA_0_holo_aligned_predicted_protein.pdb +5ent_1_MIY_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5ent_1_MIY_0_holo_aligned_predicted_protein.pdb,CN(C)C1CCC(O)C2C1C[C@H]1C[C@H]3[C@H](N(C)C)C(O)C(C(N)O)[C@@H](O)[C@@]3(O)C(O)[C@@H]1C2O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5ent_1_MIY_0_holo_aligned_predicted_protein.pdb +5ida_1_BMA_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5ida_1_BMA_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@@H](O)[C@@H](O)[C@@H]1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5ida_1_BMA_0_holo_aligned_predicted_protein.pdb +5k66_1_ASN-GLU_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k66_1_ASN-GLU_0_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)N[C@H](CO)CCC(O)O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k66_1_ASN-GLU_0_holo_aligned_predicted_protein.pdb +6yao_1_OJ2_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6yao_1_OJ2_0_holo_aligned_predicted_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CCC[C@@H](OC(F)(F)F)C1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6yao_1_OJ2_0_holo_aligned_predicted_protein.pdb +6qkr_1_FAD_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6qkr_1_FAD_0_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(NC(O)N[C@H]3O)N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C4NCN[C@H]5N)[C@H](O)[C@@H]3O)C2CC1C,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6qkr_1_FAD_0_holo_aligned_predicted_protein.pdb +5enr_1_MBX_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5enr_1_MBX_0_holo_aligned_predicted_protein.pdb,CCC(O)NC1CCC(CCS[C@@H]2NC(N3C[C@H](C)O[C@@H](C)C3)C3COC(C)(C)CC3C2CN)CC1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/5enr_1_MBX_0_holo_aligned_predicted_protein.pdb +4h2f_1_ADN_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4h2f_1_ADN_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4h2f_1_ADN_0_holo_aligned_predicted_protein.pdb +6b1b_1_TMO_15,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6b1b_1_TMO_15_holo_aligned_predicted_protein.pdb,C[N+](C)(C)O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/6b1b_1_TMO_15_holo_aligned_predicted_protein.pdb +4mig_1_G3F_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4mig_1_G3F_2_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](F)[C@@H]1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4mig_1_G3F_2_holo_aligned_predicted_protein.pdb +1tke_1_SER_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1tke_1_SER_0_holo_aligned_predicted_protein.pdb,N[C@@H](CO)C(O)O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/1tke_1_SER_0_holo_aligned_predicted_protein.pdb +4o95_1_245_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4o95_1_245_0_holo_aligned_predicted_protein.pdb,OC(NC1CCCCC1)N[C@H]1CCNC(Cl)C1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4o95_1_245_0_holo_aligned_predicted_protein.pdb +4qo5_1_NAG_2,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4qo5_1_NAG_2_holo_aligned_predicted_protein.pdb,CC(O)N[C@H]1CO[C@H](CO)[C@@H](O)[C@@H]1O,data/dockgen_set/dockgen_holo_aligned_predicted_structures/4qo5_1_NAG_2_holo_aligned_predicted_protein.pdb +2hk9_1_SKM_0,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2hk9_1_SKM_0_holo_aligned_predicted_protein.pdb,OC(O)[C@H]1C[C@@H](O)[C@@H](O)[C@H](O)C1,data/dockgen_set/dockgen_holo_aligned_predicted_structures/2hk9_1_SKM_0_holo_aligned_predicted_protein.pdb diff --git a/forks/NeuralPLexer/inference/neuralplexer_pocket_only_posebusters_benchmark_inputs.csv b/forks/NeuralPLexer/inference/neuralplexer_pocket_only_posebusters_benchmark_inputs.csv new file mode 100644 index 00000000..ddbdc35c --- /dev/null +++ b/forks/NeuralPLexer/inference/neuralplexer_pocket_only_posebusters_benchmark_inputs.csv @@ -0,0 +1,281 @@ +id,input_receptor,input_ligand,input_template +7CNQ_G8X,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CNQ_G8X_holo_aligned_predicted_protein.pdb,O=C(O)[C@H]1NCC[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CNQ_G8X_holo_aligned_predicted_protein.pdb +7C0U_FGO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7C0U_FGO_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@@H]2O[C@H](CO[P@](=O)(O)O[C@H]3[C@@H](O)[C@H](n4ccc(=O)[nH]c4=O)O[C@@H]3CO)[C@H]3CC(O)(O)C[C@H]32)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7C0U_FGO_holo_aligned_predicted_protein.pdb +7EBG_J0L,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7EBG_J0L_holo_aligned_predicted_protein.pdb,CNc1cccc2c1NC(=O)C2(C)C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7EBG_J0L_holo_aligned_predicted_protein.pdb +7WUY_76N,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WUY_76N_holo_aligned_predicted_protein.pdb,C[C@@H]1C=C[C@@H]2CCCC[C@@H]2[C@H]1C(=O)c1c(O)c([C@]2(O)CC[C@H](O)[C@@H]3O[C@@H]32)c[nH]c1=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WUY_76N_holo_aligned_predicted_protein.pdb +7PT3_3KK,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PT3_3KK_holo_aligned_predicted_protein.pdb,CC(C)(O)C(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)CO[P@@](=O)(O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PT3_3KK_holo_aligned_predicted_protein.pdb +7C3U_AZG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7C3U_AZG_holo_aligned_predicted_protein.pdb,Nc1nc(O)c2[nH]nnc2n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7C3U_AZG_holo_aligned_predicted_protein.pdb +7PGX_FMN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PGX_FMN_holo_aligned_predicted_protein.pdb,Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O)c2cc1C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PGX_FMN_holo_aligned_predicted_protein.pdb +7FB7_8NF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7FB7_8NF_holo_aligned_predicted_protein.pdb,Cc1cc(C)c(N)cn1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7FB7_8NF_holo_aligned_predicted_protein.pdb +8AIE_M7L,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AIE_M7L_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(C/N=C(\CON)C(=O)O)c1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AIE_M7L_holo_aligned_predicted_protein.pdb +7LCU_XTA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LCU_XTA_holo_aligned_predicted_protein.pdb,CN1CCN(c2ncc(Oc3cc(CN4CCC(CC(=O)O)CC4)cc(-c4cc(Cl)cc(Cl)c4)n3)cn2)CC1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LCU_XTA_holo_aligned_predicted_protein.pdb +6YMS_OZH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YMS_OZH_holo_aligned_predicted_protein.pdb,CC(C)C[C@H](NC(=O)CN[P@@](=O)(O)[C@@H](Cc1ccccc1)NC(=O)OCc1ccccc1)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YMS_OZH_holo_aligned_predicted_protein.pdb +7PK0_BYC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PK0_BYC_holo_aligned_predicted_protein.pdb,CC(C)(CO[P@@](=O)(O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCSC(=O)c1ccccc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PK0_BYC_holo_aligned_predicted_protein.pdb +7M6K_YRJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7M6K_YRJ_holo_aligned_predicted_protein.pdb,O=C(NCc1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c1)Nc1ccc([N+](=O)[O-])cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7M6K_YRJ_holo_aligned_predicted_protein.pdb +7SFO_98L,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SFO_98L_holo_aligned_predicted_protein.pdb,Oc1cccc(CNc2nc(Cl)nc3scc(-c4ccccc4)c23)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SFO_98L_holo_aligned_predicted_protein.pdb +8EAB_VN2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8EAB_VN2_holo_aligned_predicted_protein.pdb,O=C(N[C@H](C(=O)Nc1cnccc1-c1ccc(C(F)(F)F)cc1)c1cccc(C(F)(F)F)c1)c1cnccc1-c1ccc(C(F)(F)F)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8EAB_VN2_holo_aligned_predicted_protein.pdb +7MWU_ZPM,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MWU_ZPM_holo_aligned_predicted_protein.pdb,O=C(O)C1CCC1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MWU_ZPM_holo_aligned_predicted_protein.pdb +7TYP_KUR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TYP_KUR_holo_aligned_predicted_protein.pdb,Cc1nccnc1-c1nn2c(=O)cc(-c3ccc(C4CCCCC4)cc3)[nH]c2c1C(=O)N1CC(CF)C1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TYP_KUR_holo_aligned_predicted_protein.pdb +6XM9_V55,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6XM9_V55_holo_aligned_predicted_protein.pdb,COc1cc(C=O)ccc1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6XM9_V55_holo_aligned_predicted_protein.pdb +7T0D_FPP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7T0D_FPP_holo_aligned_predicted_protein.pdb,CC(C)=CCC/C(C)=C/CC/C(C)=C/CO[P@](=O)(O)OP(=O)(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7T0D_FPP_holo_aligned_predicted_protein.pdb +7XI7_4RI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XI7_4RI_holo_aligned_predicted_protein.pdb,CCCCCCc1nc(N)nc(N)c1-c1ccccc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XI7_4RI_holo_aligned_predicted_protein.pdb +7PRI_7TI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PRI_7TI_holo_aligned_predicted_protein.pdb,Nc1cc(C(Cl)=C(Cl)Cl)c(S(N)(=O)=O)cc1S(N)(=O)=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PRI_7TI_holo_aligned_predicted_protein.pdb +8FLV_ZB9,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8FLV_ZB9_holo_aligned_predicted_protein.pdb,CN(c1ncnc2[nH]ccc12)[C@@H]1CCCN(C(=O)CNc2cc(Cl)cc(Cl)c2)C1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8FLV_ZB9_holo_aligned_predicted_protein.pdb +7N4W_P4V,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N4W_P4V_holo_aligned_predicted_protein.pdb,COc1cc2c(cc1OC)[C@H]1Cc3ccc(OC)c(OC)c3CN1CC2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N4W_P4V_holo_aligned_predicted_protein.pdb +7OEO_V9Z,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OEO_V9Z_holo_aligned_predicted_protein.pdb,CNC(=O)CN(CC(c1ccccc1)c1ccccc1)C(=O)c1cc(C)c(OC)c(C)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OEO_V9Z_holo_aligned_predicted_protein.pdb +5SB2_1K2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/5SB2_1K2_holo_aligned_predicted_protein.pdb,O=C(N[C@@H]1C[C@H]1c1ccccc1)c1cc(Cl)cc(COc2cnc3[nH]ccc3c2)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/5SB2_1K2_holo_aligned_predicted_protein.pdb +7MGY_ZD1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MGY_ZD1_holo_aligned_predicted_protein.pdb,N[C@H]1C=C(CO)[C@@H](O[C@H]2O[C@H](CO)[C@@H](O)[C@H](O)[C@H]2O)[C@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MGY_ZD1_holo_aligned_predicted_protein.pdb +7OPG_06N,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OPG_06N_holo_aligned_predicted_protein.pdb,CCCNc1nn2c(-c3ccc(O)cc3)cnc2s1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OPG_06N_holo_aligned_predicted_protein.pdb +7OLI_8HG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OLI_8HG_holo_aligned_predicted_protein.pdb,Nc1nc2c([nH]c(=O)n2[C@H]2C[C@H](O)[C@@H](CO)O2)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OLI_8HG_holo_aligned_predicted_protein.pdb +8FAV_4Y5,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8FAV_4Y5_holo_aligned_predicted_protein.pdb,O=C(O)c1ccc(-c2nn(C(=O)c3c(Cl)cccc3C(F)(F)F)c3cccc(F)c23)c(F)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8FAV_4Y5_holo_aligned_predicted_protein.pdb +7R59_I5F,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7R59_I5F_holo_aligned_predicted_protein.pdb,Oc1ccc2c(c1)sc1nncn12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7R59_I5F_holo_aligned_predicted_protein.pdb +6XHT_V2V,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6XHT_V2V_holo_aligned_predicted_protein.pdb,Nc1ccn([C@@H]2O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OC[C@@H](O)[C@@H](O)[C@@H](O)CO)[C@@H](O)[C@H]2O)c(=O)n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6XHT_V2V_holo_aligned_predicted_protein.pdb +8EX2_Q2Q,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8EX2_Q2Q_holo_aligned_predicted_protein.pdb,O=C1CC(c2ccccc2)=Nc2c(-c3ccccc3)c(C(F)(F)F)nn21,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8EX2_Q2Q_holo_aligned_predicted_protein.pdb +7LOU_IFM,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LOU_IFM_holo_aligned_predicted_protein.pdb,OC[C@H]1CNC[C@@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LOU_IFM_holo_aligned_predicted_protein.pdb +6XBO_5MC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6XBO_5MC_holo_aligned_predicted_protein.pdb,Cc1cn([C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)nc1N,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6XBO_5MC_holo_aligned_predicted_protein.pdb +6TW7_NZB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6TW7_NZB_holo_aligned_predicted_protein.pdb,CN(C)Cc1[nH]nc2ccc(-c3ccc(F)cc3OCCc3cccnc3)cc12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6TW7_NZB_holo_aligned_predicted_protein.pdb +7U3J_L6U,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7U3J_L6U_holo_aligned_predicted_protein.pdb,CNC(=O)[C@H](CCc1ccccc1)NC(=O)[C@H](NC(=O)CNCc1ccc(OC)cc1OC)c1cccs1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7U3J_L6U_holo_aligned_predicted_protein.pdb +7UYB_OK0,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UYB_OK0_holo_aligned_predicted_protein.pdb,NS(=O)(=O)c1c(C(F)(F)F)ccc(-c2ccc(C3CCNCC3)cc2)c1-c1nnn[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UYB_OK0_holo_aligned_predicted_protein.pdb +7WJB_BGC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WJB_BGC_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WJB_BGC_holo_aligned_predicted_protein.pdb +7RC3_SAH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RC3_SAH_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSCC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RC3_SAH_holo_aligned_predicted_protein.pdb +7MY1_IPE,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MY1_IPE_holo_aligned_predicted_protein.pdb,C=C(C)CCO[P@@](=O)(O)OP(=O)(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MY1_IPE_holo_aligned_predicted_protein.pdb +7AN5_RDH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7AN5_RDH_holo_aligned_predicted_protein.pdb,C=C(Oc1cccc(C(=O)O)c1)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7AN5_RDH_holo_aligned_predicted_protein.pdb +7JY3_VUD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7JY3_VUD_holo_aligned_predicted_protein.pdb,C[C@H](Oc1cc2cc(F)ccc2nc1N)c1[nH]c(=O)ccc1-n1cccn1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7JY3_VUD_holo_aligned_predicted_protein.pdb +7D5C_GV6,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7D5C_GV6_holo_aligned_predicted_protein.pdb,CC[C@H](C)[C@H](N)C(=O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7D5C_GV6_holo_aligned_predicted_protein.pdb +7WUX_6OI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WUX_6OI_holo_aligned_predicted_protein.pdb,N[C@@H](CC[C@H](O)[C@@H](N)COS(=O)(=O)O)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WUX_6OI_holo_aligned_predicted_protein.pdb +8F8E_XJI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8F8E_XJI_holo_aligned_predicted_protein.pdb,NC(=O)C[C@H](NC(=O)c1c[nH]nc1-c1ccc(Cl)cc1F)c1ccc(F)c(Cl)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8F8E_XJI_holo_aligned_predicted_protein.pdb +7PL1_SFG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PL1_SFG_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](C[C@@H](N)CC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PL1_SFG_holo_aligned_predicted_protein.pdb +6Z14_Q4Z,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6Z14_Q4Z_holo_aligned_predicted_protein.pdb,CC1=[NH+][C@H]2[C@@H](O1)O[C@H](COS(=O)(=O)[O-])[C@@H](O)[C@@H]2O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6Z14_Q4Z_holo_aligned_predicted_protein.pdb +7ODY_DGI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ODY_DGI_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@H]2C[C@H](O)[C@@H](CO[P@@](=O)(O)OP(=O)(O)O)O2)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ODY_DGI_holo_aligned_predicted_protein.pdb +7RKW_5TV,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RKW_5TV_holo_aligned_predicted_protein.pdb,O=C(O[C@H](Cn1ccnc1)c1ccc(F)cc1)c1cc(Cl)cc(Cl)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RKW_5TV_holo_aligned_predicted_protein.pdb +5SAK_ZRY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/5SAK_ZRY_holo_aligned_predicted_protein.pdb,N=C1N/C(=N\Nc2ccccc2)c2ccccc21,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/5SAK_ZRY_holo_aligned_predicted_protein.pdb +8AUH_L9I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AUH_L9I_holo_aligned_predicted_protein.pdb,CCOC(=O)/C(=N\O)C(C)=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AUH_L9I_holo_aligned_predicted_protein.pdb +8C5M_MTA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8C5M_MTA_holo_aligned_predicted_protein.pdb,CSC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8C5M_MTA_holo_aligned_predicted_protein.pdb +7N03_ZRP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N03_ZRP_holo_aligned_predicted_protein.pdb,CCCCCCNc1ccc2ncc(C(=O)NC)c(Nc3ccccc3)c2c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N03_ZRP_holo_aligned_predicted_protein.pdb +7U0U_FK5,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7U0U_FK5_holo_aligned_predicted_protein.pdb,C=CC[C@@H]1/C=C(\C)C[C@H](C)C[C@H](OC)[C@H]2O[C@@](O)(C(=O)C(=O)N3CCCC[C@H]3C(=O)O[C@H](/C(C)=C/[C@@H]3CC[C@@H](O)[C@H](OC)C3)[C@H](C)[C@@H](O)CC1=O)[C@H](C)C[C@@H]2OC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7U0U_FK5_holo_aligned_predicted_protein.pdb +7OFK_VCH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OFK_VCH_holo_aligned_predicted_protein.pdb,CC(=O)N1Cc2cc(S(C)(=O)=O)ccc2[C@@H]1C(=O)Nc1ccc(C(O)(C(F)(F)F)C(F)(F)F)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OFK_VCH_holo_aligned_predicted_protein.pdb +7ULC_56B,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ULC_56B_holo_aligned_predicted_protein.pdb,Nc1nc2c(c(CN[C@H]3C=C[C@H](O)[C@@H]3O)cn2[C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ULC_56B_holo_aligned_predicted_protein.pdb +7TS6_KMI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TS6_KMI_holo_aligned_predicted_protein.pdb,CNCc1cccc(-c2cc(C)cc(N)n2)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TS6_KMI_holo_aligned_predicted_protein.pdb +7NF3_4LU,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NF3_4LU_holo_aligned_predicted_protein.pdb,Cc1cc2c3c(c1C)C(C)(C)CC=[N+]3c1c([nH]c(=O)[nH]c1=O)N2C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NF3_4LU_holo_aligned_predicted_protein.pdb +7Z1Q_NIO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Z1Q_NIO_holo_aligned_predicted_protein.pdb,O=C(O)c1cccnc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Z1Q_NIO_holo_aligned_predicted_protein.pdb +7QPP_VDX,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QPP_VDX_holo_aligned_predicted_protein.pdb,C=C1/C(=C\C=C2/CCC[C@]3(C)[C@@H]([C@H](C)CCCC(C)(C)O)CC[C@@H]23)C[C@@H](O)C[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QPP_VDX_holo_aligned_predicted_protein.pdb +7P4C_5OV,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7P4C_5OV_holo_aligned_predicted_protein.pdb,O=S1(=O)N[C@@H]2[C@H](O)[C@@H](O)[C@H](O)[C@@H](CO)[C@@H]2O1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7P4C_5OV_holo_aligned_predicted_protein.pdb +7VQ9_ISY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VQ9_ISY_holo_aligned_predicted_protein.pdb,C=C(C)CCS[P@@](=O)(O)OP(=O)(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VQ9_ISY_holo_aligned_predicted_protein.pdb +6VTA_AKN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6VTA_AKN_holo_aligned_predicted_protein.pdb,NCC[C@H](O)C(=O)N[C@@H]1C[C@H](N)[C@@H](O[C@H]2O[C@H](CN)[C@@H](O)[C@H](O)[C@H]2O)[C@H](O)[C@H]1O[C@H]1O[C@H](CO)[C@@H](O)[C@H](N)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6VTA_AKN_holo_aligned_predicted_protein.pdb +7V43_C4O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7V43_C4O_holo_aligned_predicted_protein.pdb,Cc1ccc(Cl)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7V43_C4O_holo_aligned_predicted_protein.pdb +6M2B_EZO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6M2B_EZO_holo_aligned_predicted_protein.pdb,CN(/N=C/c1ccccc1C(=O)O)c1nc(-c2ccccc2Cl)cs1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6M2B_EZO_holo_aligned_predicted_protein.pdb +7UJ5_DGL,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UJ5_DGL_holo_aligned_predicted_protein.pdb,N[C@H](CCC(=O)O)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UJ5_DGL_holo_aligned_predicted_protein.pdb +7OZC_G6S,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OZC_G6S_holo_aligned_predicted_protein.pdb,O=S(=O)(O)OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OZC_G6S_holo_aligned_predicted_protein.pdb +7TB0_UD1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TB0_UD1_holo_aligned_predicted_protein.pdb,CC(=O)N[C@H]1[C@@H](O[P@@](=O)(O)O[P@@](=O)(O)OC[C@H]2O[C@@H](n3ccc(=O)[nH]c3=O)[C@H](O)[C@@H]2O)O[C@H](CO)[C@@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TB0_UD1_holo_aligned_predicted_protein.pdb +8C3N_ADP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8C3N_ADP_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8C3N_ADP_holo_aligned_predicted_protein.pdb +7NUT_GLP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NUT_GLP_holo_aligned_predicted_protein.pdb,N[C@@H]1[C@@H](O)[C@H](O)[C@@H](COP(=O)(O)O)O[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NUT_GLP_holo_aligned_predicted_protein.pdb +7P1M_4IU,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7P1M_4IU_holo_aligned_predicted_protein.pdb,Cn1c(CO[C@@H]2C=CO[C@H](CO)[C@@H]2O)nc2ccc(C(=O)O)cc21,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7P1M_4IU_holo_aligned_predicted_protein.pdb +7EPV_FDA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7EPV_FDA_holo_aligned_predicted_protein.pdb,Cc1cc2c(cc1C)N(C[C@H](O)[C@H](O)[C@H](O)CO[P@@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n3cnc4c(N)ncnc43)[C@H](O)[C@@H]1O)c1[nH]c(=O)[nH]c(=O)c1N2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7EPV_FDA_holo_aligned_predicted_protein.pdb +6Z0R_Q4H,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6Z0R_Q4H_holo_aligned_predicted_protein.pdb,N#Cc1cncnc1N,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6Z0R_Q4H_holo_aligned_predicted_protein.pdb +7P1F_KFN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7P1F_KFN_holo_aligned_predicted_protein.pdb,O=C(O)C1=C[C@H](O)[C@@H](O)[C@H]([C@H](O)[C@H](O)CO)O1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7P1F_KFN_holo_aligned_predicted_protein.pdb +7VB8_STL,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VB8_STL_holo_aligned_predicted_protein.pdb,Oc1ccc(/C=C/c2cc(O)cc(O)c2)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VB8_STL_holo_aligned_predicted_protein.pdb +6YR2_T1C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YR2_T1C_holo_aligned_predicted_protein.pdb,C[NH+](C)c1cc(NC(=O)CNC(C)(C)C)c(O)c2c1C[C@H]1C[C@H]3[C@H]([NH+](C)C)C(O)=C(C(N)=O)C(=O)[C@@]3(O)C(O)=C1C2=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YR2_T1C_holo_aligned_predicted_protein.pdb +7CIJ_G0C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CIJ_G0C_holo_aligned_predicted_protein.pdb,CSCCC/N=C/c1c(COP(=O)(O)O)cnc(C)c1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CIJ_G0C_holo_aligned_predicted_protein.pdb +7UMW_NAD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UMW_NAD_holo_aligned_predicted_protein.pdb,NC(=O)c1ccc[n+]([C@@H]2O[C@H](CO[P@@](=O)([O-])O[P@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UMW_NAD_holo_aligned_predicted_protein.pdb +7TOM_5AD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TOM_5AD_holo_aligned_predicted_protein.pdb,C[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TOM_5AD_holo_aligned_predicted_protein.pdb +7DQL_4CL,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7DQL_4CL_holo_aligned_predicted_protein.pdb,Oc1ccc(Cl)cc1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7DQL_4CL_holo_aligned_predicted_protein.pdb +7CUO_PHB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CUO_PHB_holo_aligned_predicted_protein.pdb,O=C(O)c1ccc(O)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CUO_PHB_holo_aligned_predicted_protein.pdb +7Q2B_M6H,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Q2B_M6H_holo_aligned_predicted_protein.pdb,c1ccc(CC2NCCN2)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Q2B_M6H_holo_aligned_predicted_protein.pdb +7MWN_WI5,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MWN_WI5_holo_aligned_predicted_protein.pdb,Cc1c(C(=O)c2cccc3ccccc23)c2cccc3c2n1[C@H](CN1CCOCC1)CO3,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MWN_WI5_holo_aligned_predicted_protein.pdb +7L5F_XNG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7L5F_XNG_holo_aligned_predicted_protein.pdb,CCCCCCCCCC(=O)NCCCC(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7L5F_XNG_holo_aligned_predicted_protein.pdb +8DSC_NCA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8DSC_NCA_holo_aligned_predicted_protein.pdb,NC(=O)c1cccnc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8DSC_NCA_holo_aligned_predicted_protein.pdb +7ZDY_6MJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZDY_6MJ_holo_aligned_predicted_protein.pdb,CO[C@@H]1OC[C@@H](O)[C@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZDY_6MJ_holo_aligned_predicted_protein.pdb +7NP6_UK8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NP6_UK8_holo_aligned_predicted_protein.pdb,O=C(O)c1ccc(OCc2c(-c3c(Cl)cccc3C(F)(F)F)noc2-c2cn[nH]c2)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NP6_UK8_holo_aligned_predicted_protein.pdb +7JXX_VP7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7JXX_VP7_holo_aligned_predicted_protein.pdb,CC(C)(O)C#Cc1ccc2[nH]c3c(c2c1)-c1nc(N)ncc1CCC3,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7JXX_VP7_holo_aligned_predicted_protein.pdb +7LOE_Y84,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LOE_Y84_holo_aligned_predicted_protein.pdb,Fc1cccc2ccccc12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LOE_Y84_holo_aligned_predicted_protein.pdb +7KM8_WPD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KM8_WPD_holo_aligned_predicted_protein.pdb,CCc1nc(N)nc(N)c1OCCCOc1cccc(C[C@@H](C(=O)O)C(F)F)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KM8_WPD_holo_aligned_predicted_protein.pdb +7UY4_SMI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UY4_SMI_holo_aligned_predicted_protein.pdb,CN[C@@H]1[C@H](O)[C@H](NC)[C@H]2O[C@]3(O)[C@H](O[C@@H]2[C@H]1O)O[C@H](C)CC3(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UY4_SMI_holo_aligned_predicted_protein.pdb +7CL8_TES,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CL8_TES_holo_aligned_predicted_protein.pdb,C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@@H]2O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CL8_TES_holo_aligned_predicted_protein.pdb +6Z1C_7EY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6Z1C_7EY_holo_aligned_predicted_protein.pdb,Cc1ccc(-c2csc3ncnc(SCCC(=O)O)c23)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6Z1C_7EY_holo_aligned_predicted_protein.pdb +8HFN_XGC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8HFN_XGC_holo_aligned_predicted_protein.pdb,COc1ccc(-c2cccc(S(=O)(=O)NC(=O)[C@@H](N)CS)c2)cn1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8HFN_XGC_holo_aligned_predicted_protein.pdb +8EXL_799,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8EXL_799_holo_aligned_predicted_protein.pdb,Cc1nc(-c2cn3c(n2)-c2ccc(-c4cnn(C(C)(C)C(N)=O)c4)cc2OCC3)n(C(C)C)n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8EXL_799_holo_aligned_predicted_protein.pdb +7Z7F_IF3,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Z7F_IF3_holo_aligned_predicted_protein.pdb,CNc1nc(NC)c2ncn(C)c2n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Z7F_IF3_holo_aligned_predicted_protein.pdb +7XG5_PLP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XG5_PLP_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(C=O)c1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XG5_PLP_holo_aligned_predicted_protein.pdb +7MOI_HPS,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MOI_HPS_holo_aligned_predicted_protein.pdb,O=P(O)(O)Oc1ccccc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MOI_HPS_holo_aligned_predicted_protein.pdb +7LMO_NYO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LMO_NYO_holo_aligned_predicted_protein.pdb,CCN(CC)c1ccc2c(C)c(CCN3C(=O)N[C@@]4(CCN(C(=O)c5c[nH]cn5)C4)C3=O)c(=O)oc2c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LMO_NYO_holo_aligned_predicted_protein.pdb +7MSR_DCA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MSR_DCA_holo_aligned_predicted_protein.pdb,CCNC(=O)CCNC(=O)[C@H](O)C(C)(C)CO[P@@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MSR_DCA_holo_aligned_predicted_protein.pdb +6ZCY_QF8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZCY_QF8_holo_aligned_predicted_protein.pdb,CNC(=O)c1nn(C)c2ccc(Nc3nccc(-n4cc(N[C@@H]5CCNC5)c(C)n4)n3)cc12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZCY_QF8_holo_aligned_predicted_protein.pdb +6ZC3_JOR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZC3_JOR_holo_aligned_predicted_protein.pdb,CS(=O)(=O)Nc1ccc(F)cc1C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZC3_JOR_holo_aligned_predicted_protein.pdb +7LZD_YHY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LZD_YHY_holo_aligned_predicted_protein.pdb,Cc1ccc(F)c2cc(C(=O)Nc3cccc(N4CCC(N(C)C)CC4)c3)[nH]c12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LZD_YHY_holo_aligned_predicted_protein.pdb +7N4N_0BK,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N4N_0BK_holo_aligned_predicted_protein.pdb,COc1cc(C(=O)Nc2ccc(F)c([C@]3(CF)CC[C@@](C)(S(C)(=O)=O)C(N)=N3)c2)ncn1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N4N_0BK_holo_aligned_predicted_protein.pdb +7KRU_ATP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KRU_ATP_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)O[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KRU_ATP_holo_aligned_predicted_protein.pdb +7UTW_NAI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UTW_NAI_holo_aligned_predicted_protein.pdb,NC(=O)C1=CN([C@@H]2O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UTW_NAI_holo_aligned_predicted_protein.pdb +7BKA_4JC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BKA_4JC_holo_aligned_predicted_protein.pdb,CCc1ccc(S(N)(=O)=O)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BKA_4JC_holo_aligned_predicted_protein.pdb +5SD5_HWI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/5SD5_HWI_holo_aligned_predicted_protein.pdb,CCc1nc(N)nc(N)c1OCCCOc1cc(C)ccc1N1CC(C(=O)O)C1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/5SD5_HWI_holo_aligned_predicted_protein.pdb +7SCW_GSP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SCW_GSP_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@@H]2O[C@H](CO[P@](=O)(O)O[P@](=O)(O)OP(O)(O)=S)[C@@H](O)[C@H]2O)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SCW_GSP_holo_aligned_predicted_protein.pdb +7SDD_4IP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SDD_4IP_holo_aligned_predicted_protein.pdb,O=P(O)(O)O[C@H]1[C@H](O)[C@@H](OP(=O)(O)O)[C@H](OP(=O)(O)O)[C@@H](OP(=O)(O)O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SDD_4IP_holo_aligned_predicted_protein.pdb +7TH4_FFO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TH4_FFO_holo_aligned_predicted_protein.pdb,Nc1nc2c(c(=O)[nH]1)N(C=O)[C@@H](CNc1ccc(C(=O)N[C@@H](CCC(=O)O)C(=O)O)cc1)CN2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TH4_FFO_holo_aligned_predicted_protein.pdb +8D19_GSH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8D19_GSH_holo_aligned_predicted_protein.pdb,N[C@@H](CCC(=O)N[C@@H](CS)C(=O)NCC(=O)O)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8D19_GSH_holo_aligned_predicted_protein.pdb +7A1P_QW2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7A1P_QW2_holo_aligned_predicted_protein.pdb,CCC[C@@H](CC(=O)C(=O)O)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7A1P_QW2_holo_aligned_predicted_protein.pdb +8G0V_YHT,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8G0V_YHT_holo_aligned_predicted_protein.pdb,C#CCO[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8G0V_YHT_holo_aligned_predicted_protein.pdb +7MFP_Z7P,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MFP_Z7P_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(/C=N/[C@H]2[C@H](O)[C@@H](O)[C@@H](O[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](n4ccc(=O)[nH]c4=O)[C@H](O)[C@@H]3O)O[C@@H]2C)c1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MFP_Z7P_holo_aligned_predicted_protein.pdb +7N7H_CTP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N7H_CTP_holo_aligned_predicted_protein.pdb,Nc1ccn([C@@H]2O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N7H_CTP_holo_aligned_predicted_protein.pdb +7X9K_8OG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7X9K_8OG_holo_aligned_predicted_protein.pdb,Nc1nc2c([nH]c(=O)n2[C@H]2C[C@H](O)[C@@H](COP(=O)(O)O)O2)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7X9K_8OG_holo_aligned_predicted_protein.pdb +7NFB_GEN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NFB_GEN_holo_aligned_predicted_protein.pdb,O=c1c(-c2ccc(O)cc2)coc2cc(O)cc(O)c12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NFB_GEN_holo_aligned_predicted_protein.pdb +7DKT_GLF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7DKT_GLF_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](F)[C@H](O)[C@@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7DKT_GLF_holo_aligned_predicted_protein.pdb +7R3D_APR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7R3D_APR_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]2O[C@@H](O)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7R3D_APR_holo_aligned_predicted_protein.pdb +6YYO_Q1K,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YYO_Q1K_holo_aligned_predicted_protein.pdb,CS(=O)(=O)N1CCN(c2ccc3nncn3n2)CC1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YYO_Q1K_holo_aligned_predicted_protein.pdb +7V14_ORU,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7V14_ORU_holo_aligned_predicted_protein.pdb,[O-][n+]1cc(-c2c(-n3cnnn3)ccc(Cl)c2F)ccc1[C@@H](CC1CC1)n1cc(-c2cncs2)cn1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7V14_ORU_holo_aligned_predicted_protein.pdb +7SIU_9ID,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SIU_9ID_holo_aligned_predicted_protein.pdb,Cn1cc(Nc2ncc(C3=CCC[C@@H](NC(=O)c4ccccc4)C3)nc2C(N)=O)cn1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SIU_9ID_holo_aligned_predicted_protein.pdb +7TE8_P0T,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TE8_P0T_holo_aligned_predicted_protein.pdb,C=C(C)[C@@H]1CCC(C)=C[C@H]1c1c(O)cc(CCCCC)cc1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TE8_P0T_holo_aligned_predicted_protein.pdb +7F8T_FAD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7F8T_FAD_holo_aligned_predicted_protein.pdb,Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO[P@@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)c2cc1C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7F8T_FAD_holo_aligned_predicted_protein.pdb +7TUO_KL9,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TUO_KL9_holo_aligned_predicted_protein.pdb,Cc1cnc2c(N)c(C(=O)NCCc3ccc([C@@H]4C[C@H]5CC[C@@H](C4)N5)cc3)sc2n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TUO_KL9_holo_aligned_predicted_protein.pdb +7B94_ANP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7B94_ANP_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)O[P@](=O)(O)NP(=O)(O)O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7B94_ANP_holo_aligned_predicted_protein.pdb +7LEV_0JO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LEV_0JO_holo_aligned_predicted_protein.pdb,C=C(/N=C/c1c(COP(=O)(O)O)cnc(C)c1O)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LEV_0JO_holo_aligned_predicted_protein.pdb +8A1H_DLZ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8A1H_DLZ_holo_aligned_predicted_protein.pdb,Cc1nc2c(=O)[nH]c(=O)nc-2n(C[C@H](O)[C@H](O)[C@H](O)CO)c1C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8A1H_DLZ_holo_aligned_predicted_protein.pdb +7NU0_DCL,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NU0_DCL_holo_aligned_predicted_protein.pdb,CC(C)C[C@H](N)CO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NU0_DCL_holo_aligned_predicted_protein.pdb +7QF4_RBF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QF4_RBF_holo_aligned_predicted_protein.pdb,Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO)c2cc1C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QF4_RBF_holo_aligned_predicted_protein.pdb +7Z2O_IAJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Z2O_IAJ_holo_aligned_predicted_protein.pdb,COc1cccc2sc3nncn3c12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Z2O_IAJ_holo_aligned_predicted_protein.pdb +7O0N_CDP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7O0N_CDP_holo_aligned_predicted_protein.pdb,Nc1ccn([C@@H]2O[C@H](CO[P@@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7O0N_CDP_holo_aligned_predicted_protein.pdb +7X5N_5M5,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7X5N_5M5_holo_aligned_predicted_protein.pdb,Cc1n[nH]c2c1[C@](c1cc(CO)cc(-c3ccccc3)c1)(C(C)C)C(C#N)=C(N)O2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7X5N_5M5_holo_aligned_predicted_protein.pdb +8BOM_QU6,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8BOM_QU6_holo_aligned_predicted_protein.pdb,COc1cccc(NC(=O)c2ccc(C)c(Nc3nc(-c4cccnc4)nc4c3cnn4C)c2)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8BOM_QU6_holo_aligned_predicted_protein.pdb +7K0V_VQP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7K0V_VQP_holo_aligned_predicted_protein.pdb,Cc1cc(F)c(NC(=O)NCCC(C)(C)C)cc1Nc1ccc2ncn(C)c(=O)c2c1F,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7K0V_VQP_holo_aligned_predicted_protein.pdb +7MMH_ZJY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MMH_ZJY_holo_aligned_predicted_protein.pdb,COc1ccc2nc(C)c(O[C@@H]3C[C@H]4C(=O)N[C@]5(C(=O)NS(=O)(=O)C6(C)CC6)C[C@H]5/C=C\CCCCC[C@H](NC(=O)OCC5(C(F)(F)F)CC5)C(=O)N4C3)nc2c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MMH_ZJY_holo_aligned_predicted_protein.pdb +7PJQ_OWH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PJQ_OWH_holo_aligned_predicted_protein.pdb,CNC(=S)c1cccnc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PJQ_OWH_holo_aligned_predicted_protein.pdb +7FHA_ADX,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7FHA_ADX_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)OS(=O)(=O)O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7FHA_ADX_holo_aligned_predicted_protein.pdb +7BTT_F8R,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BTT_F8R_holo_aligned_predicted_protein.pdb,COc1cc2c(cc1Nc1nc(Nc3ccccc3S(=O)(=O)C(C)C)c3[nH]ccc3n1)N(C(=O)CN(C)C)CC2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BTT_F8R_holo_aligned_predicted_protein.pdb +7QHG_T3B,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QHG_T3B_holo_aligned_predicted_protein.pdb,CC(C)C(=O)Nc1ncc(C(=O)NCCN(Cc2ccccc2)C(=O)c2ccc(S(=O)(=O)Nc3ccccc3)cc2)s1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QHG_T3B_holo_aligned_predicted_protein.pdb +7N6F_0I1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N6F_0I1_holo_aligned_predicted_protein.pdb,O=C(C1CCN(c2cncnc2-c2ccc(F)cc2)CC1)N1CC(F)C1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N6F_0I1_holo_aligned_predicted_protein.pdb +8AAU_LH0,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AAU_LH0_holo_aligned_predicted_protein.pdb,CC(C)C(=O)Nc1ncc(-c2cc(C(F)F)nn2-c2c(Cl)cccc2Cl)s1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AAU_LH0_holo_aligned_predicted_protein.pdb +8GFD_ZHR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8GFD_ZHR_holo_aligned_predicted_protein.pdb,CC(=O)N[C@H]1[C@H](OCCc2ccc3occc3c2)O[C@H](CO)[C@@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8GFD_ZHR_holo_aligned_predicted_protein.pdb +7TBU_S3P,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TBU_S3P_holo_aligned_predicted_protein.pdb,O=C(O)C1=C[C@@H](OP(=O)(O)O)[C@@H](O)[C@H](O)C1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TBU_S3P_holo_aligned_predicted_protein.pdb +6YQW_82I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YQW_82I_holo_aligned_predicted_protein.pdb,CNc1cnn(C)c(=O)c1Cl,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YQW_82I_holo_aligned_predicted_protein.pdb +7W06_ITN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7W06_ITN_holo_aligned_predicted_protein.pdb,C=C(CC(=O)O)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7W06_ITN_holo_aligned_predicted_protein.pdb +7ZL5_IWE,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZL5_IWE_holo_aligned_predicted_protein.pdb,NS(=O)(=O)c1cc(-c2nnn[nH]2)c(NCc2cccs2)cc1Cl,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZL5_IWE_holo_aligned_predicted_protein.pdb +7L03_F9F,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7L03_F9F_holo_aligned_predicted_protein.pdb,O=P(O)(O)OCCNS(=O)(=O)c1ccc(OC(F)(F)F)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7L03_F9F_holo_aligned_predicted_protein.pdb +7XJN_NSD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XJN_NSD_holo_aligned_predicted_protein.pdb,NCCCNCCCN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XJN_NSD_holo_aligned_predicted_protein.pdb +7ZCC_OGA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZCC_OGA_holo_aligned_predicted_protein.pdb,O=C(O)CNC(=O)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZCC_OGA_holo_aligned_predicted_protein.pdb +6TW5_9M2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6TW5_9M2_holo_aligned_predicted_protein.pdb,Cc1nn(C)c(C)c1CCOc1cc(F)ccc1-c1ccc2n[nH]c(CN(C)C)c2c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6TW5_9M2_holo_aligned_predicted_protein.pdb +7VWF_K55,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VWF_K55_holo_aligned_predicted_protein.pdb,CCCCOc1ccc(C[C@H](CC)C(=O)O)cc1CNC(=O)c1ccc(C(F)(F)F)cc1F,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VWF_K55_holo_aligned_predicted_protein.pdb +7SZA_DUI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SZA_DUI_holo_aligned_predicted_protein.pdb,Nc1cccc2c1C(=O)N([C@@H]1CCC(=O)NC1=O)C2=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7SZA_DUI_holo_aligned_predicted_protein.pdb +7POM_7VZ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7POM_7VZ_holo_aligned_predicted_protein.pdb,COC(=O)c1cc(S(N)(=O)=O)c(SC2CCCCC2)cc1Cl,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7POM_7VZ_holo_aligned_predicted_protein.pdb +7KQU_YOF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KQU_YOF_holo_aligned_predicted_protein.pdb,N[C@@H](Cc1ccc(O)c(F)c1)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KQU_YOF_holo_aligned_predicted_protein.pdb +7ZOC_T8E,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZOC_T8E_holo_aligned_predicted_protein.pdb,CC(=O)c1ccc(NC(=O)[C@H](C)S)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZOC_T8E_holo_aligned_predicted_protein.pdb +7ELT_TYM,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ELT_TYM_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)OC(=O)[C@@H](N)Cc2c[nH]c3ccccc23)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ELT_TYM_holo_aligned_predicted_protein.pdb +6YRV_PJ8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YRV_PJ8_holo_aligned_predicted_protein.pdb,CCCCCCCCCCCCCCCCC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YRV_PJ8_holo_aligned_predicted_protein.pdb +7CNS_PMV,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CNS_PMV_holo_aligned_predicted_protein.pdb,C[C@@](O)(CCOP(=O)(O)O)CC(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CNS_PMV_holo_aligned_predicted_protein.pdb +6XG5_TOP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6XG5_TOP_holo_aligned_predicted_protein.pdb,COc1cc(Cc2cnc(N)nc2N)cc(OC)c1OC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6XG5_TOP_holo_aligned_predicted_protein.pdb +7USH_82V,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7USH_82V_holo_aligned_predicted_protein.pdb,O=c1cc(N2CCOCC2)oc2c(-c3ccc4c(c3)OCCO4)csc12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7USH_82V_holo_aligned_predicted_protein.pdb +7OZ9_NGK,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OZ9_NGK_holo_aligned_predicted_protein.pdb,CC(=O)N[C@@H]1[C@@H](O)[C@@H](OS(=O)(=O)O)[C@@H](CO)O[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OZ9_NGK_holo_aligned_predicted_protein.pdb +7TXK_LW8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TXK_LW8_holo_aligned_predicted_protein.pdb,C[N+](C)(C)[C@@H](Cc1c[nH]c(=S)[nH]1)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TXK_LW8_holo_aligned_predicted_protein.pdb +7ZHP_IQY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZHP_IQY_holo_aligned_predicted_protein.pdb,CCC(O)(C#Cc1ccc2[nH]c3c(c2c1)-c1nc(N)ncc1CCC3)CC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZHP_IQY_holo_aligned_predicted_protein.pdb +8AQL_PLG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AQL_PLG_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(CNCC(=O)O)c1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AQL_PLG_holo_aligned_predicted_protein.pdb +8BTI_RFO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8BTI_RFO_holo_aligned_predicted_protein.pdb,COCC(=O)n1ccc2c(Cl)cccc21,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8BTI_RFO_holo_aligned_predicted_protein.pdb +7ROU_66I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ROU_66I_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1c(OC(F)F)nn2[C@@H]1O[C@H](COS(=O)(=O)NC(=O)[C@@H](N)Cc2ccc(O)cc2)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ROU_66I_holo_aligned_predicted_protein.pdb +6ZAE_ACV,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZAE_ACV_holo_aligned_predicted_protein.pdb,CC(C)[C@@H](NC(=O)[C@H](CS)NC(=O)CCC[C@H](N)C(=O)O)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZAE_ACV_holo_aligned_predicted_protein.pdb +6YSP_PAL,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YSP_PAL_holo_aligned_predicted_protein.pdb,O=C(O)C[C@H](NC(=O)CP(=O)(O)O)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YSP_PAL_holo_aligned_predicted_protein.pdb +8B8H_OJQ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8B8H_OJQ_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(C[NH2+]c2conc2O)c1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8B8H_OJQ_holo_aligned_predicted_protein.pdb +7THI_PGA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7THI_PGA_holo_aligned_predicted_protein.pdb,O=C(O)COP(=O)(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7THI_PGA_holo_aligned_predicted_protein.pdb +7OFF_VCB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OFF_VCB_holo_aligned_predicted_protein.pdb,O=C(N[C@@H](C(=O)O)c1ccccc1)c1cccc2c1-c1ccccc1C2=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OFF_VCB_holo_aligned_predicted_protein.pdb +7WY1_D0L,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WY1_D0L_holo_aligned_predicted_protein.pdb,CCCCCCCN1CCC[C@H]1C(=O)N[C@@H](Cc1ccccc1)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WY1_D0L_holo_aligned_predicted_protein.pdb +7ES1_UDP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ES1_UDP_holo_aligned_predicted_protein.pdb,O=c1ccn([C@@H]2O[C@H](CO[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ES1_UDP_holo_aligned_predicted_protein.pdb +7V3N_AKG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7V3N_AKG_holo_aligned_predicted_protein.pdb,O=C(O)CCC(=O)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7V3N_AKG_holo_aligned_predicted_protein.pdb +7TM6_GPJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TM6_GPJ_holo_aligned_predicted_protein.pdb,O=C(O)C[NH2+]CP(=O)(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TM6_GPJ_holo_aligned_predicted_protein.pdb +7ECR_SIN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ECR_SIN_holo_aligned_predicted_protein.pdb,O=C(O)CCC(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ECR_SIN_holo_aligned_predicted_protein.pdb +7A9E_R4W,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7A9E_R4W_holo_aligned_predicted_protein.pdb,CCS(C)(=O)=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7A9E_R4W_holo_aligned_predicted_protein.pdb +7ZF0_DHR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZF0_DHR_holo_aligned_predicted_protein.pdb,N#C[C@@H](O)c1ccc(O)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZF0_DHR_holo_aligned_predicted_protein.pdb +7F51_BA7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7F51_BA7_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]2O[C@@H](O)[C@H](OC(=O)c3ccccc3)[C@@H]2O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7F51_BA7_holo_aligned_predicted_protein.pdb +7XFA_D9J,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XFA_D9J_holo_aligned_predicted_protein.pdb,Cc1nc([C@@H]2O[C@H](CO)[C@H](O)[C@H](n3cc(-c4cc(F)c(Cl)c(F)c4)nn3)[C@H]2O)n(-c2cc(Cl)ccc2C(F)(F)F)n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XFA_D9J_holo_aligned_predicted_protein.pdb +8DKO_TFB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8DKO_TFB_holo_aligned_predicted_protein.pdb,O=C(O)[C@@H]1CCCO1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8DKO_TFB_holo_aligned_predicted_protein.pdb +6T88_MWQ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6T88_MWQ_holo_aligned_predicted_protein.pdb,O=C(O)CCc1cnc[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6T88_MWQ_holo_aligned_predicted_protein.pdb +7BCP_GCO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BCP_GCO_holo_aligned_predicted_protein.pdb,O=C(O)[C@H](O)[C@@H](O)[C@H](O)[C@H](O)CO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BCP_GCO_holo_aligned_predicted_protein.pdb +7NF0_BYN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NF0_BYN_holo_aligned_predicted_protein.pdb,Cc1cc2c3c(c1C)C(C)(C)C[C@@H](O)N3c1c(nc(O)[nH]c1=O)N2C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NF0_BYN_holo_aligned_predicted_protein.pdb +7QE4_NGA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QE4_NGA_holo_aligned_predicted_protein.pdb,CC(=O)N[C@@H]1[C@@H](O)[C@@H](O)[C@@H](CO)O[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QE4_NGA_holo_aligned_predicted_protein.pdb +7M3H_YPV,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7M3H_YPV_holo_aligned_predicted_protein.pdb,CCCCc1ccc(NS(=O)(=O)c2ccc(O)c(C(=O)O)c2)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7M3H_YPV_holo_aligned_predicted_protein.pdb +6Z2C_Q5E,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6Z2C_Q5E_holo_aligned_predicted_protein.pdb,O=C(O)CCCCCN1C(=O)[C@@H]2[C@H](C1=O)[C@]1(Cl)C(Cl)=C(Cl)[C@@]2(Cl)C1(Cl)Cl,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6Z2C_Q5E_holo_aligned_predicted_protein.pdb +8A2D_KXY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8A2D_KXY_holo_aligned_predicted_protein.pdb,Cc1c(C#Cc2ccc(CN3CCC(CO)CC3)cc2)cc(C(F)F)c2cn([C@@H](C(=O)Nc3nccs3)c3ncn4c3CCC43CC3)nc12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8A2D_KXY_holo_aligned_predicted_protein.pdb +7NGW_UAW,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NGW_UAW_holo_aligned_predicted_protein.pdb,CC1CCN(C(=O)Nc2ccc(O)cc2)CC1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NGW_UAW_holo_aligned_predicted_protein.pdb +7KZ9_XN7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KZ9_XN7_holo_aligned_predicted_protein.pdb,O=C(CNCCO)NCCO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KZ9_XN7_holo_aligned_predicted_protein.pdb +7UAS_MBU,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UAS_MBU_holo_aligned_predicted_protein.pdb,CCn1cc(-c2cc(Cn3ccnc3C)cc3c2CCN([C@H](c2cc(C)ccn2)C2CC2)C3=O)c(C(F)(F)F)n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UAS_MBU_holo_aligned_predicted_protein.pdb +7YZU_DO7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7YZU_DO7_holo_aligned_predicted_protein.pdb,CO[C@H]1O[C@H](CS(=O)(=O)O)[C@@H](O)[C@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7YZU_DO7_holo_aligned_predicted_protein.pdb +7VKZ_NOJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VKZ_NOJ_holo_aligned_predicted_protein.pdb,OC[C@H]1NC[C@H](O)[C@@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VKZ_NOJ_holo_aligned_predicted_protein.pdb +7ROR_69X,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ROR_69X_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)OC(=O)[C@@H](N)Cc2ccc(O)cc2)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ROR_69X_holo_aligned_predicted_protein.pdb +8AY3_OE3,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AY3_OE3_holo_aligned_predicted_protein.pdb,CCn1c(=O)cc(C)c2cc(CNS(=O)(=O)c3ccccc3)ccc21,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AY3_OE3_holo_aligned_predicted_protein.pdb +7C8Q_DSG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7C8Q_DSG_holo_aligned_predicted_protein.pdb,NC(=O)C[C@@H](N)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7C8Q_DSG_holo_aligned_predicted_protein.pdb +7XRL_FWK,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XRL_FWK_holo_aligned_predicted_protein.pdb,CC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XRL_FWK_holo_aligned_predicted_protein.pdb +7CD9_FVR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CD9_FVR_holo_aligned_predicted_protein.pdb,CN1C[C@H](Nc2nc3cc[nH]c3c(=O)n2C)C[C@H](c2ccc(OCc3ccccc3)cc2)C1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7CD9_FVR_holo_aligned_predicted_protein.pdb +7T3E_SLB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7T3E_SLB_holo_aligned_predicted_protein.pdb,CC(=O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(=O)O)C[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7T3E_SLB_holo_aligned_predicted_protein.pdb +6YJA_2BA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YJA_2BA_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@@H]2CO[P@@](=O)(O)O[C@H]3[C@@H](O)[C@H](n4cnc5c(N)ncnc54)O[C@@H]3CO[P@](=O)(O)O[C@H]2[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YJA_2BA_holo_aligned_predicted_protein.pdb +7LT0_ONJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LT0_ONJ_holo_aligned_predicted_protein.pdb,Cc1ccc(Sc2cc(C(=O)N3Cc4ccccc4C3)ccc2O)cc1C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LT0_ONJ_holo_aligned_predicted_protein.pdb +7D6O_MTE,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7D6O_MTE_holo_aligned_predicted_protein.pdb,Nc1nc2c(c(=O)[nH]1)N[C@H]1C(S)=C(S)[C@@H](COP(=O)(O)O)O[C@H]1N2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7D6O_MTE_holo_aligned_predicted_protein.pdb +7UJ4_OQ4,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UJ4_OQ4_holo_aligned_predicted_protein.pdb,CCN(C(=O)c1cc(F)ccc1Oc1cncnc1N1CC2(CCN(C[C@H]3CC[C@H](NS(=O)(=O)CC)CC3)CC2)C1)C(C)C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UJ4_OQ4_holo_aligned_predicted_protein.pdb +7OSO_0V1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OSO_0V1_holo_aligned_predicted_protein.pdb,OCC[C@H](O)CO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OSO_0V1_holo_aligned_predicted_protein.pdb +7AFX_R9K,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7AFX_R9K_holo_aligned_predicted_protein.pdb,Cc1cccc2c(-c3ccccc3Cl)c(C(=O)O)[nH]c12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7AFX_R9K_holo_aligned_predicted_protein.pdb +7T1D_E7K,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7T1D_E7K_holo_aligned_predicted_protein.pdb,Cc1cn(-c2ccc3c(c2)CN(c2ncc(Cc4ccc(-n5cccn5)cc4)s2)CC3)c(C)n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7T1D_E7K_holo_aligned_predicted_protein.pdb +7R9N_F97,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7R9N_F97_holo_aligned_predicted_protein.pdb,CC(C)n1ncc2cnc(Nc3cc([C@@H]4CCNC4)nc(N4CCC(F)(F)C4)n3)cc21,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7R9N_F97_holo_aligned_predicted_protein.pdb +7MGT_ZD4,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MGT_ZD4_holo_aligned_predicted_protein.pdb,Nc1nc(Cl)nc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MGT_ZD4_holo_aligned_predicted_protein.pdb +7MYU_ZR7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MYU_ZR7_holo_aligned_predicted_protein.pdb,COc1cnc(C(=O)Nc2ccc(F)c([C@]34CN(c5ncc(F)cn5)C[C@H]3CSC(N)=N4)c2)cn1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7MYU_ZR7_holo_aligned_predicted_protein.pdb +7RH3_59O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RH3_59O_holo_aligned_predicted_protein.pdb,CO[C@@H]1O[C@H](CO)[C@H](O)[C@H](OC(=O)c2ccc(C)cc2)[C@@H]1OC(=O)c1ccc(Cl)cc1[N+](=O)[O-],data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RH3_59O_holo_aligned_predicted_protein.pdb +7OMX_CNA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OMX_CNA_holo_aligned_predicted_protein.pdb,NC(=O)c1ccc[n+]([C@@H]2C[C@H](CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7OMX_CNA_holo_aligned_predicted_protein.pdb +7NXO_UU8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NXO_UU8_holo_aligned_predicted_protein.pdb,Cc1ccccc1Oc1cc(-n2c(=O)cc(S(=O)(=O)c3ccccc3)[nH]c2=O)c(F)cc1C#N,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NXO_UU8_holo_aligned_predicted_protein.pdb +8DHG_T78,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8DHG_T78_holo_aligned_predicted_protein.pdb,CCn1c(CO)nn(-c2nc(O[C@@H](C)C(F)(F)F)c(C(=O)Nc3c(C)ccnc3Cl)cc2F)c1=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8DHG_T78_holo_aligned_predicted_protein.pdb +7NPL_UKZ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NPL_UKZ_holo_aligned_predicted_protein.pdb,CCC[C@@H](NC(=O)c1cccc2c1CC(=O)N2)[C@@H](O)c1cccc(Cl)c1C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NPL_UKZ_holo_aligned_predicted_protein.pdb +7PRM_81I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PRM_81I_holo_aligned_predicted_protein.pdb,O=C(c1ccco1)N1CCN([C@@H]2CC(=O)N(c3ccc(-c4ccc(F)cc4)cc3)C2)CC1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PRM_81I_holo_aligned_predicted_protein.pdb +7WDT_NGS,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WDT_NGS_holo_aligned_predicted_protein.pdb,CC(=O)N[C@@H]1[C@@H](O)[C@H](O)[C@@H](COS(=O)(=O)O)O[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WDT_NGS_holo_aligned_predicted_protein.pdb +7UAW_MF6,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UAW_MF6_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@@H]2CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](O[C@@H]1[C@@H]2O)[C@H](O)[C@@H]3O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UAW_MF6_holo_aligned_predicted_protein.pdb +7W05_GMP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7W05_GMP_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7W05_GMP_holo_aligned_predicted_protein.pdb +7UJF_R3V,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UJF_R3V_holo_aligned_predicted_protein.pdb,C[C@H]1CCN(CCOc2ccc([C@@H]3c4ccc(O)cc4CC[C@@H]3c3ccccc3)cc2)C1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UJF_R3V_holo_aligned_predicted_protein.pdb +8D39_QDB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8D39_QDB_holo_aligned_predicted_protein.pdb,O=C(O)c1ccc(C(=O)c2ccccc2)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8D39_QDB_holo_aligned_predicted_protein.pdb +7F5D_EUO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7F5D_EUO_holo_aligned_predicted_protein.pdb,CNc1cc(-c2ccc3[nH]ccc3c2)nc(S(C)(=O)=O)n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7F5D_EUO_holo_aligned_predicted_protein.pdb +7BMI_U4B,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BMI_U4B_holo_aligned_predicted_protein.pdb,O=C(O)c1ccnc(C(=O)O)c1F,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BMI_U4B_holo_aligned_predicted_protein.pdb +7KB1_WBJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KB1_WBJ_holo_aligned_predicted_protein.pdb,C=C/C(=N\Cc1c(COP(=O)(O)O)cnc(C)c1O)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KB1_WBJ_holo_aligned_predicted_protein.pdb +7R7R_AWJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7R7R_AWJ_holo_aligned_predicted_protein.pdb,Cc1nc([C@](C)(O)CO)sc1-c1cnc(N)c(O[C@H](C)c2cc(F)ccc2N2NC=CN2)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7R7R_AWJ_holo_aligned_predicted_protein.pdb +7L00_XCJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7L00_XCJ_holo_aligned_predicted_protein.pdb,CS(=O)(=O)c1ccc2nc(NC(=O)Cc3csc(-n4cccc4)n3)sc2c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7L00_XCJ_holo_aligned_predicted_protein.pdb +7BJJ_TVW,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BJJ_TVW_holo_aligned_predicted_protein.pdb,Nc1ncnc2n[nH]cc12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BJJ_TVW_holo_aligned_predicted_protein.pdb +7UQ3_O2U,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UQ3_O2U_holo_aligned_predicted_protein.pdb,O=C(O)C[C@@H]1CC(=O)N(O)C1=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UQ3_O2U_holo_aligned_predicted_protein.pdb +7XQZ_FPF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XQZ_FPF_holo_aligned_predicted_protein.pdb,CC(C)=CCC/C(C)=C/CC/C(C)=C(\F)CO[P@@](=O)(O)OP(=O)(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XQZ_FPF_holo_aligned_predicted_protein.pdb +7JMV_4NC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7JMV_4NC_holo_aligned_predicted_protein.pdb,O=[N+]([O-])c1ccc(O)c(O)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7JMV_4NC_holo_aligned_predicted_protein.pdb +7BNH_BEZ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BNH_BEZ_holo_aligned_predicted_protein.pdb,O=C(O)c1ccccc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7BNH_BEZ_holo_aligned_predicted_protein.pdb +8FO5_Y4U,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8FO5_Y4U_holo_aligned_predicted_protein.pdb,CC(=O)c1ccn(S(=O)(=O)c2ccccc2)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8FO5_Y4U_holo_aligned_predicted_protein.pdb +7ZU2_DHT,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZU2_DHT_holo_aligned_predicted_protein.pdb,C[C@]12CCC(=O)C[C@@H]1CC[C@@H]1[C@@H]2CC[C@]2(C)[C@@H](O)CC[C@@H]12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZU2_DHT_holo_aligned_predicted_protein.pdb +7A9H_TPP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7A9H_TPP_holo_aligned_predicted_protein.pdb,Cc1ncc(C[n+]2csc(CCO[P@](=O)(O)OP(=O)(O)O)c2C)c(N)n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7A9H_TPP_holo_aligned_predicted_protein.pdb +7DUA_HJ0,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7DUA_HJ0_holo_aligned_predicted_protein.pdb,Cc1cc(NC(=O)c2cn(C3(C)CC3)c3ncnc(N)c23)n[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7DUA_HJ0_holo_aligned_predicted_protein.pdb +7P5T_5YG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7P5T_5YG_holo_aligned_predicted_protein.pdb,COc1ccc(CNc2ccc(Cc3ccncc3)cc2)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7P5T_5YG_holo_aligned_predicted_protein.pdb +7RNI_60I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RNI_60I_holo_aligned_predicted_protein.pdb,Cc1nn(C)c(C)c1C(=O)N1CCN(Cc2nc3ccccc3n2CC(C)(C)C)CC1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RNI_60I_holo_aligned_predicted_protein.pdb +6M73_FNR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6M73_FNR_holo_aligned_predicted_protein.pdb,Cc1cc2c(cc1C)N(C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O)c1[nH]c(=O)[nH]c(=O)c1N2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6M73_FNR_holo_aligned_predicted_protein.pdb +6ZK5_IMH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZK5_IMH_holo_aligned_predicted_protein.pdb,OC[C@H]1N[C@@H](c2c[nH]c3c(O)ncnc23)[C@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZK5_IMH_holo_aligned_predicted_protein.pdb +7VC5_9SF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VC5_9SF_holo_aligned_predicted_protein.pdb,O=C(C[C@H]1NCCC[C@@H]1O)Cn1cnc2ccccc2c1=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VC5_9SF_holo_aligned_predicted_protein.pdb +7ZZW_KKW,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZZW_KKW_holo_aligned_predicted_protein.pdb,O=C([C@H]1C[C@@H](c2cccc(Cl)c2)CN1)N1CCN(c2nccc3ccsc23)CC1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ZZW_KKW_holo_aligned_predicted_protein.pdb +7R6J_2I7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7R6J_2I7_holo_aligned_predicted_protein.pdb,Cc1cc(NCc2cccc(CN3C[C@H](O)[C@@H](O)[C@H](O)[C@H]3CO)c2)cc(-c2ncccn2)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7R6J_2I7_holo_aligned_predicted_protein.pdb +8HO0_3ZI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8HO0_3ZI_holo_aligned_predicted_protein.pdb,O=C1N[C@@H](Cc2c[nH]c3c(F)cccc23)C(=O)N2CCC[C@@H]12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8HO0_3ZI_holo_aligned_predicted_protein.pdb +7XBV_APC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XBV_APC_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)C[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XBV_APC_holo_aligned_predicted_protein.pdb +7UXS_OJC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UXS_OJC_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@@H]2CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](O[C@H]2[C@H]1O)[C@H](O)[C@@H]3O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7UXS_OJC_holo_aligned_predicted_protein.pdb +7WPW_F15,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WPW_F15_holo_aligned_predicted_protein.pdb,CCCCCCCCCCCCCCC(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WPW_F15_holo_aligned_predicted_protein.pdb +8AEM_LVF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AEM_LVF_holo_aligned_predicted_protein.pdb,N#CCc1c[nH]c2ccc(Cl)cc12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AEM_LVF_holo_aligned_predicted_protein.pdb +7Q25_8J9,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Q25_8J9_holo_aligned_predicted_protein.pdb,CCCC[C@H](N[C@@H](CCc1ccccc1)C(=O)O)C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Q25_8J9_holo_aligned_predicted_protein.pdb +6ZPB_3D1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZPB_3D1_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@H]1C[C@H](O)[C@@H](CO)O1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6ZPB_3D1_holo_aligned_predicted_protein.pdb +7TSF_H4B,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TSF_H4B_holo_aligned_predicted_protein.pdb,C[C@H](O)[C@H](O)[C@H]1CNc2nc(N)[nH]c(=O)c2N1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7TSF_H4B_holo_aligned_predicted_protein.pdb +7LJN_GTP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LJN_GTP_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@@H]2O[C@H](CO[P@](=O)(O)O[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7LJN_GTP_holo_aligned_predicted_protein.pdb +7E4L_MDN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7E4L_MDN_holo_aligned_predicted_protein.pdb,O=P(O)(O)CP(=O)(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7E4L_MDN_holo_aligned_predicted_protein.pdb +7N7B_T3F,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N7B_T3F_holo_aligned_predicted_protein.pdb,Cc1cn([C@H]2C[C@H](O)[C@@H](CO[P@@](=O)(O)O[P@](=O)(O)O[C@H]3O[C@H](C)[C@H](O)[C@H](N)[C@H]3O)O2)c(=O)[nH]c1=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7N7B_T3F_holo_aligned_predicted_protein.pdb +7WKL_CAQ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WKL_CAQ_holo_aligned_predicted_protein.pdb,Oc1ccccc1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WKL_CAQ_holo_aligned_predicted_protein.pdb +8AP0_PRP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AP0_PRP_holo_aligned_predicted_protein.pdb,O=P(O)(O)OC[C@H]1O[C@H](O[P@@](=O)(O)OP(=O)(O)O)[C@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8AP0_PRP_holo_aligned_predicted_protein.pdb +7V3S_5I9,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7V3S_5I9_holo_aligned_predicted_protein.pdb,O=C(Nc1ccc(F)cc1)C1(C(=O)Nc2ccc(Oc3ccnc4c3Oc3ccccc3N4)c(F)c2)CC1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7V3S_5I9_holo_aligned_predicted_protein.pdb +7XPO_UPG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XPO_UPG_holo_aligned_predicted_protein.pdb,O=c1ccn([C@@H]2O[C@H](CO[P@](=O)(O)O[P@](=O)(O)O[C@H]3O[C@H](CO)[C@@H](O)[C@H](O)[C@H]3O)[C@@H](O)[C@H]2O)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7XPO_UPG_holo_aligned_predicted_protein.pdb +7KC5_BJZ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KC5_BJZ_holo_aligned_predicted_protein.pdb,CN1C(=O)N(c2cc(Cl)cc(Cl)c2)C(=O)[C@]12CN(c1ccc(C(=O)O)cn1)C[C@H]2c1ccc(C#N)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7KC5_BJZ_holo_aligned_predicted_protein.pdb +7NSW_HC4,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NSW_HC4_holo_aligned_predicted_protein.pdb,O=C(O)/C=C/c1ccc(O)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NSW_HC4_holo_aligned_predicted_protein.pdb +7RWS_4UR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RWS_4UR_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@@H]2O[C@@H]3CO[P@@](=O)(O)O[C@@H]4[C@H](O)[C@@H](CO[P@](=O)(O)O[C@H]3[C@H]2O)O[C@H]4n2cnc3c(N)ncnc32)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RWS_4UR_holo_aligned_predicted_protein.pdb +7VBU_6I4,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VBU_6I4_holo_aligned_predicted_protein.pdb,Cc1ccc2c(n1)[nH]c1c(C3CC3)cccc12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VBU_6I4_holo_aligned_predicted_protein.pdb +7QTA_URI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QTA_URI_holo_aligned_predicted_protein.pdb,O=c1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QTA_URI_holo_aligned_predicted_protein.pdb +7WQQ_5Z6,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WQQ_5Z6_holo_aligned_predicted_protein.pdb,CC(C)(C)c1cc(C(=O)/C=C/c2ccc(C(=O)O)cc2)cc(C(C)(C)C)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WQQ_5Z6_holo_aligned_predicted_protein.pdb +8D5D_5DK,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8D5D_5DK_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(/C=N/[C@H](CCCNC(=N)N)C(=O)O)c1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8D5D_5DK_holo_aligned_predicted_protein.pdb +7Q27_8KC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Q27_8KC_holo_aligned_predicted_protein.pdb,CCCC[C@H](N[C@@H](CCc1ccccc1)C(=O)O)C(=O)N[C@@H](Cc1c[nH]c2ccccc12)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7Q27_8KC_holo_aligned_predicted_protein.pdb +7ED2_A3P,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ED2_A3P_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](OP(=O)(O)O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7ED2_A3P_holo_aligned_predicted_protein.pdb +6YT6_PKE,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YT6_PKE_holo_aligned_predicted_protein.pdb,CN(c1ncccc1CNc1ccnc(Nc2ccc3c(c2)CC(=O)N3)n1)S(C)(=O)=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6YT6_PKE_holo_aligned_predicted_protein.pdb +7JG0_GAR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7JG0_GAR_holo_aligned_predicted_protein.pdb,NCC(=O)N[C@@H]1O[C@H](COP(=O)([O-])[O-])[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7JG0_GAR_holo_aligned_predicted_protein.pdb +8EYE_X4I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8EYE_X4I_holo_aligned_predicted_protein.pdb,O=C(CNc1cc(F)cc(F)c1)N[C@@H](C(=O)NO)c1ccc(-c2cc(F)c(F)c(F)c2)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8EYE_X4I_holo_aligned_predicted_protein.pdb +7O1T_5X8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7O1T_5X8_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7O1T_5X8_holo_aligned_predicted_protein.pdb +6Z4N_Q7B,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6Z4N_Q7B_holo_aligned_predicted_protein.pdb,Cc1ccc(C[C@@]2(C(=O)O)C[C@H]2c2ccccc2)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/6Z4N_Q7B_holo_aligned_predicted_protein.pdb +7WL4_JFU,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WL4_JFU_holo_aligned_predicted_protein.pdb,CCN1C(=O)c2cc(N3CCN(C)CC3)nc3c(NS(=O)(=O)c4ccc(F)cc4F)ccc1c23,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7WL4_JFU_holo_aligned_predicted_protein.pdb +8SLG_G5A,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8SLG_G5A_holo_aligned_predicted_protein.pdb,NCC(=O)NS(=O)(=O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/8SLG_G5A_holo_aligned_predicted_protein.pdb +7L7C_XQ1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7L7C_XQ1_holo_aligned_predicted_protein.pdb,COc1cccc(-c2ccc3c(CC(=O)O)coc3c2)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7L7C_XQ1_holo_aligned_predicted_protein.pdb +7NLV_UJE,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NLV_UJE_holo_aligned_predicted_protein.pdb,O=C(CCCC[C@@H]1SC[C@@H]2NC(=O)N[C@@H]21)N[C@H]1CCNC1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7NLV_UJE_holo_aligned_predicted_protein.pdb +7VYJ_CA0,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VYJ_CA0_holo_aligned_predicted_protein.pdb,NC(=O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7VYJ_CA0_holo_aligned_predicted_protein.pdb +7PUV_84Z,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PUV_84Z_holo_aligned_predicted_protein.pdb,COC(=O)c1cc(S(N)(=O)=O)c(Cl)cc1S(=O)(=O)c1ccccc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7PUV_84Z_holo_aligned_predicted_protein.pdb +7RSV_7IQ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RSV_7IQ_holo_aligned_predicted_protein.pdb,C[C@@H]1COCCN1c1cc2n(n1)[C@@H]1CCC[C@@H]1NC2=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7RSV_7IQ_holo_aligned_predicted_protein.pdb +7QGP_DJ8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QGP_DJ8_holo_aligned_predicted_protein.pdb,Cc1cc(Cl)ccc1CNC(=O)Nc1ccc2cc1OCCOCCNc1ccn3ncc-2c3n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures_bs_cropped/7QGP_DJ8_holo_aligned_predicted_protein.pdb diff --git a/forks/NeuralPLexer/inference/neuralplexer_posebusters_benchmark_inputs.csv b/forks/NeuralPLexer/inference/neuralplexer_posebusters_benchmark_inputs.csv new file mode 100644 index 00000000..87dbf65b --- /dev/null +++ b/forks/NeuralPLexer/inference/neuralplexer_posebusters_benchmark_inputs.csv @@ -0,0 +1,281 @@ +id,input_receptor,input_ligand,input_template +7CNQ_G8X,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CNQ_G8X_holo_aligned_predicted_protein.pdb,O=C(O)[C@H]1NCC[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CNQ_G8X_holo_aligned_predicted_protein.pdb +7C0U_FGO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7C0U_FGO_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@@H]2O[C@H](CO[P@](=O)(O)O[C@H]3[C@@H](O)[C@H](n4ccc(=O)[nH]c4=O)O[C@@H]3CO)[C@H]3CC(O)(O)C[C@H]32)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7C0U_FGO_holo_aligned_predicted_protein.pdb +7EBG_J0L,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7EBG_J0L_holo_aligned_predicted_protein.pdb,CNc1cccc2c1NC(=O)C2(C)C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7EBG_J0L_holo_aligned_predicted_protein.pdb +7WUY_76N,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WUY_76N_holo_aligned_predicted_protein.pdb,C[C@@H]1C=C[C@@H]2CCCC[C@@H]2[C@H]1C(=O)c1c(O)c([C@]2(O)CC[C@H](O)[C@@H]3O[C@@H]32)c[nH]c1=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WUY_76N_holo_aligned_predicted_protein.pdb +7PT3_3KK,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PT3_3KK_holo_aligned_predicted_protein.pdb,CC(C)(O)C(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)CO[P@@](=O)(O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PT3_3KK_holo_aligned_predicted_protein.pdb +7C3U_AZG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7C3U_AZG_holo_aligned_predicted_protein.pdb,Nc1nc(O)c2[nH]nnc2n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7C3U_AZG_holo_aligned_predicted_protein.pdb +7PGX_FMN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PGX_FMN_holo_aligned_predicted_protein.pdb,Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O)c2cc1C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PGX_FMN_holo_aligned_predicted_protein.pdb +7FB7_8NF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7FB7_8NF_holo_aligned_predicted_protein.pdb,Cc1cc(C)c(N)cn1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7FB7_8NF_holo_aligned_predicted_protein.pdb +8AIE_M7L,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AIE_M7L_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(C/N=C(\CON)C(=O)O)c1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AIE_M7L_holo_aligned_predicted_protein.pdb +7LCU_XTA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LCU_XTA_holo_aligned_predicted_protein.pdb,CN1CCN(c2ncc(Oc3cc(CN4CCC(CC(=O)O)CC4)cc(-c4cc(Cl)cc(Cl)c4)n3)cn2)CC1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LCU_XTA_holo_aligned_predicted_protein.pdb +6YMS_OZH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YMS_OZH_holo_aligned_predicted_protein.pdb,CC(C)C[C@H](NC(=O)CN[P@@](=O)(O)[C@@H](Cc1ccccc1)NC(=O)OCc1ccccc1)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YMS_OZH_holo_aligned_predicted_protein.pdb +7PK0_BYC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PK0_BYC_holo_aligned_predicted_protein.pdb,CC(C)(CO[P@@](=O)(O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCSC(=O)c1ccccc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PK0_BYC_holo_aligned_predicted_protein.pdb +7M6K_YRJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7M6K_YRJ_holo_aligned_predicted_protein.pdb,O=C(NCc1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c1)Nc1ccc([N+](=O)[O-])cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7M6K_YRJ_holo_aligned_predicted_protein.pdb +7SFO_98L,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SFO_98L_holo_aligned_predicted_protein.pdb,Oc1cccc(CNc2nc(Cl)nc3scc(-c4ccccc4)c23)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SFO_98L_holo_aligned_predicted_protein.pdb +8EAB_VN2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8EAB_VN2_holo_aligned_predicted_protein.pdb,O=C(N[C@H](C(=O)Nc1cnccc1-c1ccc(C(F)(F)F)cc1)c1cccc(C(F)(F)F)c1)c1cnccc1-c1ccc(C(F)(F)F)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8EAB_VN2_holo_aligned_predicted_protein.pdb +7MWU_ZPM,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MWU_ZPM_holo_aligned_predicted_protein.pdb,O=C(O)C1CCC1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MWU_ZPM_holo_aligned_predicted_protein.pdb +7TYP_KUR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TYP_KUR_holo_aligned_predicted_protein.pdb,Cc1nccnc1-c1nn2c(=O)cc(-c3ccc(C4CCCCC4)cc3)[nH]c2c1C(=O)N1CC(CF)C1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TYP_KUR_holo_aligned_predicted_protein.pdb +6XM9_V55,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6XM9_V55_holo_aligned_predicted_protein.pdb,COc1cc(C=O)ccc1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6XM9_V55_holo_aligned_predicted_protein.pdb +7T0D_FPP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7T0D_FPP_holo_aligned_predicted_protein.pdb,CC(C)=CCC/C(C)=C/CC/C(C)=C/CO[P@](=O)(O)OP(=O)(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7T0D_FPP_holo_aligned_predicted_protein.pdb +7XI7_4RI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XI7_4RI_holo_aligned_predicted_protein.pdb,CCCCCCc1nc(N)nc(N)c1-c1ccccc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XI7_4RI_holo_aligned_predicted_protein.pdb +7PRI_7TI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PRI_7TI_holo_aligned_predicted_protein.pdb,Nc1cc(C(Cl)=C(Cl)Cl)c(S(N)(=O)=O)cc1S(N)(=O)=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PRI_7TI_holo_aligned_predicted_protein.pdb +8FLV_ZB9,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8FLV_ZB9_holo_aligned_predicted_protein.pdb,CN(c1ncnc2[nH]ccc12)[C@@H]1CCCN(C(=O)CNc2cc(Cl)cc(Cl)c2)C1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8FLV_ZB9_holo_aligned_predicted_protein.pdb +7N4W_P4V,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N4W_P4V_holo_aligned_predicted_protein.pdb,COc1cc2c(cc1OC)[C@H]1Cc3ccc(OC)c(OC)c3CN1CC2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N4W_P4V_holo_aligned_predicted_protein.pdb +7OEO_V9Z,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OEO_V9Z_holo_aligned_predicted_protein.pdb,CNC(=O)CN(CC(c1ccccc1)c1ccccc1)C(=O)c1cc(C)c(OC)c(C)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OEO_V9Z_holo_aligned_predicted_protein.pdb +5SB2_1K2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/5SB2_1K2_holo_aligned_predicted_protein.pdb,O=C(N[C@@H]1C[C@H]1c1ccccc1)c1cc(Cl)cc(COc2cnc3[nH]ccc3c2)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/5SB2_1K2_holo_aligned_predicted_protein.pdb +7MGY_ZD1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MGY_ZD1_holo_aligned_predicted_protein.pdb,N[C@H]1C=C(CO)[C@@H](O[C@H]2O[C@H](CO)[C@@H](O)[C@H](O)[C@H]2O)[C@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MGY_ZD1_holo_aligned_predicted_protein.pdb +7OPG_06N,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OPG_06N_holo_aligned_predicted_protein.pdb,CCCNc1nn2c(-c3ccc(O)cc3)cnc2s1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OPG_06N_holo_aligned_predicted_protein.pdb +7OLI_8HG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OLI_8HG_holo_aligned_predicted_protein.pdb,Nc1nc2c([nH]c(=O)n2[C@H]2C[C@H](O)[C@@H](CO)O2)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OLI_8HG_holo_aligned_predicted_protein.pdb +8FAV_4Y5,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8FAV_4Y5_holo_aligned_predicted_protein.pdb,O=C(O)c1ccc(-c2nn(C(=O)c3c(Cl)cccc3C(F)(F)F)c3cccc(F)c23)c(F)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8FAV_4Y5_holo_aligned_predicted_protein.pdb +7R59_I5F,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7R59_I5F_holo_aligned_predicted_protein.pdb,Oc1ccc2c(c1)sc1nncn12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7R59_I5F_holo_aligned_predicted_protein.pdb +6XHT_V2V,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6XHT_V2V_holo_aligned_predicted_protein.pdb,Nc1ccn([C@@H]2O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OC[C@@H](O)[C@@H](O)[C@@H](O)CO)[C@@H](O)[C@H]2O)c(=O)n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6XHT_V2V_holo_aligned_predicted_protein.pdb +8EX2_Q2Q,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8EX2_Q2Q_holo_aligned_predicted_protein.pdb,O=C1CC(c2ccccc2)=Nc2c(-c3ccccc3)c(C(F)(F)F)nn21,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8EX2_Q2Q_holo_aligned_predicted_protein.pdb +7LOU_IFM,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LOU_IFM_holo_aligned_predicted_protein.pdb,OC[C@H]1CNC[C@@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LOU_IFM_holo_aligned_predicted_protein.pdb +6XBO_5MC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6XBO_5MC_holo_aligned_predicted_protein.pdb,Cc1cn([C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)nc1N,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6XBO_5MC_holo_aligned_predicted_protein.pdb +6TW7_NZB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6TW7_NZB_holo_aligned_predicted_protein.pdb,CN(C)Cc1[nH]nc2ccc(-c3ccc(F)cc3OCCc3cccnc3)cc12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6TW7_NZB_holo_aligned_predicted_protein.pdb +7U3J_L6U,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7U3J_L6U_holo_aligned_predicted_protein.pdb,CNC(=O)[C@H](CCc1ccccc1)NC(=O)[C@H](NC(=O)CNCc1ccc(OC)cc1OC)c1cccs1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7U3J_L6U_holo_aligned_predicted_protein.pdb +7UYB_OK0,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UYB_OK0_holo_aligned_predicted_protein.pdb,NS(=O)(=O)c1c(C(F)(F)F)ccc(-c2ccc(C3CCNCC3)cc2)c1-c1nnn[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UYB_OK0_holo_aligned_predicted_protein.pdb +7WJB_BGC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WJB_BGC_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WJB_BGC_holo_aligned_predicted_protein.pdb +7RC3_SAH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RC3_SAH_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSCC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RC3_SAH_holo_aligned_predicted_protein.pdb +7MY1_IPE,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MY1_IPE_holo_aligned_predicted_protein.pdb,C=C(C)CCO[P@@](=O)(O)OP(=O)(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MY1_IPE_holo_aligned_predicted_protein.pdb +7AN5_RDH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7AN5_RDH_holo_aligned_predicted_protein.pdb,C=C(Oc1cccc(C(=O)O)c1)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7AN5_RDH_holo_aligned_predicted_protein.pdb +7JY3_VUD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7JY3_VUD_holo_aligned_predicted_protein.pdb,C[C@H](Oc1cc2cc(F)ccc2nc1N)c1[nH]c(=O)ccc1-n1cccn1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7JY3_VUD_holo_aligned_predicted_protein.pdb +7D5C_GV6,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7D5C_GV6_holo_aligned_predicted_protein.pdb,CC[C@H](C)[C@H](N)C(=O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7D5C_GV6_holo_aligned_predicted_protein.pdb +7WUX_6OI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WUX_6OI_holo_aligned_predicted_protein.pdb,N[C@@H](CC[C@H](O)[C@@H](N)COS(=O)(=O)O)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WUX_6OI_holo_aligned_predicted_protein.pdb +8F8E_XJI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8F8E_XJI_holo_aligned_predicted_protein.pdb,NC(=O)C[C@H](NC(=O)c1c[nH]nc1-c1ccc(Cl)cc1F)c1ccc(F)c(Cl)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8F8E_XJI_holo_aligned_predicted_protein.pdb +7PL1_SFG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PL1_SFG_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](C[C@@H](N)CC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PL1_SFG_holo_aligned_predicted_protein.pdb +6Z14_Q4Z,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6Z14_Q4Z_holo_aligned_predicted_protein.pdb,CC1=[NH+][C@H]2[C@@H](O1)O[C@H](COS(=O)(=O)[O-])[C@@H](O)[C@@H]2O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6Z14_Q4Z_holo_aligned_predicted_protein.pdb +7ODY_DGI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ODY_DGI_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@H]2C[C@H](O)[C@@H](CO[P@@](=O)(O)OP(=O)(O)O)O2)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ODY_DGI_holo_aligned_predicted_protein.pdb +7RKW_5TV,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RKW_5TV_holo_aligned_predicted_protein.pdb,O=C(O[C@H](Cn1ccnc1)c1ccc(F)cc1)c1cc(Cl)cc(Cl)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RKW_5TV_holo_aligned_predicted_protein.pdb +5SAK_ZRY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/5SAK_ZRY_holo_aligned_predicted_protein.pdb,N=C1N/C(=N\Nc2ccccc2)c2ccccc21,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/5SAK_ZRY_holo_aligned_predicted_protein.pdb +8AUH_L9I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AUH_L9I_holo_aligned_predicted_protein.pdb,CCOC(=O)/C(=N\O)C(C)=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AUH_L9I_holo_aligned_predicted_protein.pdb +8C5M_MTA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8C5M_MTA_holo_aligned_predicted_protein.pdb,CSC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8C5M_MTA_holo_aligned_predicted_protein.pdb +7N03_ZRP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N03_ZRP_holo_aligned_predicted_protein.pdb,CCCCCCNc1ccc2ncc(C(=O)NC)c(Nc3ccccc3)c2c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N03_ZRP_holo_aligned_predicted_protein.pdb +7U0U_FK5,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7U0U_FK5_holo_aligned_predicted_protein.pdb,C=CC[C@@H]1/C=C(\C)C[C@H](C)C[C@H](OC)[C@H]2O[C@@](O)(C(=O)C(=O)N3CCCC[C@H]3C(=O)O[C@H](/C(C)=C/[C@@H]3CC[C@@H](O)[C@H](OC)C3)[C@H](C)[C@@H](O)CC1=O)[C@H](C)C[C@@H]2OC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7U0U_FK5_holo_aligned_predicted_protein.pdb +7OFK_VCH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OFK_VCH_holo_aligned_predicted_protein.pdb,CC(=O)N1Cc2cc(S(C)(=O)=O)ccc2[C@@H]1C(=O)Nc1ccc(C(O)(C(F)(F)F)C(F)(F)F)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OFK_VCH_holo_aligned_predicted_protein.pdb +7ULC_56B,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ULC_56B_holo_aligned_predicted_protein.pdb,Nc1nc2c(c(CN[C@H]3C=C[C@H](O)[C@@H]3O)cn2[C@@H]2O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ULC_56B_holo_aligned_predicted_protein.pdb +7TS6_KMI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TS6_KMI_holo_aligned_predicted_protein.pdb,CNCc1cccc(-c2cc(C)cc(N)n2)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TS6_KMI_holo_aligned_predicted_protein.pdb +7NF3_4LU,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NF3_4LU_holo_aligned_predicted_protein.pdb,Cc1cc2c3c(c1C)C(C)(C)CC=[N+]3c1c([nH]c(=O)[nH]c1=O)N2C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NF3_4LU_holo_aligned_predicted_protein.pdb +7Z1Q_NIO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Z1Q_NIO_holo_aligned_predicted_protein.pdb,O=C(O)c1cccnc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Z1Q_NIO_holo_aligned_predicted_protein.pdb +7QPP_VDX,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QPP_VDX_holo_aligned_predicted_protein.pdb,C=C1/C(=C\C=C2/CCC[C@]3(C)[C@@H]([C@H](C)CCCC(C)(C)O)CC[C@@H]23)C[C@@H](O)C[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QPP_VDX_holo_aligned_predicted_protein.pdb +7P4C_5OV,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7P4C_5OV_holo_aligned_predicted_protein.pdb,O=S1(=O)N[C@@H]2[C@H](O)[C@@H](O)[C@H](O)[C@@H](CO)[C@@H]2O1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7P4C_5OV_holo_aligned_predicted_protein.pdb +7VQ9_ISY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VQ9_ISY_holo_aligned_predicted_protein.pdb,C=C(C)CCS[P@@](=O)(O)OP(=O)(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VQ9_ISY_holo_aligned_predicted_protein.pdb +6VTA_AKN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6VTA_AKN_holo_aligned_predicted_protein.pdb,NCC[C@H](O)C(=O)N[C@@H]1C[C@H](N)[C@@H](O[C@H]2O[C@H](CN)[C@@H](O)[C@H](O)[C@H]2O)[C@H](O)[C@H]1O[C@H]1O[C@H](CO)[C@@H](O)[C@H](N)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6VTA_AKN_holo_aligned_predicted_protein.pdb +7V43_C4O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7V43_C4O_holo_aligned_predicted_protein.pdb,Cc1ccc(Cl)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7V43_C4O_holo_aligned_predicted_protein.pdb +6M2B_EZO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6M2B_EZO_holo_aligned_predicted_protein.pdb,CN(/N=C/c1ccccc1C(=O)O)c1nc(-c2ccccc2Cl)cs1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6M2B_EZO_holo_aligned_predicted_protein.pdb +7UJ5_DGL,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UJ5_DGL_holo_aligned_predicted_protein.pdb,N[C@H](CCC(=O)O)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UJ5_DGL_holo_aligned_predicted_protein.pdb +7OZC_G6S,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OZC_G6S_holo_aligned_predicted_protein.pdb,O=S(=O)(O)OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OZC_G6S_holo_aligned_predicted_protein.pdb +7TB0_UD1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TB0_UD1_holo_aligned_predicted_protein.pdb,CC(=O)N[C@H]1[C@@H](O[P@@](=O)(O)O[P@@](=O)(O)OC[C@H]2O[C@@H](n3ccc(=O)[nH]c3=O)[C@H](O)[C@@H]2O)O[C@H](CO)[C@@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TB0_UD1_holo_aligned_predicted_protein.pdb +8C3N_ADP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8C3N_ADP_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8C3N_ADP_holo_aligned_predicted_protein.pdb +7NUT_GLP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NUT_GLP_holo_aligned_predicted_protein.pdb,N[C@@H]1[C@@H](O)[C@H](O)[C@@H](COP(=O)(O)O)O[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NUT_GLP_holo_aligned_predicted_protein.pdb +7P1M_4IU,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7P1M_4IU_holo_aligned_predicted_protein.pdb,Cn1c(CO[C@@H]2C=CO[C@H](CO)[C@@H]2O)nc2ccc(C(=O)O)cc21,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7P1M_4IU_holo_aligned_predicted_protein.pdb +7EPV_FDA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7EPV_FDA_holo_aligned_predicted_protein.pdb,Cc1cc2c(cc1C)N(C[C@H](O)[C@H](O)[C@H](O)CO[P@@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n3cnc4c(N)ncnc43)[C@H](O)[C@@H]1O)c1[nH]c(=O)[nH]c(=O)c1N2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7EPV_FDA_holo_aligned_predicted_protein.pdb +6Z0R_Q4H,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6Z0R_Q4H_holo_aligned_predicted_protein.pdb,N#Cc1cncnc1N,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6Z0R_Q4H_holo_aligned_predicted_protein.pdb +7P1F_KFN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7P1F_KFN_holo_aligned_predicted_protein.pdb,O=C(O)C1=C[C@H](O)[C@@H](O)[C@H]([C@H](O)[C@H](O)CO)O1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7P1F_KFN_holo_aligned_predicted_protein.pdb +7VB8_STL,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VB8_STL_holo_aligned_predicted_protein.pdb,Oc1ccc(/C=C/c2cc(O)cc(O)c2)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VB8_STL_holo_aligned_predicted_protein.pdb +6YR2_T1C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YR2_T1C_holo_aligned_predicted_protein.pdb,C[NH+](C)c1cc(NC(=O)CNC(C)(C)C)c(O)c2c1C[C@H]1C[C@H]3[C@H]([NH+](C)C)C(O)=C(C(N)=O)C(=O)[C@@]3(O)C(O)=C1C2=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YR2_T1C_holo_aligned_predicted_protein.pdb +7CIJ_G0C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CIJ_G0C_holo_aligned_predicted_protein.pdb,CSCCC/N=C/c1c(COP(=O)(O)O)cnc(C)c1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CIJ_G0C_holo_aligned_predicted_protein.pdb +7UMW_NAD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UMW_NAD_holo_aligned_predicted_protein.pdb,NC(=O)c1ccc[n+]([C@@H]2O[C@H](CO[P@@](=O)([O-])O[P@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UMW_NAD_holo_aligned_predicted_protein.pdb +7TOM_5AD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TOM_5AD_holo_aligned_predicted_protein.pdb,C[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TOM_5AD_holo_aligned_predicted_protein.pdb +7DQL_4CL,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7DQL_4CL_holo_aligned_predicted_protein.pdb,Oc1ccc(Cl)cc1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7DQL_4CL_holo_aligned_predicted_protein.pdb +7CUO_PHB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CUO_PHB_holo_aligned_predicted_protein.pdb,O=C(O)c1ccc(O)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CUO_PHB_holo_aligned_predicted_protein.pdb +7Q2B_M6H,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Q2B_M6H_holo_aligned_predicted_protein.pdb,c1ccc(CC2NCCN2)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Q2B_M6H_holo_aligned_predicted_protein.pdb +7MWN_WI5,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MWN_WI5_holo_aligned_predicted_protein.pdb,Cc1c(C(=O)c2cccc3ccccc23)c2cccc3c2n1[C@H](CN1CCOCC1)CO3,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MWN_WI5_holo_aligned_predicted_protein.pdb +7L5F_XNG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7L5F_XNG_holo_aligned_predicted_protein.pdb,CCCCCCCCCC(=O)NCCCC(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7L5F_XNG_holo_aligned_predicted_protein.pdb +8DSC_NCA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8DSC_NCA_holo_aligned_predicted_protein.pdb,NC(=O)c1cccnc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8DSC_NCA_holo_aligned_predicted_protein.pdb +7ZDY_6MJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZDY_6MJ_holo_aligned_predicted_protein.pdb,CO[C@@H]1OC[C@@H](O)[C@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZDY_6MJ_holo_aligned_predicted_protein.pdb +7NP6_UK8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NP6_UK8_holo_aligned_predicted_protein.pdb,O=C(O)c1ccc(OCc2c(-c3c(Cl)cccc3C(F)(F)F)noc2-c2cn[nH]c2)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NP6_UK8_holo_aligned_predicted_protein.pdb +7JXX_VP7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7JXX_VP7_holo_aligned_predicted_protein.pdb,CC(C)(O)C#Cc1ccc2[nH]c3c(c2c1)-c1nc(N)ncc1CCC3,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7JXX_VP7_holo_aligned_predicted_protein.pdb +7LOE_Y84,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LOE_Y84_holo_aligned_predicted_protein.pdb,Fc1cccc2ccccc12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LOE_Y84_holo_aligned_predicted_protein.pdb +7KM8_WPD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KM8_WPD_holo_aligned_predicted_protein.pdb,CCc1nc(N)nc(N)c1OCCCOc1cccc(C[C@@H](C(=O)O)C(F)F)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KM8_WPD_holo_aligned_predicted_protein.pdb +7UY4_SMI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UY4_SMI_holo_aligned_predicted_protein.pdb,CN[C@@H]1[C@H](O)[C@H](NC)[C@H]2O[C@]3(O)[C@H](O[C@@H]2[C@H]1O)O[C@H](C)CC3(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UY4_SMI_holo_aligned_predicted_protein.pdb +7CL8_TES,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CL8_TES_holo_aligned_predicted_protein.pdb,C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@@H]2O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CL8_TES_holo_aligned_predicted_protein.pdb +6Z1C_7EY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6Z1C_7EY_holo_aligned_predicted_protein.pdb,Cc1ccc(-c2csc3ncnc(SCCC(=O)O)c23)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6Z1C_7EY_holo_aligned_predicted_protein.pdb +8HFN_XGC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8HFN_XGC_holo_aligned_predicted_protein.pdb,COc1ccc(-c2cccc(S(=O)(=O)NC(=O)[C@@H](N)CS)c2)cn1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8HFN_XGC_holo_aligned_predicted_protein.pdb +8EXL_799,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8EXL_799_holo_aligned_predicted_protein.pdb,Cc1nc(-c2cn3c(n2)-c2ccc(-c4cnn(C(C)(C)C(N)=O)c4)cc2OCC3)n(C(C)C)n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8EXL_799_holo_aligned_predicted_protein.pdb +7Z7F_IF3,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Z7F_IF3_holo_aligned_predicted_protein.pdb,CNc1nc(NC)c2ncn(C)c2n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Z7F_IF3_holo_aligned_predicted_protein.pdb +7XG5_PLP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XG5_PLP_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(C=O)c1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XG5_PLP_holo_aligned_predicted_protein.pdb +7MOI_HPS,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MOI_HPS_holo_aligned_predicted_protein.pdb,O=P(O)(O)Oc1ccccc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MOI_HPS_holo_aligned_predicted_protein.pdb +7LMO_NYO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LMO_NYO_holo_aligned_predicted_protein.pdb,CCN(CC)c1ccc2c(C)c(CCN3C(=O)N[C@@]4(CCN(C(=O)c5c[nH]cn5)C4)C3=O)c(=O)oc2c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LMO_NYO_holo_aligned_predicted_protein.pdb +7MSR_DCA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MSR_DCA_holo_aligned_predicted_protein.pdb,CCNC(=O)CCNC(=O)[C@H](O)C(C)(C)CO[P@@](=O)(O)O[P@@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MSR_DCA_holo_aligned_predicted_protein.pdb +6ZCY_QF8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZCY_QF8_holo_aligned_predicted_protein.pdb,CNC(=O)c1nn(C)c2ccc(Nc3nccc(-n4cc(N[C@@H]5CCNC5)c(C)n4)n3)cc12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZCY_QF8_holo_aligned_predicted_protein.pdb +6ZC3_JOR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZC3_JOR_holo_aligned_predicted_protein.pdb,CS(=O)(=O)Nc1ccc(F)cc1C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZC3_JOR_holo_aligned_predicted_protein.pdb +7LZD_YHY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LZD_YHY_holo_aligned_predicted_protein.pdb,Cc1ccc(F)c2cc(C(=O)Nc3cccc(N4CCC(N(C)C)CC4)c3)[nH]c12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LZD_YHY_holo_aligned_predicted_protein.pdb +7N4N_0BK,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N4N_0BK_holo_aligned_predicted_protein.pdb,COc1cc(C(=O)Nc2ccc(F)c([C@]3(CF)CC[C@@](C)(S(C)(=O)=O)C(N)=N3)c2)ncn1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N4N_0BK_holo_aligned_predicted_protein.pdb +7KRU_ATP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KRU_ATP_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)O[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KRU_ATP_holo_aligned_predicted_protein.pdb +7UTW_NAI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UTW_NAI_holo_aligned_predicted_protein.pdb,NC(=O)C1=CN([C@@H]2O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UTW_NAI_holo_aligned_predicted_protein.pdb +7BKA_4JC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BKA_4JC_holo_aligned_predicted_protein.pdb,CCc1ccc(S(N)(=O)=O)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BKA_4JC_holo_aligned_predicted_protein.pdb +5SD5_HWI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/5SD5_HWI_holo_aligned_predicted_protein.pdb,CCc1nc(N)nc(N)c1OCCCOc1cc(C)ccc1N1CC(C(=O)O)C1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/5SD5_HWI_holo_aligned_predicted_protein.pdb +7SCW_GSP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SCW_GSP_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@@H]2O[C@H](CO[P@](=O)(O)O[P@](=O)(O)OP(O)(O)=S)[C@@H](O)[C@H]2O)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SCW_GSP_holo_aligned_predicted_protein.pdb +7SDD_4IP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SDD_4IP_holo_aligned_predicted_protein.pdb,O=P(O)(O)O[C@H]1[C@H](O)[C@@H](OP(=O)(O)O)[C@H](OP(=O)(O)O)[C@@H](OP(=O)(O)O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SDD_4IP_holo_aligned_predicted_protein.pdb +7TH4_FFO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TH4_FFO_holo_aligned_predicted_protein.pdb,Nc1nc2c(c(=O)[nH]1)N(C=O)[C@@H](CNc1ccc(C(=O)N[C@@H](CCC(=O)O)C(=O)O)cc1)CN2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TH4_FFO_holo_aligned_predicted_protein.pdb +8D19_GSH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8D19_GSH_holo_aligned_predicted_protein.pdb,N[C@@H](CCC(=O)N[C@@H](CS)C(=O)NCC(=O)O)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8D19_GSH_holo_aligned_predicted_protein.pdb +7A1P_QW2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7A1P_QW2_holo_aligned_predicted_protein.pdb,CCC[C@@H](CC(=O)C(=O)O)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7A1P_QW2_holo_aligned_predicted_protein.pdb +8G0V_YHT,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8G0V_YHT_holo_aligned_predicted_protein.pdb,C#CCO[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8G0V_YHT_holo_aligned_predicted_protein.pdb +7MFP_Z7P,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MFP_Z7P_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(/C=N/[C@H]2[C@H](O)[C@@H](O)[C@@H](O[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](n4ccc(=O)[nH]c4=O)[C@H](O)[C@@H]3O)O[C@@H]2C)c1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MFP_Z7P_holo_aligned_predicted_protein.pdb +7N7H_CTP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N7H_CTP_holo_aligned_predicted_protein.pdb,Nc1ccn([C@@H]2O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N7H_CTP_holo_aligned_predicted_protein.pdb +7X9K_8OG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7X9K_8OG_holo_aligned_predicted_protein.pdb,Nc1nc2c([nH]c(=O)n2[C@H]2C[C@H](O)[C@@H](COP(=O)(O)O)O2)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7X9K_8OG_holo_aligned_predicted_protein.pdb +7NFB_GEN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NFB_GEN_holo_aligned_predicted_protein.pdb,O=c1c(-c2ccc(O)cc2)coc2cc(O)cc(O)c12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NFB_GEN_holo_aligned_predicted_protein.pdb +7DKT_GLF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7DKT_GLF_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](F)[C@H](O)[C@@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7DKT_GLF_holo_aligned_predicted_protein.pdb +7R3D_APR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7R3D_APR_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)O[P@@](=O)(O)OC[C@H]2O[C@@H](O)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7R3D_APR_holo_aligned_predicted_protein.pdb +6YYO_Q1K,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YYO_Q1K_holo_aligned_predicted_protein.pdb,CS(=O)(=O)N1CCN(c2ccc3nncn3n2)CC1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YYO_Q1K_holo_aligned_predicted_protein.pdb +7V14_ORU,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7V14_ORU_holo_aligned_predicted_protein.pdb,[O-][n+]1cc(-c2c(-n3cnnn3)ccc(Cl)c2F)ccc1[C@@H](CC1CC1)n1cc(-c2cncs2)cn1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7V14_ORU_holo_aligned_predicted_protein.pdb +7SIU_9ID,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SIU_9ID_holo_aligned_predicted_protein.pdb,Cn1cc(Nc2ncc(C3=CCC[C@@H](NC(=O)c4ccccc4)C3)nc2C(N)=O)cn1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SIU_9ID_holo_aligned_predicted_protein.pdb +7TE8_P0T,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TE8_P0T_holo_aligned_predicted_protein.pdb,C=C(C)[C@@H]1CCC(C)=C[C@H]1c1c(O)cc(CCCCC)cc1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TE8_P0T_holo_aligned_predicted_protein.pdb +7F8T_FAD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7F8T_FAD_holo_aligned_predicted_protein.pdb,Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO[P@@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)c2cc1C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7F8T_FAD_holo_aligned_predicted_protein.pdb +7TUO_KL9,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TUO_KL9_holo_aligned_predicted_protein.pdb,Cc1cnc2c(N)c(C(=O)NCCc3ccc([C@@H]4C[C@H]5CC[C@@H](C4)N5)cc3)sc2n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TUO_KL9_holo_aligned_predicted_protein.pdb +7B94_ANP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7B94_ANP_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)O[P@](=O)(O)NP(=O)(O)O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7B94_ANP_holo_aligned_predicted_protein.pdb +7LEV_0JO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LEV_0JO_holo_aligned_predicted_protein.pdb,C=C(/N=C/c1c(COP(=O)(O)O)cnc(C)c1O)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LEV_0JO_holo_aligned_predicted_protein.pdb +8A1H_DLZ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8A1H_DLZ_holo_aligned_predicted_protein.pdb,Cc1nc2c(=O)[nH]c(=O)nc-2n(C[C@H](O)[C@H](O)[C@H](O)CO)c1C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8A1H_DLZ_holo_aligned_predicted_protein.pdb +7NU0_DCL,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NU0_DCL_holo_aligned_predicted_protein.pdb,CC(C)C[C@H](N)CO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NU0_DCL_holo_aligned_predicted_protein.pdb +7QF4_RBF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QF4_RBF_holo_aligned_predicted_protein.pdb,Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO)c2cc1C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QF4_RBF_holo_aligned_predicted_protein.pdb +7Z2O_IAJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Z2O_IAJ_holo_aligned_predicted_protein.pdb,COc1cccc2sc3nncn3c12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Z2O_IAJ_holo_aligned_predicted_protein.pdb +7O0N_CDP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7O0N_CDP_holo_aligned_predicted_protein.pdb,Nc1ccn([C@@H]2O[C@H](CO[P@@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7O0N_CDP_holo_aligned_predicted_protein.pdb +7X5N_5M5,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7X5N_5M5_holo_aligned_predicted_protein.pdb,Cc1n[nH]c2c1[C@](c1cc(CO)cc(-c3ccccc3)c1)(C(C)C)C(C#N)=C(N)O2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7X5N_5M5_holo_aligned_predicted_protein.pdb +8BOM_QU6,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8BOM_QU6_holo_aligned_predicted_protein.pdb,COc1cccc(NC(=O)c2ccc(C)c(Nc3nc(-c4cccnc4)nc4c3cnn4C)c2)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8BOM_QU6_holo_aligned_predicted_protein.pdb +7K0V_VQP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7K0V_VQP_holo_aligned_predicted_protein.pdb,Cc1cc(F)c(NC(=O)NCCC(C)(C)C)cc1Nc1ccc2ncn(C)c(=O)c2c1F,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7K0V_VQP_holo_aligned_predicted_protein.pdb +7MMH_ZJY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MMH_ZJY_holo_aligned_predicted_protein.pdb,COc1ccc2nc(C)c(O[C@@H]3C[C@H]4C(=O)N[C@]5(C(=O)NS(=O)(=O)C6(C)CC6)C[C@H]5/C=C\CCCCC[C@H](NC(=O)OCC5(C(F)(F)F)CC5)C(=O)N4C3)nc2c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MMH_ZJY_holo_aligned_predicted_protein.pdb +7PJQ_OWH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PJQ_OWH_holo_aligned_predicted_protein.pdb,CNC(=S)c1cccnc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PJQ_OWH_holo_aligned_predicted_protein.pdb +7FHA_ADX,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7FHA_ADX_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)OS(=O)(=O)O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7FHA_ADX_holo_aligned_predicted_protein.pdb +7BTT_F8R,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BTT_F8R_holo_aligned_predicted_protein.pdb,COc1cc2c(cc1Nc1nc(Nc3ccccc3S(=O)(=O)C(C)C)c3[nH]ccc3n1)N(C(=O)CN(C)C)CC2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BTT_F8R_holo_aligned_predicted_protein.pdb +7QHG_T3B,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QHG_T3B_holo_aligned_predicted_protein.pdb,CC(C)C(=O)Nc1ncc(C(=O)NCCN(Cc2ccccc2)C(=O)c2ccc(S(=O)(=O)Nc3ccccc3)cc2)s1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QHG_T3B_holo_aligned_predicted_protein.pdb +7N6F_0I1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N6F_0I1_holo_aligned_predicted_protein.pdb,O=C(C1CCN(c2cncnc2-c2ccc(F)cc2)CC1)N1CC(F)C1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N6F_0I1_holo_aligned_predicted_protein.pdb +8AAU_LH0,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AAU_LH0_holo_aligned_predicted_protein.pdb,CC(C)C(=O)Nc1ncc(-c2cc(C(F)F)nn2-c2c(Cl)cccc2Cl)s1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AAU_LH0_holo_aligned_predicted_protein.pdb +8GFD_ZHR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8GFD_ZHR_holo_aligned_predicted_protein.pdb,CC(=O)N[C@H]1[C@H](OCCc2ccc3occc3c2)O[C@H](CO)[C@@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8GFD_ZHR_holo_aligned_predicted_protein.pdb +7TBU_S3P,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TBU_S3P_holo_aligned_predicted_protein.pdb,O=C(O)C1=C[C@@H](OP(=O)(O)O)[C@@H](O)[C@H](O)C1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TBU_S3P_holo_aligned_predicted_protein.pdb +6YQW_82I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YQW_82I_holo_aligned_predicted_protein.pdb,CNc1cnn(C)c(=O)c1Cl,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YQW_82I_holo_aligned_predicted_protein.pdb +7W06_ITN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7W06_ITN_holo_aligned_predicted_protein.pdb,C=C(CC(=O)O)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7W06_ITN_holo_aligned_predicted_protein.pdb +7ZL5_IWE,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZL5_IWE_holo_aligned_predicted_protein.pdb,NS(=O)(=O)c1cc(-c2nnn[nH]2)c(NCc2cccs2)cc1Cl,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZL5_IWE_holo_aligned_predicted_protein.pdb +7L03_F9F,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7L03_F9F_holo_aligned_predicted_protein.pdb,O=P(O)(O)OCCNS(=O)(=O)c1ccc(OC(F)(F)F)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7L03_F9F_holo_aligned_predicted_protein.pdb +7XJN_NSD,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XJN_NSD_holo_aligned_predicted_protein.pdb,NCCCNCCCN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XJN_NSD_holo_aligned_predicted_protein.pdb +7ZCC_OGA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZCC_OGA_holo_aligned_predicted_protein.pdb,O=C(O)CNC(=O)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZCC_OGA_holo_aligned_predicted_protein.pdb +6TW5_9M2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6TW5_9M2_holo_aligned_predicted_protein.pdb,Cc1nn(C)c(C)c1CCOc1cc(F)ccc1-c1ccc2n[nH]c(CN(C)C)c2c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6TW5_9M2_holo_aligned_predicted_protein.pdb +7VWF_K55,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VWF_K55_holo_aligned_predicted_protein.pdb,CCCCOc1ccc(C[C@H](CC)C(=O)O)cc1CNC(=O)c1ccc(C(F)(F)F)cc1F,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VWF_K55_holo_aligned_predicted_protein.pdb +7SZA_DUI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SZA_DUI_holo_aligned_predicted_protein.pdb,Nc1cccc2c1C(=O)N([C@@H]1CCC(=O)NC1=O)C2=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7SZA_DUI_holo_aligned_predicted_protein.pdb +7POM_7VZ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7POM_7VZ_holo_aligned_predicted_protein.pdb,COC(=O)c1cc(S(N)(=O)=O)c(SC2CCCCC2)cc1Cl,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7POM_7VZ_holo_aligned_predicted_protein.pdb +7KQU_YOF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KQU_YOF_holo_aligned_predicted_protein.pdb,N[C@@H](Cc1ccc(O)c(F)c1)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KQU_YOF_holo_aligned_predicted_protein.pdb +7ZOC_T8E,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZOC_T8E_holo_aligned_predicted_protein.pdb,CC(=O)c1ccc(NC(=O)[C@H](C)S)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZOC_T8E_holo_aligned_predicted_protein.pdb +7ELT_TYM,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ELT_TYM_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)OC(=O)[C@@H](N)Cc2c[nH]c3ccccc23)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ELT_TYM_holo_aligned_predicted_protein.pdb +6YRV_PJ8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YRV_PJ8_holo_aligned_predicted_protein.pdb,CCCCCCCCCCCCCCCCC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YRV_PJ8_holo_aligned_predicted_protein.pdb +7CNS_PMV,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CNS_PMV_holo_aligned_predicted_protein.pdb,C[C@@](O)(CCOP(=O)(O)O)CC(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CNS_PMV_holo_aligned_predicted_protein.pdb +6XG5_TOP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6XG5_TOP_holo_aligned_predicted_protein.pdb,COc1cc(Cc2cnc(N)nc2N)cc(OC)c1OC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6XG5_TOP_holo_aligned_predicted_protein.pdb +7USH_82V,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7USH_82V_holo_aligned_predicted_protein.pdb,O=c1cc(N2CCOCC2)oc2c(-c3ccc4c(c3)OCCO4)csc12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7USH_82V_holo_aligned_predicted_protein.pdb +7OZ9_NGK,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OZ9_NGK_holo_aligned_predicted_protein.pdb,CC(=O)N[C@@H]1[C@@H](O)[C@@H](OS(=O)(=O)O)[C@@H](CO)O[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OZ9_NGK_holo_aligned_predicted_protein.pdb +7TXK_LW8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TXK_LW8_holo_aligned_predicted_protein.pdb,C[N+](C)(C)[C@@H](Cc1c[nH]c(=S)[nH]1)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TXK_LW8_holo_aligned_predicted_protein.pdb +7ZHP_IQY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZHP_IQY_holo_aligned_predicted_protein.pdb,CCC(O)(C#Cc1ccc2[nH]c3c(c2c1)-c1nc(N)ncc1CCC3)CC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZHP_IQY_holo_aligned_predicted_protein.pdb +8AQL_PLG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AQL_PLG_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(CNCC(=O)O)c1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AQL_PLG_holo_aligned_predicted_protein.pdb +8BTI_RFO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8BTI_RFO_holo_aligned_predicted_protein.pdb,COCC(=O)n1ccc2c(Cl)cccc21,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8BTI_RFO_holo_aligned_predicted_protein.pdb +7ROU_66I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ROU_66I_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1c(OC(F)F)nn2[C@@H]1O[C@H](COS(=O)(=O)NC(=O)[C@@H](N)Cc2ccc(O)cc2)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ROU_66I_holo_aligned_predicted_protein.pdb +6ZAE_ACV,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZAE_ACV_holo_aligned_predicted_protein.pdb,CC(C)[C@@H](NC(=O)[C@H](CS)NC(=O)CCC[C@H](N)C(=O)O)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZAE_ACV_holo_aligned_predicted_protein.pdb +6YSP_PAL,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YSP_PAL_holo_aligned_predicted_protein.pdb,O=C(O)C[C@H](NC(=O)CP(=O)(O)O)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YSP_PAL_holo_aligned_predicted_protein.pdb +8B8H_OJQ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8B8H_OJQ_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(C[NH2+]c2conc2O)c1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8B8H_OJQ_holo_aligned_predicted_protein.pdb +7THI_PGA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7THI_PGA_holo_aligned_predicted_protein.pdb,O=C(O)COP(=O)(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7THI_PGA_holo_aligned_predicted_protein.pdb +7OFF_VCB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OFF_VCB_holo_aligned_predicted_protein.pdb,O=C(N[C@@H](C(=O)O)c1ccccc1)c1cccc2c1-c1ccccc1C2=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OFF_VCB_holo_aligned_predicted_protein.pdb +7WY1_D0L,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WY1_D0L_holo_aligned_predicted_protein.pdb,CCCCCCCN1CCC[C@H]1C(=O)N[C@@H](Cc1ccccc1)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WY1_D0L_holo_aligned_predicted_protein.pdb +7ES1_UDP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ES1_UDP_holo_aligned_predicted_protein.pdb,O=c1ccn([C@@H]2O[C@H](CO[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ES1_UDP_holo_aligned_predicted_protein.pdb +7V3N_AKG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7V3N_AKG_holo_aligned_predicted_protein.pdb,O=C(O)CCC(=O)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7V3N_AKG_holo_aligned_predicted_protein.pdb +7TM6_GPJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TM6_GPJ_holo_aligned_predicted_protein.pdb,O=C(O)C[NH2+]CP(=O)(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TM6_GPJ_holo_aligned_predicted_protein.pdb +7ECR_SIN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ECR_SIN_holo_aligned_predicted_protein.pdb,O=C(O)CCC(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ECR_SIN_holo_aligned_predicted_protein.pdb +7A9E_R4W,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7A9E_R4W_holo_aligned_predicted_protein.pdb,CCS(C)(=O)=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7A9E_R4W_holo_aligned_predicted_protein.pdb +7ZF0_DHR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZF0_DHR_holo_aligned_predicted_protein.pdb,N#C[C@@H](O)c1ccc(O)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZF0_DHR_holo_aligned_predicted_protein.pdb +7F51_BA7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7F51_BA7_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]2O[C@@H](O)[C@H](OC(=O)c3ccccc3)[C@@H]2O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7F51_BA7_holo_aligned_predicted_protein.pdb +7XFA_D9J,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XFA_D9J_holo_aligned_predicted_protein.pdb,Cc1nc([C@@H]2O[C@H](CO)[C@H](O)[C@H](n3cc(-c4cc(F)c(Cl)c(F)c4)nn3)[C@H]2O)n(-c2cc(Cl)ccc2C(F)(F)F)n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XFA_D9J_holo_aligned_predicted_protein.pdb +8DKO_TFB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8DKO_TFB_holo_aligned_predicted_protein.pdb,O=C(O)[C@@H]1CCCO1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8DKO_TFB_holo_aligned_predicted_protein.pdb +6T88_MWQ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6T88_MWQ_holo_aligned_predicted_protein.pdb,O=C(O)CCc1cnc[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6T88_MWQ_holo_aligned_predicted_protein.pdb +7BCP_GCO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BCP_GCO_holo_aligned_predicted_protein.pdb,O=C(O)[C@H](O)[C@@H](O)[C@H](O)[C@H](O)CO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BCP_GCO_holo_aligned_predicted_protein.pdb +7NF0_BYN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NF0_BYN_holo_aligned_predicted_protein.pdb,Cc1cc2c3c(c1C)C(C)(C)C[C@@H](O)N3c1c(nc(O)[nH]c1=O)N2C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NF0_BYN_holo_aligned_predicted_protein.pdb +7QE4_NGA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QE4_NGA_holo_aligned_predicted_protein.pdb,CC(=O)N[C@@H]1[C@@H](O)[C@@H](O)[C@@H](CO)O[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QE4_NGA_holo_aligned_predicted_protein.pdb +7M3H_YPV,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7M3H_YPV_holo_aligned_predicted_protein.pdb,CCCCc1ccc(NS(=O)(=O)c2ccc(O)c(C(=O)O)c2)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7M3H_YPV_holo_aligned_predicted_protein.pdb +6Z2C_Q5E,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6Z2C_Q5E_holo_aligned_predicted_protein.pdb,O=C(O)CCCCCN1C(=O)[C@@H]2[C@H](C1=O)[C@]1(Cl)C(Cl)=C(Cl)[C@@]2(Cl)C1(Cl)Cl,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6Z2C_Q5E_holo_aligned_predicted_protein.pdb +8A2D_KXY,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8A2D_KXY_holo_aligned_predicted_protein.pdb,Cc1c(C#Cc2ccc(CN3CCC(CO)CC3)cc2)cc(C(F)F)c2cn([C@@H](C(=O)Nc3nccs3)c3ncn4c3CCC43CC3)nc12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8A2D_KXY_holo_aligned_predicted_protein.pdb +7NGW_UAW,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NGW_UAW_holo_aligned_predicted_protein.pdb,CC1CCN(C(=O)Nc2ccc(O)cc2)CC1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NGW_UAW_holo_aligned_predicted_protein.pdb +7KZ9_XN7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KZ9_XN7_holo_aligned_predicted_protein.pdb,O=C(CNCCO)NCCO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KZ9_XN7_holo_aligned_predicted_protein.pdb +7UAS_MBU,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UAS_MBU_holo_aligned_predicted_protein.pdb,CCn1cc(-c2cc(Cn3ccnc3C)cc3c2CCN([C@H](c2cc(C)ccn2)C2CC2)C3=O)c(C(F)(F)F)n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UAS_MBU_holo_aligned_predicted_protein.pdb +7YZU_DO7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7YZU_DO7_holo_aligned_predicted_protein.pdb,CO[C@H]1O[C@H](CS(=O)(=O)O)[C@@H](O)[C@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7YZU_DO7_holo_aligned_predicted_protein.pdb +7VKZ_NOJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VKZ_NOJ_holo_aligned_predicted_protein.pdb,OC[C@H]1NC[C@H](O)[C@@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VKZ_NOJ_holo_aligned_predicted_protein.pdb +7ROR_69X,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ROR_69X_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)OC(=O)[C@@H](N)Cc2ccc(O)cc2)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ROR_69X_holo_aligned_predicted_protein.pdb +8AY3_OE3,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AY3_OE3_holo_aligned_predicted_protein.pdb,CCn1c(=O)cc(C)c2cc(CNS(=O)(=O)c3ccccc3)ccc21,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AY3_OE3_holo_aligned_predicted_protein.pdb +7C8Q_DSG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7C8Q_DSG_holo_aligned_predicted_protein.pdb,NC(=O)C[C@@H](N)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7C8Q_DSG_holo_aligned_predicted_protein.pdb +7XRL_FWK,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XRL_FWK_holo_aligned_predicted_protein.pdb,CC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XRL_FWK_holo_aligned_predicted_protein.pdb +7CD9_FVR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CD9_FVR_holo_aligned_predicted_protein.pdb,CN1C[C@H](Nc2nc3cc[nH]c3c(=O)n2C)C[C@H](c2ccc(OCc3ccccc3)cc2)C1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7CD9_FVR_holo_aligned_predicted_protein.pdb +7T3E_SLB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7T3E_SLB_holo_aligned_predicted_protein.pdb,CC(=O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(=O)O)C[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7T3E_SLB_holo_aligned_predicted_protein.pdb +6YJA_2BA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YJA_2BA_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@@H]2CO[P@@](=O)(O)O[C@H]3[C@@H](O)[C@H](n4cnc5c(N)ncnc54)O[C@@H]3CO[P@](=O)(O)O[C@H]2[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YJA_2BA_holo_aligned_predicted_protein.pdb +7LT0_ONJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LT0_ONJ_holo_aligned_predicted_protein.pdb,Cc1ccc(Sc2cc(C(=O)N3Cc4ccccc4C3)ccc2O)cc1C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LT0_ONJ_holo_aligned_predicted_protein.pdb +7D6O_MTE,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7D6O_MTE_holo_aligned_predicted_protein.pdb,Nc1nc2c(c(=O)[nH]1)N[C@H]1C(S)=C(S)[C@@H](COP(=O)(O)O)O[C@H]1N2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7D6O_MTE_holo_aligned_predicted_protein.pdb +7UJ4_OQ4,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UJ4_OQ4_holo_aligned_predicted_protein.pdb,CCN(C(=O)c1cc(F)ccc1Oc1cncnc1N1CC2(CCN(C[C@H]3CC[C@H](NS(=O)(=O)CC)CC3)CC2)C1)C(C)C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UJ4_OQ4_holo_aligned_predicted_protein.pdb +7OSO_0V1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OSO_0V1_holo_aligned_predicted_protein.pdb,OCC[C@H](O)CO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OSO_0V1_holo_aligned_predicted_protein.pdb +7AFX_R9K,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7AFX_R9K_holo_aligned_predicted_protein.pdb,Cc1cccc2c(-c3ccccc3Cl)c(C(=O)O)[nH]c12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7AFX_R9K_holo_aligned_predicted_protein.pdb +7T1D_E7K,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7T1D_E7K_holo_aligned_predicted_protein.pdb,Cc1cn(-c2ccc3c(c2)CN(c2ncc(Cc4ccc(-n5cccn5)cc4)s2)CC3)c(C)n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7T1D_E7K_holo_aligned_predicted_protein.pdb +7R9N_F97,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7R9N_F97_holo_aligned_predicted_protein.pdb,CC(C)n1ncc2cnc(Nc3cc([C@@H]4CCNC4)nc(N4CCC(F)(F)C4)n3)cc21,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7R9N_F97_holo_aligned_predicted_protein.pdb +7MGT_ZD4,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MGT_ZD4_holo_aligned_predicted_protein.pdb,Nc1nc(Cl)nc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MGT_ZD4_holo_aligned_predicted_protein.pdb +7MYU_ZR7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MYU_ZR7_holo_aligned_predicted_protein.pdb,COc1cnc(C(=O)Nc2ccc(F)c([C@]34CN(c5ncc(F)cn5)C[C@H]3CSC(N)=N4)c2)cn1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7MYU_ZR7_holo_aligned_predicted_protein.pdb +7RH3_59O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RH3_59O_holo_aligned_predicted_protein.pdb,CO[C@@H]1O[C@H](CO)[C@H](O)[C@H](OC(=O)c2ccc(C)cc2)[C@@H]1OC(=O)c1ccc(Cl)cc1[N+](=O)[O-],data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RH3_59O_holo_aligned_predicted_protein.pdb +7OMX_CNA,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OMX_CNA_holo_aligned_predicted_protein.pdb,NC(=O)c1ccc[n+]([C@@H]2C[C@H](CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7OMX_CNA_holo_aligned_predicted_protein.pdb +7NXO_UU8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NXO_UU8_holo_aligned_predicted_protein.pdb,Cc1ccccc1Oc1cc(-n2c(=O)cc(S(=O)(=O)c3ccccc3)[nH]c2=O)c(F)cc1C#N,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NXO_UU8_holo_aligned_predicted_protein.pdb +8DHG_T78,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8DHG_T78_holo_aligned_predicted_protein.pdb,CCn1c(CO)nn(-c2nc(O[C@@H](C)C(F)(F)F)c(C(=O)Nc3c(C)ccnc3Cl)cc2F)c1=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8DHG_T78_holo_aligned_predicted_protein.pdb +7NPL_UKZ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NPL_UKZ_holo_aligned_predicted_protein.pdb,CCC[C@@H](NC(=O)c1cccc2c1CC(=O)N2)[C@@H](O)c1cccc(Cl)c1C,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NPL_UKZ_holo_aligned_predicted_protein.pdb +7PRM_81I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PRM_81I_holo_aligned_predicted_protein.pdb,O=C(c1ccco1)N1CCN([C@@H]2CC(=O)N(c3ccc(-c4ccc(F)cc4)cc3)C2)CC1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PRM_81I_holo_aligned_predicted_protein.pdb +7WDT_NGS,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WDT_NGS_holo_aligned_predicted_protein.pdb,CC(=O)N[C@@H]1[C@@H](O)[C@H](O)[C@@H](COS(=O)(=O)O)O[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WDT_NGS_holo_aligned_predicted_protein.pdb +7UAW_MF6,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UAW_MF6_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@@H]2CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](O[C@@H]1[C@@H]2O)[C@H](O)[C@@H]3O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UAW_MF6_holo_aligned_predicted_protein.pdb +7W05_GMP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7W05_GMP_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7W05_GMP_holo_aligned_predicted_protein.pdb +7UJF_R3V,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UJF_R3V_holo_aligned_predicted_protein.pdb,C[C@H]1CCN(CCOc2ccc([C@@H]3c4ccc(O)cc4CC[C@@H]3c3ccccc3)cc2)C1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UJF_R3V_holo_aligned_predicted_protein.pdb +8D39_QDB,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8D39_QDB_holo_aligned_predicted_protein.pdb,O=C(O)c1ccc(C(=O)c2ccccc2)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8D39_QDB_holo_aligned_predicted_protein.pdb +7F5D_EUO,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7F5D_EUO_holo_aligned_predicted_protein.pdb,CNc1cc(-c2ccc3[nH]ccc3c2)nc(S(C)(=O)=O)n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7F5D_EUO_holo_aligned_predicted_protein.pdb +7BMI_U4B,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BMI_U4B_holo_aligned_predicted_protein.pdb,O=C(O)c1ccnc(C(=O)O)c1F,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BMI_U4B_holo_aligned_predicted_protein.pdb +7KB1_WBJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KB1_WBJ_holo_aligned_predicted_protein.pdb,C=C/C(=N\Cc1c(COP(=O)(O)O)cnc(C)c1O)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KB1_WBJ_holo_aligned_predicted_protein.pdb +7R7R_AWJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7R7R_AWJ_holo_aligned_predicted_protein.pdb,Cc1nc([C@](C)(O)CO)sc1-c1cnc(N)c(O[C@H](C)c2cc(F)ccc2N2NC=CN2)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7R7R_AWJ_holo_aligned_predicted_protein.pdb +7L00_XCJ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7L00_XCJ_holo_aligned_predicted_protein.pdb,CS(=O)(=O)c1ccc2nc(NC(=O)Cc3csc(-n4cccc4)n3)sc2c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7L00_XCJ_holo_aligned_predicted_protein.pdb +7BJJ_TVW,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BJJ_TVW_holo_aligned_predicted_protein.pdb,Nc1ncnc2n[nH]cc12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BJJ_TVW_holo_aligned_predicted_protein.pdb +7UQ3_O2U,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UQ3_O2U_holo_aligned_predicted_protein.pdb,O=C(O)C[C@@H]1CC(=O)N(O)C1=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UQ3_O2U_holo_aligned_predicted_protein.pdb +7XQZ_FPF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XQZ_FPF_holo_aligned_predicted_protein.pdb,CC(C)=CCC/C(C)=C/CC/C(C)=C(\F)CO[P@@](=O)(O)OP(=O)(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XQZ_FPF_holo_aligned_predicted_protein.pdb +7JMV_4NC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7JMV_4NC_holo_aligned_predicted_protein.pdb,O=[N+]([O-])c1ccc(O)c(O)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7JMV_4NC_holo_aligned_predicted_protein.pdb +7BNH_BEZ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BNH_BEZ_holo_aligned_predicted_protein.pdb,O=C(O)c1ccccc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7BNH_BEZ_holo_aligned_predicted_protein.pdb +8FO5_Y4U,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8FO5_Y4U_holo_aligned_predicted_protein.pdb,CC(=O)c1ccn(S(=O)(=O)c2ccccc2)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8FO5_Y4U_holo_aligned_predicted_protein.pdb +7ZU2_DHT,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZU2_DHT_holo_aligned_predicted_protein.pdb,C[C@]12CCC(=O)C[C@@H]1CC[C@@H]1[C@@H]2CC[C@]2(C)[C@@H](O)CC[C@@H]12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZU2_DHT_holo_aligned_predicted_protein.pdb +7A9H_TPP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7A9H_TPP_holo_aligned_predicted_protein.pdb,Cc1ncc(C[n+]2csc(CCO[P@](=O)(O)OP(=O)(O)O)c2C)c(N)n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7A9H_TPP_holo_aligned_predicted_protein.pdb +7DUA_HJ0,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7DUA_HJ0_holo_aligned_predicted_protein.pdb,Cc1cc(NC(=O)c2cn(C3(C)CC3)c3ncnc(N)c23)n[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7DUA_HJ0_holo_aligned_predicted_protein.pdb +7P5T_5YG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7P5T_5YG_holo_aligned_predicted_protein.pdb,COc1ccc(CNc2ccc(Cc3ccncc3)cc2)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7P5T_5YG_holo_aligned_predicted_protein.pdb +7RNI_60I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RNI_60I_holo_aligned_predicted_protein.pdb,Cc1nn(C)c(C)c1C(=O)N1CCN(Cc2nc3ccccc3n2CC(C)(C)C)CC1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RNI_60I_holo_aligned_predicted_protein.pdb +6M73_FNR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6M73_FNR_holo_aligned_predicted_protein.pdb,Cc1cc2c(cc1C)N(C[C@H](O)[C@H](O)[C@H](O)COP(=O)(O)O)c1[nH]c(=O)[nH]c(=O)c1N2,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6M73_FNR_holo_aligned_predicted_protein.pdb +6ZK5_IMH,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZK5_IMH_holo_aligned_predicted_protein.pdb,OC[C@H]1N[C@@H](c2c[nH]c3c(O)ncnc23)[C@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZK5_IMH_holo_aligned_predicted_protein.pdb +7VC5_9SF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VC5_9SF_holo_aligned_predicted_protein.pdb,O=C(C[C@H]1NCCC[C@@H]1O)Cn1cnc2ccccc2c1=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VC5_9SF_holo_aligned_predicted_protein.pdb +7ZZW_KKW,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZZW_KKW_holo_aligned_predicted_protein.pdb,O=C([C@H]1C[C@@H](c2cccc(Cl)c2)CN1)N1CCN(c2nccc3ccsc23)CC1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ZZW_KKW_holo_aligned_predicted_protein.pdb +7R6J_2I7,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7R6J_2I7_holo_aligned_predicted_protein.pdb,Cc1cc(NCc2cccc(CN3C[C@H](O)[C@@H](O)[C@H](O)[C@H]3CO)c2)cc(-c2ncccn2)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7R6J_2I7_holo_aligned_predicted_protein.pdb +8HO0_3ZI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8HO0_3ZI_holo_aligned_predicted_protein.pdb,O=C1N[C@@H](Cc2c[nH]c3c(F)cccc23)C(=O)N2CCC[C@@H]12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8HO0_3ZI_holo_aligned_predicted_protein.pdb +7XBV_APC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XBV_APC_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)C[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XBV_APC_holo_aligned_predicted_protein.pdb +7UXS_OJC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UXS_OJC_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@@H]2CO[P@](=O)(O)O[P@](=O)(O)OC[C@H]3O[C@@H](O[C@H]2[C@H]1O)[C@H](O)[C@@H]3O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7UXS_OJC_holo_aligned_predicted_protein.pdb +7WPW_F15,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WPW_F15_holo_aligned_predicted_protein.pdb,CCCCCCCCCCCCCCC(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WPW_F15_holo_aligned_predicted_protein.pdb +8AEM_LVF,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AEM_LVF_holo_aligned_predicted_protein.pdb,N#CCc1c[nH]c2ccc(Cl)cc12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AEM_LVF_holo_aligned_predicted_protein.pdb +7Q25_8J9,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Q25_8J9_holo_aligned_predicted_protein.pdb,CCCC[C@H](N[C@@H](CCc1ccccc1)C(=O)O)C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Q25_8J9_holo_aligned_predicted_protein.pdb +6ZPB_3D1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZPB_3D1_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@H]1C[C@H](O)[C@@H](CO)O1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6ZPB_3D1_holo_aligned_predicted_protein.pdb +7TSF_H4B,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TSF_H4B_holo_aligned_predicted_protein.pdb,C[C@H](O)[C@H](O)[C@H]1CNc2nc(N)[nH]c(=O)c2N1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7TSF_H4B_holo_aligned_predicted_protein.pdb +7LJN_GTP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LJN_GTP_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@@H]2O[C@H](CO[P@](=O)(O)O[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7LJN_GTP_holo_aligned_predicted_protein.pdb +7E4L_MDN,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7E4L_MDN_holo_aligned_predicted_protein.pdb,O=P(O)(O)CP(=O)(O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7E4L_MDN_holo_aligned_predicted_protein.pdb +7N7B_T3F,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N7B_T3F_holo_aligned_predicted_protein.pdb,Cc1cn([C@H]2C[C@H](O)[C@@H](CO[P@@](=O)(O)O[P@](=O)(O)O[C@H]3O[C@H](C)[C@H](O)[C@H](N)[C@H]3O)O2)c(=O)[nH]c1=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7N7B_T3F_holo_aligned_predicted_protein.pdb +7WKL_CAQ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WKL_CAQ_holo_aligned_predicted_protein.pdb,Oc1ccccc1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WKL_CAQ_holo_aligned_predicted_protein.pdb +8AP0_PRP,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AP0_PRP_holo_aligned_predicted_protein.pdb,O=P(O)(O)OC[C@H]1O[C@H](O[P@@](=O)(O)OP(=O)(O)O)[C@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8AP0_PRP_holo_aligned_predicted_protein.pdb +7V3S_5I9,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7V3S_5I9_holo_aligned_predicted_protein.pdb,O=C(Nc1ccc(F)cc1)C1(C(=O)Nc2ccc(Oc3ccnc4c3Oc3ccccc3N4)c(F)c2)CC1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7V3S_5I9_holo_aligned_predicted_protein.pdb +7XPO_UPG,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XPO_UPG_holo_aligned_predicted_protein.pdb,O=c1ccn([C@@H]2O[C@H](CO[P@](=O)(O)O[P@](=O)(O)O[C@H]3O[C@H](CO)[C@@H](O)[C@H](O)[C@H]3O)[C@@H](O)[C@H]2O)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7XPO_UPG_holo_aligned_predicted_protein.pdb +7KC5_BJZ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KC5_BJZ_holo_aligned_predicted_protein.pdb,CN1C(=O)N(c2cc(Cl)cc(Cl)c2)C(=O)[C@]12CN(c1ccc(C(=O)O)cn1)C[C@H]2c1ccc(C#N)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7KC5_BJZ_holo_aligned_predicted_protein.pdb +7NSW_HC4,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NSW_HC4_holo_aligned_predicted_protein.pdb,O=C(O)/C=C/c1ccc(O)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NSW_HC4_holo_aligned_predicted_protein.pdb +7RWS_4UR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RWS_4UR_holo_aligned_predicted_protein.pdb,Nc1nc2c(ncn2[C@@H]2O[C@@H]3CO[P@@](=O)(O)O[C@@H]4[C@H](O)[C@@H](CO[P@](=O)(O)O[C@H]3[C@H]2O)O[C@H]4n2cnc3c(N)ncnc32)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RWS_4UR_holo_aligned_predicted_protein.pdb +7VBU_6I4,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VBU_6I4_holo_aligned_predicted_protein.pdb,Cc1ccc2c(n1)[nH]c1c(C3CC3)cccc12,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VBU_6I4_holo_aligned_predicted_protein.pdb +7QTA_URI,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QTA_URI_holo_aligned_predicted_protein.pdb,O=c1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)[nH]1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QTA_URI_holo_aligned_predicted_protein.pdb +7WQQ_5Z6,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WQQ_5Z6_holo_aligned_predicted_protein.pdb,CC(C)(C)c1cc(C(=O)/C=C/c2ccc(C(=O)O)cc2)cc(C(C)(C)C)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WQQ_5Z6_holo_aligned_predicted_protein.pdb +8D5D_5DK,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8D5D_5DK_holo_aligned_predicted_protein.pdb,Cc1ncc(COP(=O)(O)O)c(/C=N/[C@H](CCCNC(=N)N)C(=O)O)c1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8D5D_5DK_holo_aligned_predicted_protein.pdb +7Q27_8KC,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Q27_8KC_holo_aligned_predicted_protein.pdb,CCCC[C@H](N[C@@H](CCc1ccccc1)C(=O)O)C(=O)N[C@@H](Cc1c[nH]c2ccccc12)C(=O)O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7Q27_8KC_holo_aligned_predicted_protein.pdb +7ED2_A3P,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ED2_A3P_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](OP(=O)(O)O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7ED2_A3P_holo_aligned_predicted_protein.pdb +6YT6_PKE,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YT6_PKE_holo_aligned_predicted_protein.pdb,CN(c1ncccc1CNc1ccnc(Nc2ccc3c(c2)CC(=O)N3)n1)S(C)(=O)=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6YT6_PKE_holo_aligned_predicted_protein.pdb +7JG0_GAR,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7JG0_GAR_holo_aligned_predicted_protein.pdb,NCC(=O)N[C@@H]1O[C@H](COP(=O)([O-])[O-])[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7JG0_GAR_holo_aligned_predicted_protein.pdb +8EYE_X4I,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8EYE_X4I_holo_aligned_predicted_protein.pdb,O=C(CNc1cc(F)cc(F)c1)N[C@@H](C(=O)NO)c1ccc(-c2cc(F)c(F)c(F)c2)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8EYE_X4I_holo_aligned_predicted_protein.pdb +7O1T_5X8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7O1T_5X8_holo_aligned_predicted_protein.pdb,Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSC[C@H](N)C(=O)O)[C@@H](O)[C@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7O1T_5X8_holo_aligned_predicted_protein.pdb +6Z4N_Q7B,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6Z4N_Q7B_holo_aligned_predicted_protein.pdb,Cc1ccc(C[C@@]2(C(=O)O)C[C@H]2c2ccccc2)cc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/6Z4N_Q7B_holo_aligned_predicted_protein.pdb +7WL4_JFU,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WL4_JFU_holo_aligned_predicted_protein.pdb,CCN1C(=O)c2cc(N3CCN(C)CC3)nc3c(NS(=O)(=O)c4ccc(F)cc4F)ccc1c23,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7WL4_JFU_holo_aligned_predicted_protein.pdb +8SLG_G5A,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8SLG_G5A_holo_aligned_predicted_protein.pdb,NCC(=O)NS(=O)(=O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/8SLG_G5A_holo_aligned_predicted_protein.pdb +7L7C_XQ1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7L7C_XQ1_holo_aligned_predicted_protein.pdb,COc1cccc(-c2ccc3c(CC(=O)O)coc3c2)c1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7L7C_XQ1_holo_aligned_predicted_protein.pdb +7NLV_UJE,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NLV_UJE_holo_aligned_predicted_protein.pdb,O=C(CCCC[C@@H]1SC[C@@H]2NC(=O)N[C@@H]21)N[C@H]1CCNC1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7NLV_UJE_holo_aligned_predicted_protein.pdb +7VYJ_CA0,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VYJ_CA0_holo_aligned_predicted_protein.pdb,NC(=O)O[P@](=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7VYJ_CA0_holo_aligned_predicted_protein.pdb +7PUV_84Z,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PUV_84Z_holo_aligned_predicted_protein.pdb,COC(=O)c1cc(S(N)(=O)=O)c(Cl)cc1S(=O)(=O)c1ccccc1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7PUV_84Z_holo_aligned_predicted_protein.pdb +7RSV_7IQ,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RSV_7IQ_holo_aligned_predicted_protein.pdb,C[C@@H]1COCCN1c1cc2n(n1)[C@@H]1CCC[C@@H]1NC2=O,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7RSV_7IQ_holo_aligned_predicted_protein.pdb +7QGP_DJ8,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QGP_DJ8_holo_aligned_predicted_protein.pdb,Cc1cc(Cl)ccc1CNC(=O)Nc1ccc2cc1OCCOCCNc1ccn3ncc-2c3n1,data/posebusters_benchmark_set/posebusters_benchmark_holo_aligned_predicted_structures/7QGP_DJ8_holo_aligned_predicted_protein.pdb diff --git a/forks/RoseTTAFold-All-Atom/README.md b/forks/RoseTTAFold-All-Atom/README.md index 501ad01d..14c220a0 100644 --- a/forks/RoseTTAFold-All-Atom/README.md +++ b/forks/RoseTTAFold-All-Atom/README.md @@ -70,13 +70,13 @@ wget https://bfd.mmseqs.com/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted mkdir -p bfd tar xfz bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz -C ./bfd -# structure templates (including *_a3m.ffdata, *_a3m.ffindex) +# structure templates [81G] (including *_a3m.ffdata, *_a3m.ffindex) wget https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz tar xfz pdb100_2021Mar03.tar.gz ``` **NOTE:** Make sure to update `DB_UR30` and `DB_BFD` (on Lines 19 and 20 of `make_msa.sh`) as well as `database_params.hhdb` (on Line 6 of `rf2aa/config/inference/base.yaml`) to list the absolute (base) paths to these respective local databases. For example, one may set these values to `DB_UR30="/bmlfast/rfaa_databases/uniref30/UniRef30_2020_06"`, `DB_BFD="/bmlfast/rfaa_databases/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt"`, and `hhdb: "/bmlfast/rfaa_databases/pdb100_2021Mar03/pdb100_2021Mar03"`. -8. Download `BLAST` +8. Download `BLAST` [39M] ``` wget https://ftp.ncbi.nlm.nih.gov/blast/executables/legacy.NOTSUPPORTED/2.2.26/blast-2.2.26-x64-linux.tar.gz mkdir -p blast-2.2.26 diff --git a/forks/TULIP/outputs/.gitkeep b/forks/TULIP/outputs/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/forks/Vina/inference/vina_dockgen_inputs.csv b/forks/Vina/inference/vina_dockgen_inputs.csv index 1a3cf8bd..f4d46911 100644 --- a/forks/Vina/inference/vina_dockgen_inputs.csv +++ b/forks/Vina/inference/vina_dockgen_inputs.csv @@ -1,190 +1,190 @@ complex_name,protein_path,ligand_description,protein_sequence -3gvl_1_SLB_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3gvl_1_SLB_2_holo_aligned_esmfold_protein.pdb,CC(O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(O)O)C[C@@H]1O, -3inr_1_GDU_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3inr_1_GDU_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](O[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCC(O)NC3O)[C@H](O)[C@@H]2O)[C@H](O)[C@@H](O)[C@H]1O, -3jqm_1_GTP_5,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3jqm_1_GTP_5_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]3O)C2N1, -3ju4_1_SLB_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ju4_1_SLB_2_holo_aligned_esmfold_protein.pdb,CC(O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(O)O)C[C@@H]1O, -4cnl_1_CHT_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4cnl_1_CHT_1_holo_aligned_esmfold_protein.pdb,C[N+](C)(C)CCO, -1hg0_1_SIN_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1hg0_1_SIN_1_holo_aligned_esmfold_protein.pdb,OC(O)CCC(O)O, -1i8t_1_FAD_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1i8t_1_FAD_1_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2CC1C, -1o28_1_UFP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1o28_1_UFP_2_holo_aligned_esmfold_protein.pdb,OC1NC(O)N([C@H]2C[C@H](O)[C@@H](CO[PH](O)(O)O)O2)CC1F, -1o72_2_PC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1o72_2_PC_0_holo_aligned_esmfold_protein.pdb,C[N+](C)(C)CCO[PH](O)(O)O, -1pj2_1_FUM_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1pj2_1_FUM_0_holo_aligned_esmfold_protein.pdb,OC(O)CCC(O)O, -1pj4_1_FUM_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1pj4_1_FUM_1_holo_aligned_esmfold_protein.pdb,OC(O)CCC(O)O, -1qaw_1_TRP_7,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1qaw_1_TRP_7_holo_aligned_esmfold_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O, -1rqp_1_SAM_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1rqp_1_SAM_0_holo_aligned_esmfold_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O, -1sbz_1_FMN_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1sbz_1_FMN_3_holo_aligned_esmfold_protein.pdb,C[C@@H]1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2C[C@@H]1C, -1sij_1_PCD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1sij_1_PCD_0_holo_aligned_esmfold_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@H]4NC5NC(N)NC(O)C5N[C@H]4[C@@H]4S[Mo](O)(O)SC43)[C@@H](O)[C@H]2O)C(O)N1, -1tke_1_SER_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1tke_1_SER_0_holo_aligned_esmfold_protein.pdb,N[C@@H](CO)C(O)O, -1tkg_1_SSA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1tkg_1_SSA_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](COS(O)(O)NC(O)[C@@H](N)CO)[C@@H](O)[C@H]1O, -1u8u_1_OCA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1u8u_1_OCA_0_holo_aligned_esmfold_protein.pdb,CCCCCCCC(O)O, -1uf5_1_CDT_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1uf5_1_CDT_0_holo_aligned_esmfold_protein.pdb,CSCC[C@@H](NC(N)O)C(O)O, -1uf7_1_CDV_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1uf7_1_CDV_0_holo_aligned_esmfold_protein.pdb,CC(C)[C@@H](NC(N)O)C(O)O, -1uf8_1_ING_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1uf8_1_ING_2_holo_aligned_esmfold_protein.pdb,NC(O)N[C@H](CC1CCCCC1)C(O)O, -1v2g_1_OCA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1v2g_1_OCA_0_holo_aligned_esmfold_protein.pdb,CCCCCCCC(O)O, -1v97_1_MTE_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1v97_1_MTE_1_holo_aligned_esmfold_protein.pdb,NC1NC(O)[C@@H]2N[C@H]3C(S)C(S)[C@@H](CO[PH](O)(O)O)O[C@H]3NC2N1, -1za2_1_CTP_4,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/1za2_1_CTP_4_holo_aligned_esmfold_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, -2cdc_1_XYS_5,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2cdc_1_XYS_5_holo_aligned_esmfold_protein.pdb,O[C@@H]1[C@@H](O)[C@@H](O)OC[C@H]1O, -2ext_1_TRP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2ext_1_TRP_0_holo_aligned_esmfold_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O, -2g7c_1_NAG-GAL-GLA_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2g7c_1_NAG-GAL-GLA_1_holo_aligned_esmfold_protein.pdb,CC(O)N[C@@H]1[C@@H](O)[C@H](O[C@@H]2O[C@H](CO)[C@H](O)[C@H](O[C@H]3O[C@H](CO)[C@H](O)[C@H](O)[C@H]3O)[C@H]2O)[C@@H](CO)O[C@H]1O, -2gag_1_FOA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2gag_1_FOA_0_holo_aligned_esmfold_protein.pdb,OC(O)C1CCCO1, -2gag_1_NAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2gag_1_NAD_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCCC([C@H](N)O)C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O, -2gah_1_NAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2gah_1_NAD_0_holo_aligned_esmfold_protein.pdb,NC(O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1, -2gf3_2_FOA_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2gf3_2_FOA_1_holo_aligned_esmfold_protein.pdb,OC(O)C1CCCO1, -2him_1_ASN_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2him_1_ASN_3_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)O, -2hk9_1_SKM_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2hk9_1_SKM_0_holo_aligned_esmfold_protein.pdb,OC(O)[C@H]1C[C@@H](O)[C@@H](O)[C@H](O)C1, -2hs3_1_FGR_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2hs3_1_FGR_0_holo_aligned_esmfold_protein.pdb,OCNCC(O)N[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, -2o5m_1_MNR_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2o5m_1_MNR_0_holo_aligned_esmfold_protein.pdb,CCC1C(C)C2C[C@@H]3[C@H](C)[C@H](CCC(O)O)C4CC5[C@@H](CCC(O)O)C(C)C6CC7[C@@H](CC)[C@H](C)C8CC1N2[Mn@SP2](N65)(N87)N43, -2q37_1_3AL_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2q37_1_3AL_0_holo_aligned_esmfold_protein.pdb,NC(O)N[C@H]1NC(O)NC1O, -2q6k_1_ADN_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2q6k_1_ADN_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O, -2r4e_1_13P_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2r4e_1_13P_0_holo_aligned_esmfold_protein.pdb,OC[C@@H](O)CO[PH](O)(O)O, -2v5e_1_GU4-YYJ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v5e_1_GU4-YYJ_0_holo_aligned_esmfold_protein.pdb,OS(O)(O)OC[C@H]1O[C@@](COS(O)(O)O)(O[C@H]2O[C@H](COS(O)(O)O)[C@@H](OS(O)(O)O)[C@H](OS(O)(O)O)[C@H]2OS(O)(O)O)[C@@H](OS(O)(O)O)[C@@H]1OS(O)(O)O, -2v7t_1_SAH_4,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v7t_1_SAH_4_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CSCC[C@H](N)C(O)O)[C@@H](O)[C@H]1O, -2v7u_1_SAM_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v7u_1_SAM_2_holo_aligned_esmfold_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O, -2v7v_1_5FD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v7v_1_5FD_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CF)[C@@H](O)[C@H]1O, -2v7w_1_5FD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2v7w_1_5FD_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CF)[C@@H](O)[C@H]1O, -2vdf_1_OCT_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2vdf_1_OCT_0_holo_aligned_esmfold_protein.pdb,CCCCCCCC, -2vfu_1_MTL_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2vfu_1_MTL_0_holo_aligned_esmfold_protein.pdb,OC[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)CO, -2wab_1_BGC-BGC-BGC-BGC-BGC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2wab_1_BGC-BGC-BGC-BGC-BGC_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O[C@H]2[C@H](O)[C@@H](O)[C@H](O[C@H]3[C@H](O)[C@@H](O)[C@H](O[C@H]4[C@H](O)[C@@H](O)[C@H](O[C@H]5[C@H](O)[C@@H](O)[C@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O, -2wao_1_BGC-BGC-BGC-BGC-BGC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2wao_1_BGC-BGC-BGC-BGC-BGC_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O[C@H]2[C@H](O)[C@@H](O)[C@H](O[C@H]3[C@H](O)[C@@H](O)[C@H](O[C@H]4[C@H](O)[C@@H](O)[C@H](O[C@H]5[C@H](O)[C@@H](O)[C@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O, -2wr8_1_SAH_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2wr8_1_SAH_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CSCC[C@H](N)C(O)O)[C@@H](O)[C@H]1O, -2wwc_1_CHT_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2wwc_1_CHT_2_holo_aligned_esmfold_protein.pdb,C[N+](C)(C)CCO, -2x34_2_UQ8_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2x34_2_UQ8_0_holo_aligned_esmfold_protein.pdb,COC1C(O)C(C)C(CCC(C)CCCC(C)CCCC(C)CCC[C@H](C)CCCC(C)CCC[C@H](C)CCCC(C)CCCC(C)C)C(O)C1OC, -2xrh_1_NIO_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2xrh_1_NIO_1_holo_aligned_esmfold_protein.pdb,OC(O)[C@H]1CCCNC1, -2xta_1_ACO_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2xta_1_ACO_0_holo_aligned_esmfold_protein.pdb,CCO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O[PH](O)(O)O, -2zcz_2_TRP_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2zcz_2_TRP_3_holo_aligned_esmfold_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O, -2zd0_1_TRP_9,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2zd0_1_TRP_9_holo_aligned_esmfold_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O, -2ze9_1_PD7_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/2ze9_1_PD7_0_holo_aligned_esmfold_protein.pdb,CCCCCCC(O)OC[C@H](CO[PH](O)(O)O)OC(O)CCCCCC, -3ad7_1_NAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ad7_1_NAD_0_holo_aligned_esmfold_protein.pdb,N[C@H](O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5[C@H](N)NCN[C@H]54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1, -3ad9_1_NAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ad9_1_NAD_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2[C@@H]1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCCC([C@H](N)O)C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O, -3ada_1_NAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ada_1_NAD_0_holo_aligned_esmfold_protein.pdb,N[C@H](O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CN[C@@H]5[C@H](N)NCN[C@H]54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1, -3eca_1_ASP_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3eca_1_ASP_3_holo_aligned_esmfold_protein.pdb,N[C@@H](CC(O)O)C(O)O, -3gf4_1_FAD_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3gf4_1_FAD_1_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C4NCN[C@H]5N)[C@H](O)[C@@H]3O)C2CC1C, -3gf4_1_U5P_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3gf4_1_U5P_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, -3he3_5_UDP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3he3_5_UDP_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, -3it6_1_ORN_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3it6_1_ORN_1_holo_aligned_esmfold_protein.pdb,NCCC[C@H](N)C(O)O, -3k8l_1_GLC-GLC-GLC-GLC-GLC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3k8l_1_GLC-GLC-GLC-GLC-GLC_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](O[C@H]2[C@H](O)[C@@H](O)[C@@H](O[C@H]3[C@H](O)[C@@H](O)[C@@H](O[C@H]4[C@H](O)[C@@H](O)[C@@H](O[C@H]5[C@H](O)[C@@H](O)[C@@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O, -3k8m_1_GLC-GLC-AC1_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3k8m_1_GLC-GLC-AC1_0_holo_aligned_esmfold_protein.pdb,C[C@H]1O[C@H](O[C@H]2[C@H](O)[C@@H](O)[C@@H](O[C@H]3[C@H](O)[C@@H](O)[C@@H](O)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1N[C@H]1CC(CO)[C@@H](O)[C@H](O)[C@H]1O, -3nvv_1_MTE_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3nvv_1_MTE_1_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2N[C@@H]3[C@H](NC2N1)O[C@H](CO[PH](O)(O)O)C(S)[C@@H]3S, -3o01_2_DXC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3o01_2_DXC_0_holo_aligned_esmfold_protein.pdb,C[C@H](CCC(O)O)[C@H]1CC[C@H]2[C@@H]3CC[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3C[C@H](O)[C@]12C, -3o02_2_JN3_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3o02_2_JN3_0_holo_aligned_esmfold_protein.pdb,C[C@H](CCC(O)O)[C@H]1CC[C@H]2[C@@H]3[C@H](O)C[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3CC[C@]12C, -3o7j_1_2AL_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3o7j_1_2AL_0_holo_aligned_esmfold_protein.pdb,NC(O)NC1NC(O)NC1O, -3q14_1_PCR_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3q14_1_PCR_3_holo_aligned_esmfold_protein.pdb,CC1CCC(O)CC1, -3qrc_2_GU4-YYJ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3qrc_2_GU4-YYJ_0_holo_aligned_esmfold_protein.pdb,OS(O)(O)OC[C@H]1O[C@@](COS(O)(O)O)(O[C@H]2O[C@H](COS(O)(O)O)[C@@H](OS(O)(O)O)[C@H](OS(O)(O)O)[C@H]2OS(O)(O)O)[C@@H](OS(O)(O)O)[C@@H]1OS(O)(O)O, -3s5x_1_BMA-MAN-MAN-MAN-MAN_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3s5x_1_BMA-MAN-MAN-MAN-MAN_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](O[C@@H]2[C@H](O)[C@@H](OC[C@H]3O[C@@H](O)[C@@H](O)[C@@H](O[C@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]4O)[C@@H]3O)O[C@H](CO)[C@H]2O)[C@@H](O)[C@@H](O)[C@@H]1O, -3s6a_1_ANP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3s6a_1_ANP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](N)(O)O)[C@@H](O)[C@H]1O, -3se5_1_ANP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3se5_1_ANP_2_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)N[PH](O)(O)O)[C@@H](O)[C@H]1O, -3sr6_1_MTE_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3sr6_1_MTE_1_holo_aligned_esmfold_protein.pdb,NC1NC(O)[C@@H]2N[C@H]3C(S)C(S)[C@@H](CO[PH](O)(O)O)O[C@H]3NC2N1, -3ub7_1_ACM_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ub7_1_ACM_1_holo_aligned_esmfold_protein.pdb,CC(N)O, -3ub9_1_NHY_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3ub9_1_NHY_1_holo_aligned_esmfold_protein.pdb,N[C@@H](O)NO, -3uni_1_SAL_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3uni_1_SAL_0_holo_aligned_esmfold_protein.pdb,OC(O)C1CCCCC1O, -3wrb_1_GDE_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3wrb_1_GDE_0_holo_aligned_esmfold_protein.pdb,OC(O)C1CC(O)C(O)C(O)C1, -3wvc_1_FEG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3wvc_1_FEG_0_holo_aligned_esmfold_protein.pdb,CC1C(CC(O)O)NC(O)[C@@H](C)[C@@H]1O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(O)NC(N)NC32)[C@H](O)[C@@H]1O, -3zec_1_ANP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3zec_1_ANP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)N[PH](O)(O)O)[C@@H](O)[C@H]1O, -3zjx_1_BOG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3zjx_1_BOG_0_holo_aligned_esmfold_protein.pdb,CCCCCCCCO[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O, -3zqu_1_FNR_5,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3zqu_1_FNR_5_holo_aligned_esmfold_protein.pdb,C[C@H]1CC2NC3[C@H](O)N[C@H](O)N[C@@H]3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@@H]2C[C@@H]1C, -3zzs_1_TRP_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/3zzs_1_TRP_3_holo_aligned_esmfold_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O, -4b4v_1_L34_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4b4v_1_L34_0_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2C(NC[C@@H]3CN(C4CCC([C@H](O)N[C@@H](CCC(O)O)C(O)O)CC4)C(O)N23)N1, -4bc9_1_CNV-FAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4bc9_1_CNV-FAD_0_holo_aligned_esmfold_protein.pdb,CC1CC2[C@H](CC1C)N(C(C)CN)C1C(O)NC(O)NC1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCN[C@H]32)[C@H](O)[C@@H]1O, -4cdn_2_FAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4cdn_2_FAD_0_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2C[C@H]1C, -4fyv_1_DCP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4fyv_1_DCP_2_holo_aligned_esmfold_protein.pdb,NC1CCN([C@H]2C[C@H](O)[C@@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)O2)C(O)N1, -4fyw_1_CTP_4,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4fyw_1_CTP_4_holo_aligned_esmfold_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, -4gk9_1_BMA-MAN-MAN-MAN-MAN_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4gk9_1_BMA-MAN-MAN-MAN-MAN_3_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](OC[C@H]2O[C@H](OC[C@H]3O[C@@H](O)[C@@H](O)[C@@H](O[C@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]4O)[C@@H]3O)[C@@H](O)[C@@H](O[C@H]3O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]3O)[C@@H]2O)[C@@H](O)[C@@H](O)[C@@H]1O, -4h2f_1_ADN_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4h2f_1_ADN_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O, -4idk_1_1FE_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4idk_1_1FE_0_holo_aligned_esmfold_protein.pdb,NCC(O)NC1CCC2NC(O)NC2C1, -4kgx_1_CTP_5,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4kgx_1_CTP_5_holo_aligned_esmfold_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, -4mig_1_G3F_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4mig_1_G3F_2_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](F)[C@@H]1O, -4mo2_2_FDA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4mo2_2_FDA_0_holo_aligned_esmfold_protein.pdb,CC1CC2[C@@H](CC1C)N[C@H]1C(O)NC(O)N[C@H]1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O, -4mos_1_GAF_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4mos_1_GAF_1_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](O)[C@H](F)[C@@H](O)[C@H]1O, -4n4l_1_HG1_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4n4l_1_HG1_0_holo_aligned_esmfold_protein.pdb,NC(O)CCCC1CCCCC1, -4o0d_1_GLY_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4o0d_1_GLY_3_holo_aligned_esmfold_protein.pdb,NCC(O)O, -4o0f_1_GLY_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4o0f_1_GLY_3_holo_aligned_esmfold_protein.pdb,NCC(O)O, -4o95_1_245_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4o95_1_245_0_holo_aligned_esmfold_protein.pdb,OC(NC1CCCCC1)N[C@H]1CCNC(Cl)C1, -4oal_2_245_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4oal_2_245_0_holo_aligned_esmfold_protein.pdb,OC(NC1CCCCC1)N[C@H]1CCNC(Cl)C1, -4osx_1_GLY_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4osx_1_GLY_2_holo_aligned_esmfold_protein.pdb,NCC(O)O, -4osy_1_GLY_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4osy_1_GLY_3_holo_aligned_esmfold_protein.pdb,NCC(O)O, -4pfx_1_UDP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4pfx_1_UDP_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, -4phr_1_UDP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4phr_1_UDP_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, -4phs_1_UDP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4phs_1_UDP_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)([OH2+])O[PH](O)(O)[OH2+])[C@@H](O)[C@H]2O)C(O)N1, -4pya_1_2X3_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4pya_1_2X3_0_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2N[C@H]3N([C@@H]2N1)[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@]3(O)[C@H]1O, -4qa8_1_PJZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4qa8_1_PJZ_0_holo_aligned_esmfold_protein.pdb,CCCCCCCCCCCCCC[C@@H](O)OC[C@@H](C)O[C@@H](O)CCCCCCCCCCC, -4qo5_1_NAG_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4qo5_1_NAG_2_holo_aligned_esmfold_protein.pdb,CC(O)N[C@H]1CO[C@H](CO)[C@@H](O)[C@@H]1O, -4rhe_1_FMN_6,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4rhe_1_FMN_6_holo_aligned_esmfold_protein.pdb,CC1C[C@@H]2[C@@H](C[C@@H]1C)NC1[C@H](O)NC(O)N[C@@H]1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH]([O-])(O)O, -4rpj_1_UDP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4rpj_1_UDP_0_holo_aligned_esmfold_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, -4rpm_1_HXC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4rpm_1_HXC_0_holo_aligned_esmfold_protein.pdb,CCCCCC(O)SC, -4tvd_1_BGC_4,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4tvd_1_BGC_4_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1O, -4tvd_1_GLC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4tvd_1_GLC_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O, -4u63_1_FAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4u63_1_FAD_0_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2CC1C, -4uoc_1_NCN_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4uoc_1_NCN_1_holo_aligned_esmfold_protein.pdb,OC(O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]2O)C1, -4uuw_1_AMP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4uuw_1_AMP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, -4xdr_1_ADN_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4xdr_1_ADN_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O, -4xfm_1_THE_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4xfm_1_THE_0_holo_aligned_esmfold_protein.pdb,OC[C@@H](O)[C@H](O)C(O)O, -4ydx_1_TCE_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4ydx_1_TCE_0_holo_aligned_esmfold_protein.pdb,OC(O)CCP(CCC(O)O)CCC(O)O, -4zav_1_4LS_8,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zav_1_4LS_8_holo_aligned_esmfold_protein.pdb,CC(C)CCN1C2CC(C)[C@@H](C)CC2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@H]2NC(O)N[C@@H](O)C21, -4zaw_1_4LU_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zaw_1_4LU_1_holo_aligned_esmfold_protein.pdb,C[C@H]1[C@@H](C)C[C@@H]2C3[C@@H]1C(C)(C)CCN3[C@H]1C(O)NC(O)NC1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O, -4zay_1_4LS_6,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zay_1_4LS_6_holo_aligned_esmfold_protein.pdb,CC(C)CCN1[C@H]2[C@H](O)N[C@H](O)N[C@@H]2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@@H]2CC(C)C(C)C[C@H]21, -4zaz_1_4LS_6,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zaz_1_4LS_6_holo_aligned_esmfold_protein.pdb,CC(C)CCN1C2CC(C)[C@@H](C)CC2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2N[C@@H](O)N[C@@H](O)C21, -4zqx_1_ATP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/4zqx_1_ATP_2_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, -5a98_1_ATP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5a98_1_ATP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, -5ae3_2_AWB_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5ae3_2_AWB_1_holo_aligned_esmfold_protein.pdb,CCCCCC[C@H]1C(O)O[C@H](C)[C@H](N[C@H](O)[C@@H]2CCC[C@H](NCO)[C@H]2O)C(O)O[C@@H](C)[C@@H]1OC(O)CC(C)C, -5b5s_1_BOG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5b5s_1_BOG_0_holo_aligned_esmfold_protein.pdb,CCCCCCCCO[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O, -5d9g_1_GLU-ASN-LEU-TYR-PHE-GLN_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5d9g_1_GLU-ASN-LEU-TYR-PHE-GLN_0_holo_aligned_esmfold_protein.pdb,CC(C)C[C@H](N)C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)N[C@@H](Cc1ccccc1)C(=O)N[C@@H](CCC(N)=O)C(=O)O, -5dnc_1_ASN_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5dnc_1_ASN_2_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)O, -5eno_1_5QG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5eno_1_5QG_0_holo_aligned_esmfold_protein.pdb,CC1(C)CC2C(CO1)C(N1CCOCC1)NC(SCCC1CCCCC1)[C@@H]2CN, -5enp_1_5QF_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5enp_1_5QF_0_holo_aligned_esmfold_protein.pdb,COCCN1CCN([C@@H]2NC(SCC[C@H]3CCC(OC)[C@@H](OC)C3)[C@H](CN)[C@H]3CC(C)(C)OCC23)CC1, -5enq_1_5QE_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5enq_1_5QE_0_holo_aligned_esmfold_protein.pdb,CC(O)NC1CCC(CCSC2NC(N3C[C@@H](C)O[C@@H](C)C3)[C@@H]3COC(C)(C)CC3C2CN)CC1, -5enr_1_MBX_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5enr_1_MBX_0_holo_aligned_esmfold_protein.pdb,CCC(O)NC1CCC(CCS[C@@H]2NC(N3C[C@H](C)O[C@@H](C)C3)C3COC(C)(C)CC3C2CN)CC1, -5ent_1_MIY_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5ent_1_MIY_0_holo_aligned_esmfold_protein.pdb,CN(C)C1CCC(O)C2C1C[C@H]1C[C@H]3[C@H](N(C)C)C(O)C(C(N)O)[C@@H](O)[C@@]3(O)C(O)[C@@H]1C2O, -5ers_1_AMP_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5ers_1_AMP_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, -5f2t_1_PLM_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5f2t_1_PLM_0_holo_aligned_esmfold_protein.pdb,CCCCCCCCCCCCCCCC(O)O, -5f52_1_ASP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5f52_1_ASP_2_holo_aligned_esmfold_protein.pdb,N[C@@H](CC(O)O)C(O)O, -5fiu_1_Y3J_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5fiu_1_Y3J_3_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](C(F)F)[C@@H](O)[C@@H]1O, -5fxd_1_H7Y_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5fxd_1_H7Y_1_holo_aligned_esmfold_protein.pdb,CCC[C@H]1CCC(O)C(OC)C1, -5fxe_1_CIY_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5fxe_1_CIY_1_holo_aligned_esmfold_protein.pdb,CO[C@H]1CC(CCCO)CCC1O, -5fxf_1_BEZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5fxf_1_BEZ_0_holo_aligned_esmfold_protein.pdb,OC(O)C1CCCCC1, -5gqi_1_ATP_7,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5gqi_1_ATP_7_holo_aligned_esmfold_protein.pdb,N[C@@H]1NCN[C@H]2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, -5gql_1_ATP_4,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5gql_1_ATP_4_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, -5hhz_1_ZME_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5hhz_1_ZME_0_holo_aligned_esmfold_protein.pdb,CC1CCN(C2NCNC3NCNC32)C1, -5hmr_1_FDZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5hmr_1_FDZ_0_holo_aligned_esmfold_protein.pdb,OC(NC1CNNS1)N[C@H]1CCC[C@H](OC(F)(F)F)C1, -5hqx_1_EDZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5hqx_1_EDZ_0_holo_aligned_esmfold_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CNNS1, -5hw0_1_GLU_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5hw0_1_GLU_2_holo_aligned_esmfold_protein.pdb,N[C@@H](CCC(O)O)C(O)O, -5ida_1_BMA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5ida_1_BMA_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O)[C@@H](O)[C@@H](O)[C@@H]1O, -5k3o_2_ASP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k3o_2_ASP_0_holo_aligned_esmfold_protein.pdb,N[C@@H](CC(O)O)C(O)O, -5k45_2_GLU_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k45_2_GLU_1_holo_aligned_esmfold_protein.pdb,N[C@@H](CCC(O)O)C(O)O, -5k4h_2_GLU_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k4h_2_GLU_3_holo_aligned_esmfold_protein.pdb,N[C@@H](CCC(O)O)C(O)O, -5k62_1_ASN-VAL_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k62_1_ASN-VAL_0_holo_aligned_esmfold_protein.pdb,CC(C)[C@@H](CO)NC(O)[C@@H](N)CC(N)O, -5k63_1_ASN-GLY_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k63_1_ASN-GLY_0_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)NCCO, -5k66_1_ASN-GLU_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5k66_1_ASN-GLU_0_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)N[C@H](CO)CCC(O)O, -5mh1_1_BMA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5mh1_1_BMA_0_holo_aligned_esmfold_protein.pdb,OC[C@H]1O[C@@H](O)[C@@H](O)[C@@H](O)[C@@H]1O, -5u82_2_ZN0_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/5u82_2_ZN0_0_holo_aligned_esmfold_protein.pdb,CC[SnH](CC)CC, -6a71_1_9UX_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6a71_1_9UX_0_holo_aligned_esmfold_protein.pdb,O[Mo@]12S[Mo@@]1(O)S2, -6a72_1_9UX_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6a72_1_9UX_0_holo_aligned_esmfold_protein.pdb,O[Mo@]12S[Mo@@]1(O)S2, -6b1b_1_TMO_15,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6b1b_1_TMO_15_holo_aligned_esmfold_protein.pdb,C[N+](C)(C)O, -6ea9_1_9BG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6ea9_1_9BG_0_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O[PH]([O-])(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O[PH]([O-])([O-])O)C2N1, -6ep5_1_ADP_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6ep5_1_ADP_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, -6etf_1_AMP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6etf_1_AMP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2[C@H]1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, -6fgc_1_ADP_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6fgc_1_ADP_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, -6fgc_1_D95_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6fgc_1_D95_1_holo_aligned_esmfold_protein.pdb,C[C@H]1[C@H](OC(O)CCC(O)O)O[C@@H]2O[C@@]3(C)CC[C@H]4[C@H](C)CC[C@@H]1[C@@]24OO3, -6gbf_1_AMP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6gbf_1_AMP_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, -6jls_1_FMN_6,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6jls_1_FMN_6_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2CC1C, -6n19_2_K8V_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6n19_2_K8V_0_holo_aligned_esmfold_protein.pdb,CCC(O)NC1CCC(C([O-])O)C(C([O-])O)C1, -6nco_1_KQP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6nco_1_KQP_0_holo_aligned_esmfold_protein.pdb,CC(C)(O)C1CCC([C@H]2CC(Cl)C[C@@H](C34(C(N)N)CC3C4)C2)CC1, -6npp_1_KWG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6npp_1_KWG_0_holo_aligned_esmfold_protein.pdb,[O-]C(O)C1CCCC(CCC2CCCCC2)C1N1CCCC1, -6o6y_1_ACK_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6o6y_1_ACK_0_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@H]2O[PH]([O-])(O)O[C@H]21, -6o70_1_ACK_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6o70_1_ACK_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@H]2O[PH]([O-])(O)O[C@H]21, -6pa2_1_ASP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6pa2_1_ASP_2_holo_aligned_esmfold_protein.pdb,N[C@@H](CC(O)O)C(O)O, -6pa6_2_ASN_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6pa6_2_ASN_0_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)O, -6paa_1_ASP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6paa_1_ASP_2_holo_aligned_esmfold_protein.pdb,N[C@@H](CC(O)O)C(O)O, -6qkr_1_FAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6qkr_1_FAD_0_holo_aligned_esmfold_protein.pdb,CC1CC2NC3C(NC(O)N[C@H]3O)N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C4NCN[C@H]5N)[C@H](O)[C@@H]3O)C2CC1C, -6rms_1_AMP_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6rms_1_AMP_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, -6ryz_1_SAM_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6ryz_1_SAM_2_holo_aligned_esmfold_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C2NCN[C@@H]3N)[C@H](O)[C@@H]1O, -6rz2_1_5CD_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6rz2_1_5CD_2_holo_aligned_esmfold_protein.pdb,NC1NCNC2[C@H]1NCN2[C@@H]1O[C@H](CCl)[C@@H](O)[C@H]1O, -6tvg_1_AP2_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6tvg_1_AP2_1_holo_aligned_esmfold_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)C[PH](O)(O)O)[C@@H](O)[C@H]1O, -6uqy_2_AT3_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6uqy_2_AT3_0_holo_aligned_esmfold_protein.pdb,C[C@@H](O)SCC[N+](C)(C)C, -6ur1_2_AT3_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6ur1_2_AT3_0_holo_aligned_esmfold_protein.pdb,C[C@@H](O)SCC[N+](C)(C)C, -6v2a_1_ASN_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6v2a_1_ASN_3_holo_aligned_esmfold_protein.pdb,NC(O)C[C@H](N)C(O)O, -6wyz_1_DGL_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6wyz_1_DGL_1_holo_aligned_esmfold_protein.pdb,N[C@H](CCC(O)O)C(O)O, -6xb3_3_9BG_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6xb3_3_9BG_1_holo_aligned_esmfold_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O[PH]([O-])(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O[PH]([O-])(O)O)C2N1, -6xug_1_O1Q_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6xug_1_O1Q_0_holo_aligned_esmfold_protein.pdb,CC1CCC[C@H](N2NCC[C@H]2C2CC(Cl)C3NNN(C4CC[C@H]5CNNC5C4)C3C2)C1, -6yao_1_OJ2_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6yao_1_OJ2_0_holo_aligned_esmfold_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CCC[C@@H](OC(F)(F)F)C1, -6yap_1_OHZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6yap_1_OHZ_0_holo_aligned_esmfold_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CC(Cl)C[C@H](OC(F)(F)F)C1, -6yaq_1_OHZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_esmfold_structures/6yaq_1_OHZ_0_holo_aligned_esmfold_protein.pdb,OCCC1CCCC[C@H]1N[C@H](O)N[C@H]1C[C@@H](Cl)C[C@@H](OC(F)(F)F)C1, +3gvl_1_SLB_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3gvl_1_SLB_2_holo_aligned_predicted_protein.pdb,CC(O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(O)O)C[C@@H]1O, +3inr_1_GDU_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3inr_1_GDU_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCC(O)NC3O)[C@H](O)[C@@H]2O)[C@H](O)[C@@H](O)[C@H]1O, +3jqm_1_GTP_5,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3jqm_1_GTP_5_holo_aligned_predicted_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]3O)C2N1, +3ju4_1_SLB_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ju4_1_SLB_2_holo_aligned_predicted_protein.pdb,CC(O)N[C@H]1[C@H]([C@H](O)[C@H](O)CO)O[C@](O)(C(O)O)C[C@@H]1O, +4cnl_1_CHT_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4cnl_1_CHT_1_holo_aligned_predicted_protein.pdb,C[N+](C)(C)CCO, +1hg0_1_SIN_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/1hg0_1_SIN_1_holo_aligned_predicted_protein.pdb,OC(O)CCC(O)O, +1i8t_1_FAD_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/1i8t_1_FAD_1_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2CC1C, +1o28_1_UFP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/1o28_1_UFP_2_holo_aligned_predicted_protein.pdb,OC1NC(O)N([C@H]2C[C@H](O)[C@@H](CO[PH](O)(O)O)O2)CC1F, +1o72_2_PC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/1o72_2_PC_0_holo_aligned_predicted_protein.pdb,C[N+](C)(C)CCO[PH](O)(O)O, +1pj2_1_FUM_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/1pj2_1_FUM_0_holo_aligned_predicted_protein.pdb,OC(O)CCC(O)O, +1pj4_1_FUM_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/1pj4_1_FUM_1_holo_aligned_predicted_protein.pdb,OC(O)CCC(O)O, +1qaw_1_TRP_7,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/1qaw_1_TRP_7_holo_aligned_predicted_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O, +1rqp_1_SAM_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/1rqp_1_SAM_0_holo_aligned_predicted_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O, +1sbz_1_FMN_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/1sbz_1_FMN_3_holo_aligned_predicted_protein.pdb,C[C@@H]1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2C[C@@H]1C, +1sij_1_PCD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/1sij_1_PCD_0_holo_aligned_predicted_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@H]4NC5NC(N)NC(O)C5N[C@H]4[C@@H]4S[Mo](O)(O)SC43)[C@@H](O)[C@H]2O)C(O)N1, +1tke_1_SER_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/1tke_1_SER_0_holo_aligned_predicted_protein.pdb,N[C@@H](CO)C(O)O, +1tkg_1_SSA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/1tkg_1_SSA_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](COS(O)(O)NC(O)[C@@H](N)CO)[C@@H](O)[C@H]1O, +1u8u_1_OCA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/1u8u_1_OCA_0_holo_aligned_predicted_protein.pdb,CCCCCCCC(O)O, +1uf5_1_CDT_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/1uf5_1_CDT_0_holo_aligned_predicted_protein.pdb,CSCC[C@@H](NC(N)O)C(O)O, +1uf7_1_CDV_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/1uf7_1_CDV_0_holo_aligned_predicted_protein.pdb,CC(C)[C@@H](NC(N)O)C(O)O, +1uf8_1_ING_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/1uf8_1_ING_2_holo_aligned_predicted_protein.pdb,NC(O)N[C@H](CC1CCCCC1)C(O)O, +1v2g_1_OCA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/1v2g_1_OCA_0_holo_aligned_predicted_protein.pdb,CCCCCCCC(O)O, +1v97_1_MTE_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/1v97_1_MTE_1_holo_aligned_predicted_protein.pdb,NC1NC(O)[C@@H]2N[C@H]3C(S)C(S)[C@@H](CO[PH](O)(O)O)O[C@H]3NC2N1, +1za2_1_CTP_4,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/1za2_1_CTP_4_holo_aligned_predicted_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, +2cdc_1_XYS_5,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2cdc_1_XYS_5_holo_aligned_predicted_protein.pdb,O[C@@H]1[C@@H](O)[C@@H](O)OC[C@H]1O, +2ext_1_TRP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2ext_1_TRP_0_holo_aligned_predicted_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O, +2g7c_1_NAG-GAL-GLA_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2g7c_1_NAG-GAL-GLA_1_holo_aligned_predicted_protein.pdb,CC(O)N[C@@H]1[C@@H](O)[C@H](O[C@@H]2O[C@H](CO)[C@H](O)[C@H](O[C@H]3O[C@H](CO)[C@H](O)[C@H](O)[C@H]3O)[C@H]2O)[C@@H](CO)O[C@H]1O, +2gag_1_FOA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2gag_1_FOA_0_holo_aligned_predicted_protein.pdb,OC(O)C1CCCO1, +2gag_1_NAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2gag_1_NAD_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCCC([C@H](N)O)C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O, +2gah_1_NAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2gah_1_NAD_0_holo_aligned_predicted_protein.pdb,NC(O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1, +2gf3_2_FOA_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2gf3_2_FOA_1_holo_aligned_predicted_protein.pdb,OC(O)C1CCCO1, +2him_1_ASN_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2him_1_ASN_3_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)O, +2hk9_1_SKM_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2hk9_1_SKM_0_holo_aligned_predicted_protein.pdb,OC(O)[C@H]1C[C@@H](O)[C@@H](O)[C@H](O)C1, +2hs3_1_FGR_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2hs3_1_FGR_0_holo_aligned_predicted_protein.pdb,OCNCC(O)N[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, +2o5m_1_MNR_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2o5m_1_MNR_0_holo_aligned_predicted_protein.pdb,CCC1C(C)C2C[C@@H]3[C@H](C)[C@H](CCC(O)O)C4CC5[C@@H](CCC(O)O)C(C)C6CC7[C@@H](CC)[C@H](C)C8CC1N2[Mn@SP2](N65)(N87)N43, +2q37_1_3AL_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2q37_1_3AL_0_holo_aligned_predicted_protein.pdb,NC(O)N[C@H]1NC(O)NC1O, +2q6k_1_ADN_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2q6k_1_ADN_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O, +2r4e_1_13P_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2r4e_1_13P_0_holo_aligned_predicted_protein.pdb,OC[C@@H](O)CO[PH](O)(O)O, +2v5e_1_GU4-YYJ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2v5e_1_GU4-YYJ_0_holo_aligned_predicted_protein.pdb,OS(O)(O)OC[C@H]1O[C@@](COS(O)(O)O)(O[C@H]2O[C@H](COS(O)(O)O)[C@@H](OS(O)(O)O)[C@H](OS(O)(O)O)[C@H]2OS(O)(O)O)[C@@H](OS(O)(O)O)[C@@H]1OS(O)(O)O, +2v7t_1_SAH_4,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2v7t_1_SAH_4_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CSCC[C@H](N)C(O)O)[C@@H](O)[C@H]1O, +2v7u_1_SAM_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2v7u_1_SAM_2_holo_aligned_predicted_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O, +2v7v_1_5FD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2v7v_1_5FD_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CF)[C@@H](O)[C@H]1O, +2v7w_1_5FD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2v7w_1_5FD_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CF)[C@@H](O)[C@H]1O, +2vdf_1_OCT_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2vdf_1_OCT_0_holo_aligned_predicted_protein.pdb,CCCCCCCC, +2vfu_1_MTL_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2vfu_1_MTL_0_holo_aligned_predicted_protein.pdb,OC[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)CO, +2wab_1_BGC-BGC-BGC-BGC-BGC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2wab_1_BGC-BGC-BGC-BGC-BGC_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O[C@H]2[C@H](O)[C@@H](O)[C@H](O[C@H]3[C@H](O)[C@@H](O)[C@H](O[C@H]4[C@H](O)[C@@H](O)[C@H](O[C@H]5[C@H](O)[C@@H](O)[C@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O, +2wao_1_BGC-BGC-BGC-BGC-BGC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2wao_1_BGC-BGC-BGC-BGC-BGC_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O[C@H]2[C@H](O)[C@@H](O)[C@H](O[C@H]3[C@H](O)[C@@H](O)[C@H](O[C@H]4[C@H](O)[C@@H](O)[C@H](O[C@H]5[C@H](O)[C@@H](O)[C@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O, +2wr8_1_SAH_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2wr8_1_SAH_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CSCC[C@H](N)C(O)O)[C@@H](O)[C@H]1O, +2wwc_1_CHT_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2wwc_1_CHT_2_holo_aligned_predicted_protein.pdb,C[N+](C)(C)CCO, +2x34_2_UQ8_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2x34_2_UQ8_0_holo_aligned_predicted_protein.pdb,COC1C(O)C(C)C(CCC(C)CCCC(C)CCCC(C)CCC[C@H](C)CCCC(C)CCC[C@H](C)CCCC(C)CCCC(C)C)C(O)C1OC, +2xrh_1_NIO_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2xrh_1_NIO_1_holo_aligned_predicted_protein.pdb,OC(O)[C@H]1CCCNC1, +2xta_1_ACO_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2xta_1_ACO_0_holo_aligned_predicted_protein.pdb,CCO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O[PH](O)(O)O, +2zcz_2_TRP_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2zcz_2_TRP_3_holo_aligned_predicted_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O, +2zd0_1_TRP_9,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2zd0_1_TRP_9_holo_aligned_predicted_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O, +2ze9_1_PD7_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/2ze9_1_PD7_0_holo_aligned_predicted_protein.pdb,CCCCCCC(O)OC[C@H](CO[PH](O)(O)O)OC(O)CCCCCC, +3ad7_1_NAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ad7_1_NAD_0_holo_aligned_predicted_protein.pdb,N[C@H](O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5[C@H](N)NCN[C@H]54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1, +3ad9_1_NAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ad9_1_NAD_0_holo_aligned_predicted_protein.pdb,NC1NCNC2[C@@H]1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]2O[C@@H](N3CCCC([C@H](N)O)C3)[C@H](O)[C@@H]2O)[C@@H](O)[C@H]1O, +3ada_1_NAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ada_1_NAD_0_holo_aligned_predicted_protein.pdb,N[C@H](O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CN[C@@H]5[C@H](N)NCN[C@H]54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C1, +3eca_1_ASP_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3eca_1_ASP_3_holo_aligned_predicted_protein.pdb,N[C@@H](CC(O)O)C(O)O, +3gf4_1_FAD_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3gf4_1_FAD_1_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C4NCN[C@H]5N)[C@H](O)[C@@H]3O)C2CC1C, +3gf4_1_U5P_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3gf4_1_U5P_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, +3he3_5_UDP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3he3_5_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, +3it6_1_ORN_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3it6_1_ORN_1_holo_aligned_predicted_protein.pdb,NCCC[C@H](N)C(O)O, +3k8l_1_GLC-GLC-GLC-GLC-GLC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3k8l_1_GLC-GLC-GLC-GLC-GLC_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O[C@H]2[C@H](O)[C@@H](O)[C@@H](O[C@H]3[C@H](O)[C@@H](O)[C@@H](O[C@H]4[C@H](O)[C@@H](O)[C@@H](O[C@H]5[C@H](O)[C@@H](O)[C@@H](O)O[C@@H]5CO)O[C@@H]4CO)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1O, +3k8m_1_GLC-GLC-AC1_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3k8m_1_GLC-GLC-AC1_0_holo_aligned_predicted_protein.pdb,C[C@H]1O[C@H](O[C@H]2[C@H](O)[C@@H](O)[C@@H](O[C@H]3[C@H](O)[C@@H](O)[C@@H](O)O[C@@H]3CO)O[C@@H]2CO)[C@H](O)[C@@H](O)[C@@H]1N[C@H]1CC(CO)[C@@H](O)[C@H](O)[C@H]1O, +3nvv_1_MTE_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3nvv_1_MTE_1_holo_aligned_predicted_protein.pdb,NC1NC(O)C2N[C@@H]3[C@H](NC2N1)O[C@H](CO[PH](O)(O)O)C(S)[C@@H]3S, +3o01_2_DXC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3o01_2_DXC_0_holo_aligned_predicted_protein.pdb,C[C@H](CCC(O)O)[C@H]1CC[C@H]2[C@@H]3CC[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3C[C@H](O)[C@]12C, +3o02_2_JN3_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3o02_2_JN3_0_holo_aligned_predicted_protein.pdb,C[C@H](CCC(O)O)[C@H]1CC[C@H]2[C@@H]3[C@H](O)C[C@@H]4C[C@H](O)CC[C@]4(C)[C@H]3CC[C@]12C, +3o7j_1_2AL_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3o7j_1_2AL_0_holo_aligned_predicted_protein.pdb,NC(O)NC1NC(O)NC1O, +3q14_1_PCR_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3q14_1_PCR_3_holo_aligned_predicted_protein.pdb,CC1CCC(O)CC1, +3qrc_2_GU4-YYJ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3qrc_2_GU4-YYJ_0_holo_aligned_predicted_protein.pdb,OS(O)(O)OC[C@H]1O[C@@](COS(O)(O)O)(O[C@H]2O[C@H](COS(O)(O)O)[C@@H](OS(O)(O)O)[C@H](OS(O)(O)O)[C@H]2OS(O)(O)O)[C@@H](OS(O)(O)O)[C@@H]1OS(O)(O)O, +3s5x_1_BMA-MAN-MAN-MAN-MAN_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3s5x_1_BMA-MAN-MAN-MAN-MAN_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O[C@@H]2[C@H](O)[C@@H](OC[C@H]3O[C@@H](O)[C@@H](O)[C@@H](O[C@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]4O)[C@@H]3O)O[C@H](CO)[C@H]2O)[C@@H](O)[C@@H](O)[C@@H]1O, +3s6a_1_ANP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3s6a_1_ANP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](N)(O)O)[C@@H](O)[C@H]1O, +3se5_1_ANP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3se5_1_ANP_2_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)N[PH](O)(O)O)[C@@H](O)[C@H]1O, +3sr6_1_MTE_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3sr6_1_MTE_1_holo_aligned_predicted_protein.pdb,NC1NC(O)[C@@H]2N[C@H]3C(S)C(S)[C@@H](CO[PH](O)(O)O)O[C@H]3NC2N1, +3ub7_1_ACM_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ub7_1_ACM_1_holo_aligned_predicted_protein.pdb,CC(N)O, +3ub9_1_NHY_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3ub9_1_NHY_1_holo_aligned_predicted_protein.pdb,N[C@@H](O)NO, +3uni_1_SAL_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3uni_1_SAL_0_holo_aligned_predicted_protein.pdb,OC(O)C1CCCCC1O, +3wrb_1_GDE_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3wrb_1_GDE_0_holo_aligned_predicted_protein.pdb,OC(O)C1CC(O)C(O)C(O)C1, +3wvc_1_FEG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3wvc_1_FEG_0_holo_aligned_predicted_protein.pdb,CC1C(CC(O)O)NC(O)[C@@H](C)[C@@H]1O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(O)NC(N)NC32)[C@H](O)[C@@H]1O, +3zec_1_ANP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3zec_1_ANP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)N[PH](O)(O)O)[C@@H](O)[C@H]1O, +3zjx_1_BOG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3zjx_1_BOG_0_holo_aligned_predicted_protein.pdb,CCCCCCCCO[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O, +3zqu_1_FNR_5,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3zqu_1_FNR_5_holo_aligned_predicted_protein.pdb,C[C@H]1CC2NC3[C@H](O)N[C@H](O)N[C@@H]3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@@H]2C[C@@H]1C, +3zzs_1_TRP_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/3zzs_1_TRP_3_holo_aligned_predicted_protein.pdb,N[C@@H](CC1CNC2CCCCC12)C(O)O, +4b4v_1_L34_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4b4v_1_L34_0_holo_aligned_predicted_protein.pdb,NC1NC(O)C2C(NC[C@@H]3CN(C4CCC([C@H](O)N[C@@H](CCC(O)O)C(O)O)CC4)C(O)N23)N1, +4bc9_1_CNV-FAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4bc9_1_CNV-FAD_0_holo_aligned_predicted_protein.pdb,CC1CC2[C@H](CC1C)N(C(C)CN)C1C(O)NC(O)NC1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCN[C@H]32)[C@H](O)[C@@H]1O, +4cdn_2_FAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4cdn_2_FAD_0_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2C[C@H]1C, +4fyv_1_DCP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4fyv_1_DCP_2_holo_aligned_predicted_protein.pdb,NC1CCN([C@H]2C[C@H](O)[C@@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)O2)C(O)N1, +4fyw_1_CTP_4,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4fyw_1_CTP_4_holo_aligned_predicted_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, +4gk9_1_BMA-MAN-MAN-MAN-MAN_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4gk9_1_BMA-MAN-MAN-MAN-MAN_3_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](OC[C@H]2O[C@H](OC[C@H]3O[C@@H](O)[C@@H](O)[C@@H](O[C@H]4O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]4O)[C@@H]3O)[C@@H](O)[C@@H](O[C@H]3O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]3O)[C@@H]2O)[C@@H](O)[C@@H](O)[C@@H]1O, +4h2f_1_ADN_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4h2f_1_ADN_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O, +4idk_1_1FE_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4idk_1_1FE_0_holo_aligned_predicted_protein.pdb,NCC(O)NC1CCC2NC(O)NC2C1, +4kgx_1_CTP_5,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4kgx_1_CTP_5_holo_aligned_predicted_protein.pdb,NC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, +4mig_1_G3F_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4mig_1_G3F_2_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](F)[C@@H]1O, +4mo2_2_FDA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4mo2_2_FDA_0_holo_aligned_predicted_protein.pdb,CC1CC2[C@@H](CC1C)N[C@H]1C(O)NC(O)N[C@H]1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]1O[C@@H](N2CNC3C(N)NCNC32)[C@H](O)[C@@H]1O, +4mos_1_GAF_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4mos_1_GAF_1_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O)[C@H](F)[C@@H](O)[C@H]1O, +4n4l_1_HG1_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4n4l_1_HG1_0_holo_aligned_predicted_protein.pdb,NC(O)CCCC1CCCCC1, +4o0d_1_GLY_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4o0d_1_GLY_3_holo_aligned_predicted_protein.pdb,NCC(O)O, +4o0f_1_GLY_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4o0f_1_GLY_3_holo_aligned_predicted_protein.pdb,NCC(O)O, +4o95_1_245_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4o95_1_245_0_holo_aligned_predicted_protein.pdb,OC(NC1CCCCC1)N[C@H]1CCNC(Cl)C1, +4oal_2_245_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4oal_2_245_0_holo_aligned_predicted_protein.pdb,OC(NC1CCCCC1)N[C@H]1CCNC(Cl)C1, +4osx_1_GLY_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4osx_1_GLY_2_holo_aligned_predicted_protein.pdb,NCC(O)O, +4osy_1_GLY_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4osy_1_GLY_3_holo_aligned_predicted_protein.pdb,NCC(O)O, +4pfx_1_UDP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4pfx_1_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, +4phr_1_UDP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4phr_1_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, +4phs_1_UDP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4phs_1_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)([OH2+])O[PH](O)(O)[OH2+])[C@@H](O)[C@H]2O)C(O)N1, +4pya_1_2X3_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4pya_1_2X3_0_holo_aligned_predicted_protein.pdb,NC1NC(O)C2N[C@H]3N([C@@H]2N1)[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@]3(O)[C@H]1O, +4qa8_1_PJZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4qa8_1_PJZ_0_holo_aligned_predicted_protein.pdb,CCCCCCCCCCCCCC[C@@H](O)OC[C@@H](C)O[C@@H](O)CCCCCCCCCCC, +4qo5_1_NAG_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4qo5_1_NAG_2_holo_aligned_predicted_protein.pdb,CC(O)N[C@H]1CO[C@H](CO)[C@@H](O)[C@@H]1O, +4rhe_1_FMN_6,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4rhe_1_FMN_6_holo_aligned_predicted_protein.pdb,CC1C[C@@H]2[C@@H](C[C@@H]1C)NC1[C@H](O)NC(O)N[C@@H]1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH]([O-])(O)O, +4rpj_1_UDP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4rpj_1_UDP_0_holo_aligned_predicted_protein.pdb,OC1CCN([C@@H]2O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]2O)C(O)N1, +4rpm_1_HXC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4rpm_1_HXC_0_holo_aligned_predicted_protein.pdb,CCCCCC(O)SC, +4tvd_1_BGC_4,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4tvd_1_BGC_4_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1O, +4tvd_1_GLC_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4tvd_1_GLC_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O, +4u63_1_FAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4u63_1_FAD_0_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O)C2CC1C, +4uoc_1_NCN_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4uoc_1_NCN_1_holo_aligned_predicted_protein.pdb,OC(O)C1CCCN([C@@H]2O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]2O)C1, +4uuw_1_AMP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4uuw_1_AMP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, +4xdr_1_ADN_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4xdr_1_ADN_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O, +4xfm_1_THE_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4xfm_1_THE_0_holo_aligned_predicted_protein.pdb,OC[C@@H](O)[C@H](O)C(O)O, +4ydx_1_TCE_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4ydx_1_TCE_0_holo_aligned_predicted_protein.pdb,OC(O)CCP(CCC(O)O)CCC(O)O, +4zav_1_4LS_8,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4zav_1_4LS_8_holo_aligned_predicted_protein.pdb,CC(C)CCN1C2CC(C)[C@@H](C)CC2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@H]2NC(O)N[C@@H](O)C21, +4zaw_1_4LU_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4zaw_1_4LU_1_holo_aligned_predicted_protein.pdb,C[C@H]1[C@@H](C)C[C@@H]2C3[C@@H]1C(C)(C)CCN3[C@H]1C(O)NC(O)NC1N2C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O, +4zay_1_4LS_6,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4zay_1_4LS_6_holo_aligned_predicted_protein.pdb,CC(C)CCN1[C@H]2[C@H](O)N[C@H](O)N[C@@H]2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)[C@@H]2CC(C)C(C)C[C@H]21, +4zaz_1_4LS_6,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4zaz_1_4LS_6_holo_aligned_predicted_protein.pdb,CC(C)CCN1C2CC(C)[C@@H](C)CC2N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2N[C@@H](O)N[C@@H](O)C21, +4zqx_1_ATP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/4zqx_1_ATP_2_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, +5a98_1_ATP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5a98_1_ATP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, +5ae3_2_AWB_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5ae3_2_AWB_1_holo_aligned_predicted_protein.pdb,CCCCCC[C@H]1C(O)O[C@H](C)[C@H](N[C@H](O)[C@@H]2CCC[C@H](NCO)[C@H]2O)C(O)O[C@@H](C)[C@@H]1OC(O)CC(C)C, +5b5s_1_BOG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5b5s_1_BOG_0_holo_aligned_predicted_protein.pdb,CCCCCCCCO[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@H]1O, +5d9g_1_GLU-ASN-LEU-TYR-PHE-GLN_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5d9g_1_GLU-ASN-LEU-TYR-PHE-GLN_0_holo_aligned_predicted_protein.pdb,CC(C)C[C@H](N)C(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)N[C@@H](Cc1ccccc1)C(=O)N[C@@H](CCC(N)=O)C(=O)O, +5dnc_1_ASN_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5dnc_1_ASN_2_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)O, +5eno_1_5QG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5eno_1_5QG_0_holo_aligned_predicted_protein.pdb,CC1(C)CC2C(CO1)C(N1CCOCC1)NC(SCCC1CCCCC1)[C@@H]2CN, +5enp_1_5QF_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5enp_1_5QF_0_holo_aligned_predicted_protein.pdb,COCCN1CCN([C@@H]2NC(SCC[C@H]3CCC(OC)[C@@H](OC)C3)[C@H](CN)[C@H]3CC(C)(C)OCC23)CC1, +5enq_1_5QE_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5enq_1_5QE_0_holo_aligned_predicted_protein.pdb,CC(O)NC1CCC(CCSC2NC(N3C[C@@H](C)O[C@@H](C)C3)[C@@H]3COC(C)(C)CC3C2CN)CC1, +5enr_1_MBX_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5enr_1_MBX_0_holo_aligned_predicted_protein.pdb,CCC(O)NC1CCC(CCS[C@@H]2NC(N3C[C@H](C)O[C@@H](C)C3)C3COC(C)(C)CC3C2CN)CC1, +5ent_1_MIY_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5ent_1_MIY_0_holo_aligned_predicted_protein.pdb,CN(C)C1CCC(O)C2C1C[C@H]1C[C@H]3[C@H](N(C)C)C(O)C(C(N)O)[C@@H](O)[C@@]3(O)C(O)[C@@H]1C2O, +5ers_1_AMP_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5ers_1_AMP_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, +5f2t_1_PLM_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5f2t_1_PLM_0_holo_aligned_predicted_protein.pdb,CCCCCCCCCCCCCCCC(O)O, +5f52_1_ASP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5f52_1_ASP_2_holo_aligned_predicted_protein.pdb,N[C@@H](CC(O)O)C(O)O, +5fiu_1_Y3J_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5fiu_1_Y3J_3_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](C(F)F)[C@@H](O)[C@@H]1O, +5fxd_1_H7Y_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5fxd_1_H7Y_1_holo_aligned_predicted_protein.pdb,CCC[C@H]1CCC(O)C(OC)C1, +5fxe_1_CIY_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5fxe_1_CIY_1_holo_aligned_predicted_protein.pdb,CO[C@H]1CC(CCCO)CCC1O, +5fxf_1_BEZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5fxf_1_BEZ_0_holo_aligned_predicted_protein.pdb,OC(O)C1CCCCC1, +5gqi_1_ATP_7,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5gqi_1_ATP_7_holo_aligned_predicted_protein.pdb,N[C@@H]1NCN[C@H]2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, +5gql_1_ATP_4,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5gql_1_ATP_4_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, +5hhz_1_ZME_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5hhz_1_ZME_0_holo_aligned_predicted_protein.pdb,CC1CCN(C2NCNC3NCNC32)C1, +5hmr_1_FDZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5hmr_1_FDZ_0_holo_aligned_predicted_protein.pdb,OC(NC1CNNS1)N[C@H]1CCC[C@H](OC(F)(F)F)C1, +5hqx_1_EDZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5hqx_1_EDZ_0_holo_aligned_predicted_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CNNS1, +5hw0_1_GLU_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5hw0_1_GLU_2_holo_aligned_predicted_protein.pdb,N[C@@H](CCC(O)O)C(O)O, +5ida_1_BMA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5ida_1_BMA_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@@H](O)[C@@H](O)[C@@H]1O, +5k3o_2_ASP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k3o_2_ASP_0_holo_aligned_predicted_protein.pdb,N[C@@H](CC(O)O)C(O)O, +5k45_2_GLU_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k45_2_GLU_1_holo_aligned_predicted_protein.pdb,N[C@@H](CCC(O)O)C(O)O, +5k4h_2_GLU_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k4h_2_GLU_3_holo_aligned_predicted_protein.pdb,N[C@@H](CCC(O)O)C(O)O, +5k62_1_ASN-VAL_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k62_1_ASN-VAL_0_holo_aligned_predicted_protein.pdb,CC(C)[C@@H](CO)NC(O)[C@@H](N)CC(N)O, +5k63_1_ASN-GLY_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k63_1_ASN-GLY_0_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)NCCO, +5k66_1_ASN-GLU_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5k66_1_ASN-GLU_0_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)N[C@H](CO)CCC(O)O, +5mh1_1_BMA_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5mh1_1_BMA_0_holo_aligned_predicted_protein.pdb,OC[C@H]1O[C@@H](O)[C@@H](O)[C@@H](O)[C@@H]1O, +5u82_2_ZN0_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/5u82_2_ZN0_0_holo_aligned_predicted_protein.pdb,CC[SnH](CC)CC, +6a71_1_9UX_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6a71_1_9UX_0_holo_aligned_predicted_protein.pdb,O[Mo@]12S[Mo@@]1(O)S2, +6a72_1_9UX_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6a72_1_9UX_0_holo_aligned_predicted_protein.pdb,O[Mo@]12S[Mo@@]1(O)S2, +6b1b_1_TMO_15,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6b1b_1_TMO_15_holo_aligned_predicted_protein.pdb,C[N+](C)(C)O, +6ea9_1_9BG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6ea9_1_9BG_0_holo_aligned_predicted_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O[PH]([O-])(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O[PH]([O-])([O-])O)C2N1, +6ep5_1_ADP_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6ep5_1_ADP_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, +6etf_1_AMP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6etf_1_AMP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2[C@H]1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, +6fgc_1_ADP_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6fgc_1_ADP_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O[PH](O)(O)O)[C@@H](O)[C@H]1O, +6fgc_1_D95_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6fgc_1_D95_1_holo_aligned_predicted_protein.pdb,C[C@H]1[C@H](OC(O)CCC(O)O)O[C@@H]2O[C@@]3(C)CC[C@H]4[C@H](C)CC[C@@H]1[C@@]24OO3, +6gbf_1_AMP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6gbf_1_AMP_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, +6jls_1_FMN_6,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6jls_1_FMN_6_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(O)NC(O)NC3N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O)C2CC1C, +6n19_2_K8V_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6n19_2_K8V_0_holo_aligned_predicted_protein.pdb,CCC(O)NC1CCC(C([O-])O)C(C([O-])O)C1, +6nco_1_KQP_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6nco_1_KQP_0_holo_aligned_predicted_protein.pdb,CC(C)(O)C1CCC([C@H]2CC(Cl)C[C@@H](C34(C(N)N)CC3C4)C2)CC1, +6npp_1_KWG_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6npp_1_KWG_0_holo_aligned_predicted_protein.pdb,[O-]C(O)C1CCCC(CCC2CCCCC2)C1N1CCCC1, +6o6y_1_ACK_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6o6y_1_ACK_0_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@H]2O[PH]([O-])(O)O[C@H]21, +6o70_1_ACK_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6o70_1_ACK_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO)[C@H]2O[PH]([O-])(O)O[C@H]21, +6pa2_1_ASP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6pa2_1_ASP_2_holo_aligned_predicted_protein.pdb,N[C@@H](CC(O)O)C(O)O, +6pa6_2_ASN_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6pa6_2_ASN_0_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)O, +6paa_1_ASP_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6paa_1_ASP_2_holo_aligned_predicted_protein.pdb,N[C@@H](CC(O)O)C(O)O, +6qkr_1_FAD_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6qkr_1_FAD_0_holo_aligned_predicted_protein.pdb,CC1CC2NC3C(NC(O)N[C@H]3O)N(C[C@H](O)[C@H](O)[C@H](O)CO[PH](O)(O)O[PH](O)(O)OC[C@H]3O[C@@H](N4CNC5C4NCN[C@H]5N)[C@H](O)[C@@H]3O)C2CC1C, +6rms_1_AMP_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6rms_1_AMP_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)O)[C@@H](O)[C@H]1O, +6ryz_1_SAM_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6ryz_1_SAM_2_holo_aligned_predicted_protein.pdb,C[S@@H](CC[C@H](N)C(O)O)C[C@H]1O[C@@H](N2CNC3C2NCN[C@@H]3N)[C@H](O)[C@@H]1O, +6rz2_1_5CD_2,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6rz2_1_5CD_2_holo_aligned_predicted_protein.pdb,NC1NCNC2[C@H]1NCN2[C@@H]1O[C@H](CCl)[C@@H](O)[C@H]1O, +6tvg_1_AP2_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6tvg_1_AP2_1_holo_aligned_predicted_protein.pdb,NC1NCNC2C1NCN2[C@@H]1O[C@H](CO[PH](O)(O)C[PH](O)(O)O)[C@@H](O)[C@H]1O, +6uqy_2_AT3_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6uqy_2_AT3_0_holo_aligned_predicted_protein.pdb,C[C@@H](O)SCC[N+](C)(C)C, +6ur1_2_AT3_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6ur1_2_AT3_0_holo_aligned_predicted_protein.pdb,C[C@@H](O)SCC[N+](C)(C)C, +6v2a_1_ASN_3,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6v2a_1_ASN_3_holo_aligned_predicted_protein.pdb,NC(O)C[C@H](N)C(O)O, +6wyz_1_DGL_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6wyz_1_DGL_1_holo_aligned_predicted_protein.pdb,N[C@H](CCC(O)O)C(O)O, +6xb3_3_9BG_1,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6xb3_3_9BG_1_holo_aligned_predicted_protein.pdb,NC1NC(O)C2NCN([C@@H]3O[C@H](CO)[C@@H](O)[C@H]3O[PH]([O-])(O)OC[C@H]3O[C@@H](N4CNC5C(N)NCNC54)[C@H](O)[C@@H]3O[PH]([O-])(O)O)C2N1, +6xug_1_O1Q_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6xug_1_O1Q_0_holo_aligned_predicted_protein.pdb,CC1CCC[C@H](N2NCC[C@H]2C2CC(Cl)C3NNN(C4CC[C@H]5CNNC5C4)C3C2)C1, +6yao_1_OJ2_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6yao_1_OJ2_0_holo_aligned_predicted_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CCC[C@@H](OC(F)(F)F)C1, +6yap_1_OHZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6yap_1_OHZ_0_holo_aligned_predicted_protein.pdb,OCC[C@H]1CCCC[C@@H]1NC(O)N[C@H]1CC(Cl)C[C@H](OC(F)(F)F)C1, +6yaq_1_OHZ_0,/bml/acmwhb/Repositories/Lab_Repositories/PoseBench/data/dockgen_set/dockgen_holo_aligned_predicted_structures/6yaq_1_OHZ_0_holo_aligned_predicted_protein.pdb,OCCC1CCCC[C@H]1N[C@H](O)N[C@H]1C[C@@H](Cl)C[C@@H](OC(F)(F)F)C1, diff --git a/forks/chai-lab/.devcontainer/devcontainer.json b/forks/chai-lab/.devcontainer/devcontainer.json new file mode 100755 index 00000000..3291ea02 --- /dev/null +++ b/forks/chai-lab/.devcontainer/devcontainer.json @@ -0,0 +1,42 @@ +{ + "name": "ChaiLab", + "build": { + "context": "..", + "dockerfile": "../Dockerfile.chailab", + "target": "chailab-baseimage" + }, + "runArgs": [ + // by default use all GPUs, can be overriden by envvar + "--gpus=${localEnv:DEVBOX_GPU_SPEC:all}", + "--ipc=host", + "-v=/data/instance:/data/instance", + // default container name is chai-lab-container + "--name=chai-lab-${localEnv:DEVBOX_USER:container}", + // set restrictions on CPU and RAM memory usage + "--cpus=60.0", + "--memory=1000g" + ], + "shutdownAction": "none", + "postCreateCommand": "uv pip install -e . && pre-commit install -f", + "customizations": { + "vscode": { + "settings": { + "python.defaultInterpreterPath": "/opt/venv/bin/python" + }, + "extensions": [ + "ms-azuretools.vscode-docker", + "ms-python.python", + "ms-python.vscode-pylance", + "ms-python.mypy-type-checker", + "charliermarsh.ruff", + "ms-toolsai.jupyter", + "arianjamasb.protein-viewer", + "redhat.vscode-yaml", + // very optional git-specific stuff + "arturock.gitstash", + "mhutchie.git-graph", + "GitHub.vscode-pull-request-github" + ] + } + } +} \ No newline at end of file diff --git a/forks/chai-lab/.github/dependabot.yml b/forks/chai-lab/.github/dependabot.yml new file mode 100644 index 00000000..9d8f2d04 --- /dev/null +++ b/forks/chai-lab/.github/dependabot.yml @@ -0,0 +1,12 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for more information: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates +# https://containers.dev/guide/dependabot + +version: 2 +updates: + - package-ecosystem: "devcontainers" + directory: "/" + schedule: + interval: weekly diff --git a/forks/chai-lab/.github/pull_request_template.md b/forks/chai-lab/.github/pull_request_template.md new file mode 100644 index 00000000..59a823ba --- /dev/null +++ b/forks/chai-lab/.github/pull_request_template.md @@ -0,0 +1,9 @@ +## Description + + +## Motivation + + + +## Test plan + diff --git a/forks/chai-lab/.github/workflows/mypy.yml b/forks/chai-lab/.github/workflows/mypy.yml new file mode 100644 index 00000000..1c0af86f --- /dev/null +++ b/forks/chai-lab/.github/workflows/mypy.yml @@ -0,0 +1,27 @@ +name: Mypy +on: + # Triggered whenever a commit is added to the main branch + push: + branches: + - main + # Triggered whenever a PR is opened or updated + pull_request: +jobs: + mypy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "pip" + - name: Install dependencies + run: | + # install uv and cpu-only torch + pip install --no-deps uv -r <( cat requirements.in | grep torch) --extra-index-url https://download.pytorch.org/whl/cpu + # install requirements, except torch and potentially nvidia-related stuff + uv pip install --system -r <( cat requirements.in | grep -v nvidia | grep -v torch ) + uv pip install --system --no-deps -e . + - name: Run mypy + run: mypy . diff --git a/forks/chai-lab/.github/workflows/prettier_yaml.yml b/forks/chai-lab/.github/workflows/prettier_yaml.yml new file mode 100644 index 00000000..82dba850 --- /dev/null +++ b/forks/chai-lab/.github/workflows/prettier_yaml.yml @@ -0,0 +1,27 @@ +name: Prettier + +on: + # Triggered whenever a commit is added to the main branch + push: + branches: + - main + # Triggered whenever a PR is opened or updated + pull_request: +jobs: + yaml: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: "20" + + - name: Install dependencies + run: npm install --save-dev --save-exact prettier + + - name: Run Prettier to check YAML format + run: npx prettier --check "**/*.yml" "**/*.yaml" diff --git a/forks/chai-lab/.github/workflows/publish-to-pypi.yml b/forks/chai-lab/.github/workflows/publish-to-pypi.yml new file mode 100644 index 00000000..431edf29 --- /dev/null +++ b/forks/chai-lab/.github/workflows/publish-to-pypi.yml @@ -0,0 +1,27 @@ +name: Deploy to pypi + +on: + release: + types: [created] + +jobs: + publish: + runs-on: ubuntu-latest + steps: + - name: Checkout sources + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + pip install . && pip install hatch + - name: Publish to PyPi + env: + HATCH_INDEX_USER: "__token__" + HATCH_INDEX_AUTH: ${{ secrets.PYPI_TOKEN }} + run: | + hatch build --clean && hatch publish diff --git a/forks/chai-lab/.github/workflows/pytest.yml b/forks/chai-lab/.github/workflows/pytest.yml new file mode 100644 index 00000000..2b3166b3 --- /dev/null +++ b/forks/chai-lab/.github/workflows/pytest.yml @@ -0,0 +1,27 @@ +name: Pytest +on: + # Triggered whenever a commit is added to the main branch + push: + branches: + - main + # Triggered whenever a PR is opened or updated + pull_request: +jobs: + pytest: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" # ensure we support 3.10 + cache: "pip" + - name: Install dependencies + run: | + # install uv and cpu-only torch + pip install --no-deps uv -r <( cat requirements.in | grep torch) --extra-index-url https://download.pytorch.org/whl/cpu + # install requirements, except torch and potentially nvidia-related stuff + uv pip install --system -r <( cat requirements.in | grep -v nvidia | grep -v torch ) + uv pip install --system --no-deps -e . + - name: Run pytest + run: pytest ./tests/ diff --git a/forks/chai-lab/.github/workflows/ruff.yml b/forks/chai-lab/.github/workflows/ruff.yml new file mode 100644 index 00000000..dde0f6cf --- /dev/null +++ b/forks/chai-lab/.github/workflows/ruff.yml @@ -0,0 +1,27 @@ +name: Ruff +on: + # Triggered whenever a commit is added to the main branch + push: + branches: + - main + # Triggered whenever a PR is opened or updated + pull_request: +jobs: + ruff: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + - name: Install + run: pip install pre-commit + - name: Run pre-commit checks on all files + # run specific ruff pre-commit hooks on all files + run: > + pip install pre-commit + && pre-commit install -f + && pre-commit run ruff --all-files + && pre-commit run ruff-format --all-files diff --git a/forks/chai-lab/.gitignore b/forks/chai-lab/.gitignore new file mode 100644 index 00000000..72c088d4 --- /dev/null +++ b/forks/chai-lab/.gitignore @@ -0,0 +1,162 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm/JetBrains +.idea/ + +# outputs from the model +outputs/ + diff --git a/forks/chai-lab/.pre-commit-config.yaml b/forks/chai-lab/.pre-commit-config.yaml new file mode 100644 index 00000000..a07ede44 --- /dev/null +++ b/forks/chai-lab/.pre-commit-config.yaml @@ -0,0 +1,10 @@ +--- +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.6.3 + hooks: + # Run the linter. + - id: ruff + # Run the formatter. + - id: ruff-format diff --git a/forks/chai-lab/Dockerfile.chailab b/forks/chai-lab/Dockerfile.chailab new file mode 100755 index 00000000..0f616dd3 --- /dev/null +++ b/forks/chai-lab/Dockerfile.chailab @@ -0,0 +1,79 @@ +FROM ubuntu:22.04 AS chailab-baseimage + +ENV \ + LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 \ + # config for apt + DEBIAN_FRONTEND=noninteractive \ + # default editor for git cli + EDITOR=vim \ + # keep (large) mypy cache outside of working tree + MYPY_CACHE_DIR='/tmp/.chai_lab_mypy_cache' \ + # always flush output from python + PYTHONUNBUFFERED=TRUE \ + # enable fault handler (print tracebacks even after segfault or NCCL errors). + PYTHONFAULTHANDLER=1 \ + # keep __pycache__ out of working tree + PYTHONPYCACHEPREFIX='/tmp/.chai_lab_pycache' + + +RUN --mount=type=cache,target=/var/cache/apt \ + apt-get -qq update \ + && apt-get -qq install -y \ + # common things + gnupg ca-certificates wget git curl aria2 lsb-release tzdata \ + rsync sudo tree htop tmux unzip \ + clang \ + # for direct ssh into container + openssh-server socat \ + # provides `fuser` command + psmisc \ + # RDMA/InfiniBand + libibverbs1 librdmacm1 \ + # text editors, needed by git cli + nano vim \ + build-essential libstdc++6 \ + # (run continues) + # stop git from complaining about dubious ownership. + && git config --global --add safe.directory "*" \ + # + # cuda softlinking is needed in podman, but not docker + && ln -s /lib/x86_64-linux-gnu/libcuda.so.1 /lib/x86_64-linux-gnu/libcuda.so \ + && ldconfig /lib/x86_64-linux-gnu/ \ + # setup timezone, to $TZ, ubuntu-specific + # && ln -fs /usr/share/zoneinfo/$TZ /etc/localtime \ + && dpkg-reconfigure --frontend noninteractive tzdata \ + # change default shell to bash (has no effect during building) + && chsh -s /bin/bash + + +ENV \ + # expose CUDA libraries. Now that we don't build anything this is likely redundant + LD_LIBRARY_PATH="/usr/local/cuda/lib64/stubs/:$LD_LIBRARY_PATH" \ + # Set uv timeout to larger value to account for slow download time of nvidia-cudnn-cu12 + UV_HTTP_TIMEOUT=1000 \ + # where virtual env will be installed + VIRTUAL_ENV=/opt/venv + +# Install dependencies in virtualenv +COPY ./requirements.in /tmp/requirements.in +# from https://pythonspeed.com/articles/activate-virtualenv-dockerfile/ +# a trick to have virtualenv "always activated" +ENV PATH="$VIRTUAL_ENV/bin:$PATH" +RUN --mount=type=cache,target=/root/.cache/uv \ + # Install uv + curl -LsSf https://astral.sh/uv/install.sh | sh \ + && $HOME/.cargo/bin/uv venv --python 3.11 $VIRTUAL_ENV \ + # this is sh, not bash, so . not source + && . $VIRTUAL_ENV/bin/activate \ + && $HOME/.cargo/bin/uv pip install uv pip -r /tmp/requirements.in + + +# making sure envvars are set in all shells +RUN echo "PATH=\"$PATH\"" >> /etc/environment \ + && echo "LANG=\"$LANG\"" >> /etc/environment \ + && echo "LC_ALL=\"$LC_ALL\"" >> /etc/environment \ + && echo "LD_LIBRARY_PATH=\"$LD_LIBRARY_PATH\"" >> /etc/environment \ + && echo "EDITOR=\"$EDITOR\"" >> /etc/environment + +# no startup command. \ No newline at end of file diff --git a/forks/chai-lab/LICENSE.md b/forks/chai-lab/LICENSE.md new file mode 100644 index 00000000..f9f2a5fd --- /dev/null +++ b/forks/chai-lab/LICENSE.md @@ -0,0 +1,387 @@ +Please read this Chai Discovery Community License Agreement (the +“**License**”) carefully before using the Chai-1 Model software code and +model weights (the “**AI Model**”) and any “**Outputs**” (as defined +below) which is offered by Chai Discovery, Inc. (“**Chai**”) and made +available at the following link +, as they may be updated and +amended from time to time. + +THIS LICENSE GRANTS RIGHTS ONLY TO USE THE ai MODEL, outputs, and any +derivative works (AS DEFINED BELOW) SOLELY FOR NON-COMMERCIAL PURPOSES +(AS DEFINED BELOW). YOU MAY NOT USE THE AI MODEL, OUTPUT, OR ANY +DERIVATIVE WORKS UNDER THE TERMS OF THIS LICENSE FOR ANY COMMERCIAL +PURPOSES OR AS PART OF A SERVICE OFFERING. PLEASE REVIEW SECTION “use +restrictions and aup” below CAREFULLY BEFORE USING THE ai model or any +output. + +By downloading the AI Model, or otherwise using the AI Model or +exercising any of the rights granted hereunder in any manner, You agree +that You have read and agree to be bound by the terms of this License +and that You will use the AI Model only for Non-Commercial Purposes (as +defined below). If You are accessing the AI Model on behalf of an +organization or entity, You represent and warrant that You are +authorized to enter into this License on that organization’s or entity’s +behalf and bind them to the terms of this License (in which case, the +references to “You” and “Your” in this License, except for in this +sentence, refer to that organization or entity) and that such entity is +not a Commercial Entity (as defined below). No rights are granted under +this License to a Commercial Entity. Use of the AI Model is expressly +conditioned upon Your assent to all terms of this License. + + +## **1. Definitions.** + +In addition to other terms defined elsewhere in this License, the terms +below have the following meanings. + +1. “**Commercial Entity**” means any entity engaged, in whole or in + part, in any activity intended for or directed toward commercial + advantage or monetary compensation, including but not limited to the + development of any product or service intended to be sold or made + available for a fee or other economic consideration. For the purpose + of this License, references to a Commercial Entity expressly exclude + any universities, non-profit organizations, non-profit research + institutes, and non-profit educational and government bodies. + +2. “**Contribution**” means any work of authorship, including the + original version of the AI Model and any modifications or additions + to that AI Model or Derivative Works thereof, that is intentionally + submitted to Chai for inclusion in the AI Model by the copyright + owner or by an individual or legal entity authorized to submit on + behalf of the copyright owner. For the purposes of this definition, + "submitted" means any form of electronic, verbal, or written + communication sent to Chai or its representatives, including but not + limited to communication on electronic mailing lists, source code + control systems, and issue tracking systems that are managed by, or + on behalf of, Chai for the purpose of discussing and improving the + AI Model, but excluding Outputs and all communications that are + conspicuously marked or otherwise designated in writing by the + copyright owner as "Not a Contribution." + +3. “**Contributor**” means Chai and any individual or legal entity on + behalf of whom a Contribution has been received by Chai and + subsequently incorporated within the AI Model. + +4. “**Derivative Work**” means any work, whether in Source or Object + form, that is based on (or derived from) the AI Model and for which + the revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the + purposes of this License, Derivative Works shall not include works + that remain separable from, or merely link (or bind by name) to the + interfaces of, the AI Model and Derivative Works thereof. + +5. “**Non-Commercial Purposes**” means uses not intended for or + directed toward commercial advantage or monetary compensation, or + the facilitation of development of any product or service to be sold + or made available for a fee or other economic consideration. For the + avoidance of doubt, the provision of Outputs or Output Derivatives + as a service, or the provision of any other service that utilizes + the AI Model, Derivative Works thereof, Outputs or Derivative + Outputs (even if the service does not provide Outputs or Output + Derivatives), is not a Non-Commercial Purpose, whether or not for a + fee or other economic consideration. + +6. “**Object**” means any form resulting from mechanical transformation + or translation of a Source form, including but not limited to + compiled object code, generated documentation, and conversions to + other media types. + +7. “**Output**” means any output that is made available to You by the + functionality of the AI Model, including but not limited to any + protein sequence, structure prediction, functional annotation, + molecule, descriptions of a molecule, structure predictions, + confidence rankings, intermediate model states, model, model + embeddings, sequence, text, and/or images. + +8. “**Output Derivatives**” means any enhancements, modifications and + derivative works of Outputs (including, but not limited to, any + derivative sequences, structures, or molecules). + +9. “**Source**” means the preferred form for making modifications, + including but not limited to AI Model source code, documentation + source, and configuration files. + +10. “**You**” or “**Your**” means the individual entering into this + License or the organization or entity on whose behalf such + individual is entering into this License. + + +## **2. Grant of License.** + +1. **Copyright License**. Subject to the terms and conditions of this + License, each Contributor hereby grants to You a limited, + non-exclusive, worldwide, royalty-free, non-transferable, + non-sublicensable copyright license to reproduce, prepare Derivative + Works of, publicly display, publicly perform, and distribute the AI + Model and such Derivative Works in Source or Object form solely for + Your Non-Commercial Purposes and subject to the restrictions set + forth in Sections 3 (“Use Restrictions and AUP”) and 4 (“Sharing and + Distribution”). + +2. **Patent License**. Subject to the terms and conditions of this + License, each Contributor hereby grants to You a limited, + non-exclusive, worldwide, royalty-free, non-transferable, + non-sublicensable patent license to make, have made, use, import, + and otherwise transfer the AI Model solely for Your Non-Commercial + Purposes and subject to the restrictions set forth in Sections 3 + (“Use Restrictions and AUP”) and 4 (“Sharing and Distribution”), + where such license applies only to those patent claims licensable by + such Contributor that are necessarily infringed by its + Contribution(s) alone or by combination of its Contribution(s) with + the AI Model to which such Contribution(s) was submitted. + + 1. If You institute patent litigation against any entity (including + a cross-claim or counterclaim in a lawsuit) alleging that the AI + Model or a Contribution incorporated within the AI Model + constitutes direct or contributory patent infringement, then any + patent licenses granted to You under this License for that AI + Model shall terminate as of the date such litigation is filed + and may be reinstituted only by a separate grant of a patent + license in writing from the Contributor. + + +## **3. Use Restrictions and AUP.** + +1. **No Commercial Use**. You may use the AI Model, Contributions, + Derivative Works, Outputs and Output Derivatives only for + Non-Commercial Purposes. Any commercial use of any of the foregoing, + including but not limited to any use by, on behalf of or for any + Commercial Entity or to facilitate the development of any product or + service to be sold or made available for a fee or other economic + consideration, is strictly prohibited under this License. + +2. **Drug Discovery.** You may not use the AI Model, Contributions, + Derivative Works, Outputs and Output Derivatives in connection with + drug development or discovery, including but not limited to: (i) the + development (at any stage) or discovery of any drug, medication, + therapeutic, or pharmaceutical of any kind; (ii) any molecular or + biological target, hit or lead identification; (iii) drug candidate + selection; or (iv) lead optimization. + +3. **No Service Offerings**. You may not use the AI Model or any + Contributions, Derivative Works, Outputs or Output Derivatives in or + in connection with the provision of any service offering to third + parties (such as in connection with a hosted service offering that + provides Outputs or Output Derivatives to third parties), regardless + of whether or not such service requires monetary compensation or + other consideration. + +4. **Acceptable Use Policy**. Your use of the AI Model, Derivative + Works, Outputs and Output Derivatives is further subject to the Chai + Discovery Acceptable Use Policy available at + and any additional + use restrictions that may be communicated to You through the AI Model, + as may be updated and amended from time to time (the “**AUP**”), the + terms of which are incorporated herein by reference. In the event of + any conflict between the terms of this License and the terms of the + AUP, the terms that are more restrictive of Your use of the AI + Model, Derivative Works, Outputs and Output Derivatives, as + applicable, shall govern and control. For the purpose of clarity, + the AUP includes, among other things, restrictions that the AI + Model, Derivative Works, Outputs and Output Derivatives may not be + used to train, optimize, improve or otherwise influence the + functionality or performance of any: (i) neural network, tool, + platform and/or artificial intelligence or machine learning models + with more than 10,000 trainable parameters; or (ii) technology for + protein structure prediction or protein, drug, or enzyme design. + + +## **4. Sharing and Distribution.** + +Subject to Section “Use Restrictions and AUP”, You may reproduce and +distribute copies of the AI Model or Derivative Works thereof, with or +without modifications, and in Source or Object form solely for Your +Non-Commercial Purposes, provided that You meet the following +conditions: + +1. You must not distribute copies of the AI Model, Contributions, + Derivative Works, Output, and Output Derivatives, or allow the use + of any reproductions or copies thereof by, on behalf of or for, any + Commercial Entity; + +2. You must restrict the usage of any copies of the AI Model, + Contributions, Derivative Works, Output, and Output Derivatives to + usage for Non-Commercial Purposes; + +3. You must give any other recipients of the AI Model, Contributions, + Derivative Works, Output, and Output Derivatives a copy of this + License; + +4. You must cause any modified files of the AI Model, Contributions, + Derivative Works, Output, and Output Derivatives to carry prominent + notices stating that You changed the files; + +5. You must retain, in the AI Model, Contributions, Derivative Works, + Output, and Output Derivatives that You distribute, all copyright, + patent, trademark, and attribution notices which are included in the + version of the AI Model, Contributions, Derivative Works, Output, + and Output Derivatives provided to You (collectively, “**Attribution + Notices**”), excluding those portions of the Attribution Notices + that do not pertain to any part of the Derivative Works or Output + Derivatives that you distribute, You must include the pertinent + portions of the Attribution Notices in at least one of the following + places: within a NOTICE text file distributed as part of the + Derivative Works or Output Derivatives; within the Source form or + documentation, if provided along with the Derivative Works or Output + Derivatives; or, within a display generated by the Derivative Works, + if and wherever such third-party notices normally appear. The + contents of such Attribution Notices are for informational purposes + only and do not modify this License. You may add Your own + attribution notices within Derivative Works or Output Derivatives + that You distribute, alongside or as an addendum to the pertinent + Attribution Notices, provided that such additional attribution + notices cannot be construed as modifying this License. + +You may add Your own copyright statement to Your modifications and may +provide additional or different license terms and conditions for use, +reproduction, or distribution of Your modifications, or for any such +Derivative Works as a whole, or for Your Services, provided Your use, +reproduction, and distribution of the AI Model, Derivative Works, and +Your Services otherwise complies with the conditions stated in this +License. + + +## **5. Submission of Contributions.** + +Unless You explicitly state otherwise, any Contribution intentionally +submitted for inclusion in the AI Model by You to Chai shall be under +the terms and conditions of this License, without any additional terms +or conditions. Notwithstanding the above, nothing herein shall supersede +or modify the terms of any separate license agreement you may have +executed with Chai regarding such Contributions. + + +## **6. Trademarks.** + +This License does not grant permission to use the trade names, +trademarks, service marks, or product names of Chai, except for +reasonable and customary use in describing the origin of the AI Model +and reproducing the content of the NOTICE file. + + +## **7. Term and Termination.** + +This License applies for so long as the rights licensed in Section 2 +hereunder remain protected by copyright and/or patent law, as +applicable. However, if You fail to comply with this License, then Your +rights under this License terminate automatically. + +1. For the avoidance of doubt, this Section (“Term and Termination”) + does not affect any right that Chai may have to seek remedies for + Your violations of this License. + +2. For the avoidance of doubt, Chai may also offer the AI Model under + separate terms or conditions or stop distributing the AI Model at + any time; however, doing so will not terminate this License. + +3. This sentence of Section “Term and Termination” and Sections + “Submission of Contributions,” “Trademarks,” “Disclaimer of + Warranty,” Limitation of Liability,” and “General” survive + termination of this License. + + +## **8. Disclaimer of Warranty.** + +CHAI PROVIDES THE AI MODEL AND ITS OUTPUTS (AND EACH CONTRIBUTOR +PROVIDES ITS CONTRIBUTIONS) ON AN "AS IS" BASIS, WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY +IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, +QUIET ENJOYMENT AND NON-INFRINGEMENT, AND ANY WARRANTIES ARISING OUT OF +COURSE OF DEALING OR USAGE OF TRADE, ALL OF WHICH ARE HEREBY DISCLAIMED. +CHAI AND ITS CONTRIBUTORS MAKE NO WARRANTY (1) THAT THE AI MODEL, +DERIVATIVE WORKS, OUTPUTS, AND/OR OUTPUT DERIVATIVES WILL MEET YOUR +REQUIREMENTS OR BE AVAILABLE ON AN UNINTERRUPTED, SECURE, OR ERROR-FREE +BASIS, OR (2) REGARDING THE QUALITY, ACCURACY, TIMELINESS, TRUTHFULNESS, +COMPLETENESS OR RELIABILITY OF ANY OUTPUTS. YOU ARE SOLELY RESPONSIBLE +FOR DETERMINING THE APPROPRIATENESS OF USING THE AI MODEL, DERIVATIVE +WORKS, OUTPUTS, AND/OR OUTPUT DERIVATIVES AND ASSUME ANY RISKS +ASSOCIATED WITH YOUR EXERCISE OF PERMISSIONS UNDER THIS AGREEMENT. + + +## **9. Limitation of Liability.** + +TO THE MAXIMUM EXTENT PERMITTED BY LAW, NEITHER CHAI NOR ANY +CONTRIBUTORS WILL BE LIABLE FOR ANY DIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY OR CONSEQUENTIAL DAMAGES, OR DAMAGES FOR LOST PROFITS, LOST +REVENUES, LOST SAVINGS, LOST BUSINESS OPPORTUNITY, LOSS OF DATA OR +GOODWILL, SERVICE INTERRUPTION, COMPUTER DAMAGE OR SYSTEM FAILURE OR THE +COST OF SUBSTITUTE SERVICES OF ANY KIND ARISING OUT OF OR IN CONNECTION +WITH THESE TERMS OR FROM THE USE OF OR INABILITY TO USE THE SERVICES OR +OUTPUT, WHETHER BASED ON WARRANTY, CONTRACT, TORT (INCLUDING +NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER LEGAL THEORY, AND WHETHER OR +NOT CHAI OR the CONTRIBUTORS HAVE BEEN INFORMED OF THE POSSIBILITY OF +SUCH DAMAGE, EVEN IF A LIMITED REMEDY SET FORTH HEREIN IS FOUND TO HAVE +FAILED OF ITS ESSENTIAL PURPOSE. THE EXCLUSIONS AND LIMITATIONS OF +DAMAGES SET FORTH ABOVE ARE FUNDAMENTAL ELEMENTS OF THE BASIS OF THE +BARGAIN BETWEEN Chai, THE CONTRIBUTORS, AND YOU. + + +## **10. General.** + +1. Entire Agreement. This License constitutes the entire + agreement between You and Chai relating to the subject matter hereof + and supersedes all proposals, understandings, or discussions, + whether written or oral, relating to the subject matter of this + License and all past dealing or industry custom. The failure of + either party to enforce its rights under this License at any time + for any period shall not be construed as a waiver of such rights. + Chai may amend or modify this License from time to time and will use + reasonable efforts to provide You with notice of any material + changes that may negatively impact Your use of the AI Model via the + github page for the AI Model at + , or through another + means made available to You. No other changes, modifications or + waivers to this License will be effective unless in writing and + signed by both parties. + +2. Relationship of Parties. Nothing in this License will be + construed to create a partnership, joint venture or agency + relationship between the parties. Neither party will have the power + to bind the other or to incur obligations on the other’s behalf + without such other party’s prior written consent. Unless otherwise + expressly provided, no provisions of this License are intended or + will be construed to confer upon or give to any person or entity, + other than the parties, any rights, remedies or other benefits under + or by reason of this License.. + +3. Export Control. You will comply fully with all applicable + export laws and regulations of the United States + (“**Export Laws**”) to ensure that neither the AI Model, + Contributions, Derivative Works, Outputs, or Output Derivatives, nor + any technical data related thereto is: (i) exported or re-exported + directly or indirectly in violation of Export Laws; or (ii) used for + any purposes prohibited by the Export Laws, including, but not + limited to, nuclear, chemical, or biological weapons proliferation. + +4. Assignment. This License and the rights and obligations + herein may not be assigned or transferred, in whole or in part, by + You without the prior written consent of Chai. Any assignment in + violation of this provision is void. Chai may freely assign or + transfer this License, in whole or in part. This License shall be + binding upon, and inure to the benefit of, the successors and + permitted assigns of the parties. + +5. Governing Law. This License shall be governed by and + construed under the laws of the State of California and the United + States without regard to conflicts of laws provisions thereof, and + without regard to the Uniform Computer Information Transactions Act. + Any legal action or proceeding arising under this License will be + brought exclusively in the federal or state courts located in the + Northern District of California and the parties irrevocably consent + to the personal jurisdiction and venue therein. + +6. Severability. If any provision of this License is held to be + invalid, illegal or unenforceable in any respect, that provision + shall be limited or eliminated to the minimum extent necessary so + that this License otherwise remains in full force and effect and + enforceable. + + +## **11. Additional License Rights.** + +If You are interested in using the AI Model or Outputs for purposes +beyond the rights granted under this License (for example, if you would +like to use the AI Model or Outputs for commercial purposes), you may +contact Chai at . Any such use in excess +of the rights granted herein to You must be subject to a written +agreement between Chai and You. diff --git a/forks/chai-lab/README.md b/forks/chai-lab/README.md new file mode 100644 index 00000000..fafa6ef4 --- /dev/null +++ b/forks/chai-lab/README.md @@ -0,0 +1,69 @@ +# Chai-1 + +Chai-1 is a multi-modal foundation model for molecular structure prediction that performs at the state-of-the-art across a variety of benchmarks. Chai-1 enables unified prediction of proteins, small molecules, DNA, RNA, glycosylations, and more. + +

+ +

+ +For more information on the model's performance and capabilities, see our [technical report](https://chaiassets.com/chai-1/paper/technical_report_v1.pdf). + +## Installation + +```shell +# current version (updates daily): +pip install git+https://github.com/chaidiscovery/chai-lab.git +# version on pypi: +pip install chai_lab==0.0.1 +``` + +This Python package requires Linux, and a GPU with CUDA and bfloat16 support + +(we recommend A100/H100, but A10, A30 should work for smaller complexes. Users reported success with consumer-grade RTX 4090). + + +## Running the model + +The model accepts inputs in the FASTA file format, and allows you to specify the number of trunk recycles and diffusion timesteps via the `chai_lab.chai1.run_inference` function. By default, the model generates five sample predictions, and uses embeddings without MSAs or templates. + +The following script demonstrates how to provide inputs to the model, and obtain a list of PDB files for downstream analysis: + +```shell +python examples/predict_structure.py +``` + +For more advanced use cases, we also expose the `chai_lab.chai1.run_folding_on_context`, which allows users to construct an `AllAtomFeatureContext` manually. This allows users to specify their own templates, MSAs, embeddings, and constraints. We currently provide an example of how to construct an embeddings context, and will be releasing helper methods to build MSA and templates contexts soon. + +## ⚡ Try it online + +We provide a [web server](https://lab.chaidiscovery.com) so you can test the Chai-1 model right from your browser, without any setup. + +

+ +

+ +## 💬 Feedback + +Found a 🐞? Please report it in GitHub [issues](https://github.com/chaidiscovery/chai-lab/issues). + +We welcome community testing and feedback. To share observations about the model's performance, please reach via [GitHub discussions](https://github.com/chaidiscovery/chai-lab/discussions), or [via email](mailto:feedback@chaidiscovery.com). + +## 🛠️ Development + +We use [devcontainers](https://code.visualstudio.com/docs/devcontainers/containers) in development, which helps us ensure we work in identical environments. We recommend working inside a devcontainer if you want to make a contribution to this repository. + +Devcontainers work on local Linux setup, and on remote machines over an SSH connection. + +## Status + +Since this is an initial release, we expect to make some breaking changes to the API and are not guaranteeing backwards compatibility. We recommend pinning the current version in your requirements, i.e.: + +``` +chai_lab==0.0.1 +``` + +## Licence + +See [LICENSE.md](LICENSE.md). + +To discuss commercial use of our models, reach us [via email](mailto:partnerships@chaidiscovery.com). diff --git a/forks/chai-lab/assets/chailab_online_screenshot.png b/forks/chai-lab/assets/chailab_online_screenshot.png new file mode 100644 index 00000000..1d3a4e3f Binary files /dev/null and b/forks/chai-lab/assets/chailab_online_screenshot.png differ diff --git a/forks/chai-lab/assets/performance_barplot.png b/forks/chai-lab/assets/performance_barplot.png new file mode 100644 index 00000000..b996807a Binary files /dev/null and b/forks/chai-lab/assets/performance_barplot.png differ diff --git a/forks/chai-lab/chai_lab/__init__.py b/forks/chai-lab/chai_lab/__init__.py new file mode 100644 index 00000000..f102a9ca --- /dev/null +++ b/forks/chai-lab/chai_lab/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/forks/chai-lab/chai_lab/chai1.py b/forks/chai-lab/chai_lab/chai1.py new file mode 100644 index 00000000..69f0a351 --- /dev/null +++ b/forks/chai-lab/chai_lab/chai1.py @@ -0,0 +1,688 @@ +# %% +import math +from dataclasses import dataclass +from pathlib import Path + +import numpy as np +import torch +import torch.export +from einops import einsum, rearrange, repeat +from torch import Tensor +from tqdm import tqdm + +from chai_lab.data.collate.collate import Collate +from chai_lab.data.collate.utils import AVAILABLE_MODEL_SIZES +from chai_lab.data.dataset.all_atom_feature_context import ( + MAX_MSA_DEPTH, + MAX_NUM_TEMPLATES, + AllAtomFeatureContext, +) +from chai_lab.data.dataset.constraints.constraint_context import ConstraintContext +from chai_lab.data.dataset.embeddings.embedding_context import EmbeddingContext +from chai_lab.data.dataset.embeddings.esm import get_esm_embedding_context +from chai_lab.data.dataset.inference_dataset import load_chains_from_raw, read_inputs +from chai_lab.data.dataset.msas.msa_context import MSAContext +from chai_lab.data.dataset.structure.all_atom_structure_context import ( + AllAtomStructureContext, +) +from chai_lab.data.dataset.templates.context import TemplateContext +from chai_lab.data.features.feature_factory import FeatureFactory +from chai_lab.data.features.feature_type import FeatureType +from chai_lab.data.features.generators.atom_element import AtomElementOneHot +from chai_lab.data.features.generators.atom_name import AtomNameOneHot +from chai_lab.data.features.generators.base import EncodingType +from chai_lab.data.features.generators.blocked_atom_pair_distances import ( + BlockedAtomPairDistances, + BlockedAtomPairDistogram, +) +from chai_lab.data.features.generators.docking import DockingConstraintGenerator +from chai_lab.data.features.generators.esm_generator import ESMEmbeddings +from chai_lab.data.features.generators.identity import Identity +from chai_lab.data.features.generators.is_cropped_chain import ChainIsCropped +from chai_lab.data.features.generators.missing_chain_contact import MissingChainContact +from chai_lab.data.features.generators.msa import ( + IsPairedMSAGenerator, + MSADataSourceGenerator, + MSADeletionMeanGenerator, + MSADeletionValueGenerator, + MSAFeatureGenerator, + MSAHasDeletionGenerator, + MSAProfileGenerator, +) +from chai_lab.data.features.generators.ref_pos import RefPos +from chai_lab.data.features.generators.relative_chain import RelativeChain +from chai_lab.data.features.generators.relative_entity import RelativeEntity +from chai_lab.data.features.generators.relative_sep import RelativeSequenceSeparation +from chai_lab.data.features.generators.relative_token import RelativeTokenSeparation +from chai_lab.data.features.generators.residue_type import ResidueType +from chai_lab.data.features.generators.structure_metadata import ( + IsDistillation, + TokenBFactor, + TokenPLDDT, +) +from chai_lab.data.features.generators.templates import ( + TemplateDistogramGenerator, + TemplateMaskGenerator, + TemplateResTypeGenerator, + TemplateUnitVectorGenerator, +) +from chai_lab.data.features.generators.token_dist_restraint import ( + TokenDistanceRestraint, +) +from chai_lab.data.features.generators.token_pair_pocket_restraint import ( + TokenPairPocketRestraint, +) +from chai_lab.data.io.pdb_utils import write_pdbs_from_outputs +from chai_lab.model.diffusion_schedules import InferenceNoiseSchedule +from chai_lab.model.utils import center_random_augmentation +from chai_lab.ranking.frames import get_frames_and_mask +from chai_lab.ranking.rank import SampleRanking, get_scores, rank +from chai_lab.utils.paths import chai1_component +from chai_lab.utils.plot import plot_msa +from chai_lab.utils.tensor_utils import move_data_to_device, set_seed, und_self +from chai_lab.utils.typing import Float, typecheck + + +class UnsupportedInputError(RuntimeError): + pass + + +def load_exported(comp_key: str, device: torch.device) -> torch.nn.Module: + local_path = chai1_component(comp_key) + exported_program = torch.export.load(local_path) + return exported_program.module().to(device) + + +# %% +# Create feature factory + +feature_generators = dict( + RelativeSequenceSeparation=RelativeSequenceSeparation(sep_bins=None), + RelativeTokenSeparation=RelativeTokenSeparation(r_max=32), + RelativeEntity=RelativeEntity(), + RelativeChain=RelativeChain(), + ResidueType=ResidueType( + min_corrupt_prob=0.0, + max_corrupt_prob=0.0, + num_res_ty=32, + key="token_residue_type", + ), + ESMEmbeddings=ESMEmbeddings(), # TODO: this can probably be the identity + BlockedAtomPairDistogram=BlockedAtomPairDistogram(), + InverseSquaredBlockedAtomPairDistances=BlockedAtomPairDistances( + transform="inverse_squared", + encoding_ty=EncodingType.IDENTITY, + ), + AtomRefPos=RefPos(), + AtomRefCharge=Identity( + key="inputs/atom_ref_charge", + ty=FeatureType.ATOM, + dim=1, + can_mask=False, + ), + AtomRefMask=Identity( + key="inputs/atom_ref_mask", + ty=FeatureType.ATOM, + dim=1, + can_mask=False, + ), + AtomRefElement=AtomElementOneHot(max_atomic_num=128), + AtomNameOneHot=AtomNameOneHot(), + TemplateMask=TemplateMaskGenerator(), + TemplateUnitVector=TemplateUnitVectorGenerator(), + TemplateResType=TemplateResTypeGenerator(), + TemplateDistogram=TemplateDistogramGenerator(), + TokenDistanceRestraint=TokenDistanceRestraint( + include_probability=0.0, + size=0.33, + min_dist=6.0, + max_dist=30.0, + num_rbf_radii=6, + ), + DockingConstraintGenerator=DockingConstraintGenerator( + include_probability=0.0, + structure_dropout_prob=0.75, + chain_dropout_prob=0.75, + ), + TokenPairPocketRestraint=TokenPairPocketRestraint( + size=0.33, + include_probability=0.0, + min_dist=6.0, + max_dist=20.0, + coord_noise=0.0, + num_rbf_radii=6, + ), + MSAProfile=MSAProfileGenerator(), + MSADeletionMean=MSADeletionMeanGenerator(), + IsDistillation=IsDistillation(), + TokenBFactor=TokenBFactor(include_prob=0.0), + TokenPLDDT=TokenPLDDT(include_prob=0.0), + ChainIsCropped=ChainIsCropped(), + MissingChainContact=MissingChainContact(contact_threshold=6.0), + MSAOneHot=MSAFeatureGenerator(), + MSAHasDeletion=MSAHasDeletionGenerator(), + MSADeletionValue=MSADeletionValueGenerator(), + IsPairedMSA=IsPairedMSAGenerator(), + MSADataSource=MSADataSourceGenerator(), +) +feature_factory = FeatureFactory(feature_generators) + +# %% +# Config + + +class DiffusionConfig: + S_churn: float = 80 + S_tmin: float = 4e-4 + S_tmax: float = 80.0 + S_noise: float = 1.003 + sigma_data: float = 16.0 + second_order: bool = True + + +# %% +# Input validation + + +def raise_if_too_many_tokens(n_actual_tokens: int): + if n_actual_tokens > max(AVAILABLE_MODEL_SIZES): + raise UnsupportedInputError( + f"Too many tokens in input: {n_actual_tokens} > {max(AVAILABLE_MODEL_SIZES)}. " + "Please limit the length of the input sequence." + ) + + +def raise_if_too_many_templates(n_actual_templates: int): + if n_actual_templates > MAX_NUM_TEMPLATES: + raise UnsupportedInputError( + f"Too many templates in input: {n_actual_templates} > {MAX_NUM_TEMPLATES}. " + "Please limit the number of templates." + ) + + +def raise_if_msa_too_deep(msa_depth: int): + if msa_depth > MAX_MSA_DEPTH: + raise UnsupportedInputError( + f"MSA to deep: {msa_depth} > {MAX_MSA_DEPTH}. " + "Please limit the MSA depth." + ) + + +# %% +# Inference logic + + +@torch.no_grad() +def run_inference( + fasta_file: Path, + output_dir: Path, + use_esm_embeddings: bool = True, + # expose some params for easy tweaking + num_trunk_recycles: int = 3, + num_diffn_timesteps: int = 2, + seed: int | None = None, + device: torch.device | None = None, +) -> list[Path]: + # Prepare inputs + assert fasta_file.exists(), fasta_file + fasta_inputs = read_inputs(fasta_file, length_limit=None) + assert len(fasta_inputs) > 0, "No inputs found in fasta file" + + # Load structure context + chains = load_chains_from_raw(fasta_inputs) + contexts = [c.structure_context for c in chains] + merged_context = AllAtomStructureContext.merge(contexts) + n_actual_tokens = merged_context.num_tokens + raise_if_too_many_tokens(n_actual_tokens) + + # Load MSAs + msa_context = MSAContext.create_empty( + n_tokens=n_actual_tokens, + depth=MAX_MSA_DEPTH, + ) + main_msa_context = MSAContext.create_empty( + n_tokens=n_actual_tokens, + depth=MAX_MSA_DEPTH, + ) + + # Load templates + template_context = TemplateContext.empty( + n_tokens=n_actual_tokens, + n_templates=MAX_NUM_TEMPLATES, + ) + + # Load ESM embeddings + if use_esm_embeddings: + embedding_context = get_esm_embedding_context(chains, device=device) + else: + embedding_context = EmbeddingContext.empty(n_tokens=n_actual_tokens) + + # Constraints + constraint_context = ConstraintContext.empty() + + # Build final feature context + feature_context = AllAtomFeatureContext( + chains=chains, + structure_context=merged_context, + msa_context=msa_context, + main_msa_context=main_msa_context, + template_context=template_context, + embedding_context=embedding_context, + constraint_context=constraint_context, + ) + + output_pdb_paths, _, _, _ = run_folding_on_context( + feature_context, + output_dir=output_dir, + num_trunk_recycles=num_trunk_recycles, + num_diffn_timesteps=num_diffn_timesteps, + seed=seed, + device=device, + ) + + return output_pdb_paths + + +def _bin_centers(min_bin: float, max_bin: float, no_bins: int) -> Tensor: + return torch.linspace(min_bin, max_bin, 2 * no_bins + 1)[1::2] + + +@typecheck +@dataclass(frozen=True) +class ConfidenceScores: + # Predicted aligned error(PAE) + pae: Float[Tensor, "bs num_tokens num_tokens"] + + # Predicted distance error (PDE) + pde: Float[Tensor, "bs num_tokens num_tokens"] + + # Predicted local distance difference test (pLDDT) + plddt: Float[Tensor, "bs num_tokens"] + + +@torch.no_grad() +def run_folding_on_context( + feature_context: AllAtomFeatureContext, + output_dir: Path, + # expose some params for easy tweaking + num_trunk_recycles: int = 3, + num_diffn_timesteps: int = 200, + seed: int | None = None, + device: torch.device | None = None, +) -> tuple[list[Path], ConfidenceScores, list[SampleRanking], Path]: + """ + Function for in-depth explorations. + User completely controls folding inputs. + + Returns: + - list of Path corresponding to folding outputs + - ConfidenceScores object + - SampleRanking data + - Path to plot of MSA coverage + """ + # Set seed + if seed is not None: + set_seed([seed]) + + if device is None: + device = torch.device("cuda:0") + + ## + ## Validate inputs + ## + + n_actual_tokens = feature_context.structure_context.num_tokens + raise_if_too_many_tokens(n_actual_tokens) + raise_if_too_many_templates(feature_context.template_context.num_templates) + raise_if_msa_too_deep(feature_context.msa_context.depth) + raise_if_msa_too_deep(feature_context.main_msa_context.depth) + + ## + ## Prepare batch + ## + + # Collate inputs into batch + collator = Collate( + feature_factory=feature_factory, + num_key_atoms=128, + num_query_atoms=32, + ) + + feature_contexts = [feature_context] + batch_size = len(feature_contexts) + batch = collator(feature_contexts) + batch = move_data_to_device(batch, device=device) + + # Get features and inputs from batch + features = {name: feature for name, feature in batch["features"].items()} + inputs = batch["inputs"] + block_indices_h = inputs["block_atom_pair_q_idces"] + block_indices_w = inputs["block_atom_pair_kv_idces"] + atom_single_mask = inputs["atom_exists_mask"] + atom_token_indices = inputs["atom_token_index"].long() + token_single_mask = inputs["token_exists_mask"] + token_pair_mask = und_self(token_single_mask, "b i, b j -> b i j") + token_reference_atom_index = inputs["token_ref_atom_index"] + atom_within_token_index = inputs["atom_within_token_index"] + msa_mask = inputs["msa_mask"] + template_input_masks = und_self( + inputs["template_mask"], "b t n1, b t n2 -> b t n1 n2" + ) + block_atom_pair_mask = inputs["block_atom_pair_mask"] + + ## + ## Load exported models + ## + + # Model is size-specific + model_size = min(x for x in AVAILABLE_MODEL_SIZES if n_actual_tokens <= x) + + feature_embedding = load_exported(f"{model_size}/feature_embedding.pt2", device) + token_input_embedder = load_exported( + f"{model_size}/token_input_embedder.pt2", device + ) + trunk = load_exported(f"{model_size}/trunk.pt2", device) + diffusion_module = load_exported(f"{model_size}/diffusion_module.pt2", device) + confidence_head = load_exported(f"{model_size}/confidence_head.pt2", device) + + ## + ## Run the features through the feature embedder + ## + + embedded_features = feature_embedding.forward(**features) + token_single_input_feats = embedded_features["TOKEN"] + token_pair_input_feats, token_pair_structure_input_feats = embedded_features[ + "TOKEN_PAIR" + ].chunk(2, dim=-1) + atom_single_input_feats, atom_single_structure_input_feats = embedded_features[ + "ATOM" + ].chunk(2, dim=-1) + block_atom_pair_input_feats, block_atom_pair_structure_input_feats = ( + embedded_features["ATOM_PAIR"].chunk(2, dim=-1) + ) + template_input_feats = embedded_features["TEMPLATES"] + msa_input_feats = embedded_features["MSA"] + + ## + ## Run the inputs through the token input embedder + ## + + token_input_embedder_outputs: tuple[Tensor, ...] = token_input_embedder.forward( + token_single_input_feats=token_single_input_feats, + token_pair_input_feats=token_pair_input_feats, + atom_single_input_feats=atom_single_input_feats, + block_atom_pair_feat=block_atom_pair_input_feats, + block_atom_pair_mask=block_atom_pair_mask, + block_indices_h=block_indices_h, + block_indices_w=block_indices_w, + atom_single_mask=atom_single_mask, + atom_token_indices=atom_token_indices, + ) + token_single_initial_repr, token_single_structure_input, token_pair_initial_repr = ( + token_input_embedder_outputs + ) + + ## + ## Run the input representations through the trunk + ## + + # Recycle the representations by feeding the output back into the trunk as input for + # the subsequent recycle + token_single_trunk_repr = token_single_initial_repr + token_pair_trunk_repr = token_pair_initial_repr + for _ in tqdm(range(num_trunk_recycles), desc="Trunk recycles"): + (token_single_trunk_repr, token_pair_trunk_repr) = trunk.forward( + token_single_trunk_initial_repr=token_single_initial_repr, + token_pair_trunk_initial_repr=token_pair_initial_repr, + token_single_trunk_repr=token_single_trunk_repr, # recycled + token_pair_trunk_repr=token_pair_trunk_repr, # recycled + msa_input_feats=msa_input_feats, + msa_mask=msa_mask, + template_input_feats=template_input_feats, + template_input_masks=template_input_masks, + token_single_mask=token_single_mask, + token_pair_mask=token_pair_mask, + ) + + ## + ## Denoise the trunk representation by passing it through the diffusion module + ## + + def _denoise(atom_pos: Tensor, sigma: Tensor, s: int) -> Tensor: + atom_noised_coords = rearrange( + atom_pos, "(b s) ... -> b s ...", s=s + ).contiguous() + noise_sigma = repeat(sigma, " -> b s", b=batch_size, s=s) + return diffusion_module.forward( + token_single_initial_repr=token_single_structure_input.float(), + token_pair_initial_repr=token_pair_structure_input_feats.float(), + token_single_trunk_repr=token_single_trunk_repr.float(), + token_pair_trunk_repr=token_pair_trunk_repr.float(), + atom_single_input_feats=atom_single_structure_input_feats.float(), + atom_block_pair_input_feats=block_atom_pair_structure_input_feats.float(), + atom_single_mask=atom_single_mask, + atom_block_pair_mask=block_atom_pair_mask, + token_single_mask=token_single_mask, + block_indices_h=block_indices_h, + block_indices_w=block_indices_w, + atom_noised_coords=atom_noised_coords.float(), + noise_sigma=noise_sigma.float(), + atom_token_indices=atom_token_indices, + ) + + num_diffn_samples = 5 # Fixed at export time + inference_noise_schedule = InferenceNoiseSchedule( + s_max=DiffusionConfig.S_tmax, + s_min=4e-4, + p=7.0, + sigma_data=DiffusionConfig.sigma_data, + ) + sigmas = inference_noise_schedule.get_schedule( + device=device, num_timesteps=num_diffn_timesteps + ) + gammas = torch.where( + (sigmas >= DiffusionConfig.S_tmin) & (sigmas <= DiffusionConfig.S_tmax), + min(DiffusionConfig.S_churn / num_diffn_timesteps, math.sqrt(2) - 1), + 0.0, + ) + + sigmas_and_gammas = list(zip(sigmas[:-1], sigmas[1:], gammas[:-1])) + + # Initial atom positions + _, num_atoms = atom_single_mask.shape + atom_pos = sigmas[0] * torch.randn( + batch_size * num_diffn_samples, num_atoms, 3, device=device + ) + + for sigma_curr, sigma_next, gamma_curr in tqdm( + sigmas_and_gammas, desc="Diffusion steps" + ): + # Center coords + atom_pos = center_random_augmentation( + atom_pos, + atom_single_mask=repeat( + atom_single_mask, + "b a -> (b s) a", + s=num_diffn_samples, + ), + ) + + # Alg 2. lines 4-6 + noise = DiffusionConfig.S_noise * torch.randn( + atom_pos.shape, device=atom_pos.device + ) + sigma_hat = sigma_curr + gamma_curr * sigma_curr + atom_pos_noise = (sigma_hat**2 - sigma_curr**2).clamp_min(1e-6).sqrt() + atom_pos_hat = atom_pos + noise * atom_pos_noise + + # Lines 7-8 + denoised_pos = _denoise( + atom_pos=atom_pos_hat, + sigma=sigma_hat, + s=num_diffn_samples, + ) + d_i = (atom_pos_hat - denoised_pos) / sigma_hat + atom_pos = atom_pos_hat + (sigma_next - sigma_hat) * d_i + + # Lines 9-11 + if sigma_next != 0 and DiffusionConfig.second_order: # second order update + denoised_pos = _denoise( + atom_pos, + sigma=sigma_next, + s=num_diffn_samples, + ) + d_i_prime = (atom_pos - denoised_pos) / sigma_next + atom_pos = atom_pos + (sigma_next - sigma_hat) * ((d_i_prime + d_i) / 2) + + ## + ## Run the confidence model + ## + + confidence_outputs: list[tuple[Tensor, ...]] = [ + confidence_head.forward( + token_single_input_repr=token_single_initial_repr, + token_single_trunk_repr=token_single_trunk_repr, + token_pair_trunk_repr=token_pair_trunk_repr, + token_single_mask=token_single_mask, + atom_single_mask=atom_single_mask, + atom_coords=atom_pos[s : s + 1], + token_reference_atom_index=token_reference_atom_index, + atom_token_index=atom_token_indices, + atom_within_token_index=atom_within_token_index, + ) + for s in range(num_diffn_samples) + ] + + pae_logits, pde_logits, plddt_logits = [ + torch.cat(single_sample, dim=0) + for single_sample in zip(*confidence_outputs, strict=True) + ] + + assert atom_pos.shape[0] == num_diffn_samples + assert pae_logits.shape[0] == num_diffn_samples + + def softmax_einsum_and_cpu( + logits: Tensor, bin_mean: Tensor, pattern: str + ) -> Tensor: + # utility to compute score from bin logits + res = einsum( + logits.float().softmax(dim=-1), bin_mean.to(logits.device), pattern + ) + return res.to(device="cpu") + + token_mask_1d = rearrange(token_single_mask, "1 b -> b") + + pae_scores = softmax_einsum_and_cpu( + pae_logits[:, token_mask_1d, :, :][:, :, token_mask_1d, :], + _bin_centers(0.0, 32.0, 64), + "b n1 n2 d, d -> b n1 n2", + ) + + pde_scores = softmax_einsum_and_cpu( + pde_logits[:, token_mask_1d, :, :][:, :, token_mask_1d, :], + _bin_centers(0.0, 32.0, 64), + "b n1 n2 d, d -> b n1 n2", + ) + + plddt_scores_atom = softmax_einsum_and_cpu( + plddt_logits, + _bin_centers(0, 1, plddt_logits.shape[-1]), + "b a d, d -> b a", + ) + + # converting per-atom plddt to per-token + [mask] = atom_single_mask.cpu() + [indices] = atom_token_indices.cpu() + + def avg_per_token_1d(x): + n = torch.bincount(indices[mask], weights=x[mask]) + d = torch.bincount(indices[mask]).clamp(min=1) + return n / d + + plddt_scores = torch.stack([avg_per_token_1d(x) for x in plddt_scores_atom]) + + confidence_scores = ConfidenceScores( + pae=pae_scores, + pde=pde_scores, + plddt=plddt_scores, + ) + + ## + ## Write the outputs + ## + + # Plot coverage of tokens by MSA, save plot + output_dir.mkdir(parents=True, exist_ok=True) + msa_plot_path = plot_msa( + input_tokens=feature_context.structure_context.token_residue_type, + msa_tokens=feature_context.msa_context.tokens, + out_fname=output_dir / "msa_depth.pdf", + ) + + output_paths: list[Path] = [] + ranking_data: list[SampleRanking] = [] + + for idx in range(num_diffn_samples): + ## + ## Compute ranking scores + ## + + _, valid_frames_mask = get_frames_and_mask( + atom_pos[idx : idx + 1], + inputs["token_asym_id"], + inputs["token_residue_index"], + inputs["token_backbone_frame_mask"], + inputs["token_centre_atom_index"], + inputs["token_exists_mask"], + inputs["atom_exists_mask"], + inputs["token_backbone_frame_index"], + inputs["atom_token_index"], + ) + + ranking_outputs = rank( + atom_pos[idx : idx + 1], + atom_mask=inputs["atom_exists_mask"], + atom_token_index=inputs["atom_token_index"], + token_exists_mask=inputs["token_exists_mask"], + token_asym_id=inputs["token_asym_id"], + token_entity_type=inputs["token_entity_type"], + token_valid_frames_mask=valid_frames_mask, + lddt_logits=plddt_logits[idx : idx + 1], + lddt_bin_centers=_bin_centers(0, 1, plddt_logits.shape[-1]).to( + plddt_logits.device + ), + pae_logits=pae_logits[idx : idx + 1], + pae_bin_centers=_bin_centers(0.0, 32.0, 64).to(pae_logits.device), + ) + + ranking_data.append(ranking_outputs) + + ## + ## Write output files + ## + + pdb_out_path = output_dir.joinpath(f"pred.model_idx_{idx}.pdb") + + print(f"Writing output to {pdb_out_path}") + + # use 0-100 scale for pLDDT in pdb outputs + scaled_plddt_scores_per_atom = 100 * plddt_scores_atom[idx : idx + 1] + + write_pdbs_from_outputs( + coords=atom_pos[idx : idx + 1], + bfactors=scaled_plddt_scores_per_atom, + output_batch=move_data_to_device(inputs, torch.device("cpu")), + write_path=pdb_out_path, + ) + output_paths.append(pdb_out_path) + + scores_basename = f"scores.model_idx_{idx}.npz" + scores_out_path = output_dir / scores_basename + + scores = get_scores(ranking_outputs) + np.savez( + scores_out_path, + **scores, + ) + + return output_paths, confidence_scores, ranking_data, msa_plot_path diff --git a/forks/chai-lab/chai_lab/data/__init__.py b/forks/chai-lab/chai_lab/data/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/forks/chai-lab/chai_lab/data/collate/__init__.py b/forks/chai-lab/chai_lab/data/collate/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/forks/chai-lab/chai_lab/data/collate/collate.py b/forks/chai-lab/chai_lab/data/collate/collate.py new file mode 100644 index 00000000..30e5f531 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/collate/collate.py @@ -0,0 +1,93 @@ +import dataclasses +import logging +from typing import Any + +import torch + +from chai_lab.data.collate.utils import get_pad_sizes +from chai_lab.data.dataset.all_atom_feature_context import AllAtomFeatureContext +from chai_lab.data.features.feature_factory import FeatureFactory +from chai_lab.model.utils import ( + get_block_atom_pair_mask, + get_qkv_indices_for_blocks, +) +from chai_lab.utils.dict import list_dict_to_dict_list + +logger = logging.getLogger(__name__) + + +@dataclasses.dataclass(frozen=True) +class Collate: + feature_factory: FeatureFactory + num_query_atoms: int + num_key_atoms: int + + def __call__( + self, + feature_contexts: list[AllAtomFeatureContext], + ) -> dict[str, Any]: + raw_batch = self._collate(feature_contexts) + prepared_batch = self._post_collate(raw_batch) + return prepared_batch + + def _collate( + self, + feature_contexts: list[AllAtomFeatureContext], + ) -> dict[str, Any]: + # Get the pad sizes, finding the max number of tokens/atoms/bonds in the batch. + pad_sizes = get_pad_sizes([p.structure_context for p in feature_contexts]) + + # Pad each feature context to the max sizes + padded_feature_contexts = [ + feature_context.pad( + n_tokens=pad_sizes.n_tokens, + n_atoms=pad_sizes.n_atoms, + ) + for feature_context in feature_contexts + ] + + # Convert all the input data into dicts, for each feature context + inputs_per_context = [e.to_dict() for e in padded_feature_contexts] + + # Stack the dict inputs into a single batch dict, across all feature contexts + batched_inputs = { + k: (torch.stack(v, dim=0) if isinstance(v[0], torch.Tensor) else v) + for k, v in list_dict_to_dict_list(inputs_per_context).items() + } + + # Make a batch dict + batch = dict(inputs=batched_inputs) + return batch + + def _post_collate(self, raw_batch: dict[str, Any]) -> dict[str, Any]: + """ + takes a list of processed multi-chain systems, + returns a dictionary with batched tensors to feed in the model forward method + and any other necessary data for the task/losses + """ + raw_b_i = raw_batch["inputs"] + + # prepare atom pair block data: + atom_exists_mask = raw_b_i["atom_exists_mask"] + block_q_atom_idces, block_kv_atom_idces, kv_mask = get_qkv_indices_for_blocks( + atom_exists_mask.shape[1], + self.num_query_atoms, + self.num_key_atoms, + atom_exists_mask.device, + ) + block_atom_pair_mask = get_block_atom_pair_mask( + atom_single_mask=raw_b_i["atom_ref_mask"], + q_idx=block_q_atom_idces, + kv_idx=block_kv_atom_idces, + kv_is_wrapped_mask=kv_mask, + ) + raw_b_i |= dict( + block_atom_pair_q_idces=block_q_atom_idces, + block_atom_pair_kv_idces=block_kv_atom_idces, + block_atom_pair_mask=block_atom_pair_mask, + ) + + # Compute features + raw_batch["features"] = self.feature_factory.generate(raw_batch) + + return raw_batch diff --git a/forks/chai-lab/chai_lab/data/collate/utils.py b/forks/chai-lab/chai_lab/data/collate/utils.py new file mode 100644 index 00000000..0cb74314 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/collate/utils.py @@ -0,0 +1,34 @@ +from dataclasses import dataclass + +from chai_lab.data.dataset.structure.all_atom_structure_context import ( + AllAtomStructureContext, +) + +# static graph is exported for different n_tokens, +# we pad to the closest one +AVAILABLE_MODEL_SIZES = [256, 384, 512, 768, 1024, 2048] + + +@dataclass(frozen=True) +class PadSizes: + n_tokens: int + n_atoms: int + + +def pad_size(max_in_batch: int, allowed_sizes: list[int]) -> int: + """pads to the smallest allowed size""" + max_allowed_size = allowed_sizes[-1] + if max_in_batch > max_allowed_size: + raise ValueError(f"{max_in_batch=} > {max_allowed_size=}") + return min(n for n in allowed_sizes if n >= max_in_batch) + + +def get_pad_sizes(contexts: list[AllAtomStructureContext]) -> PadSizes: + max_n_tokens = max(context.num_tokens for context in contexts) + n_tokens = pad_size(max_n_tokens, AVAILABLE_MODEL_SIZES) + + max_n_atoms = max(context.num_atoms for context in contexts) + n_atoms = 23 * n_tokens + assert max_n_atoms <= n_atoms + + return PadSizes(n_tokens=n_tokens, n_atoms=n_atoms) diff --git a/forks/chai-lab/chai_lab/data/dataset/__init__.py b/forks/chai-lab/chai_lab/data/dataset/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/forks/chai-lab/chai_lab/data/dataset/all_atom_feature_context.py b/forks/chai-lab/chai_lab/data/dataset/all_atom_feature_context.py new file mode 100644 index 00000000..6523f9a2 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/dataset/all_atom_feature_context.py @@ -0,0 +1,92 @@ +import logging +from dataclasses import dataclass +from typing import Any, Final + +from chai_lab.data.dataset.constraints.constraint_context import ConstraintContext +from chai_lab.data.dataset.embeddings.embedding_context import EmbeddingContext +from chai_lab.data.dataset.msas.msa_context import MSAContext +from chai_lab.data.dataset.structure.all_atom_structure_context import ( + AllAtomStructureContext, +) +from chai_lab.data.dataset.structure.chain import Chain +from chai_lab.data.dataset.templates.context import TemplateContext + +logger = logging.getLogger(__name__) + +MAX_MSA_DEPTH: Final[int] = 16_384 +MAX_NUM_TEMPLATES: Final[int] = 4 + + +@dataclass +class AllAtomFeatureContext: + """ + Feature contexts are produced by datasets. Multiple feature contexts are passed to + collator, which transforms them into a batch (by padding and stacking them). + """ + + # Metadata: these are not padded and batched + chains: list[Chain] + # Contexts: these are what get padded and batched + structure_context: AllAtomStructureContext + msa_context: MSAContext + main_msa_context: MSAContext + template_context: TemplateContext + embedding_context: EmbeddingContext | None + constraint_context: ConstraintContext + + def __str__(self) -> str: + chains_info = [str(chain) for chain in self.chains] + return f"{self.__class__.__name__}(chains={chains_info})" + + def pad( + self, + n_tokens: int, + n_atoms: int, + ) -> "AllAtomFeatureContext": + return AllAtomFeatureContext( + # Metadata + chains=self.chains, + # Contexts + structure_context=self.structure_context.pad( + n_tokens=n_tokens, + n_atoms=n_atoms, + ), + msa_context=self.msa_context.pad( + max_num_tokens=n_tokens, + max_msa_depth=MAX_MSA_DEPTH, + ), + main_msa_context=self.main_msa_context.pad( + max_num_tokens=n_tokens, + max_msa_depth=MAX_MSA_DEPTH, + ), + template_context=self.template_context.pad( + max_tokens=n_tokens, + max_templates=MAX_NUM_TEMPLATES, + ), + embedding_context=( + self.embedding_context.pad(max_tokens=n_tokens) + if self.embedding_context is not None + else None + ), + constraint_context=self.constraint_context.pad(max_tokens=n_tokens), + ) + + def to_dict(self) -> dict[str, Any]: + msa_context_dict = dict( + msa_tokens=self.msa_context.tokens, + msa_mask=self.msa_context.mask, + msa_deletion_matrix=self.msa_context.deletion_matrix, + msa_species=self.msa_context.species, + msa_sequence_source=self.msa_context.sequence_source, + main_msa_tokens=self.main_msa_context.tokens, + main_msa_mask=self.main_msa_context.mask, + main_msa_deletion_matrix=self.main_msa_context.deletion_matrix, + paired_msa_depth=self.msa_context.paired_msa_depth, + ) + return { + **self.structure_context.to_dict(), + **msa_context_dict, + **self.template_context.to_dict(), + **(self.embedding_context.to_dict() if self.embedding_context else {}), + **self.constraint_context.to_dict(), + } diff --git a/forks/chai-lab/chai_lab/data/dataset/constraints/__init__.py b/forks/chai-lab/chai_lab/data/dataset/constraints/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/forks/chai-lab/chai_lab/data/dataset/constraints/constraint_context.py b/forks/chai-lab/chai_lab/data/dataset/constraints/constraint_context.py new file mode 100644 index 00000000..37c4cbc1 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/dataset/constraints/constraint_context.py @@ -0,0 +1,58 @@ +from dataclasses import asdict, dataclass +from typing import Any + +from chai_lab.data.features.generators.docking import ( + ConstraintGroup as DockingConstraint, +) +from chai_lab.data.features.generators.token_dist_restraint import ( + ConstraintGroup as ContactConstraint, +) +from chai_lab.data.features.generators.token_pair_pocket_restraint import ( + ConstraintGroup as PocketConstraint, +) +from chai_lab.utils.typing import typecheck + + +@typecheck +@dataclass +class ConstraintContext: + docking_constraints: list[DockingConstraint] | None + contact_constraints: list[ContactConstraint] | None + pocket_constraints: list[PocketConstraint] | None + + def __str__(self) -> str: + return ( + f"{self.__class__.__name__}(" + f"\n\tdocking_constraints {self.docking_constraints})" + f"\n\tcontact_constraints {self.contact_constraints}" + f"\n\tpocket_constraints {self.pocket_constraints}\n)" + ) + + def pad(self, *args, **kwargs) -> "ConstraintContext": + # No-op + return ConstraintContext( + docking_constraints=self.docking_constraints, + contact_constraints=self.contact_constraints, + pocket_constraints=self.pocket_constraints, + ) + + def to_dict(self) -> dict[str, Any]: + return dict( + docking_constraints=[asdict(c) for c in self.docking_constraints] + if self.docking_constraints is not None + else [None], + contact_constraints=[asdict(c) for c in self.contact_constraints] + if self.contact_constraints is not None + else [None], + pocket_constraints=[asdict(c) for c in self.pocket_constraints] + if self.pocket_constraints is not None + else [None], + ) + + @classmethod + def empty(cls) -> "ConstraintContext": + return cls( + docking_constraints=None, + contact_constraints=None, + pocket_constraints=None, + ) diff --git a/forks/chai-lab/chai_lab/data/dataset/embeddings/__init__.py b/forks/chai-lab/chai_lab/data/dataset/embeddings/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/forks/chai-lab/chai_lab/data/dataset/embeddings/embedding_context.py b/forks/chai-lab/chai_lab/data/dataset/embeddings/embedding_context.py new file mode 100644 index 00000000..4c09ff7a --- /dev/null +++ b/forks/chai-lab/chai_lab/data/dataset/embeddings/embedding_context.py @@ -0,0 +1,47 @@ +from dataclasses import asdict, dataclass + +import torch +from torch import Tensor + +from chai_lab.utils.typing import Float, typecheck + + +@typecheck +@dataclass +class EmbeddingContext: + esm_embeddings: Float[Tensor, "num_tokens d_emb"] + + def __str__(self) -> str: + return ( + f"{self.__class__.__name__}(esm_embeddings of {self.esm_embeddings.shape})" + ) + + @property + def num_tokens(self) -> int: + (num_tokens, _) = self.esm_embeddings.shape + return num_tokens + + def pad(self, max_tokens: int) -> "EmbeddingContext": + assert self.num_tokens <= max_tokens + + pad_dims_token = (0, max_tokens - self.num_tokens) + pad_dims_emb = (0, 0) + + padded_embeddings = torch.nn.functional.pad( + self.esm_embeddings, + pad_dims_emb + pad_dims_token, + value=0, + ) + + return EmbeddingContext( + esm_embeddings=padded_embeddings, + ) + + def to_dict(self) -> dict[str, torch.Tensor]: + return asdict(self) + + @classmethod + def empty(cls, n_tokens: int, d_emb: int = 2560) -> "EmbeddingContext": + return cls( + esm_embeddings=torch.zeros(n_tokens, d_emb), + ) diff --git a/forks/chai-lab/chai_lab/data/dataset/embeddings/esm.py b/forks/chai-lab/chai_lab/data/dataset/embeddings/esm.py new file mode 100644 index 00000000..22c26322 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/dataset/embeddings/esm.py @@ -0,0 +1,111 @@ +import os +from contextlib import contextmanager + +import torch +from transformers import logging as tr_logging + +from chai_lab.data.dataset.embeddings.embedding_context import EmbeddingContext +from chai_lab.data.dataset.structure.chain import Chain +from chai_lab.data.parsing.structure.entity_type import EntityType +from chai_lab.utils.tensor_utils import move_data_to_device +from chai_lab.utils.typing import typecheck + +_esm_model: list = [] # persistent in-process container + +os.register_at_fork(after_in_child=lambda: _esm_model.clear()) + + +# unfortunately huggingface complains on pooler layer in ESM being non-initialized. +# Did not find a way to filter specifically that logging message :/ +tr_logging.set_verbosity_error() + + +@contextmanager +def esm_model(model_name: str, device): + """Context transiently keeps ESM model on specified device.""" + from transformers import EsmModel + + if len(_esm_model) == 0: + # lazy loading of the model + _esm_model.append(EsmModel.from_pretrained(model_name)) + + [model] = _esm_model + model.to(device) + model.eval() + yield model + model.to("cpu") # move model back to CPU when done + + +def _get_esm_contexts_for_sequences( + prot_sequences: set[str], device +) -> dict[str, EmbeddingContext]: + if len(prot_sequences) == 0: + return {} # skip loading ESM + + # local import, requires huggingface transformers + from transformers import EsmTokenizer + + model_name = "facebook/esm2_t36_3B_UR50D" + tokenizer = EsmTokenizer.from_pretrained(model_name) + + seq2embedding_context = {} + + with torch.no_grad(): + with esm_model(model_name=model_name, device=device) as model: + for seq in prot_sequences: + inputs = tokenizer(seq, return_tensors="pt") + inputs = move_data_to_device(dict(**inputs), device=device) + outputs = model(**inputs) + # remove BOS/EOS, back to CPU + esm_embeddings = outputs.last_hidden_state[0, 1:-1].to("cpu") + seq_len, _emb_dim = esm_embeddings.shape + assert seq_len == len(seq) + + seq2embedding_context[seq] = EmbeddingContext( + esm_embeddings=esm_embeddings + ) + + return seq2embedding_context + + +@typecheck +def get_esm_embedding_context(chains: list[Chain], device) -> EmbeddingContext: + # device is used for computing, but result is still on CPU + + protein_seq2emb_context = _get_esm_contexts_for_sequences( + prot_sequences=set( + chain.entity_data.sequence + for chain in chains + if chain.entity_data.entity_type == EntityType.PROTEIN + ), + device=device, + ) + + chain_embs = [] + for chain in chains: + if chain.entity_data.entity_type == EntityType.PROTEIN: + chain_embs.append(protein_seq2emb_context[chain.entity_data.sequence]) + else: + # embed non-proteins with zeros + chain_embs.append( + EmbeddingContext.empty(n_tokens=chain.structure_context.num_tokens) + ) + + exploded_embs = [ + embedding.esm_embeddings[chain.structure_context.token_residue_index, :] + for embedding, chain in zip(chain_embs, chains, strict=True) + ] + + # don't crop any chains during inference + cropped_embs = exploded_embs + + # if we had to crop, we'd need some logic like below: + # crop_idces: list[torch.Tensor] + # cropped_embs = [ + # embedding[crop_idx, :] for embedding, crop_idx in zip(exploded_embs, crop_idces) + # ] + + # Merge the embeddings along the tokens dimension (i.e. merge the chains) + merged_embs = torch.cat(cropped_embs, dim=0) + + return EmbeddingContext(esm_embeddings=merged_embs) diff --git a/forks/chai-lab/chai_lab/data/dataset/inference_dataset.py b/forks/chai-lab/chai_lab/data/dataset/inference_dataset.py new file mode 100644 index 00000000..f4a502c1 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/dataset/inference_dataset.py @@ -0,0 +1,237 @@ +import logging +import string +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path + +import gemmi + +from chai_lab.data.dataset.structure.all_atom_residue_tokenizer import ( + AllAtomResidueTokenizer, + _make_sym_ids, +) +from chai_lab.data.dataset.structure.chain import Chain +from chai_lab.data.parsing.fasta import get_residue_name, read_fasta +from chai_lab.data.parsing.input_validation import ( + constituents_of_modified_fasta, + identify_potential_entity_types, +) +from chai_lab.data.parsing.structure.all_atom_entity_data import AllAtomEntityData +from chai_lab.data.parsing.structure.entity_type import EntityType +from chai_lab.data.parsing.structure.residue import Residue, get_restype +from chai_lab.data.residue_constants import ( + new_ligand_residue_name, + residue_types_with_nucleotides_order, +) +from chai_lab.data.sources.rdkit import RefConformerGenerator + +logger = logging.getLogger(__name__) + + +@dataclass +class Input: + sequence: str + entity_type: int + + +def get_lig_residues( + smiles: str, +) -> list[Residue]: + return [ + Residue( + name=new_ligand_residue_name, + label_seq=0, + restype=residue_types_with_nucleotides_order["X"], + residue_index=0, + is_missing=False, + b_factor_or_plddt=0.0, + conformer_data=None, + smiles=smiles, + ) + ] + + +def get_polymer_residues( + residue_names: list[str], + entity_type: EntityType, +) -> list[Residue]: + residues = [] + for i, residue_name in enumerate(residue_names): + residues.append( + Residue( + name=residue_name, + label_seq=i, + restype=get_restype( + gemmi.find_tabulated_residue(residue_name), entity_type + ), + residue_index=i, + is_missing=False, + b_factor_or_plddt=1.0, + conformer_data=None, + ) + ) + return residues + + +def _synth_subchain_id(idx: int) -> str: + n = len(string.ascii_uppercase) + retval = "" + while idx >= 0: + retval = string.ascii_uppercase[idx % n] + retval + idx = idx // n - 1 + return retval + + +def raw_inputs_to_entitites_data( + inputs: list[Input], identifier: str = "test" +) -> list[AllAtomEntityData]: + entities = [] + + # track unique entities + entity_to_index: dict[tuple[EntityType, tuple[str, ...]], int] = {} + + for i, input in enumerate(inputs): + # Parse residues based on entity type + residues = None + match entity_type := EntityType(input.entity_type): + case EntityType.LIGAND: + residues = get_lig_residues(smiles=input.sequence) + + case EntityType.PROTEIN | EntityType.RNA | EntityType.DNA: + parsed_sequence: list | None = constituents_of_modified_fasta( + input.sequence + ) + assert ( + parsed_sequence is not None + ), f"incorrect FASTA: {parsed_sequence=} " + expanded_sequence = [ + get_residue_name(r, entity_type=entity_type) if len(r) == 1 else r + for r in parsed_sequence + ] + residues = get_polymer_residues(expanded_sequence, entity_type) + case _: + raise NotImplementedError + assert residues is not None + + # Determine the entity id (unique integer for each distinct sequence) + # NOTE very important for recognizing things like homo polymers + seq: tuple[str, ...] = tuple(res.name for res in residues) + entity_key: tuple[EntityType, tuple[str, ...]] = (entity_type, seq) + if entity_key in entity_to_index: + entity_id = entity_to_index[entity_key] + else: + entity_id = len(entity_to_index) + entity_to_index[entity_key] = entity_id + + entities.append( + AllAtomEntityData( + residues, + full_sequence=[residue.name for residue in residues], + resolution=0.0, + release_datetime=datetime.now(), + pdb_id=identifier, + source_pdb_chain_id=_synth_subchain_id(i), + entity_name=f"entity_{i}_{entity_type.name}", + entity_id=entity_id, + method="none", + entity_type=entity_type, + subchain_id=_synth_subchain_id(i), + ) + ) + + assert len(entities) == len(inputs) + return entities + + +def load_chains_from_raw( + inputs: list[Input], + identifier: str = "test", + tokenizer: AllAtomResidueTokenizer | None = None, +) -> list[Chain]: + """ + loads and tokenizes each input chain + """ + + if tokenizer is None: + conformer_generator = RefConformerGenerator() + tokenizer = AllAtomResidueTokenizer(conformer_generator) + + # Extract the entity data from the gemmi structure. + entities: list[AllAtomEntityData] = raw_inputs_to_entitites_data( + inputs, + identifier=identifier, + ) + + # Tokenize the entity data + structure_contexts = [] + sym_ids = _make_sym_ids([x.entity_id for x in entities]) + for idx, (entity_data, sym_id) in enumerate(zip(entities, sym_ids)): + try: + tok = tokenizer._tokenize_entity( + entity_data, + chain_id=idx + 1, + sym_id=sym_id, + ) + structure_contexts.append(tok) + except Exception: + logger.exception(f"Failed to tokenize input {inputs[idx]}") + + # Join the untokenized entity data with the tokenized chain data, removing + # chains we failed to tokenize + chains = [ + Chain(entity_data=entity_data, structure_context=structure_context) + for entity_data, structure_context in zip(entities, structure_contexts) + if structure_context is not None + ] + + return chains + + +def read_inputs(fasta_file: str | Path, length_limit: int | None = None) -> list[Input]: + """Read inputs from a fasta file. + + If the total length of sequences' character count is greater than length limit, + return an empty list. Note that character count is not the same as token count, but + is an easy approximation (smiles length is somewhat proportion to number of atoms in + a ligand, number of residues approximates number of tokens with modified amino acids + adding to it, etc.). + """ + sequences = read_fasta(fasta_file) + + retval: list[Input] = [] + total_length: int = 0 + for desc, sequence in sequences: + logger.info(f"[fasta] [{fasta_file}] {desc} {len(sequence)}") + # get the type of the sequence + entity_str = desc.split("|")[0].strip().lower() + match entity_str: + case "protein": + entity_type = EntityType.PROTEIN + case "ligand": + entity_type = EntityType.LIGAND + case "rna": + entity_type = EntityType.RNA + case "dna": + entity_type = EntityType.DNA + case _: + raise ValueError(f"{entity_str} is not a valid entity type") + + possible_types = identify_potential_entity_types(sequence) + if len(possible_types) == 0: + logger.error(f"Provided {sequence=} is invalid") + elif entity_type not in possible_types: + types_fmt = "/".join(str(et.name) for et in possible_types) + logger.warning( + f"Provided {sequence=} is likely {types_fmt}, not {entity_type.name}" + ) + + retval.append(Input(sequence, entity_type.value)) + total_length += len(sequence) + + if length_limit is not None and total_length > length_limit: + logger.warning( + f"[fasta] [{fasta_file}] too many chars ({total_length} > {length_limit}); skipping" + ) + return [] + + return retval diff --git a/forks/chai-lab/chai_lab/data/dataset/msas/__init__.py b/forks/chai-lab/chai_lab/data/dataset/msas/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/forks/chai-lab/chai_lab/data/dataset/msas/msa_context.py b/forks/chai-lab/chai_lab/data/dataset/msas/msa_context.py new file mode 100644 index 00000000..ca4dfd22 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/dataset/msas/msa_context.py @@ -0,0 +1,209 @@ +from dataclasses import dataclass + +import torch +from einops import rearrange, repeat +from torch import Tensor + +from chai_lab.data.parsing.msas.data_source import ( + MSADataSource, + msa_dataset_source_to_int, +) +from chai_lab.data.parsing.msas.species import UNKNOWN_SPECIES +from chai_lab.data.residue_constants import residue_types_with_nucleotides_order +from chai_lab.utils.defaults import default +from chai_lab.utils.typing import Bool, Int32, UInt8, typecheck + + +@typecheck +@dataclass +class MSAContext: + # MSA-level + dataset_source: MSADataSource + + # token level + tokens: UInt8[Tensor, "msa_depth n_tokens"] + species: Int32[Tensor, "msa_depth n_tokens"] + deletion_matrix: UInt8[Tensor, "msa_depth n_tokens"] + mask: Bool[Tensor, "msa_depth n_tokens"] + sequence_source: UInt8[Tensor, "msa_depth n_tokens"] + is_paired_mask: Bool[Tensor, "msa_depth"] + + @property + def depth(self) -> int: + depth, _ = self._dims + return depth + + @property + def num_tokens(self) -> int: + _, num_tokens = self._dims + return num_tokens + + @property + def _dims(self) -> torch.Size: + return self.tokens.shape + + @property + def paired_msa_depth(self) -> Int32[Tensor, "b"]: + return (self.mask.any(dim=-1) & self.is_paired_mask).sum(dim=-1) + + def __getitem__(self, subscript: tuple) -> "MSAContext": + # enforce typing on item + if not ( + isinstance(subscript, tuple) + and ((len(subscript) == 2) or subscript[0] is Ellipsis) + ): + raise TypeError( + "Subscript must be a tuple with 2 elements or have an ellipsis." + ) + + is_paired_mask = repeat( + self.is_paired_mask, + "msa_depth -> msa_depth n_tokens", + n_tokens=self.num_tokens, + ) + return MSAContext( + dataset_source=self.dataset_source, + tokens=self.tokens[subscript], + species=self.species[subscript], + deletion_matrix=self.deletion_matrix[subscript], + sequence_source=self.sequence_source[subscript], + mask=self.mask[subscript], + is_paired_mask=is_paired_mask[subscript].any(dim=-1), + ) + + def pad( + self, + max_num_tokens: int | None = None, + max_msa_depth: int | None = None, + ) -> "MSAContext": + max_num_tokens = default(max_num_tokens, self.num_tokens) + assert self.num_tokens <= max_num_tokens + + max_msa_depth = default(max_msa_depth, self.depth) + assert self.depth <= max_msa_depth + + pad_dims = (0, max_num_tokens - self.num_tokens, 0, max_msa_depth - self.depth) + return MSAContext( + dataset_source=self.dataset_source, + tokens=torch.nn.functional.pad( + self.tokens, + pad_dims, + value=residue_types_with_nucleotides_order[":"], + ), + species=torch.nn.functional.pad( + self.species, + pad_dims, + value=UNKNOWN_SPECIES, + ), + deletion_matrix=torch.nn.functional.pad( + self.deletion_matrix, + pad_dims, + value=0, # No deletions + ), + mask=torch.nn.functional.pad( + self.mask, + pad_dims, + value=False, + ), + sequence_source=torch.nn.functional.pad( + self.sequence_source, + pad_dims, + value=msa_dataset_source_to_int[MSADataSource.NONE], + ), + is_paired_mask=torch.nn.functional.pad( + self.is_paired_mask, + (0, max_msa_depth - self.depth), + value=False, + ), + ) + + @typecheck + def apply_mask(self, mask: Bool[Tensor, "msa_depth n_tokens"]) -> "MSAContext": + return MSAContext( + dataset_source=self.dataset_source, + tokens=self.tokens.masked_fill( + ~mask, residue_types_with_nucleotides_order[":"] + ), + species=self.species.masked_fill(~mask, UNKNOWN_SPECIES), + deletion_matrix=self.deletion_matrix.masked_fill(~mask, 0), + mask=self.mask.masked_fill(~mask, False), + sequence_source=self.sequence_source.masked_fill( + ~mask, value=msa_dataset_source_to_int[MSADataSource.NONE] + ), + is_paired_mask=self.is_paired_mask.masked_fill(~mask.any(dim=-1), False), + ) + + @classmethod + def cat( + cls, + msas: list["MSAContext"], + dataset_source: MSADataSource | None = None, + dim=-1, + ) -> "MSAContext": + if dataset_source is None: + dataset_sources = set([msa.dataset_source for msa in msas]) + assert len(dataset_sources) == 1 or dataset_sources == { + MSADataSource.MAIN, + MSADataSource.PAIRED, + }, "all MSAs must have the same datasource or be MAIN and PAIRED" + dataset_source = dataset_sources.pop() + + assert dim == -1 or dim >= 0, "dim < 0 not implemented except for -1" + if 0 <= dim < 1: + is_paired_mask = torch.cat([msa.is_paired_mask for msa in msas], dim=dim) + else: + assert len(msas) > 0 + is_paired_mask = msas[0].is_paired_mask + + return MSAContext( + dataset_source=dataset_source, + tokens=torch.cat([msa.tokens for msa in msas], dim=dim), + species=torch.cat([msa.species for msa in msas], dim=dim), + deletion_matrix=torch.cat([msa.deletion_matrix for msa in msas], dim=dim), + sequence_source=torch.cat([msa.sequence_source for msa in msas], dim=dim), + mask=torch.cat([msa.mask for msa in msas], dim=dim), + is_paired_mask=is_paired_mask, + ) + + @classmethod + @typecheck + def create( + cls, + dataset_source: MSADataSource, + tokens: UInt8[Tensor, "n_tokens"], + ) -> "MSAContext": + """ + Creates an MSA comprised of a single sequence. + """ + tokens_for_msa = rearrange(tokens, "n_tokens -> 1 n_tokens") + return MSAContext( + dataset_source=dataset_source, + tokens=tokens_for_msa, + species=torch.full_like(tokens_for_msa, UNKNOWN_SPECIES, dtype=torch.int32), + deletion_matrix=torch.zeros_like(tokens_for_msa, dtype=torch.uint8), + mask=torch.ones_like(tokens_for_msa, dtype=torch.bool), + sequence_source=torch.full_like( + tokens_for_msa, + fill_value=msa_dataset_source_to_int[dataset_source], + ), + is_paired_mask=torch.zeros((1,), dtype=torch.bool), + ) + + @classmethod + def create_empty(cls, n_tokens: int, depth: int = 0) -> "MSAContext": + dims = (depth, n_tokens) + return MSAContext( + dataset_source=MSADataSource.NONE, + tokens=torch.full( + dims, residue_types_with_nucleotides_order[":"], dtype=torch.uint8 + ), + species=torch.full(dims, UNKNOWN_SPECIES, dtype=torch.int32), + deletion_matrix=torch.zeros(dims, dtype=torch.uint8), # No deletions + mask=torch.zeros(dims, dtype=torch.bool), + sequence_source=torch.full( + dims, + fill_value=msa_dataset_source_to_int[MSADataSource.NONE], + dtype=torch.uint8, + ), + is_paired_mask=torch.zeros((depth,), dtype=torch.bool), + ) diff --git a/forks/chai-lab/chai_lab/data/dataset/structure/all_atom_residue_tokenizer.py b/forks/chai-lab/chai_lab/data/dataset/structure/all_atom_residue_tokenizer.py new file mode 100644 index 00000000..320352f7 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/dataset/structure/all_atom_residue_tokenizer.py @@ -0,0 +1,632 @@ +import logging +from dataclasses import dataclass +from itertools import chain + +import torch +from einops import repeat +from torch import Tensor + +from chai_lab.data.dataset.structure import utils +from chai_lab.data.dataset.structure.all_atom_structure_context import ( + AllAtomStructureContext, +) +from chai_lab.data.dataset.structure.utils import ( + backbone_atoms_all_present, + backbone_atoms_indices, + get_centre_atom_index, + get_reference_atom_index, +) +from chai_lab.data.parsing.structure.all_atom_entity_data import AllAtomEntityData +from chai_lab.data.parsing.structure.entity_type import EntityType +from chai_lab.data.parsing.structure.residue import ConformerData, Residue +from chai_lab.data.residue_constants import standard_residue_pdb_codes +from chai_lab.data.sources.rdkit import ( + RefConformerGenerator, + conformer_data_to_rdkit_mol, +) +from chai_lab.utils.tensor_utils import string_to_tensorcode, unique_indexes +from chai_lab.utils.typing import Bool, Float, Int, typecheck + +logger = logging.getLogger(__name__) + + +# jaxtyping on residue-level objects is extremely slow. +@dataclass(frozen=True) +class TokenSpan: + restype: Int[Tensor, "n_tokens"] + residue_index: Int[Tensor, "n_tokens"] + centre_atom_index: Int[Tensor, "n_tokens"] + reference_atom_index: Int[Tensor, "n_tokens"] + backbone_frame_mask: Bool[Tensor, "n_tokens"] + backbone_frame_index: Int[Tensor, "n_tokens 3"] + atom_gt_coords: Float[Tensor, "n_atoms 3"] + atom_exists_mask: Bool[Tensor, "n_atoms"] + atom_token_index: Int[Tensor, "n_atoms"] + ref_pos: Float[Tensor, "n_atoms 3"] + ref_mask: Bool[Tensor, "n_atoms"] + ref_element: Int[Tensor, "n_atoms"] + ref_charge: Int[Tensor, "n_atoms"] + atom_names: list[str] + # Consistent atom ordering witin each token + atom_within_token_indices: Int[Tensor, "n_atoms"] + residue_names: list[str] + symmetries: Int[Tensor, "n_atoms n_symm"] + b_factor_or_plddt: Float[Tensor, "n_tokens"] + + @classmethod + def concatenate(cls, spans: list["TokenSpan"]) -> "TokenSpan": + # offset bond indices: + tokens_per_span = torch.tensor([span.restype.shape[0] for span in spans]) + token_count = torch.cumsum(tokens_per_span, dim=0).roll(1, 0) + token_count[0] = 0 + + # offsets indices of centre atoms: + atoms_per_span = torch.tensor( + [span.atom_exists_mask.shape[0] for span in spans] + ) + atom_offsets = torch.cumsum(atoms_per_span, dim=0).roll(1, 0) + atom_offsets[0] = 0 + + centre_atom_index = torch.cat( + [ + span.centre_atom_index + offset + for span, offset in zip(spans, atom_offsets) + ] + ) + reference_atom_index = torch.cat( + [ + span.reference_atom_index + offset + for span, offset in zip(spans, atom_offsets) + ] + ) + + atom_token_index = ( + torch.cumsum( + torch.cat([x.atom_token_index for x in spans]), + dim=0, + dtype=torch.int, + ) + - 1 + ) + backbone_frame_index = torch.cat( + [ + span.backbone_frame_index + offset + for span, offset in zip(spans, atom_offsets) + ] + ) + + # concatenate symmetric permutations at the atom level + # make sure that trailing shape is the same + # NOTE: we store the *local* permutation indices, not the global ones + # i.e. the permutation indices are relative to the residue + atom_symms = [span.symmetries for span in spans] + max_symms = max(x.shape[-1] for x in atom_symms) + atom_symms = [ + torch.nn.functional.pad(x, (0, max_symms - x.shape[-1]), value=-1) + for x in atom_symms + ] + return cls( + restype=torch.cat([x.restype for x in spans]), + residue_index=torch.cat([x.residue_index for x in spans]), + centre_atom_index=centre_atom_index, + reference_atom_index=reference_atom_index, + backbone_frame_mask=torch.cat([x.backbone_frame_mask for x in spans]), + backbone_frame_index=backbone_frame_index, + atom_gt_coords=torch.cat([x.atom_gt_coords for x in spans]), + atom_exists_mask=torch.cat([x.atom_exists_mask for x in spans]), + atom_token_index=atom_token_index, + ref_pos=torch.cat([x.ref_pos for x in spans]), + ref_mask=torch.cat([x.ref_mask for x in spans]), + ref_element=torch.cat([x.ref_element for x in spans]), + ref_charge=torch.cat([x.ref_charge for x in spans]), + atom_names=list(chain.from_iterable([x.atom_names for x in spans])), + atom_within_token_indices=torch.cat( + [x.atom_within_token_indices for x in spans] + ), + residue_names=list(chain.from_iterable([x.residue_names for x in spans])), + symmetries=torch.cat(atom_symms, dim=0), + b_factor_or_plddt=torch.cat([x.b_factor_or_plddt for x in spans]), + ) + + +class AllAtomResidueTokenizer: + ref_conformer_generator: RefConformerGenerator + + def __init__(self, ref_conformer_generator: RefConformerGenerator): + self.ref_conformer_generator = ref_conformer_generator + + def tokenize_residue( + self, + residue: Residue, + entity_type: EntityType, + ) -> TokenSpan | None: + ref_conformer_data = self._get_ref_conformer_data(residue) + if ref_conformer_data.num_atoms == 0: + # avoid dealing with empty tensors in downstream processing + # this should only happen when residue is sole hydrogen + # or when residue code is not in CCD dictionary and + # the residue has 0 coords in the PDB structure + logger.warning( + f"skipping residue {residue.name} {residue.label_seq} as reference conformer has 0 heavy atoms" + ) + return None + + # Keep only the atoms from the ground truth conformer that are present in + # reference conformer. + # + # If we don't have a reference conformer, we fall back to using the ground truth + # conformer names, i.e. we keep all atoms in the ground truth conformer. + # When a true conformer data is not provided, use reference conformer directly + gt_conformer_data = residue.conformer_data + + if gt_conformer_data is not None: + atom_gt_coords, atom_exists_mask = gt_conformer_data.gather_atom_positions( + ref_conformer_data.atom_names + ) + else: + atom_gt_coords = ref_conformer_data.position + atom_exists_mask = torch.ones( + atom_gt_coords.shape[0], dtype=torch.bool, device=atom_gt_coords.device + ) + + # Tokenization is by residue if it is a standard amino acid or standard + # nucleotide; all ligands and all modified residues are tokenized per atom. + tokenize_fn = ( + self._tokenize_per_residue + if ( + residue.name in standard_residue_pdb_codes + and entity_type != EntityType.LIGAND + ) + else self._tokenize_per_atom + ) + + return tokenize_fn( + restype=torch.tensor([residue.restype], dtype=torch.int), + residue_index=torch.tensor([residue.residue_index], dtype=torch.int), + atom_gt_coords=atom_gt_coords, + atom_exists_mask=atom_exists_mask, + ref_pos=ref_conformer_data.position, + ref_mask=torch.ones_like(atom_exists_mask), + ref_element=ref_conformer_data.element, + ref_charge=ref_conformer_data.charge, + atom_names=ref_conformer_data.atom_names, + residue_name=residue.name, + bonds=ref_conformer_data.bonds, + symmetries=ref_conformer_data.symmetries, + b_factor_or_plddt=torch.tensor([residue.b_factor_or_plddt]), + ) + + @staticmethod + def filter_atom_symmetries( + symmetries: Int[Tensor, "n_atoms n_symm"], + atom_exists_mask: Bool[Tensor, "n_atoms"], + ) -> Int[Tensor, "n_atoms filtered_n_symm"]: + n_atoms, _ = symmetries.shape + + # Create a mask for non-trivial symmetries + atom_indices = torch.arange(n_atoms).unsqueeze(-1) + non_trivial_symmetries = (symmetries >= 0) & (symmetries != atom_indices) + + masked_atoms = ~atom_exists_mask.unsqueeze(-1) + + # Check if any of the masked-out atoms have non-trivial symmetries + violations = torch.any(masked_atoms & non_trivial_symmetries, dim=1) + + # If any invalid symmetries are found, replace with identity permutation + if torch.any(violations): + return atom_indices + + # Otherwise, return the original symmetries + return symmetries + + # jaxtyping on residue-level objects is very slow, + # use for debug only + # @typecheck + def _tokenize_per_residue( + self, + restype: Int[Tensor, "n_tokens"], + residue_index: Int[Tensor, "n_tokens"], + atom_gt_coords: Float[Tensor, "n_atoms 3"], + atom_exists_mask: Bool[Tensor, "n_atoms"], + ref_pos: Float[Tensor, "n_atoms 3"], + ref_mask: Bool[Tensor, "n_atoms"], + ref_element: Int[Tensor, "n_atoms"], + ref_charge: Int[Tensor, "n_atoms"], + atom_names: list[str], + residue_name: str, + bonds: list[tuple[int, int]], + symmetries: Int[Tensor, "n_atoms n_symm"], + b_factor_or_plddt: Float[Tensor, "n_tokens"], + ) -> TokenSpan: + centre_atom_index = get_centre_atom_index( + atom_names, + residue_name, + ) + reference_atom_index = get_reference_atom_index( + atom_names, + residue_name, + ) + backbone_frame_mask = backbone_atoms_all_present( + atom_names, + residue_name, + ) + backbone_indices = backbone_atoms_indices(atom_names, residue_name).unsqueeze(0) + + # to 1 token + atom_token_index = torch.zeros_like(atom_exists_mask, dtype=torch.int) + atom_token_index[0] = 1 + + residue_names = [residue_name] + + # Find atom ordering; these should always be available because per residue + # tokenization works only on standard residues. + atom_within_token_index = atom_names_to_atom37_indices( + atom_names=atom_names, + residue_name=residue_name, + ) + + return TokenSpan( + restype=restype, + residue_index=residue_index, + centre_atom_index=centre_atom_index, + reference_atom_index=reference_atom_index, + backbone_frame_mask=backbone_frame_mask, + backbone_frame_index=backbone_indices, + atom_gt_coords=atom_gt_coords, + atom_exists_mask=atom_exists_mask, + atom_token_index=atom_token_index, + ref_pos=ref_pos, + ref_mask=ref_mask, + ref_element=ref_element, + ref_charge=ref_charge, + atom_names=atom_names, + atom_within_token_indices=atom_within_token_index, + residue_names=residue_names, + symmetries=self.filter_atom_symmetries(symmetries, atom_exists_mask), + b_factor_or_plddt=b_factor_or_plddt, + ) + + # jaxtyping on residue-level objects is very slow, + # use for debug only + # @typecheck + def _tokenize_per_atom( + self, + restype: Int[Tensor, "n_tokens"], + residue_index: Int[Tensor, "n_tokens"], + atom_gt_coords: Float[Tensor, "n_atoms 3"], + atom_exists_mask: Bool[Tensor, "n_atoms"], + ref_pos: Float[Tensor, "n_atoms 3"], + ref_mask: Bool[Tensor, "n_atoms"], + ref_element: Int[Tensor, "n_atoms"], + ref_charge: Int[Tensor, "n_atoms"], + atom_names: list[str], + residue_name: str, + bonds: list[tuple[int, int]], + symmetries: Int[Tensor, "n_atoms n_symm"], + b_factor_or_plddt: Float[Tensor, "n_tokens"], + ) -> TokenSpan: + # to n_atoms tokens + n_atoms = atom_gt_coords.shape[0] + restype = repeat(restype, "1 -> a", a=n_atoms) + residue_index = repeat(residue_index, "1 -> a", a=n_atoms) + b_factor_or_plddt = repeat(b_factor_or_plddt, "1 -> a", a=n_atoms) + + # centre of the token is the first and only atom in each token + # when tokenizing per-atom + centre_atom_index = torch.arange(n_atoms, dtype=torch.int) + reference_atom_index = torch.arange(n_atoms, dtype=torch.int) + backbone_frame_mask = torch.zeros((n_atoms,), dtype=torch.bool) + backbone_indices = ( + torch.arange(n_atoms, dtype=torch.int).unsqueeze(1).expand(-1, 3) + ) + + atom_token_index = torch.ones_like(atom_exists_mask, dtype=torch.int) + + residue_names = [residue_name] * n_atoms + + # Each atom is alone in its own token + atom_within_token_index = torch.zeros(n_atoms, dtype=torch.int) + + return TokenSpan( + restype=restype, + residue_index=residue_index, + centre_atom_index=centre_atom_index, + reference_atom_index=reference_atom_index, + backbone_frame_mask=backbone_frame_mask, + backbone_frame_index=backbone_indices, + atom_gt_coords=atom_gt_coords, + atom_exists_mask=atom_exists_mask, + atom_token_index=atom_token_index, + ref_pos=ref_pos, + ref_mask=ref_mask, + ref_element=ref_element, + ref_charge=ref_charge, + atom_names=atom_names, + atom_within_token_indices=atom_within_token_index, + residue_names=residue_names, + symmetries=self.filter_atom_symmetries(symmetries, atom_exists_mask), + b_factor_or_plddt=b_factor_or_plddt, + ) + + def tokenize_entity( + self, entity_data: AllAtomEntityData + ) -> AllAtomStructureContext | None: + return self.tokenize_entities([entity_data])[0] + + def tokenize_entities( + self, + entities_data: list[AllAtomEntityData], + ) -> list[AllAtomStructureContext | None]: + sym_ids = _make_sym_ids([x.entity_id for x in entities_data]) + + return [ + self._tokenize_entity( + entity_data, + chain_id=idx + 1, + sym_id=sym_id, + ) + for idx, (entity_data, sym_id) in enumerate(zip(entities_data, sym_ids)) + ] + + def _tokenize_entity( + self, + entity_data: AllAtomEntityData, + chain_id: int = 1, + sym_id: int = 1, + ) -> AllAtomStructureContext | None: + tokenized_residues = [ + self.tokenize_residue(residue, entity_data.entity_type) + for residue in entity_data.residues + ] + + valid_residues = [x for x in tokenized_residues if x is not None] + if len(valid_residues) == 0: + return None + + tokens = TokenSpan.concatenate(valid_residues) + + num_tokens = tokens.restype.shape[0] + token_index = torch.arange(num_tokens, dtype=torch.int) + + # mask indicating if a token has >=1 atom with known coordinates + token_exists_mask = (tokens.atom_token_index == token_index[..., None]).sum( + dim=-1 + ) > 0 + + # checks on atom mask and positions: + # max 1 atom per-example has zero coordinates + if ( + torch.sum( + torch.all(tokens.atom_gt_coords[tokens.atom_exists_mask] == 0, dim=-1) + ) + > 1 + ): + raise ValueError( + f"Zero coordinates found in unmasked atoms for {entity_data.pdb_id}" + ) + + # construct asym_id, entity_id, sym_id + asym_id = chain_id + entity_id = entity_data.entity_id + + # Create unique ids to identify atoms which belong to same residue in same chain + # here assume we featurize a single chain + atom_residue_index = torch.gather( + tokens.residue_index, + dim=0, + index=tokens.atom_token_index.long(), + ) + + atom_ref_space_uid = atom_residue_index + + residue_names = tokens.residue_names + + match entity_data.entity_type: + case EntityType.PROTEIN: + if tokens.residue_index[0].item() != 0: + logger.error( + f"Protein residue index should start at zero, {entity_data}" + ) + + if not torch.all(torch.diff(tokens.residue_index) <= 1): + logger.error( + f"Protein residue index should be contiguous (no gaps), {entity_data}" + ) + + _, unique_indices = unique_indexes(tokens.residue_index) + res_seq = [residue_names[i.item()] for i in unique_indices] + if res_seq != entity_data.full_sequence: + logger.error( + f"Protein residue names should match entity data full sequence, {entity_data}" + ) + + return AllAtomStructureContext( + # token-level + token_residue_type=tokens.restype, + token_residue_index=tokens.residue_index, + token_centre_atom_index=tokens.centre_atom_index, + token_ref_atom_index=tokens.reference_atom_index, + token_index=token_index, + token_exists_mask=token_exists_mask, + token_backbone_frame_mask=tokens.backbone_frame_mask, + token_backbone_frame_index=tokens.backbone_frame_index, + token_asym_id=_id_to_token_tensor(asym_id, num_tokens), + token_entity_id=_id_to_token_tensor(entity_id, num_tokens), + token_sym_id=_id_to_token_tensor(sym_id, num_tokens), + token_entity_type=entity_type_to_tensor( + entity_data.entity_type, + num_tokens, + ), + # token res name is padded to 8 characters + token_residue_name=torch.stack( + [string_to_tensorcode(x, 8) for x in residue_names], + dim=0, + ), + token_b_factor_or_plddt=tokens.b_factor_or_plddt, + # atom-level + atom_token_index=tokens.atom_token_index, + atom_within_token_index=tokens.atom_within_token_indices, + atom_ref_pos=tokens.ref_pos, + atom_ref_mask=tokens.ref_mask, + atom_ref_element=tokens.ref_element, + atom_ref_charge=tokens.ref_charge, + atom_ref_name=tokens.atom_names, + atom_ref_name_chars=_atom_names_to_tensor(tokens.atom_names), + atom_ref_space_uid=atom_ref_space_uid, + atom_is_not_padding_mask=torch.ones_like( + tokens.atom_exists_mask, + dtype=torch.bool, + ), + # supervision only + atom_gt_coords=tokens.atom_gt_coords, + atom_exists_mask=tokens.atom_exists_mask, + # structure-only + pdb_id=repeat( + # PDB ids are only 4 characters long, but AFDB ids can be longer + string_to_tensorcode(entity_data.pdb_id, pad_to_length=32), + "length -> num_tokens length", + num_tokens=num_tokens, + ), + source_pdb_chain_id=repeat( + string_to_tensorcode(entity_data.source_pdb_chain_id, pad_to_length=4), + "length -> num_tokens length", + num_tokens=num_tokens, + ), + subchain_id=repeat( + string_to_tensorcode(entity_data.subchain_id, pad_to_length=4), + "length -> num_tokens length", + num_tokens=num_tokens, + ), + resolution=torch.tensor( + [entity_data.resolution], + dtype=torch.float32, + ), + is_distillation=torch.tensor( + [entity_data.is_distillation], + dtype=torch.bool, + ), + symmetries=tokens.symmetries, + ) + + def _get_ref_conformer_data(self, residue: Residue) -> ConformerData: + """ + Returns the reference conformer data for the residue. We determine the reference + conformer according to the following logic: + 1. conformer_generator is available and a reference + conformer exists for the residue name => we return the cached reference + conformer via the conformer generator + 2. conformer_generator is available and a smiles is given for the residue => + we generate a reference conformer using Rdkit via the conformer generator + 3. conformer_generator is available, the reference conformer can't be + found and no smiles is given => we convert the Residue to an RDKit molecule + and load full conformer data with the residue's atom positions as coordinates. + 4. conformer generator is not available => we set reference conformer to + the ground truth conformer + """ + # The reference conformer tells us: + # - which atoms we should expect in this ligand / residue, and how many of them + # - what are the ideal coordinates of these atoms if the ligand or residue was + # assembled alone in the void + ref_conformer = self.ref_conformer_generator.get(residue.name) + + if ref_conformer is not None: + if residue.name in standard_residue_pdb_codes: + return ref_conformer + else: + return ref_conformer.center_random_augment() + + # When we can't find a reference conformer, and a smiles is given, + # generate a reference conformer using rdkit + if residue.smiles is not None: + logger.info( + f"Generating ref conformer for {residue.name}, {residue.smiles}" + ) + return self.ref_conformer_generator.generate(residue.smiles) + + # When we can't find a reference conformer, attempt to use the ground + # truth conformer data as the reference conformer. + logger.warning( + f"No reference conformer found for residue {residue.name}," + "using training example conformer" + ) + assert residue.conformer_data is not None + + try: + # Rather than just setting the reference conformer to the ground truth, we + # make a fake RDKit molecule from the ground truth data and then convert + # back into a conformer data so that we can extract inter-atom aymmetries + # bond and info + rdkit_mol = conformer_data_to_rdkit_mol(residue.conformer_data) + gt_conformer = RefConformerGenerator._load_ref_conformer_from_rdkit( + rdkit_mol + ) + except Exception as e: + # Occasionally _load_ref_conformer_from_rdkit fails on unknown ligands e.g. + # rdkit.Chem.rdchem.AtomValenceException's can be raised or ValueError: + # can't infer bonds for Ligand. due to inexact connectivity. + logger.warning( + f"Caught error for {residue.name=} while loading reference conformer " + f"from RDKit, {(type(e).__name__)}. Using ground truth conformer instead." + ) + gt_conformer = residue.conformer_data + + return gt_conformer.center_random_augment() + + +@typecheck +def _atom_names_to_tensor(atom_names: list[str]) -> Int[Tensor, "n_atoms 4"]: + ords = torch.tensor( + [[ord(c) - 32 for c in atom_name.ljust(4, " ")] for atom_name in atom_names], + dtype=torch.int, + ) + return ords[:, :4] + + +@typecheck +def _id_to_token_tensor(id: int, num_tokens: int) -> Int[Tensor, "n"]: + return id * torch.ones((num_tokens,), dtype=torch.int) + + +@typecheck +def entity_type_to_tensor(entity_type: EntityType, num_tokens: int) -> Int[Tensor, "n"]: + return torch.full((num_tokens,), fill_value=entity_type.value, dtype=torch.int) + + +def _make_sym_ids(entity_ids_per_chain: list[int]) -> list[int]: + entities_dict: dict[int, int] = dict() + sym_ids = [] + + for entity_id in entity_ids_per_chain: + sym_id = entities_dict.get(entity_id, 0) + sym_ids.append(sym_id) + entities_dict[entity_id] = sym_id + 1 + + return sym_ids + + +def atom_names_to_atom37_indices( + atom_names: list[str], residue_name: str +) -> Int[Tensor, "n_atoms"]: + """ + Returns a tensor of indices into the token-level atom names. + """ + # Proteins use the atom37 ordering and indexing + # nucleotides use the 36 atom ordering and indexing + # - DNA is written as DA DG DC DT + # - RNA is given as A G C U + + precomputed_idces = utils.atom_37_atom_indices() + + if residue_name == "UNK": + retval = torch.arange(len(atom_names), dtype=torch.int) + + elif residue_name in standard_residue_pdb_codes: + idx = [precomputed_idces[(residue_name, atom_name)] for atom_name in atom_names] + retval = torch.tensor(idx, dtype=torch.int) + else: + raise ValueError( + f"Unknown residue name {residue_name} (atom names: {atom_names})" + ) + + assert retval.max() <= 36, f"Out of bounds ordering {atom_names} in {residue_name}" + return retval diff --git a/forks/chai-lab/chai_lab/data/dataset/structure/all_atom_structure_context.py b/forks/chai-lab/chai_lab/data/dataset/structure/all_atom_structure_context.py new file mode 100644 index 00000000..4459479d --- /dev/null +++ b/forks/chai-lab/chai_lab/data/dataset/structure/all_atom_structure_context.py @@ -0,0 +1,286 @@ +import logging +from dataclasses import asdict, dataclass +from functools import cached_property, partial + +import torch +from torch import Tensor + +from chai_lab.utils.tensor_utils import ( + batch_tensorcode_to_string, + tensorcode_to_string, +) +from chai_lab.utils.typing import Bool, Float, Int, UInt8, typecheck + +logger = logging.getLogger(__name__) + + +@typecheck +@dataclass +class AllAtomStructureContext: + # token-level + token_residue_type: Int[Tensor, "n_tokens"] + token_residue_index: Int[Tensor, "n_tokens"] + token_index: Int[Tensor, "n_tokens"] + token_centre_atom_index: Int[Tensor, "n_tokens"] + token_ref_atom_index: Int[Tensor, "n_tokens"] + token_exists_mask: Bool[Tensor, "n_tokens"] + token_backbone_frame_mask: Bool[Tensor, "n_tokens"] + token_backbone_frame_index: Int[Tensor, "n_tokens 3"] + token_asym_id: Int[Tensor, "n_tokens"] + token_entity_id: Int[Tensor, "n_tokens"] + token_sym_id: Int[Tensor, "n_tokens"] + token_entity_type: Int[Tensor, "n_tokens"] + token_residue_name: UInt8[Tensor, "n_tokens 8"] + token_b_factor_or_plddt: Float[Tensor, "n_tokens"] + # atom-level + atom_token_index: Int[Tensor, "n_atoms"] + atom_within_token_index: Int[Tensor, "n_atoms"] # consistent atom ordering + atom_ref_pos: Float[Tensor, "n_atoms 3"] + atom_ref_mask: Bool[Tensor, "n_atoms"] + atom_ref_element: Int[Tensor, "n_atoms"] + atom_ref_charge: Int[Tensor, "n_atoms"] + atom_ref_name: list[str] + atom_ref_name_chars: Int[Tensor, "n_atoms 4"] + atom_ref_space_uid: Int[Tensor, "n_atoms"] + atom_is_not_padding_mask: Bool[Tensor, "n_atoms"] + # supervision only + atom_gt_coords: Float[Tensor, "n_atoms 3"] + atom_exists_mask: Bool[Tensor, "n_atoms"] + # structure-level + pdb_id: UInt8[Tensor, "n_tokens 32"] + # source_pdb_chain_id corresponds to auth_asym_id in pdb + # can be the same for two different asym_id values + # (we split protein and ligand for example) + source_pdb_chain_id: UInt8[Tensor, "n_tokens 4"] + # subchain_id is label_asym_id in pdb + # it is assigned by the PDB and separates different + # chemical entities (protein, ligand) + # should be a 1-1 mapping to asym_id + subchain_id: UInt8[Tensor, "n_tokens 4"] + resolution: Float[Tensor, "1"] + is_distillation: Bool[Tensor, "1"] + # symmetric atom swap indices + symmetries: Int[Tensor, "n_atoms n_symmetries"] + + def __post_init__(self): + # Resolved residues filter should eliminate PDBs with missing residues, but that + # we can still have atom_exists mask set to False at every position if we have a + # bad crop so we log examples with no valid coordinates + if self.num_atoms > 0 and not torch.any(self.atom_exists_mask): + pdb_id = tensorcode_to_string(self.pdb_id[0]) + logger.error(f"No valid coordinates found in any atoms for {pdb_id}") + + # Check that atom and token masks are compatible. Anywhere that the atom mask is + # true, the token mask should also be true + if self.num_atoms > 0 and not torch.all( + self.token_exists_mask[self.atom_token_index][self.atom_exists_mask] + ): + pdb_id = tensorcode_to_string(self.pdb_id[0]) + logger.error(f"Incompatible masks for {pdb_id}") + + @cached_property + def residue_names(self) -> list[str]: + return batch_tensorcode_to_string(self.token_residue_name) + + def pad( + self, + n_tokens: int, + n_atoms: int, + ) -> "AllAtomStructureContext": + assert n_tokens >= self.num_tokens + pad_tokens_func = partial(_pad_func, pad_size=n_tokens - self.num_tokens) + + assert n_atoms >= self.num_atoms + pad_atoms_func = partial(_pad_func, pad_size=n_atoms - self.num_atoms) + + return AllAtomStructureContext( + # token-level + token_residue_type=pad_tokens_func(self.token_residue_type), + token_residue_index=pad_tokens_func(self.token_residue_index), + token_index=pad_tokens_func(self.token_index), + token_centre_atom_index=pad_tokens_func(self.token_centre_atom_index), + token_ref_atom_index=pad_tokens_func(self.token_ref_atom_index), + token_exists_mask=pad_tokens_func(self.token_exists_mask), + token_backbone_frame_mask=pad_tokens_func(self.token_backbone_frame_mask), + token_backbone_frame_index=torch.cat( + [ + pad_tokens_func(self.token_backbone_frame_index[..., i]).unsqueeze( + -1 + ) + for i in range(3) + ], + dim=-1, + ), + token_asym_id=pad_tokens_func(self.token_asym_id), + token_entity_id=pad_tokens_func(self.token_entity_id), + token_sym_id=pad_tokens_func(self.token_sym_id), + token_entity_type=pad_tokens_func(self.token_entity_type), + token_residue_name=pad_tokens_func(self.token_residue_name), + token_b_factor_or_plddt=pad_tokens_func(self.token_b_factor_or_plddt), + # atom-level + atom_token_index=pad_atoms_func(self.atom_token_index), + atom_within_token_index=pad_atoms_func(self.atom_within_token_index), + atom_ref_pos=pad_atoms_func(self.atom_ref_pos), + atom_ref_mask=pad_atoms_func(self.atom_ref_mask), + atom_ref_element=pad_atoms_func(self.atom_ref_element), + atom_ref_charge=pad_atoms_func(self.atom_ref_charge), + atom_ref_name=self.atom_ref_name, + atom_ref_name_chars=pad_atoms_func(self.atom_ref_name_chars), + atom_ref_space_uid=pad_atoms_func(self.atom_ref_space_uid, pad_value=-1), + atom_is_not_padding_mask=pad_atoms_func(self.atom_is_not_padding_mask), + # supervision-only + atom_gt_coords=pad_atoms_func(self.atom_gt_coords), + atom_exists_mask=pad_atoms_func(self.atom_exists_mask), + # structure-level + pdb_id=pad_tokens_func(self.pdb_id), + source_pdb_chain_id=pad_tokens_func(self.source_pdb_chain_id), + subchain_id=pad_tokens_func(self.subchain_id), + resolution=self.resolution, + is_distillation=self.is_distillation, + symmetries=pad_atoms_func(self.symmetries, pad_value=-1), + ) + + @typecheck + @classmethod + def merge( + cls, + contexts: list["AllAtomStructureContext"], + ) -> "AllAtomStructureContext": + # indexes: + token_offsets = _exclusive_cum_lengths([x.token_residue_type for x in contexts]) + atom_offsets = _exclusive_cum_lengths([x.atom_token_index for x in contexts]) + + atom_token_index = torch.cat( + [x.atom_token_index + count for x, count in zip(contexts, token_offsets)] + ) + + token_centre_atom_index = torch.cat( + [ + x.token_centre_atom_index + count + for x, count in zip(contexts, atom_offsets) + ] + ) + token_ref_atom_index = torch.cat( + [x.token_ref_atom_index + count for x, count in zip(contexts, atom_offsets)] + ) + token_backbone_frame_index = torch.cat( + [ + x.token_backbone_frame_index + count + for x, count in zip(contexts, token_offsets) + ] + ) + + n_tokens = sum(x.num_tokens for x in contexts) + token_index = torch.arange(n_tokens, dtype=torch.int) + + # re-index the reference space from 0..n_tokens-1. + zero_indexed_ref_uids = [ + torch.unique_consecutive(x.atom_ref_space_uid, return_inverse=True)[1] + for x in contexts + ] + + ref_space_uids_offsets = _exclusive_cum_lengths( + [x.atom_ref_space_uid for x in contexts] + ) + atom_ref_space_uid = torch.cat( + [ + x + count + for x, count in zip(zero_indexed_ref_uids, ref_space_uids_offsets) + ], + ) + + # pad symmetric permutations to have same length + max_symms = max(x.symmetries.shape[-1] for x in contexts) + padded_symms = [ + torch.nn.functional.pad( + x.symmetries, (0, max_symms - x.symmetries.shape[-1]), value=-1 + ) + for x in contexts + ] + # offset symmetries by number of atoms in each chain + symm_mask = torch.cat([x >= 0 for x in padded_symms]) + symmetries = torch.cat(padded_symms) + symmetries = symmetries.masked_fill(~symm_mask, -1) + + return cls( + # token-level + token_residue_type=torch.cat([x.token_residue_type for x in contexts]), + token_residue_index=torch.cat([x.token_residue_index for x in contexts]), + token_index=token_index, + token_centre_atom_index=token_centre_atom_index, + token_ref_atom_index=token_ref_atom_index, + token_exists_mask=torch.cat([x.token_exists_mask for x in contexts]), + token_backbone_frame_mask=torch.cat( + [x.token_backbone_frame_mask for x in contexts] + ), + token_backbone_frame_index=token_backbone_frame_index, + token_asym_id=torch.cat([x.token_asym_id for x in contexts]), + token_entity_id=torch.cat([x.token_entity_id for x in contexts]), + token_sym_id=torch.cat([x.token_sym_id for x in contexts]), + token_entity_type=torch.cat([x.token_entity_type for x in contexts]), + token_residue_name=torch.cat([x.token_residue_name for x in contexts]), + token_b_factor_or_plddt=torch.cat( + [x.token_b_factor_or_plddt for x in contexts] + ), + # atom-level + atom_token_index=atom_token_index, + atom_within_token_index=torch.cat( + [x.atom_within_token_index for x in contexts] + ), + atom_ref_pos=torch.cat([x.atom_ref_pos for x in contexts]), + atom_ref_mask=torch.cat([x.atom_ref_mask for x in contexts]), + atom_ref_element=torch.cat([x.atom_ref_element for x in contexts]), + atom_ref_charge=torch.cat([x.atom_ref_charge for x in contexts]), + atom_ref_name=[x for context in contexts for x in context.atom_ref_name], + atom_ref_name_chars=torch.cat([x.atom_ref_name_chars for x in contexts]), + atom_ref_space_uid=atom_ref_space_uid, + atom_is_not_padding_mask=torch.cat( + [x.atom_is_not_padding_mask for x in contexts] + ), + # supervision only + atom_gt_coords=torch.cat([x.atom_gt_coords for x in contexts]), + atom_exists_mask=torch.cat([x.atom_exists_mask for x in contexts]), + # structure-level + pdb_id=torch.cat([x.pdb_id for x in contexts]), + source_pdb_chain_id=torch.cat([x.source_pdb_chain_id for x in contexts]), + subchain_id=torch.cat([x.subchain_id for x in contexts]), + resolution=torch.max( + torch.stack([x.resolution for x in contexts]), 0 + ).values, + is_distillation=torch.max( + torch.stack([x.is_distillation for x in contexts]), 0 + ).values, + symmetries=symmetries, + ) + + def to(self, device: torch.device | str) -> "AllAtomStructureContext": + dict_ = { + k: v.to(device) if torch.is_tensor(v) else v + for k, v in asdict(self).items() + } + return AllAtomStructureContext(**dict_) + + @property + def num_tokens(self) -> int: + (n_tokens,) = self.token_index.shape + return n_tokens + + @property + def num_atoms(self) -> int: + (n_atoms,) = self.atom_token_index.shape + return n_atoms + + def to_dict(self) -> dict[str, torch.Tensor]: + return asdict(self) + + +def _pad_func(x: Tensor, pad_size: int, pad_value: float | None = None) -> Tensor: + sizes = [0, 0] * (x.ndim - 1) + [0, pad_size] + return torch.nn.functional.pad(x, sizes, value=pad_value) + + +def _exclusive_cum_lengths(tensors: list[Int[Tensor, "n"]]): + lengths = torch.tensor([t.shape[0] for t in tensors]) + cum_lengths = torch.cumsum(lengths, dim=0).roll(1, 0) + cum_lengths[0] = 0 + return cum_lengths diff --git a/forks/chai-lab/chai_lab/data/dataset/structure/chain.py b/forks/chai-lab/chai_lab/data/dataset/structure/chain.py new file mode 100644 index 00000000..91908ef6 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/dataset/structure/chain.py @@ -0,0 +1,22 @@ +from dataclasses import dataclass + +from chai_lab.data.dataset.structure.all_atom_structure_context import ( + AllAtomStructureContext, +) +from chai_lab.data.parsing.structure.all_atom_entity_data import AllAtomEntityData + + +@dataclass +class Chain: + # The untokenized entity data + entity_data: AllAtomEntityData + + # The tokenized chain, derived from the entity data + structure_context: AllAtomStructureContext + + def __str__(self) -> str: + return f"{self.__class__.__name__}(entity_data={self.entity_data})" + + @property + def num_tokens(self) -> int: + return self.structure_context.num_tokens diff --git a/forks/chai-lab/chai_lab/data/dataset/structure/utils.py b/forks/chai-lab/chai_lab/data/dataset/structure/utils.py new file mode 100644 index 00000000..a14b3bca --- /dev/null +++ b/forks/chai-lab/chai_lab/data/dataset/structure/utils.py @@ -0,0 +1,154 @@ +from functools import lru_cache + +import torch +from torch import Tensor + +import chai_lab.data.residue_constants as rc +from chai_lab.utils.typing import Bool, Int + + +def get_centre_atom_name(residue_name: str) -> str: + if residue_name not in rc.standard_residue_pdb_codes: + raise ValueError(f"Residue {residue_name} is not a standard residue") + + if residue_name in { + "A", + "G", + "C", + "U", + "DA", + "DG", + "DC", + "DT", + }: + return "C1'" + else: + assert len(residue_name) == 3, "residue expected" + return "CA" + + +def get_reference_atom_name(residue_name: str) -> str: + if residue_name not in rc.standard_residue_pdb_codes: + raise ValueError(f"Residue {residue_name} is not a standard residue") + + if residue_name == "GLY": + return "CA" + elif residue_name in {"A", "G", "DA", "DG"}: + return "C4" + elif residue_name in {"C", "U", "DC", "DT"}: + return "C2" + else: + return "CB" + + +def get_centre_atom_index(atom_names: list[str], residue_name: str) -> Int[Tensor, "1"]: + # centre of the token is Calpha or C1' + name = get_centre_atom_name(residue_name) + + if name in atom_names: + idx = atom_names.index(name) + else: + raise ValueError( + f"Residue {residue_name} marked as standard, " + f"but reference conformer misses centre atom {name}. " + "Either the residue is not standard or reference conformer is wrong." + ) + + return torch.tensor([idx], dtype=torch.int) + + +def get_reference_atom_index( + atom_names: list[str], residue_name: str +) -> Int[Tensor, "1"]: + name = get_reference_atom_name(residue_name) + if name in atom_names: + idx = atom_names.index(name) + else: + raise ValueError( + f"Residue {residue_name} marked as standard, " + f"but reference conformer misses reference atom {name}. " + "Either the residue is not standard or reference conformer is wrong." + ) + + return torch.tensor([idx], dtype=torch.int) + + +def get_backbone_frame_atom_names(residue_name: str) -> tuple[str, str, str]: + """Return names of the 3 atoms used in canonical token frame.""" + if residue_name in { + "A", + "G", + "C", + "U", + "DA", + "DG", + "DC", + "DT", + }: + return "C1'", "C3'", "C4'" + if residue_name in rc.residue_atoms: + return "N", "CA", "C" + return "", "", "" + + +def backbone_atoms_all_present( + atom_names: list[str], residue_name: str +) -> Bool[Tensor, "1"]: + """Check if all *protein* backbone atoms are present in the list of atom names.""" + backbone_frame_atoms = get_backbone_frame_atom_names(residue_name) + if all(a == "" for a in backbone_frame_atoms): + # Not a nucleic acid or a protein residue + all_present = False + else: + all_present = all(name in atom_names for name in backbone_frame_atoms) + return torch.tensor([all_present], dtype=torch.bool) + + +def backbone_atoms_indices( + atom_names: list[str], residue_name: str +) -> Int[Tensor, "3"]: + """Return indices of backbone atoms N, Ca, C in the list of atom names.""" + backbone_frame_atom_names = get_backbone_frame_atom_names(residue_name) + + if backbone_atoms_all_present(atom_names, residue_name): + indices = [atom_names.index(name) for name in backbone_frame_atom_names] + else: + indices = [0, 0, 0] + + return torch.tensor(indices, dtype=torch.int) + + +@lru_cache(maxsize=1) +def atom_37_atom_indices() -> dict[tuple[str, str | None], int]: + num_protein_atoms = 37 + protein_res_atom_to_index: dict[tuple[str, str | None], int] = { + (residue_name, atom_name): atom_index + for residue_name in rc.residue_atoms.keys() + for atom_name, atom_index in rc.atom_order.items() + } + assert max(protein_res_atom_to_index.values()) == num_protein_atoms - 1 + + num_rna_atoms = 36 + # note: convert RNA residues to R{} to match residue names from residue_constants.py + rna_res_atom_to_index = { + (residue_name, atom_name): atom_index + for residue_name in {"A", "C", "G", "U"} + for atom_index, atom_name in enumerate( + rc.nucleic_acid_atoms[f"R{residue_name}"] + ) + } + assert max(rna_res_atom_to_index.values()) == num_rna_atoms - 1 + + num_dna_atoms = 36 + dna_res_atom_to_index = { + (residue_name, atom_name): atom_index + for residue_name in {"DA", "DC", "DG", "DT"} + for atom_index, atom_name in enumerate(rc.nucleic_acid_atoms[residue_name]) + } + assert max(dna_res_atom_to_index.values()) == num_dna_atoms - 1 + + return { + **protein_res_atom_to_index, + **rna_res_atom_to_index, + **dna_res_atom_to_index, + } diff --git a/forks/chai-lab/chai_lab/data/dataset/templates/context.py b/forks/chai-lab/chai_lab/data/dataset/templates/context.py new file mode 100644 index 00000000..f0ccefc3 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/dataset/templates/context.py @@ -0,0 +1,215 @@ +import logging +from dataclasses import asdict, dataclass + +import torch +from torch import Tensor +from torch.nn import functional as F + +from chai_lab.data import residue_constants as rc +from chai_lab.utils.defaults import default +from chai_lab.utils.typing import Bool, Float, Int, typecheck + +logger = logging.getLogger(__name__) + + +@typecheck +@dataclass(frozen=True) +class TemplateContext: + """Context for templates; always aligned by construction.""" + + template_restype: Int[Tensor, "n_templates n_tokens"] + template_pseudo_beta_mask: Bool[Tensor, "n_templates n_tokens"] + template_backbone_frame_mask: Bool[Tensor, "n_templates n_tokens"] + template_distances: Float[Tensor, "n_templates n_tokens n_tokens"] + template_unit_vector: Float[Tensor, "n_templates n_tokens n_tokens 3"] + + def __str__(self) -> str: + return ( + f"TemplateContext(num_templates={self.num_templates}, " + f"num_nonnull_templates={self.num_nonnull_templates}, " + f"num_tokens={self.num_tokens})" + ) + + @property + def num_tokens(self) -> int: + return self.template_restype.shape[1] + + @property + def num_templates(self) -> int: + return self.template_restype.shape[0] + + @property + def num_nonnull_templates(self) -> int: + """Number of templates that aren't all null padding templates.""" + template_exists = self.template_mask.any(dim=-1).int() + return int(template_exists.sum().item()) + + @property + def template_mask(self) -> Bool[Tensor, "n_templates n_tokens"]: + return self.template_restype != rc.residue_types_with_nucleotides_order["-"] + + def to_dict(self) -> dict[str, torch.Tensor]: + retval = asdict(self) + retval.update( + { + "num_templates": torch.tensor(self.num_nonnull_templates), + "template_mask": self.template_mask, + } + ) + return retval + + @classmethod + def empty(cls, n_templates: int, n_tokens: int) -> "TemplateContext": + return cls( + template_restype=torch.full( + (n_templates, n_tokens), + fill_value=rc.residue_types_with_nucleotides_order["-"], + dtype=torch.int32, + ), + template_pseudo_beta_mask=torch.zeros( + n_templates, n_tokens, dtype=torch.bool + ), + template_backbone_frame_mask=torch.zeros( + n_templates, n_tokens, dtype=torch.bool + ), + template_distances=torch.zeros( + n_templates, n_tokens, n_tokens, dtype=torch.float32 + ), + template_unit_vector=torch.zeros( + n_templates, n_tokens, n_tokens, 3, dtype=torch.float32 + ), + ) + + def index_select(self, idxs: Int[Tensor, "n"]) -> "TemplateContext": + return TemplateContext( + template_restype=self.template_restype[:, idxs], + template_pseudo_beta_mask=self.template_pseudo_beta_mask[:, idxs], + template_backbone_frame_mask=self.template_backbone_frame_mask[:, idxs], + template_distances=self.template_distances[:, idxs][:, :, idxs], + template_unit_vector=self.template_unit_vector[:, idxs][:, :, idxs], + ) + + # @classmethod + # def merge( + # cls, + # templates: list["TemplateContext"], + # ) -> "TemplateContext": + # """Merge template contexts along the template dimensions.""" + # # n_token can be simply concatenated + # logger.debug(f"Merging {len(templates)} templates") + + # # Handle case where we get an empty list (no templates to merge) + # if len(templates) == 0: + # return cls.empty(n_templates=4, n_tokens=1) + + # # Pad each template_restype's template_dimension to match the largest + # # NOTE count num_templates here, NOT num_nonnull_templates + # n_templates_new: int = max(t.num_templates for t in templates) + # padded_templates = [t.pad(max_templates=n_templates_new) for t in templates] + # new_template_restype = torch.cat( + # [t.template_restype for t in padded_templates], + # dim=1, # Concat on sequence dim + # ) + # new_template_pseudo_beta_mask = torch.cat( + # [t.template_pseudo_beta_mask for t in padded_templates], + # dim=1, + # ) + # new_template_backbone_frame_mask = torch.cat( + # [t.template_backbone_frame_mask for t in padded_templates], + # dim=1, + # ) + + # # Number of tokens after concatenation along token dim + # n_token_new = new_template_restype.shape[1] + + # # n_token x n_token must be tiled into a square matrix + # # These indices like [0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, ...] indicate the region + # # of the square matrix that corresponds to each template. + # template_indices = torch.repeat_interleave( + # input=torch.arange(len(templates), device=new_template_restype.device), + # repeats=torch.tensor([t.template_restype.shape[-1] for t in templates]), + # ) + # # Sample template and token dim + # assert template_indices.shape[0] == n_token_new + + # new_template_distances = torch.zeros( + # n_templates_new, n_token_new, n_token_new, dtype=torch.float32 + # ) + # new_template_unit_vector = torch.zeros( + # n_templates_new, n_token_new, n_token_new, 3, dtype=torch.float32 + # ) + + # # For each template, find the block that it corresponds to and copy in the data + # for i, t in enumerate(templates): + # m = template_indices == i + # mask = m[:, None] * m[None, :] + # idx = torch.arange(t.template_distances.shape[0]) + # new_template_distances[idx.unsqueeze(1), mask] = ( + # t.template_distances.flatten(1, 2) + # ) + # new_template_unit_vector[idx.unsqueeze(1), mask] = ( + # t.template_unit_vector.flatten(1, 2) + # ) + + # return cls( + # template_restype=new_template_restype, + # template_pseudo_beta_mask=new_template_pseudo_beta_mask, + # template_backbone_frame_mask=new_template_backbone_frame_mask, + # template_distances=new_template_distances, + # template_unit_vector=new_template_unit_vector, + # ) + + def pad( + self, + max_templates: int | None = None, + max_tokens: int | None = None, + ) -> "TemplateContext": + """Pad to the given number of templates and tokens.""" + max_templates = default(max_templates, self.num_templates) + assert ( + self.num_templates <= max_templates + ), f"Cannot pad templates containing {self.num_templates} templates to {max_templates} templates" + n_pad_templates = max_templates - self.num_templates + + max_tokens = default(max_tokens, self.num_tokens) + assert ( + self.num_tokens <= max_tokens + ), f"Cannot pad templates containing {self.num_tokens} tokens to {max_tokens} tokens" + n_pad_tokens = max_tokens - self.num_tokens + + if n_pad_templates == 0 and n_pad_tokens == 0: # Exact match yay + return self + + logger.debug(f"Padding templates by {n_pad_templates=} {n_pad_tokens=}") + + # Padding works from last dim forward in pairs of padding (left, right) + # - (0, n_pad_tokens) = pad nothing on left, pad by n_pad_tokens on right for + # last dim + # - (0, 0, 0, n_pad_tokens, 0, n_pad_tokens) = pad nothing on last dim, but pad + # next two dims + pad_dims_template = (0, n_pad_templates) + pad_dims_token = (0, n_pad_tokens) + return TemplateContext( + template_restype=F.pad( + self.template_restype, + pad=pad_dims_token + pad_dims_template, + value=rc.residue_types_with_nucleotides_order["-"], + ), + template_pseudo_beta_mask=F.pad( + self.template_pseudo_beta_mask, + pad=pad_dims_token + pad_dims_template, + ), + template_backbone_frame_mask=F.pad( + self.template_backbone_frame_mask, + pad=pad_dims_token + pad_dims_template, + ), + template_distances=F.pad( + self.template_distances, + pad=pad_dims_token + pad_dims_token + pad_dims_template, + ), + template_unit_vector=F.pad( + self.template_unit_vector, + # This field has a final dimension of size 3, which we shouldn't pad + pad=(0, 0) + pad_dims_token + pad_dims_token + pad_dims_template, + ), + ) diff --git a/forks/chai-lab/chai_lab/data/features/__init__.py b/forks/chai-lab/chai_lab/data/features/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/forks/chai-lab/chai_lab/data/features/feature_factory.py b/forks/chai-lab/chai_lab/data/features/feature_factory.py new file mode 100644 index 00000000..a7baa41e --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/feature_factory.py @@ -0,0 +1,22 @@ +"""Helper methods for generating model input features""" + +import logging + +from torch import Tensor + +from chai_lab.data.features.generators.base import FeatureGenerator + +logger = logging.getLogger(__name__) + + +class FeatureFactory: + generators: dict[str, FeatureGenerator] + + def __init__(self, generators: dict[str, FeatureGenerator]): + self.generators = generators + + def generate(self, batch) -> dict[str, Tensor]: + return {name: gen.generate(batch) for name, gen in self.generators.items()} + + def __repr__(self) -> str: + return f"Feature factory, {len(self.generators)=}" diff --git a/forks/chai-lab/chai_lab/data/features/feature_type.py b/forks/chai-lab/chai_lab/data/features/feature_type.py new file mode 100644 index 00000000..49eb42ef --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/feature_type.py @@ -0,0 +1,12 @@ +from enum import Enum + + +class FeatureType(Enum): + RESIDUE = "RESIDUE" + PAIR = "PAIR" + MSA = "MSA" + TEMPLATES = "TEMPLATES" + TOKEN = "TOKEN" + TOKEN_PAIR = "TOKEN_PAIR" + ATOM = "ATOM" + ATOM_PAIR = "ATOM_PAIR" diff --git a/forks/chai-lab/chai_lab/data/features/feature_utils.py b/forks/chai-lab/chai_lab/data/features/feature_utils.py new file mode 100644 index 00000000..ef255129 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/feature_utils.py @@ -0,0 +1,27 @@ +"""Utility classes and functions for feature representations""" + +from chai_lab.utils.typing import typecheck + + +@typecheck +def get_entry_for_key(data: dict, key: str): + """finds entry 'key' in data dictionary + + Parameters: + data: the dict to search in + key: the key to search for + + Example 1: + data=dict(foo=dict(bar="bar")) + key = "foo" + returns: dict(bar="bar") + Example 2: + data=dict(foo=dict(bar="bar")) + key = "foo/bar" + returns: "bar" + + """ + sub_keys, sub_dict = key.split("/"), data + for sub_key in sub_keys: + sub_dict = sub_dict[sub_key] + return sub_dict diff --git a/forks/chai-lab/chai_lab/data/features/generators/__init__.py b/forks/chai-lab/chai_lab/data/features/generators/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/forks/chai-lab/chai_lab/data/features/generators/atom_element.py b/forks/chai-lab/chai_lab/data/features/generators/atom_element.py new file mode 100644 index 00000000..37be5cdf --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/generators/atom_element.py @@ -0,0 +1,30 @@ +import torch +from torch import Tensor + +from chai_lab.data.features.feature_type import FeatureType +from chai_lab.data.features.generators.base import EncodingType, FeatureGenerator +from chai_lab.utils.typing import Int, typecheck + + +class AtomElementOneHot(FeatureGenerator): + def __init__( + self, + max_atomic_num: int = 128, + ): + super().__init__( + ty=FeatureType.ATOM, + encoding_ty=EncodingType.ONE_HOT, + can_mask=True, + num_classes=max_atomic_num + 1, + mult=1, + ) + + def get_input_kwargs_from_batch(self, batch) -> dict: + return dict(atomic_numbers=batch["inputs"]["atom_ref_element"]) + + @typecheck + def _generate(self, atomic_numbers: Int[Tensor, "b n"]) -> Tensor: + """see super class""" + return self.make_feature( + data=torch.clamp(atomic_numbers, max=self.num_classes).unsqueeze(-1), + ) diff --git a/forks/chai-lab/chai_lab/data/features/generators/atom_name.py b/forks/chai-lab/chai_lab/data/features/generators/atom_name.py new file mode 100644 index 00000000..2c0b742b --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/generators/atom_name.py @@ -0,0 +1,27 @@ +from torch import Tensor + +from chai_lab.data.features.feature_type import FeatureType +from chai_lab.data.features.generators.base import EncodingType, FeatureGenerator +from chai_lab.utils.typing import Int, typecheck + + +class AtomNameOneHot(FeatureGenerator): + def __init__( + self, + num_chars: int = 64, + ): + super().__init__( + ty=FeatureType.ATOM, + encoding_ty=EncodingType.ONE_HOT, + can_mask=True, + num_classes=num_chars, + mult=4, + ) + + def get_input_kwargs_from_batch(self, batch) -> dict: + return dict(atom_name_chars=batch["inputs"]["atom_ref_name_chars"]) + + @typecheck + def _generate(self, atom_name_chars: Int[Tensor, "b n 4"]) -> Tensor: + """see super class""" + return self.make_feature(data=atom_name_chars) diff --git a/forks/chai-lab/chai_lab/data/features/generators/base.py b/forks/chai-lab/chai_lab/data/features/generators/base.py new file mode 100644 index 00000000..f2689372 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/generators/base.py @@ -0,0 +1,109 @@ +"""Feature Generator ABC and Default implementation""" + +from abc import ABC +from enum import Enum + +import torch +from beartype import beartype as typechecker +from torch import Tensor +from typing_extensions import assert_never + +from chai_lab.data.features.feature_type import FeatureType + + +class EncodingType(Enum): + ONE_HOT = "one-hot" + RBF = "rbf" + FOURIER = "fourier" + IDENTITY = "identity" + ESM = "esm" + OUTERSUM = "outersum" + + +def cast_feature( + feature: Tensor, + encoding_ty: EncodingType, +): + match encoding_ty: + case EncodingType.IDENTITY: + feature = feature.float() + # safety check + assert feature.abs().max() < 100, feature + return feature + case EncodingType.RBF | EncodingType.FOURIER: + assert feature.dtype in (torch.float16, torch.float32, torch.bfloat16) + return feature + case EncodingType.ONE_HOT | EncodingType.OUTERSUM: + if feature.dtype not in { + torch.long, + torch.int, + torch.int16, + torch.int8, + torch.uint8, + }: + raise ValueError( + f"dtype {feature.dtype} is not a valid type for {encoding_ty}" + ) + return feature + case EncodingType.ESM: + return feature + + assert_never(encoding_ty) # Enum exhaustiveness check + + +class FeatureGenerator(ABC): + @typechecker + def __init__( + self, + ty: FeatureType, + encoding_ty: EncodingType, + num_classes: int = -1, + mult: int = 1, + ignore_index: float = -100.0, + can_mask: bool = True, # marks existing, but unknown values (e.g. atom position) + ): + self.ty = ty + self.encoding_ty = encoding_ty + self.num_classes = num_classes + self.mult = mult + self.ignore_index = ignore_index + self.can_mask = can_mask + + @property + def mask_value(self) -> int | float | Tensor: + """Get value used to mask this feature""" + match self.encoding_ty: + case EncodingType.ONE_HOT | EncodingType.OUTERSUM: + return self.num_classes + case EncodingType.FOURIER | EncodingType.RBF: + return -100.0 + case EncodingType.IDENTITY: + assert self.can_mask + mask = torch.zeros(self.num_classes + int(self.can_mask)) + mask[-1] = 1 # last channel is 1 for masked-out items + return mask + case EncodingType.ESM: + return 0.0 + + assert_never(self.encoding_ty) # Enum exhaustiveness check + + def generate(self, batch) -> Tensor: + """Generate a feature""" + kwargs = self.get_input_kwargs_from_batch(batch) + feature = self._generate(**kwargs) + return feature + + def _generate(self, *args, **kwargs) -> Tensor: + """Generate a feature""" + raise NotImplementedError("implement me") + + def get_input_kwargs_from_batch(self, batch) -> dict: + """Get input keyword arguments to pass to _generate""" + raise NotImplementedError("implement me") + + def make_feature(self, data: Tensor) -> Tensor: + """Checks and converts dtype if necessary""" + return cast_feature(data, encoding_ty=self.encoding_ty) + + def __repr__(self): + return f"[FeatureGenerator] : type: {self.ty}" diff --git a/forks/chai-lab/chai_lab/data/features/generators/blocked_atom_pair_distances.py b/forks/chai-lab/chai_lab/data/features/generators/blocked_atom_pair_distances.py new file mode 100644 index 00000000..95a9f498 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/generators/blocked_atom_pair_distances.py @@ -0,0 +1,171 @@ +from typing import Any, Literal + +import torch +from einops import rearrange +from torch import Tensor + +from chai_lab.data.features.feature_type import FeatureType +from chai_lab.data.features.generators.base import EncodingType, FeatureGenerator +from chai_lab.utils.tensor_utils import cdist +from chai_lab.utils.typing import Bool, Float, Int, typecheck + +_VALID_ENCODING_TYPES = [ + EncodingType.IDENTITY, +] +DEFAULT_ONE_HOT_DIST_BINS = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 8.0, 12.0, 16.0] +DEFAULT_RBF_DIST_BINS = [0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0] + + +class BlockedAtomPairDistances(FeatureGenerator): + transform: Literal["none", "inverse_squared"] + + def __init__( + self, + encoding_ty: EncodingType = EncodingType.IDENTITY, + transform: Literal["none", "inverse_squared"] = "inverse_squared", + ): + assert ( + encoding_ty in _VALID_ENCODING_TYPES + ), f"invalid encoding type: {encoding_ty}" + + # initialize superclass after augmenting input params =O. + super().__init__( + ty=FeatureType.ATOM_PAIR, + encoding_ty=encoding_ty, + # one of dist_bins of rbf_radii is not None. + num_classes=1, + mult=1, + can_mask=True, + ) + self.transform = transform + + def get_input_kwargs_from_batch(self, batch: dict[str, Any]) -> dict: + return dict( + atom_ref_pos=batch["inputs"]["atom_ref_pos"], + atom_ref_mask=batch["inputs"]["atom_ref_mask"], + atom_ref_space_uid=batch["inputs"]["atom_ref_space_uid"], + q_idces=batch["inputs"]["block_atom_pair_q_idces"], + kv_idces=batch["inputs"]["block_atom_pair_kv_idces"], + block_atom_pair_mask=batch["inputs"]["block_atom_pair_mask"], + ) + + @typecheck + def _generate( + self, + atom_ref_pos: Float[Tensor, "b n 3"], + atom_ref_mask: Bool[Tensor, "b n"], + atom_ref_space_uid: Int[Tensor, "b n"], + q_idces: Int[Tensor, "bl bl_q"], + kv_idces: Int[Tensor, "bl bl_kv"], + block_atom_pair_mask: Bool[Tensor, "b bl bl_q bl_kv"], + ) -> Tensor: + """see super class""" + + blocked_feat, blocked_mask = get_blocked_atom_pair_dists( + atom_ref_pos, + atom_ref_space_uid, + q_idces, + kv_idces, + block_atom_pair_mask, + ) + + if self.transform == "inverse_squared": + blocked_feat = 1 / (1 + blocked_feat**2) + + # return (B, n, n, 2) where ...,0 is the feature + # and ...,1 indicates if the value is masked + # because 0.0 has a meaning as a distance + + blocked_feat = blocked_feat.unsqueeze(-1) + blocked_mask = blocked_mask.unsqueeze(-1).float() + + return self.make_feature( + torch.cat( + [blocked_feat, blocked_mask], + dim=-1, + ) + ) + + +class BlockedAtomPairDistogram(FeatureGenerator): + dist_bins: Tensor + + def __init__( + self, + dist_bins: list[float] | None = None, + encoding_ty: EncodingType = EncodingType.ONE_HOT, + ): + if dist_bins is None and encoding_ty == EncodingType.ONE_HOT: + dist_bins = DEFAULT_ONE_HOT_DIST_BINS + elif dist_bins is None and encoding_ty == EncodingType.RBF: + dist_bins = DEFAULT_RBF_DIST_BINS + assert dist_bins is not None, "must provide dist_bins" + + # initialize superclass after augmenting input params =O. + super().__init__( + ty=FeatureType.ATOM_PAIR, + encoding_ty=encoding_ty, + # one of dist_bins of rbf_radii is not None. + num_classes=len(dist_bins) + 1, + mult=1, + can_mask=True, + ) + self.dist_bins = torch.tensor(dist_bins) + + def get_input_kwargs_from_batch(self, batch: dict[str, Any]) -> dict: + return dict( + atom_ref_pos=batch["inputs"]["atom_ref_pos"], + atom_ref_mask=batch["inputs"]["atom_ref_mask"], + atom_ref_space_uid=batch["inputs"]["atom_ref_space_uid"], + q_idces=batch["inputs"]["block_atom_pair_q_idces"], + kv_idces=batch["inputs"]["block_atom_pair_kv_idces"], + block_atom_pair_mask=batch["inputs"]["block_atom_pair_mask"], + ) + + @typecheck + def _generate( + self, + atom_ref_pos: Float[Tensor, "b n 3"], + atom_ref_mask: Bool[Tensor, "b n"], + atom_ref_space_uid: Int[Tensor, "b n"], + q_idces: Int[Tensor, "bl bl_q"], + kv_idces: Int[Tensor, "bl bl_kv"], + block_atom_pair_mask: Bool[Tensor, "b bl bl_q bl_kv"], + ) -> Tensor: + """see super class""" + feat, mask = get_blocked_atom_pair_dists( + atom_ref_pos, + atom_ref_space_uid, + q_idces, + kv_idces, + block_atom_pair_mask, + ) + if self.encoding_ty == EncodingType.ONE_HOT: + feat = torch.searchsorted(self.dist_bins.to(atom_ref_pos.device), feat) + feat.masked_fill_(~mask, self.mask_value) + + return self.make_feature(feat.unsqueeze(-1)) + + +@typecheck +def get_blocked_atom_pair_dists( + positions: Float[Tensor, "b a 3"], + atom_ref_space_uid: Int[Tensor, "b a"], + q_idx: Int[Tensor, "bl bl_q"], + kv_idx: Int[Tensor, "bl bl_kv"], + block_atom_pair_mask: Bool[Tensor, "b bl bl_q bl_kv"], +) -> tuple[Float[Tensor, "b bl bl_q bl_kv"], Bool[Tensor, "b bl bl_q bl_kv"]]: + q_pos = positions[:, q_idx] + kv_pos = positions[:, kv_idx] + + blocked_pair_dists = cdist(q_pos, kv_pos) # b bl bl_q bl_kv + + atom_ref_space_q = atom_ref_space_uid[:, q_idx] + atom_ref_space_kv = atom_ref_space_uid[:, kv_idx] + block_same_atom_ref_space = rearrange( + atom_ref_space_q, "b bl a_q -> b bl a_q 1" + ) == rearrange(atom_ref_space_kv, "b bl a_kv -> b bl 1 a_kv") + + block_atom_pair_mask &= block_same_atom_ref_space + + return blocked_pair_dists, block_atom_pair_mask diff --git a/forks/chai-lab/chai_lab/data/features/generators/docking.py b/forks/chai-lab/chai_lab/data/features/generators/docking.py new file mode 100644 index 00000000..6bcc7267 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/generators/docking.py @@ -0,0 +1,367 @@ +import logging +import random +from dataclasses import dataclass +from typing import Any + +import torch +from einops import rearrange, repeat +from torch import Tensor + +from chai_lab.data.features.feature_type import FeatureType +from chai_lab.data.features.generators.base import EncodingType, FeatureGenerator +from chai_lab.data.features.generators.token_pair_distance import TokenCenterDistance +from chai_lab.data.parsing.structure.entity_type import EntityType +from chai_lab.model.utils import get_asym_id_from_subchain_id +from chai_lab.utils.defaults import default +from chai_lab.utils.tensor_utils import cdist, und, und_self +from chai_lab.utils.typing import Bool, Float, Int, UInt8, typecheck + +logger = logging.getLogger(__name__) + + +@typecheck +@dataclass +class ConstraintGroup: + """ + Container for a docking constraint group -- + collection of chains with inter/intra distance constraints + + This class can be used to specify a set of chains to be + grouped together for the docking feature + """ + + subchain_ids: list[str] + noise_sigma: float + dropout_prob: float + atom_center_mask: list[Bool[Tensor, "_"]] + atom_center_coords: list[Float[Tensor, "_ 3"]] + + def __post_init__(self) -> None: + """Ensure params are consistent""" + assert len(self.subchain_ids) == len( + self.atom_center_coords + ), f"{len(self.subchain_ids)=}, {len(self.atom_center_coords)=}" + assert len(self.subchain_ids) == len( + self.atom_center_mask + ), f"{len(self.subchain_ids)=}, {len(self.atom_center_mask)=}" + assert all( + [ + len(mask) == len(coord) + for coord, mask in zip(self.atom_center_coords, self.atom_center_mask) + ] + ), ( + f"{[len(x) for x in self.atom_center_coords]=}, " + f"{[len(x) for x in self.atom_center_mask]=}" + ) + + def get_asym_ids( + self, + token_subchain_id: UInt8[Tensor, "n 4"], + token_asym_id: Int[Tensor, "n"], + ) -> list[int]: + return [ + get_asym_id_from_subchain_id( + subchain_id=subchain_id, + source_pdb_chain_id=token_subchain_id, + token_asym_id=token_asym_id, + ) + for subchain_id in self.subchain_ids + ] + + def __str__(self): + return ( + f"ConstraintGroup(subchain_ids={self.subchain_ids}, " + f"atom_center_coords.shape={[x.shape for x in self.atom_center_coords]}, " + f"atom_center_mask.shape={[x.shape for x in self.atom_center_mask]})" + ) + + +class DockingConstraintGenerator(FeatureGenerator): + """Docking Feature Generator + + Works as follows: + separate input chains into two groups by randomly + partitioning asym_id's. + Provide all token-center distances for chains within the + same asm_id group. + Mask all token-center distances for chains within the + different asm_id groups. + + """ + + def __init__( + self, + dist_bins: list[float] | None = None, + coord_noise: tuple[float, float] = (0.0, 3.0), + include_probability: float = 0.1, + structure_dropout_prob: float = 0.0, + chain_dropout_prob: float = 0.0, + entity_types: list[EntityType] | None = None, + ): + dist_bins = dist_bins if dist_bins is not None else [0.0, 4.0, 8.0, 16.0] + super().__init__( + ty=FeatureType.TOKEN_PAIR, + encoding_ty=EncodingType.ONE_HOT, + # one of dist_bins of rbf_radii is not None. + num_classes=len(dist_bins) + 1, + mult=1, + can_mask=True, + ) + self.token_dist_gen = TokenCenterDistance(dist_bins=dist_bins) + + # maintain consistent orders + self.coord_noise = coord_noise + self.include_probability = include_probability + self.structure_dropout_prob = structure_dropout_prob + self.chain_dropout_prob = chain_dropout_prob + self.entity_types = set( + [x.value for x in default(entity_types, [e for e in EntityType])] + ) + + def get_input_kwargs_from_batch(self, batch: dict[str, Any]) -> dict: + maybe_constraint_dicts = batch["inputs"].get("docking_constraints", [[None]])[0] + docking_constraints = batch["inputs"]["docking_constraints"] = ( + [ConstraintGroup(**d) for d in maybe_constraint_dicts] + if isinstance(maybe_constraint_dicts[0], dict) + else None + ) + + return dict( + all_atom_positions=batch["inputs"]["atom_gt_coords"], + all_atom_mask=batch["inputs"]["atom_exists_mask"], + token_single_mask=batch["inputs"]["token_exists_mask"], + token_center_atom_index=batch["inputs"]["token_centre_atom_index"].long(), + token_asym_id=batch["inputs"]["token_asym_id"].long(), + token_subchain_id=batch["inputs"]["subchain_id"], + token_entity_type=batch["inputs"]["token_entity_type"].long(), + constraints=docking_constraints, + ) + + def apply_structure_dropout( + self, feature: Tensor, prob: float | None = None + ) -> Tensor: + prob = default(prob, torch.rand(1).item()) + dropout_single_mask = torch.rand_like(feature.data[..., 0, 0].float()) < prob + dropout_pair_mask = und_self(dropout_single_mask, "b i, b j -> b i j") + feature = feature.masked_fill(dropout_pair_mask.unsqueeze(-1), self.mask_value) + return feature + + def apply_chain_dropout( + self, feature: Tensor, token_asym_id: Int[Tensor, "b n"] + ) -> Tensor: + structure_masks = [] + for i in range(token_asym_id.shape[0]): + data_i, asym_i = feature.data[i], token_asym_id[i] + unique_asyms = torch.unique(asym_i[asym_i != 0]).tolist() + random.shuffle(unique_asyms) # select chains to mask at random + selected_asyms = unique_asyms[: random.randint(0, len(unique_asyms))] + if len(selected_asyms) == 0: + structure_masks.append( + torch.zeros_like(data_i[..., 0], dtype=torch.bool) + ) + continue + asyms_to_mask = torch.tensor(selected_asyms, device=data_i.device) + asym_mask = torch.any(asym_i.unsqueeze(-1) == asyms_to_mask, dim=-1) + structure_mask = und_self(asym_mask, "i, j -> i j") + structure_masks.append(structure_mask) + feature_mask = torch.stack(structure_masks, dim=0) + feature = feature.masked_fill(feature_mask.unsqueeze(-1), self.mask_value) + return feature + + @typecheck + def _generate( + self, + all_atom_positions: Float[Tensor, "b a 3"], + all_atom_mask: Bool[Tensor, "b a"], + token_single_mask: Bool[Tensor, "b n"], + token_center_atom_index: Int[Tensor, "b n"], + token_asym_id: Int[Tensor, "b n"], + token_entity_type: Int[Tensor, "b n"], + token_subchain_id: UInt8[Tensor, "b n 4"], + constraints: list[ConstraintGroup] | None = None, + ) -> Tensor: + try: + if constraints is not None: + assert all_atom_positions.shape[0] == 1 + return self._generate_from_constraints( + token_asym_id=token_asym_id, + token_subchain_id=token_subchain_id, + constraints=constraints, + ) + except Exception as e: + logger.error(f"Error {e} generating docking constraints: {constraints}") + + return self._generate_from_batch( + all_atom_positions=all_atom_positions, + all_atom_mask=all_atom_mask, + token_single_mask=token_single_mask, + token_center_atom_index=token_center_atom_index, + token_asym_id=token_asym_id, + token_entity_type=token_entity_type, + ) + + def _asym_to_entity_type( + self, asym_id: Int[Tensor, "n"], entity_type: Int[Tensor, "n"] + ) -> dict[int, int]: + unique_asyms: Tensor = torch.unique(asym_id[asym_id != 0]) + mapping = dict() + for asym in unique_asyms.tolist(): + asym_mask = asym_id == asym + mapping[int(asym)] = int(entity_type[asym_mask][0].item()) + return mapping + + @typecheck + def _generate_from_batch( + self, + all_atom_positions=Float[Tensor, "b a 3"], + all_atom_mask=Bool[Tensor, "b a"], + token_single_mask=Bool[Tensor, "b n"], + token_center_atom_index=Int[Tensor, "b n"], + token_entity_type=Int[Tensor, "b n"], + token_asym_id=Int[Tensor, "b n"], + ) -> Tensor: + sampled_noise = random.uniform(self.coord_noise[0], self.coord_noise[1]) + token_center_dists = self.token_dist_gen._generate( + all_atom_positions=all_atom_positions + + torch.randn_like(all_atom_positions) * sampled_noise, + all_atom_mask=all_atom_mask, + token_single_mask=token_single_mask, + token_center_atom_index=token_center_atom_index, + ).data + for i in range(token_center_dists.shape[0]): + asym_to_entity = self._asym_to_entity_type( + token_asym_id[i], token_entity_type[i] + ) + asym_include_list = [ + asym for asym, ety in asym_to_entity.items() if ety in self.entity_types + ] + asym_exclude_list = [ + asym + for asym, ety in asym_to_entity.items() + if ety not in self.entity_types + ] + # exclude other entity types + asym_exclude_mask = torch.any( + (token_asym_id[i].unsqueeze(-1) == torch.tensor(asym_exclude_list)), + dim=-1, + ) + token_center_dists[i, asym_exclude_mask] = self.mask_value + token_center_dists[i, :, asym_exclude_mask] = self.mask_value + if ( + random.random() < self.include_probability + and len(asym_include_list) > 1 + ): + # include distances between select chains + random.shuffle(asym_include_list) + partition_idx = random.randint(1, len(asym_include_list) - 1) + _group_1, _group_2 = ( + asym_include_list[:partition_idx], + asym_include_list[partition_idx:], + ) + group_1, group_2 = torch.tensor(_group_1), torch.tensor(_group_2) + # find positions of elements in first and second group + group1_mask, group2_mask = [ + torch.any((token_asym_id[i].unsqueeze(-1) == x), dim=-1) + for x in (group_1, group_2) + ] + partition_mask = und(group1_mask, group2_mask, "i, j -> i j") + token_center_dists[i] = token_center_dists[i].masked_fill( + (partition_mask | partition_mask.T).unsqueeze(-1), self.mask_value + ) + else: + mask = torch.ones_like(token_center_dists[i], dtype=torch.bool) + token_center_dists[i] = token_center_dists[i].masked_fill( + mask, self.mask_value + ) + + feature = self.make_feature(token_center_dists) + if random.random() < self.structure_dropout_prob: + feature = self.apply_structure_dropout(feature) + elif random.random() < self.chain_dropout_prob: + feature = self.apply_chain_dropout(feature, token_asym_id) + return feature + + @typecheck + def _generate_from_constraints( + self, + # constraints only supported with batch size 1 + token_asym_id: Int[Tensor, "1 n"], + token_subchain_id: UInt8[Tensor, "1 n 4"], + constraints: list[ConstraintGroup], + ) -> Tensor: + logger.info(f"Generating docking feature from constraints: {constraints}") + n, device = token_asym_id.shape[1], token_asym_id.device + constraint_mat = torch.zeros(n, n, device=device, dtype=torch.float32) + constraint_mask = torch.zeros(n, n, device=device, dtype=torch.bool) + for constraint_group in constraints: + # add constraints between members of each group + coords = [ + x + torch.randn_like(x) * constraint_group.noise_sigma + for x in constraint_group.atom_center_coords + ] + n_chains = len(constraint_group.subchain_ids) + l_idx, r_idx = torch.triu_indices(n_chains, n_chains) + chain_asyms = constraint_group.get_asym_ids( + token_subchain_id=rearrange(token_subchain_id, "1 ... -> ..."), + token_asym_id=rearrange(token_asym_id, "1 ... -> ..."), + ) + for i, j in zip(l_idx.tolist(), r_idx.tolist()): + constraint_mat, constraint_mask = self.add_constraint( + constraint_mat=constraint_mat, + constraint_mask=constraint_mask, + token_asym_id=rearrange(token_asym_id, "1 ... -> ..."), + chain1_asym_id=chain_asyms[i], + chain2_asym_id=chain_asyms[j], + chain1_coords=coords[i], + chain1_mask=constraint_group.atom_center_mask[i], + chain2_coords=coords[j], + chain2_mask=constraint_group.atom_center_mask[j], + ) + # encode and apply mask + feat = torch.searchsorted( + self.token_dist_gen.dist_bins.to(constraint_mat.device), constraint_mat + ) + feat = feat.masked_fill(~constraint_mask, self.mask_value) + # add back batch dim + constraint_mat = repeat(feat, "i j -> 1 i j 1") + # apply structure dropout + dropout = constraints[0].dropout_prob if len(constraints) > 0 else 0.0 + feature = self.make_feature(constraint_mat) + feature = self.apply_structure_dropout(feature, prob=dropout) + return feature + + @typecheck + def add_constraint( + self, + constraint_mat: Float[Tensor, "n n"], + constraint_mask: Bool[Tensor, "n n"], + token_asym_id: Int[Tensor, "n"], + chain1_asym_id: int, + chain2_asym_id: int, + chain1_coords: Float[Tensor, "c1 3"], + chain2_coords: Float[Tensor, "c2 3"], + chain1_mask: Bool[Tensor, "c1"], + chain2_mask: Bool[Tensor, "c2"], + ) -> tuple[Float[Tensor, "n n"], Bool[Tensor, "n n"]]: + (c1_posns,) = torch.where(token_asym_id == chain1_asym_id) + (c2_posns,) = torch.where(token_asym_id == chain2_asym_id) + # make sure we have a coordinate for each position + assert len(c1_posns) == len( + chain1_coords + ), f"{c1_posns.shape=}, {chain1_coords.shape=}" + assert len(c2_posns) == len( + chain2_coords + ), f"{c2_posns.shape=}, {chain2_coords.shape=}" + + pairwise_dists = cdist(chain1_coords, chain2_coords) + pairwise_mask = und(chain1_mask, chain2_mask, "i, j -> i j") + pairwise_dists[~pairwise_mask] = -1.0 + # mask and fill the constraint matrix + row_idxs = repeat(c1_posns, "i -> i c", c=len(c2_posns)) + col_idxs = repeat(c2_posns, "j -> r j", r=len(c1_posns)) + # fill constraints and mask + constraint_mat[row_idxs, col_idxs] = pairwise_dists + constraint_mat[col_idxs, row_idxs] = pairwise_dists + constraint_mask[row_idxs, col_idxs] = pairwise_mask + constraint_mask[col_idxs, row_idxs] = pairwise_mask + return constraint_mat, constraint_mask diff --git a/forks/chai-lab/chai_lab/data/features/generators/esm_generator.py b/forks/chai-lab/chai_lab/data/features/generators/esm_generator.py new file mode 100644 index 00000000..671d4825 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/generators/esm_generator.py @@ -0,0 +1,30 @@ +from torch import Tensor + +from chai_lab.data.features.feature_type import FeatureType +from chai_lab.data.features.generators.base import EncodingType, FeatureGenerator +from chai_lab.utils.typing import Float, typecheck + + +class ESMEmbeddings(FeatureGenerator): + def __init__( + self, + ty: FeatureType = FeatureType.TOKEN, + ): + super().__init__( + ty=ty, + encoding_ty=EncodingType.ESM, + can_mask=False, + mult=1, + ) + + def get_input_kwargs_from_batch(self, batch) -> dict: + return dict( + esm_embeddings=batch["inputs"]["esm_embeddings"], + ) + + @typecheck + def _generate( + self, + esm_embeddings: Float[Tensor, "batch num_tokens d_emb"], + ) -> Tensor: + return self.make_feature(data=esm_embeddings) diff --git a/forks/chai-lab/chai_lab/data/features/generators/identity.py b/forks/chai-lab/chai_lab/data/features/generators/identity.py new file mode 100644 index 00000000..f4d1107e --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/generators/identity.py @@ -0,0 +1,44 @@ +import torch +from einops import rearrange +from torch import Tensor + +import chai_lab.data.features.feature_utils as futils +from chai_lab.data.features.feature_type import FeatureType +from chai_lab.data.features.generators.base import EncodingType, FeatureGenerator + + +class Identity(FeatureGenerator): + def __init__( + self, + key: str, + ty: FeatureType, + dim: int, + can_mask: bool = True, + ): + super().__init__( + ty=ty, + encoding_ty=EncodingType.IDENTITY, + mult=1, + num_classes=dim, + can_mask=can_mask, + ) + self.key = key + self.dim = dim + + def generate(self, batch: dict) -> Tensor: + feat = futils.get_entry_for_key(batch, self.key) + + if feat.ndim == 2: # scalar feature + assert self.dim == 1 + feat = rearrange(feat, "b n -> b n 1") + elif feat.ndim == 3: + # feature made from sequence-wise vectors (shape b,n,d) + assert self.dim == feat.shape[-1] + else: + raise ValueError( + f"Input to feature generator has ndim={feat.ndim}, shape {feat.shape}" + ) + + if self.can_mask: # append position for mask token if feat can be masked + feat = torch.cat((feat, torch.zeros_like(feat)[..., :1]), dim=-1) + return self.make_feature(data=feat) diff --git a/forks/chai-lab/chai_lab/data/features/generators/is_cropped_chain.py b/forks/chai-lab/chai_lab/data/features/generators/is_cropped_chain.py new file mode 100644 index 00000000..9bc3a649 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/generators/is_cropped_chain.py @@ -0,0 +1,32 @@ +import torch +from torch import Tensor + +from chai_lab.data.features.feature_type import FeatureType +from chai_lab.data.features.generators.base import EncodingType, FeatureGenerator +from chai_lab.utils.typing import Int, typecheck + + +class ChainIsCropped(FeatureGenerator): + def __init__( + self, + ): + """Chain-level feature that indicates if a chain has been cropped""" + super().__init__( + ty=FeatureType.TOKEN, + can_mask=False, + encoding_ty=EncodingType.IDENTITY, + num_classes=1, + mult=1, + ) + + def get_input_kwargs_from_batch(self, batch) -> dict: + return dict( + token_asym_id=batch["inputs"]["token_asym_id"].long(), + ) + + @typecheck + def _generate( + self, + token_asym_id: Int[Tensor, "b n"], + ) -> Tensor: + return self.make_feature(torch.zeros_like(token_asym_id).unsqueeze(-1)) diff --git a/forks/chai-lab/chai_lab/data/features/generators/missing_chain_contact.py b/forks/chai-lab/chai_lab/data/features/generators/missing_chain_contact.py new file mode 100644 index 00000000..9171269c --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/generators/missing_chain_contact.py @@ -0,0 +1,92 @@ +import torch +from torch import Tensor + +from chai_lab.data.features.feature_type import FeatureType +from chai_lab.data.features.generators.base import EncodingType, FeatureGenerator +from chai_lab.utils.tensor_utils import cdist, und_self +from chai_lab.utils.typing import Bool, Float, Int, typecheck + + +class MissingChainContact(FeatureGenerator): + contact_threshold: float + + def __init__( + self, + # Use DockQ atom contact cutoff as default + contact_threshold: float = 6.0, + ): + """Token-Level feature that indicates is a chain has no tokens + in contact with tokens from another chain. + """ + super().__init__( + ty=FeatureType.TOKEN, + can_mask=False, + encoding_ty=EncodingType.IDENTITY, + num_classes=1, + mult=1, + ) + self.contact_threshold = contact_threshold + + def get_input_kwargs_from_batch(self, batch) -> dict: + return dict( + atom_gt_coords=batch["inputs"]["atom_gt_coords"], + atom_exists_mask=batch["inputs"]["atom_exists_mask"], + token_exists_mask=batch["inputs"]["token_exists_mask"], + token_asym_id=batch["inputs"]["token_asym_id"].long(), + atom_token_index=batch["inputs"]["atom_token_index"].long(), + ) + + @typecheck + def _generate( + self, + atom_gt_coords: Float[Tensor, "b a 3"], + atom_exists_mask: Bool[Tensor, "b a"], + token_exists_mask: Bool[Tensor, "b n"], + token_asym_id: Int[Tensor, "b n"], + atom_token_index: Int[Tensor, "b a"], + ) -> Tensor: + # per-atom asym id + atom_asym_id = torch.gather(token_asym_id, dim=1, index=atom_token_index.long()) + # compute atom pair distances and mask + atom_pair_dist = cdist(atom_gt_coords) + atom_pair_mask = und_self(atom_exists_mask, "b i, b j -> b i j") + atom_pair_asym_mask = atom_asym_id.unsqueeze(-1) != atom_asym_id.unsqueeze(-2) + aggregate_mask = ( + atom_pair_mask + & atom_pair_asym_mask + & (atom_pair_dist < self.contact_threshold) + ) + # determine which atoms are in contact with some atom from another chain + atom_in_contact = aggregate_mask.any(dim=-1) + # determine if any chain has no atoms in contact with another chain + chain_contact_features: list[torch.Tensor] = [] + for b in range(atom_gt_coords.shape[0]): + unique_chain_asyms = torch.unique(token_asym_id[b][token_exists_mask[b]]) + if len(unique_chain_asyms) == 1: + # monomers are set to have no missing contacts + chain_contact_features.append( + torch.zeros_like( + token_asym_id[b].unsqueeze(-1), dtype=torch.float32 + ) + ) + continue + unique_asyms_with_contacts = torch.unique( + atom_asym_id[b][atom_in_contact[b]] + ) + unique_chain_asyms, unique_asyms_with_contacts = [ + set(x.tolist()) + for x in (unique_chain_asyms, unique_asyms_with_contacts) + ] + asyms_without_contacts = torch.tensor( + list(unique_chain_asyms - unique_asyms_with_contacts) + ) + # create feature data for this chain + feat = torch.any( + token_asym_id[b].unsqueeze(-1) == asyms_without_contacts, + dim=-1, + keepdim=True, + ) + chain_contact_features.append(feat.float()) + + # make the feature + return self.make_feature(torch.stack(chain_contact_features, dim=0)) diff --git a/forks/chai-lab/chai_lab/data/features/generators/msa.py b/forks/chai-lab/chai_lab/data/features/generators/msa.py new file mode 100644 index 00000000..c06e4679 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/generators/msa.py @@ -0,0 +1,241 @@ +from typing import Any + +import torch +from einops import rearrange +from torch import Tensor + +from chai_lab.data.features.feature_type import FeatureType +from chai_lab.data.features.generators.base import EncodingType, FeatureGenerator +from chai_lab.data.parsing.msas.data_source import msa_dataset_source_to_int +from chai_lab.data.parsing.msas.species import UNKNOWN_SPECIES +from chai_lab.data.residue_constants import residue_types_with_nucleotides_order +from chai_lab.utils.tensor_utils import masked_mean +from chai_lab.utils.typing import Bool, Int, UInt8, typecheck + + +class MSAFeatureGenerator(FeatureGenerator): + """Generates feature for one-hot encoding of processed MSA, same classes as restype.""" + + def __init__(self): + num_res_ty = len(residue_types_with_nucleotides_order) + super().__init__( + ty=FeatureType.MSA, + encoding_ty=EncodingType.ONE_HOT, + can_mask=False, + num_classes=num_res_ty, + mult=1, + ) + + def get_input_kwargs_from_batch(self, batch: dict[str, Any]) -> dict: + return dict( + msa_tokens=batch["inputs"]["msa_tokens"], + ) + + @typecheck + def _generate( + self, + msa_tokens: UInt8[Tensor, "batch depth tokens"], + ) -> Tensor: + """Generate based on an input of one-hot encoded MSA""" + return self.make_feature(data=msa_tokens.unsqueeze(-1)) + + +class MSAHasDeletionGenerator(FeatureGenerator): + """Binary feature for if there is a deletion to the left of each position.""" + + def __init__(self): + super().__init__( + ty=FeatureType.MSA, + encoding_ty=EncodingType.IDENTITY, + can_mask=False, + num_classes=1, + mult=1, + ) + + def get_input_kwargs_from_batch(self, batch: dict[str, Any]) -> dict: + return dict(msa_deletion_matrix=batch["inputs"]["msa_deletion_matrix"]) + + @typecheck + def _generate( + self, + msa_deletion_matrix: UInt8[Tensor, "batch depth tokens"], + ) -> Tensor: + has_deletion = msa_deletion_matrix > 0 + return self.make_feature(data=has_deletion.unsqueeze(-1)) + + +class MSADeletionValueGenerator(FeatureGenerator): + """Raw deletion counts left of the current position, with addtional scaling. + Scaling is given by s(d) = 2 / pi * arctan(d / 3) + """ + + def __init__(self): + super().__init__( + ty=FeatureType.MSA, + encoding_ty=EncodingType.IDENTITY, + can_mask=False, + num_classes=1, + mult=1, + ) + + def get_input_kwargs_from_batch(self, batch: dict[str, Any]) -> dict: + return dict(msa_deletion_matrix=batch["inputs"]["msa_deletion_matrix"]) + + @typecheck + def _generate( + self, + msa_deletion_matrix: UInt8[Tensor, "batch depth tokens"], + ) -> Tensor: + d_scaled = 2.0 / torch.pi * torch.arctan(msa_deletion_matrix.float() / 3.0) + return self.make_feature(data=d_scaled.unsqueeze(-1)) + + +class MSAProfileGenerator(FeatureGenerator): + """MSA profile - distribution across residue types BEFORE processing""" + + def __init__(self): + self.num_res_ty = len(residue_types_with_nucleotides_order) + super().__init__( + ty=FeatureType.TOKEN, + encoding_ty=EncodingType.IDENTITY, + can_mask=False, + num_classes=self.num_res_ty, + ) + + def get_input_kwargs_from_batch(self, batch: dict[str, Any]) -> dict: + return dict( + main_msa_tokens=batch["inputs"]["main_msa_tokens"], + main_msa_mask=batch["inputs"]["main_msa_mask"], + ) + + @typecheck + def _generate( + self, + main_msa_tokens: UInt8[Tensor, "batch depth tokens"], + main_msa_mask: Bool[Tensor, "batch depth tokens"], + ) -> Tensor: + """Optimized implementation based on torch.scatter_add""" + batch, _, tokens = main_msa_tokens.shape + + unnormalized_profile = torch.zeros( + (batch, tokens, self.num_res_ty), dtype=main_msa_tokens.dtype + ).scatter_add( + dim=2, + index=rearrange( + main_msa_tokens.long(), "batch depth tokens -> batch tokens depth" + ), + src=rearrange( + main_msa_mask.to(main_msa_tokens.dtype), + "batch depth tokens -> batch tokens depth", + ), + ) + denom = unnormalized_profile.sum(dim=-1, keepdim=True).clamp_min_(1) + profile = unnormalized_profile / denom + + return self.make_feature(data=profile) + + +class MSADeletionMeanGenerator(FeatureGenerator): + """MSA deletion mean - mean number of deletions at each position in main MSA.""" + + def __init__(self): + super().__init__( + ty=FeatureType.TOKEN, + encoding_ty=EncodingType.IDENTITY, + can_mask=False, + num_classes=1, + mult=1, + ) + + def get_input_kwargs_from_batch(self, batch: dict[str, Any]) -> dict: + return dict( + main_msa_mask=batch["inputs"]["main_msa_mask"], + main_msa_deletion_matrix=batch["inputs"]["main_msa_deletion_matrix"], + ) + + @typecheck + def _generate( + self, + main_msa_mask: Bool[Tensor, "batch depth tokens"], + main_msa_deletion_matrix: UInt8[Tensor, "batch depth tokens"], + ) -> Tensor: + """Mean number of deletions at each position in main MSA.""" + # Average out the depth to get per-tokens + mean_deletion_matrix = masked_mean( + mask=main_msa_mask, value=main_msa_deletion_matrix.float(), dim=1 + ) + return self.make_feature(data=mean_deletion_matrix.unsqueeze(-1)) + + +class IsPairedMSAGenerator(FeatureGenerator): + """ + Relative species encoding within each MSA sequence + """ + + def __init__(self): + super().__init__( + ty=FeatureType.MSA, + encoding_ty=EncodingType.IDENTITY, + can_mask=False, + num_classes=1, + mult=1, + ) + + def get_input_kwargs_from_batch(self, batch: dict[str, Any]) -> dict: + return dict( + msa_mask=batch["inputs"]["msa_mask"], + msa_species=batch["inputs"]["msa_species"], + ) + + @typecheck + def _generate( + self, + msa_mask: Bool[Tensor, "batch depth tokens"], + msa_species: Int[Tensor, "batch depth tokens"], + ) -> Tensor: + first_species = msa_species[..., :1] + + is_paired = (msa_species == first_species).to(torch.uint8) + + mask = msa_mask & (msa_species != UNKNOWN_SPECIES) + is_paired = is_paired.masked_fill(~mask, 0) + + return self.make_feature(data=is_paired.unsqueeze(-1)) + + +class MSADataSourceGenerator(FeatureGenerator): + """ + MSA data source for each MSA token + """ + + def __init__( + self, + num_classes: int = 5, + ): + assert num_classes == max(msa_dataset_source_to_int.values()) + 1 + + super().__init__( + ty=FeatureType.MSA, + encoding_ty=EncodingType.ONE_HOT, + can_mask=True, + num_classes=num_classes, + mult=1, + ) + + def get_input_kwargs_from_batch(self, batch: dict[str, Any]) -> dict: + return dict( + msa_mask=batch["inputs"]["msa_mask"], + msa_sequence_source=batch["inputs"]["msa_sequence_source"], + ) + + @typecheck + def _generate( + self, + msa_mask: Bool[Tensor, "batch depth tokens"], + msa_sequence_source: UInt8[Tensor, "batch depth tokens"], + ) -> Tensor: + msa_sequence_source = msa_sequence_source.masked_fill( + ~msa_mask, self.num_classes + ) + + return self.make_feature(data=msa_sequence_source.unsqueeze(-1)) diff --git a/forks/chai-lab/chai_lab/data/features/generators/ref_pos.py b/forks/chai-lab/chai_lab/data/features/generators/ref_pos.py new file mode 100644 index 00000000..f210e311 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/generators/ref_pos.py @@ -0,0 +1,28 @@ +import torch + +from chai_lab.data.features.feature_type import FeatureType +from chai_lab.data.features.generators.base import EncodingType, FeatureGenerator + +N_COORDS = 3 # 3 coords: x, y, z + + +class RefPos(FeatureGenerator): + """Provides reference position of atom""" + + def __init__(self): + super().__init__( + ty=FeatureType.ATOM, + encoding_ty=EncodingType.IDENTITY, + mult=1, + num_classes=N_COORDS, + can_mask=False, # we expect to always have valid pos? + ) + + def generate(self, batch: dict) -> torch.Tensor: + original_pos = batch["inputs"]["atom_ref_pos"] + feat = original_pos / 10.0 # better scale for embedding + assert torch.amax(feat.norm(dim=-1)) < 100.0, "wrong scale!" + assert feat.ndim == 3 + assert feat.shape[-1] == N_COORDS + + return self.make_feature(data=feat) diff --git a/forks/chai-lab/chai_lab/data/features/generators/relative_chain.py b/forks/chai-lab/chai_lab/data/features/generators/relative_chain.py new file mode 100644 index 00000000..47ef231a --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/generators/relative_chain.py @@ -0,0 +1,50 @@ +import torch +from einops import rearrange +from torch import Tensor + +from chai_lab.data.features.feature_type import FeatureType +from chai_lab.data.features.generators.base import EncodingType, FeatureGenerator +from chai_lab.utils.typing import Int, typecheck + + +class RelativeChain(FeatureGenerator): + def __init__( + self, + s_max: int = 2, + ): + """Relative Entity Encoding + + See algorithm 5 of AF-Multimer + """ + super().__init__( + ty=FeatureType.TOKEN_PAIR, + encoding_ty=EncodingType.ONE_HOT, + num_classes=2 * s_max + 2, + can_mask=False, + ) + self.s_max = s_max + + def get_input_kwargs_from_batch(self, batch) -> dict: + return dict( + entity_id=batch["inputs"]["token_entity_id"].long(), + sym_id=batch["inputs"]["token_sym_id"].long(), + ) + + @typecheck + def _generate( + self, + entity_id: Int[Tensor, "b n"], + sym_id: Int[Tensor, "b n"], + ) -> Tensor: + # remap unique sym_id values to 0,n-1 + _, sym_ids_from_zero = torch.unique(sym_id, sorted=True, return_inverse=True) + + rel_entity, rel_chain = map( + lambda x: rearrange(x, "b n -> b n 1") - rearrange(x, "b n -> b 1 n"), + (entity_id, sym_ids_from_zero), + ) + # within an entity, determine relative chain + rel_chain = torch.clamp(rel_chain + self.s_max, 0, 2 * self.s_max) + # mask out inter-entity features + rel_chain[rel_entity != 0] = 2 * self.s_max + 1 + return self.make_feature(rel_chain.unsqueeze(-1)) diff --git a/forks/chai-lab/chai_lab/data/features/generators/relative_entity.py b/forks/chai-lab/chai_lab/data/features/generators/relative_entity.py new file mode 100644 index 00000000..8cd36054 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/generators/relative_entity.py @@ -0,0 +1,43 @@ +import torch +from einops import rearrange +from torch import Tensor + +from chai_lab.data.features.feature_type import FeatureType +from chai_lab.data.features.generators.base import EncodingType, FeatureGenerator +from chai_lab.utils.typing import Int, typecheck + + +class RelativeEntity(FeatureGenerator): + def __init__(self): + """Relative Entity Encoding + + See algorithm 5 of AF-Multimer + """ + super().__init__( + ty=FeatureType.TOKEN_PAIR, + encoding_ty=EncodingType.ONE_HOT, + num_classes=3, + can_mask=False, + ) + + def get_input_kwargs_from_batch(self, batch) -> dict: + return dict( + entity_id=batch["inputs"]["token_entity_id"].long(), + ) + + @typecheck + def _generate( + self, + entity_id: Int[Tensor, "b n"], + ) -> Tensor: + # remap unique sym_id values to 0,n-1 + _, entity_id_from_zero = torch.unique( + entity_id, sorted=True, return_inverse=True + ) + + rel_entity = rearrange(entity_id_from_zero, "b n -> b n 1") - rearrange( + entity_id_from_zero, "b n -> b 1 n" + ) + rel_entity = torch.clamp(rel_entity + 1, 0, 2) + assert rel_entity.dtype == torch.long + return self.make_feature(rel_entity.unsqueeze(-1)) diff --git a/forks/chai-lab/chai_lab/data/features/generators/relative_sep.py b/forks/chai-lab/chai_lab/data/features/generators/relative_sep.py new file mode 100644 index 00000000..2d39f24c --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/generators/relative_sep.py @@ -0,0 +1,58 @@ +import torch +from einops import rearrange +from torch import Tensor + +from chai_lab.data.features.feature_type import FeatureType +from chai_lab.data.features.generators.base import EncodingType, FeatureGenerator +from chai_lab.utils.typing import Int, typecheck + + +def get_sep_bins(max_offset: int) -> list[float]: + bins = torch.arange(-max_offset, max_offset + 1).float() + return bins.tolist() + + +SMALL_SEP_BINS = get_sep_bins(32) + + +class RelativeSequenceSeparation(FeatureGenerator): + def __init__( + self, + sep_bins: list[int] | list[float] | None = None, + num_bins: int | None = None, + ): + """Relative Sequence Separation Encoding""" + sep_bins = get_sep_bins(num_bins) if num_bins is not None else sep_bins + sep_bins = sep_bins if sep_bins is not None else SMALL_SEP_BINS + super().__init__( + ty=FeatureType.TOKEN_PAIR, + encoding_ty=EncodingType.ONE_HOT, + num_classes=len(sep_bins) + 2, + can_mask=False, + ) + self.sep_bins = torch.tensor(sep_bins) + + def get_input_kwargs_from_batch(self, batch) -> dict: + return dict( + residue_index=batch["inputs"]["token_residue_index"].long(), + asym_id=batch["inputs"]["token_asym_id"].long(), + ) + + @typecheck + def _generate( + self, + residue_index: Int[Tensor, "b n"], + asym_id: Int[Tensor, "b n"], + ) -> Tensor: + rel_sep, rel_chain = map( + lambda x: rearrange(x, "b n -> b n 1") - rearrange(x, "b n -> b 1 n"), + (residue_index, asym_id), + ) + encoded_feat = torch.searchsorted( + self.sep_bins.to(rel_sep.device), + rel_sep + 1e-4, # add small epsilon bc. bins are chosen by leftmost index + ) + same_chain_mask = rel_chain == 0 + # mask inter-chain sep + encoded_feat[~same_chain_mask] = self.num_classes - 1 + return self.make_feature(encoded_feat.unsqueeze(-1)) diff --git a/forks/chai-lab/chai_lab/data/features/generators/relative_token.py b/forks/chai-lab/chai_lab/data/features/generators/relative_token.py new file mode 100644 index 00000000..d7a22372 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/generators/relative_token.py @@ -0,0 +1,49 @@ +import torch +from einops import rearrange +from torch import Tensor + +from chai_lab.data.features.feature_type import FeatureType +from chai_lab.data.features.generators.base import EncodingType, FeatureGenerator +from chai_lab.utils.typing import Int, typecheck + + +class RelativeTokenSeparation(FeatureGenerator): + def __init__( + self, + # using 16 for default here since values beyond this are very rare. + r_max: int = 16, + ): + super().__init__( + ty=FeatureType.TOKEN_PAIR, + encoding_ty=EncodingType.ONE_HOT, + num_classes=2 * r_max + 3, + can_mask=False, + ) + self.r_max = r_max + + def get_input_kwargs_from_batch(self, batch) -> dict: + return dict( + token_index=batch["inputs"]["token_index"], + token_residue_index=batch["inputs"]["token_residue_index"], + token_asym_id=batch["inputs"]["token_asym_id"], + ) + + @typecheck + def _generate( + self, + token_index: Int[Tensor, "b n"], + token_residue_index: Int[Tensor, "b n"], + token_asym_id: Int[Tensor, "b n"], + ) -> Tensor: + rel_sep, rel_residue, rel_chain = map( + lambda x: rearrange(x, "b n -> b n 1") - rearrange(x, "b n -> b 1 n"), + (token_index, token_residue_index, token_asym_id), + ) + + mask = (rel_residue == 0) & (rel_chain == 0) + + rel_sep = torch.clamp(rel_sep + self.r_max, 0, 2 * self.r_max + 1) + # zero inter-residue and inter-chain + rel_sep = rel_sep.masked_fill(~mask, 2 * self.r_max + 2) + + return self.make_feature(rel_sep.unsqueeze(-1)) diff --git a/forks/chai-lab/chai_lab/data/features/generators/residue_type.py b/forks/chai-lab/chai_lab/data/features/generators/residue_type.py new file mode 100644 index 00000000..704da000 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/generators/residue_type.py @@ -0,0 +1,53 @@ +import numpy as np +import torch +from torch import Tensor + +from chai_lab.data.features.feature_type import FeatureType +from chai_lab.data.features.generators.base import EncodingType, FeatureGenerator +from chai_lab.utils.typing import Bool, Int, typecheck + + +class ResidueType(FeatureGenerator): + def __init__( + self, + min_corrupt_prob: float = 0.0, + max_corrupt_prob: float = 0.0, + num_res_ty: int = 22, # 20AA + gap + X + key: str = "aatype", + ): + super().__init__( + ty=FeatureType.TOKEN, + encoding_ty=EncodingType.ONE_HOT, + can_mask=True, + num_classes=num_res_ty, + mult=1, + ) + self.min_corrupt_prob = min_corrupt_prob + self.max_corrupt_prob = max_corrupt_prob + self.key = key + + @typecheck + def _corrupt_seq( + self, sequence: Int[Tensor, "... n"] + ) -> tuple[Int[Tensor, "... n"], Bool[Tensor, "... n"]]: + """Corrupt the sequence with the given probability""" + corrupt_prob = np.random.uniform( + low=self.min_corrupt_prob, high=self.max_corrupt_prob + ) + corrupt_mask = torch.rand_like(sequence.float()) < corrupt_prob + corrupt_aas = torch.randint_like( + corrupt_mask[corrupt_mask].long(), high=self.num_classes - 1 + ) + corrupt_sequence = sequence.clone() + corrupt_sequence[corrupt_mask] = corrupt_aas + return corrupt_sequence, corrupt_mask + + def get_input_kwargs_from_batch(self, batch) -> dict: + return dict(aatype=batch["inputs"][self.key].long()) + + @typecheck + def _generate(self, aatype: Int[Tensor, "b n"]) -> Tensor: + """see super class""" + seq_emb = aatype.clone() + seq_emb, _corrupt_mask = self._corrupt_seq(seq_emb) + return self.make_feature(data=seq_emb.unsqueeze(-1)) diff --git a/forks/chai-lab/chai_lab/data/features/generators/structure_metadata.py b/forks/chai-lab/chai_lab/data/features/generators/structure_metadata.py new file mode 100644 index 00000000..8e071c99 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/generators/structure_metadata.py @@ -0,0 +1,139 @@ +import torch +from einops import repeat +from torch import Tensor + +from chai_lab.data.features.feature_type import FeatureType +from chai_lab.data.features.generators.base import EncodingType, FeatureGenerator +from chai_lab.utils.defaults import default +from chai_lab.utils.typing import Bool, Float, typecheck + +DEFAULT_BFACTOR_BINS = [140.0] + +DEFAULT_PLDDT_BINS = [0.3, 0.7] + + +class IsDistillation(FeatureGenerator): + def __init__(self): + super().__init__( + ty=FeatureType.TOKEN, + encoding_ty=EncodingType.ONE_HOT, + can_mask=True, + num_classes=1, + mult=1, + ) + + def get_input_kwargs_from_batch(self, batch) -> dict: + return dict( + is_distillation=batch["inputs"]["is_distillation"], + token_exists_mask=batch["inputs"]["token_exists_mask"], + ) + + @typecheck + def _generate( + self, + is_distillation: Bool[Tensor, "b 1"], + token_exists_mask: Bool[Tensor, "b n"], + ) -> Tensor: + _, n = token_exists_mask.shape + is_distillation = repeat(is_distillation, "b 1 -> b n 1", n=n).to(torch.uint8) + return self.make_feature(data=is_distillation) + + +class TokenBFactor(FeatureGenerator): + def __init__( + self, + include_prob: float = 1.0, + bins: list[float] | None = None, + ): + self.bins = torch.tensor(default(bins, DEFAULT_BFACTOR_BINS)) + + super().__init__( + ty=FeatureType.TOKEN, + encoding_ty=EncodingType.ONE_HOT, + can_mask=True, + num_classes=len(self.bins) + 1, + mult=1, + ) + self.include_prob = include_prob + + def get_input_kwargs_from_batch(self, batch) -> dict: + return dict( + token_b_factor=batch["inputs"]["token_b_factor_or_plddt"], + is_distillation=batch["inputs"]["is_distillation"], + token_exists_mask=batch["inputs"]["token_exists_mask"], + ) + + @typecheck + def _generate( + self, + token_b_factor: Float[Tensor, "b n"], + is_distillation: Bool[Tensor, "b 1"], + token_exists_mask: Bool[Tensor, "b n"], + ) -> Tensor: + _, n = token_exists_mask.shape + + include_mask = ( + torch.rand_like(is_distillation, dtype=torch.float) <= self.include_prob + ) + + # this feature is not defined for distillation data + mask = ( + repeat(~is_distillation, "b 1 -> b n", n=n) + & token_exists_mask + & repeat(include_mask, "b 1 -> b n", n=n) + ) + + feat = torch.searchsorted(self.bins.to(is_distillation.device), token_b_factor) + feat.masked_fill_(~mask, self.mask_value) + + return self.make_feature(data=feat.unsqueeze(-1)) + + +class TokenPLDDT(FeatureGenerator): + def __init__( + self, + include_prob: float = 1.0, + bins: list[float] | None = None, + ): + self.bins = torch.tensor(default(bins, DEFAULT_PLDDT_BINS)) + + super().__init__( + ty=FeatureType.TOKEN, + encoding_ty=EncodingType.ONE_HOT, + can_mask=True, + num_classes=len(self.bins) + 1, + mult=1, + ) + self.include_prob = include_prob + + def get_input_kwargs_from_batch(self, batch) -> dict: + return dict( + token_plddt=batch["inputs"]["token_b_factor_or_plddt"], + is_distillation=batch["inputs"]["is_distillation"], + token_exists_mask=batch["inputs"]["token_exists_mask"], + ) + + @typecheck + def _generate( + self, + token_plddt: Float[Tensor, "b n"], + is_distillation: Bool[Tensor, "b 1"], + token_exists_mask: Bool[Tensor, "b n"], + ) -> Tensor: + _, n = token_exists_mask.shape + + include_mask = ( + torch.rand_like(is_distillation, dtype=torch.float) <= self.include_prob + ) + + # this feature is defined ONLY for distillation data + mask = ( + repeat(is_distillation, "b 1 -> b n", n=n) + & token_exists_mask + & repeat(include_mask, "b 1 -> b n", n=n) + ) + + feat = torch.searchsorted(self.bins.to(is_distillation.device), token_plddt) + feat.masked_fill_(~mask, self.mask_value) + + return self.make_feature(data=feat.unsqueeze(-1)) diff --git a/forks/chai-lab/chai_lab/data/features/generators/templates.py b/forks/chai-lab/chai_lab/data/features/generators/templates.py new file mode 100644 index 00000000..8e638afa --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/generators/templates.py @@ -0,0 +1,162 @@ +""" +Feature generators for templates. This includes the following: +- Template mask (includes both the psuedo beta mask and backbone frame mask) +- Template unit vector generator +- Template residue type generator +- Template distogram generator +""" + +import logging +from typing import Any + +import torch +from einops import rearrange +from torch import Tensor + +from chai_lab.data.features.feature_type import FeatureType +from chai_lab.data.features.generators.base import EncodingType, FeatureGenerator +from chai_lab.data.residue_constants import residue_types_with_nucleotides_order +from chai_lab.utils.typing import Bool, Float, Int, UInt8, typecheck + +logger = logging.getLogger(__name__) + + +class TemplateMaskGenerator(FeatureGenerator): + def __init__(self): + super().__init__( + ty=FeatureType.TEMPLATES, + encoding_ty=EncodingType.IDENTITY, + can_mask=False, + num_classes=2, + ) + + def get_input_kwargs_from_batch(self, batch: dict[str, Any]) -> dict: + return dict( + template_backbone_frame_mask=batch["inputs"][ + "template_backbone_frame_mask" + ], + template_pseudo_beta_mask=batch["inputs"]["template_pseudo_beta_mask"], + asym_ids=batch["inputs"]["token_asym_id"].type(torch.int32), + ) + + def _generate( + self, + template_backbone_frame_mask: Bool[Tensor, "batch templ tokens"], + template_pseudo_beta_mask: Bool[Tensor, "batch templ tokens"], + asym_ids: Int[Tensor, "batch tokens"], + ) -> Tensor: + same_asym = rearrange(asym_ids, "b t -> b 1 t 1 1") == rearrange( + asym_ids, "b t -> b 1 1 t 1" + ) + # Line 1: backbone frame mask + # (b t n n) + bij_backbone = rearrange( + template_backbone_frame_mask, "b t n -> b t n 1 1" + ) * rearrange(template_backbone_frame_mask, "b t n -> b t 1 n 1") + + # Line 2: backbone pseudo beta mask + # (b t n n) + bij_pseudo_beta = rearrange( + template_pseudo_beta_mask, "b t n -> b t n 1 1" + ) * rearrange(template_pseudo_beta_mask, "b t n -> b t 1 n 1") + + mask_feat = torch.cat([bij_backbone, bij_pseudo_beta], dim=-1).float() + + return self.make_feature(mask_feat.float() * same_asym.float()) + + +class TemplateUnitVectorGenerator(FeatureGenerator): + """Generates feature for template unit vector""" + + def __init__(self): + super().__init__( + ty=FeatureType.TEMPLATES, + encoding_ty=EncodingType.IDENTITY, + can_mask=False, + num_classes=3, + mult=1, + ) + + def get_input_kwargs_from_batch(self, batch: dict[str, Any]) -> dict: + return dict( + template_unit_vector=batch["inputs"]["template_unit_vector"], + asym_ids=batch["inputs"]["token_asym_id"].to(torch.int32), + ) + + @typecheck + def _generate( + self, + template_unit_vector: Float[Tensor, "batch templ tokens tokens 3"], + asym_ids: Int[Tensor, "batch tokens"], + ) -> Tensor: + same_asym = rearrange(asym_ids, "b t -> b 1 t 1 1") == rearrange( + asym_ids, "b t -> b 1 1 t 1" + ) + same_asym = same_asym.to(template_unit_vector.dtype) + # mask out pairs with different asyms + template_unit_vector = template_unit_vector * same_asym + return self.make_feature(template_unit_vector) + + +class TemplateResTypeGenerator(FeatureGenerator): + """Generates feature for one-hot encoding of templates, same classes as restype.""" + + def __init__(self, embed_dim=32): + num_res_ty = len(residue_types_with_nucleotides_order) + super().__init__( + ty=FeatureType.TEMPLATES, + encoding_ty=EncodingType.OUTERSUM, + can_mask=False, + num_classes=num_res_ty, + mult=1, + ) + + def get_input_kwargs_from_batch(self, batch: dict[str, Any]) -> dict: + return dict( + template_tokens=batch["inputs"]["template_restype"].type(torch.uint8), + ) + + @typecheck + def _generate( + self, + template_tokens: UInt8[Tensor, "batch templ tokens"], + ) -> Tensor: + return self.make_feature(data=template_tokens.unsqueeze(-1)) + + +class TemplateDistogramGenerator(FeatureGenerator): + """Generates feature for distogram of templates.""" + + def __init__( + self, + min_dist_bin: float = 3.25, + max_dist_bin: float = 50.75, + n_dist_bin: int = 38, + ): + super().__init__( + ty=FeatureType.TEMPLATES, + encoding_ty=EncodingType.ONE_HOT, + can_mask=True, + num_classes=n_dist_bin, + mult=1, + ) + self.dist_bins = torch.linspace(min_dist_bin, max_dist_bin, n_dist_bin)[1:] + + def get_input_kwargs_from_batch(self, batch: dict[str, Any]) -> dict: + return dict( + template_distances=batch["inputs"]["template_distances"], + asym_ids=batch["inputs"]["token_asym_id"].to(torch.int32), + ) + + @typecheck + def _generate( + self, + template_distances: Float[Tensor, "batch templ tokens tokens"], + asym_ids: Int[Tensor, "batch tokens"], + ) -> Tensor: + discretized = torch.searchsorted(self.dist_bins, template_distances) + same_asym = rearrange(asym_ids, "b t -> b 1 t 1") == rearrange( + asym_ids, "b t -> b 1 1 t" + ) + discretized = torch.masked_fill(discretized, ~same_asym, self.mask_value) + return self.make_feature(data=discretized.unsqueeze(-1)) diff --git a/forks/chai-lab/chai_lab/data/features/generators/token_dist_restraint.py b/forks/chai-lab/chai_lab/data/features/generators/token_dist_restraint.py new file mode 100644 index 00000000..a3a09b34 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/generators/token_dist_restraint.py @@ -0,0 +1,367 @@ +import logging +from dataclasses import dataclass + +import numpy as np +import torch +from einops import rearrange, repeat +from torch import Tensor + +from chai_lab.data.features.feature_type import FeatureType +from chai_lab.data.features.generators.base import EncodingType, FeatureGenerator +from chai_lab.data.parsing.structure.entity_type import EntityType +from chai_lab.model.utils import get_asym_id_from_subchain_id +from chai_lab.utils.tensor_utils import cdist, tensorcode_to_string, und, und_self +from chai_lab.utils.typing import Bool, Float, Int, UInt8, typecheck + +logger = logging.getLogger(__name__) + + +@typecheck +@dataclass +class ConstraintGroup: + """ + Container for a token pair distance restraint (contact) + """ + + left_residue_subchain_id: str + right_residue_subchain_id: str + left_residue_index: int + right_residue_index: int + right_residue_name: str + left_residue_name: str + distance_threshold: float + + def get_left_and_right_asym_ids( + self, + token_subchain_id: UInt8[Tensor, "n 4"], + token_asym_id: Int[Tensor, "n"], + ): + left_asym_id = get_asym_id_from_subchain_id( + subchain_id=self.left_residue_subchain_id, + source_pdb_chain_id=token_subchain_id, + token_asym_id=token_asym_id, + ) + right_asym_id = get_asym_id_from_subchain_id( + subchain_id=self.right_residue_subchain_id, + source_pdb_chain_id=token_subchain_id, + token_asym_id=token_asym_id, + ) + return left_asym_id, right_asym_id + + def __str__(self): + return ( + f"ConstraintGroup(left_residue_subchain_id={self.left_residue_subchain_id}, " + f"right_residue_subchain_id={self.right_residue_subchain_id}, " + f"left_residue_index={self.left_residue_index}, " + f"right_residue_index={self.right_residue_index}, " + f"right_residue_name={self.right_residue_name}, " + f"left_residue_name={self.left_residue_name}, " + f"distance_threshold={self.distance_threshold})" + ) + + +class TokenDistanceRestraint(FeatureGenerator): + def __init__( + self, + include_probability: float = 1.0, + size: int | float = 0.33, + min_dist: int | float = 10.0, + max_dist: int | float = 30.0, + coord_noise: float = 0.0, + num_rbf_radii: int = 5, + query_entity_types: list[EntityType] | None = None, + key_entity_types: list[EntityType] | None = None, + ): + """Randomly sample inter-chain token distance restraints + + Parameters: + include_probability: Probability with which to include restraints + for a given example. i.e. if include probability is 0.75, then 25% + of the time, we will not sample any restraints for an example. + size: Number of restraints to sample. If 0 < size < 1, then the number + of restraints will be determined as geom(size), independently for each + example. + min_dist: Minimum distance to encode restraints for + max_dist: Maximum distance to encode restraints for + coord_noise: gaussian noise with mean 0 and variance coord_noise + added to coordinates before sampling restraints. + num_rbf_radii: Number of radii to use for the radial basis function + embedding of restraints + query_entity_types: Entity types to consider when sampling "query" tokens + for restraints. Defaults to all entity types. + key_entity_types: Entity types to consider when sampling "key" tokens + for restraints. Defaults to all entity types. + + NOTE: We only sample restraints between tokens if one of the tokens is in + the query entity types and the other is in the key entity types. + """ + super().__init__( + ty=FeatureType.TOKEN_PAIR, + can_mask=False, + encoding_ty=EncodingType.RBF, + num_classes=num_rbf_radii, + mult=1, + ignore_index=-1.0, + ) + self.ignore_idx = -1.0 + self.min_dist, self.max_dist = min_dist, max_dist + self.coord_noise = coord_noise + self.include_prob = include_probability + self.size = size + self.query_entity_types = torch.tensor( + ( + [e.value for e in query_entity_types] + if query_entity_types is not None + else [e.value for e in EntityType] + ) + ).long() + self.key_entity_types = torch.tensor( + [ + [e.value for e in key_entity_types] + if key_entity_types is not None + else [e.value for e in EntityType] + ] + ).long() + + def get_num_restraints(self, batch_size) -> list[int]: + if 0 < self.size < 1: + seles = np.random.geometric(self.size, size=batch_size) + include_mask = np.random.uniform(size=batch_size) < self.include_prob + seles[~include_mask] = 0 + return [int(x) for x in seles] + return [int(self.size)] * batch_size + + def get_input_kwargs_from_batch(self, batch) -> dict: + maybe_constraint_dicts = batch["inputs"].get("contact_constraints", [[None]])[0] + contact_constraints = ( + [ConstraintGroup(**d) for d in maybe_constraint_dicts] + if isinstance(maybe_constraint_dicts[0], dict) + else None + ) + return dict( + atom_gt_coords=batch["inputs"]["atom_gt_coords"], + atom_exists_mask=batch["inputs"]["atom_exists_mask"], + token_asym_id=batch["inputs"]["token_asym_id"].long(), + token_ref_atom_index=batch["inputs"]["token_ref_atom_index"].long(), + token_exists_mask=batch["inputs"]["token_exists_mask"], + token_entity_type=batch["inputs"]["token_entity_type"].long(), + token_residue_index=batch["inputs"]["token_residue_index"].long(), + token_residue_names=batch["inputs"]["token_residue_name"], + token_subchain_id=batch["inputs"]["subchain_id"], + constraints=contact_constraints, + ) + + def _sample_restraints( + self, + dists: Float[Tensor, "n n"], + num_restraints: int, + ): + sampled_restraints = torch.full_like(dists, self.ignore_idx) + # sample upper bound independently in range (min_dist, max_dist) + # for each pair of tokens + # We choose a random delta to upper bound all sampled distances with. + # We do this because larger distance restraints are more likely to be + # valid than smaller ones, and we try to reduce that bias here. + delta = torch.rand(1) * (self.max_dist - self.min_dist) + all_restraint_bounds = torch.rand_like(dists) * delta + self.min_dist + all_valid_restraints = dists < all_restraint_bounds + num_valid_restraints = int(all_valid_restraints.sum().item()) + if num_valid_restraints == 0 or num_restraints == 0: # no restraints to add + return sampled_restraints + num_restraints = min(num_valid_restraints, num_restraints) + # select random restraints and respective sampled bounds + sampled_restraint_mask = all_restraint_bounds.new_zeros( + num_valid_restraints, dtype=torch.bool + ) + sampled_restraint_mask[:num_restraints] = True + # select random restraints by shuffling + sampled_restraint_mask = sampled_restraint_mask[ + torch.randperm(num_valid_restraints) + ] + + # add the bounds/pairs that we sampled to the sampled restraint matrix + flat_restraint_bounds = all_restraint_bounds[all_valid_restraints] + flat_restraint_bounds[~sampled_restraint_mask] = self.ignore_idx + sampled_restraints[all_valid_restraints] = flat_restraint_bounds + + return sampled_restraints + + @typecheck + def _generate( + self, + atom_gt_coords: Float[Tensor, "b a 3"], + atom_exists_mask: Bool[Tensor, "b a"], + token_asym_id: Int[Tensor, "b n"], + token_ref_atom_index: Int[Tensor, "b n"], + token_exists_mask: Bool[Tensor, "b n"], + token_entity_type: Int[Tensor, "b n"], + token_residue_index: Int[Tensor, "b n"], + token_residue_names: UInt8[Tensor, "b n 8"], + token_subchain_id: UInt8[Tensor, "b n 4"], + constraints: list[ConstraintGroup] | None = None, + ) -> Tensor: + try: + if constraints is not None: + assert atom_gt_coords.shape[0] == 1 + return self.generate_from_constraints( + token_asym_id=token_asym_id, + token_residue_index=token_residue_index, + token_residue_names=token_residue_names, + token_subchain_id=token_subchain_id, + constraints=constraints, + ) + except Exception as e: + logger.error(f"Error {e} generating distance constraints: {constraints}") + + return self._generate_from_batch( + atom_gt_coords=atom_gt_coords, + atom_exists_mask=atom_exists_mask, + token_asym_id=token_asym_id, + token_ref_atom_index=token_ref_atom_index, + token_exists_mask=token_exists_mask, + token_entity_type=token_entity_type, + ) + + @typecheck + def _generate_from_batch( + self, + atom_gt_coords: Float[Tensor, "b a 3"], + atom_exists_mask: Bool[Tensor, "b a"], + token_asym_id: Int[Tensor, "b n"], + token_ref_atom_index: Int[Tensor, "b n"], + token_exists_mask: Bool[Tensor, "b n"], + token_entity_type: Int[Tensor, "b n"], + ) -> Tensor: + batch_size = atom_gt_coords.shape[0] + # create inter-chain contact mask + valid_token_pair_mask = und_self(token_exists_mask, "b i, b j -> b i j") + left_entity_type_mask = torch.any( + (token_entity_type.unsqueeze(-1) - self.query_entity_types) == 0, dim=-1 + ) + right_entity_type_mask = torch.any( + (token_entity_type.unsqueeze(-1) - self.key_entity_types) == 0, dim=-1 + ) + valid_entity_pair_mask = und( + left_entity_type_mask, right_entity_type_mask, "b i, b j -> b i j" + ) + diff_chain_mask = rearrange(token_asym_id, "b i -> b i 1") != rearrange( + token_asym_id, "b j -> b 1 j" + ) + ref_atom_mask = torch.gather( + atom_exists_mask, dim=1, index=token_ref_atom_index + ) + valid_token_ref_atom_mask = und_self(ref_atom_mask, "b i, b j -> b i j") + valid_contact_mask = ( + valid_token_pair_mask + & valid_entity_pair_mask + & valid_token_ref_atom_mask + & diff_chain_mask + ) + + # compute pairwise distances + token_ref_atom_coords = torch.gather( + atom_gt_coords, dim=1, index=repeat(token_ref_atom_index, "... -> ... 3") + ) + # optionally add noise to coordinates before computing distances + token_ref_atom_coords = ( + token_ref_atom_coords + + torch.randn_like(token_ref_atom_coords) * self.coord_noise + ) + inter_token_dists = cdist(token_ref_atom_coords) + inter_token_dists[~valid_contact_mask] = self.max_dist + 1 + # compute contacts by (1) sampling an upper bound on the distance + # and (2) selecting pairwise distances below the threshold + num_to_include = self.get_num_restraints(batch_size) + restraint_mats = [ + self._sample_restraints(inter_token_dists[i], n) + for i, n in enumerate(num_to_include) + ] + encoded_feat = torch.stack(restraint_mats, dim=0) + return self.make_feature(encoded_feat.unsqueeze(-1)) + + @typecheck + def generate_from_constraints( + self, + token_asym_id: Int[Tensor, "1 n"], + token_residue_index: Int[Tensor, "1 n"], + token_residue_names: UInt8[Tensor, "1 n 8"], + token_subchain_id: UInt8[Tensor, "1 n 4"], + constraints: list[ConstraintGroup], + ) -> Tensor: + logger.info(f"Generating distance feature from constraints: {constraints}") + n, device = token_asym_id.shape[1], token_asym_id.device + constraint_mat = torch.full( + (n, n), fill_value=self.ignore_idx, device=device, dtype=torch.float32 + ) + for constraint_group in constraints: + left_residue_asym_id, right_residue_asym_id = ( + constraint_group.get_left_and_right_asym_ids( + token_subchain_id=rearrange(token_subchain_id, "1 ... -> ..."), + token_asym_id=rearrange(token_asym_id, "1 ... -> ..."), + ) + ) + constraint_mat = self.add_distance_constraint( + constraint_mat=constraint_mat, + token_asym_id=rearrange(token_asym_id, "1 ... -> ..."), + token_residue_index=rearrange(token_residue_index, "1 ... -> ..."), + token_residue_names=rearrange(token_residue_names, "1 ... -> ..."), + left_residue_asym_id=left_residue_asym_id, + right_residue_asym_id=right_residue_asym_id, + left_residue_index=constraint_group.left_residue_index, + right_residue_index=constraint_group.right_residue_index, + right_residue_name=constraint_group.right_residue_name, + left_residue_name=constraint_group.left_residue_name, + distance_threshold=constraint_group.distance_threshold, + ) + # encode and apply mask + constraint_mat = repeat(constraint_mat, "i j -> 1 i j 1") + return self.make_feature(constraint_mat) + + @typecheck + def add_distance_constraint( + self, + constraint_mat: Float[Tensor, "n n"], + token_asym_id: Int[Tensor, "n"], + token_residue_index: Int[Tensor, "n"], + token_residue_names: UInt8[Tensor, "n 8"], + # asym id of the chain that binds in the pocket + left_residue_asym_id: int, + right_residue_asym_id: int, + left_residue_index: int, + right_residue_index: int, + right_residue_name: str, + left_residue_name: str, + distance_threshold: float, + ): + left_asym_mask = token_asym_id == left_residue_asym_id + right_asym_mask = token_asym_id == right_residue_asym_id + left_index_mask = token_residue_index == left_residue_index + right_index_mask = token_residue_index == right_residue_index + left_residue_mask = left_asym_mask & left_index_mask + right_residue_mask = right_asym_mask & right_index_mask + # restraint should point to single residue pair + assert torch.sum(left_residue_mask) == 1, ( + f"Expected unique residue but found {torch.sum(left_residue_mask)}\n" + f"{left_residue_asym_id=}, {left_residue_index=}, {left_residue_name=}" + ) + assert torch.sum(right_residue_mask) == 1, ( + f"Expected unique residue but found {torch.sum(right_residue_mask)}\n" + f"{right_residue_asym_id=}, {right_residue_index=}, {right_residue_name=}" + ) + # make sure the residue names in the constraint match the + # ones we parsed + left_res_name = token_residue_names[left_residue_mask] + right_res_name = token_residue_names[right_residue_mask] + expected_res_name = tensorcode_to_string(rearrange(left_res_name, "1 l -> l")) + assert expected_res_name == left_residue_name, ( + f"Expected residue name {expected_res_name} but got " f"{left_residue_name}" + ) + expected_res_name = tensorcode_to_string(rearrange(right_res_name, "1 l -> l")) + assert expected_res_name == right_residue_name, ( + f"Expected residue name {expected_res_name} but got " + f"{right_residue_name}" + ) + # add constraint + # NOTE: feature is *not* symmetric + constraint_mat[left_residue_mask, right_residue_mask] = distance_threshold + return constraint_mat diff --git a/forks/chai-lab/chai_lab/data/features/generators/token_pair_distance.py b/forks/chai-lab/chai_lab/data/features/generators/token_pair_distance.py new file mode 100644 index 00000000..c016ec42 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/generators/token_pair_distance.py @@ -0,0 +1,61 @@ +from typing import Any + +import torch +from torch import Tensor + +from chai_lab.data.features.feature_type import FeatureType +from chai_lab.data.features.generators.base import EncodingType, FeatureGenerator +from chai_lab.data.features.token_utils import get_centre_positions_and_mask +from chai_lab.utils.tensor_utils import cdist +from chai_lab.utils.typing import Bool, Float, Int, typecheck + + +class TokenCenterDistance(FeatureGenerator): + def __init__( + self, + dist_bins: list[float] | None = None, + ): + dist_bins = dist_bins if dist_bins is not None else [0.0, 4.0, 8.0, 12.0, 16.0] + super().__init__( + ty=FeatureType.TOKEN_PAIR, + encoding_ty=EncodingType.ONE_HOT, + # one of dist_bins of rbf_radii is not None. + num_classes=len(dist_bins) + 1, + mult=1, + can_mask=True, + ) + + # maintain consistent orders + self.dist_bins = torch.tensor(dist_bins) + + def get_input_kwargs_from_batch(self, batch: dict[str, Any]) -> dict: + return dict( + all_atom_positions=batch["inputs"]["atom_gt_coords"], + all_atom_mask=batch["inputs"]["atom_exists_mask"], + token_single_mask=batch["inputs"]["token_exists_mask"], + token_center_atom_index=batch["inputs"]["token_centre_atom_index"].long(), + ) + + @typecheck + def _generate( + self, + all_atom_positions=Float[Tensor, "b a 3"], + all_atom_mask=Bool[Tensor, "b a"], + token_single_mask=Bool[Tensor, "b n"], + token_center_atom_index=Int[Tensor, "b n"], + ) -> Tensor: + """see super class""" + center_atom_coords, center_atom_mask = get_centre_positions_and_mask( + atom_gt_coords=all_atom_positions, + atom_exists_mask=all_atom_mask, + token_centre_atom_index=token_center_atom_index, + token_exists_mask=token_single_mask, + ) + feat = torch.searchsorted( + self.dist_bins.to(center_atom_coords.device), cdist(center_atom_coords) + ) + center_atom_pair_exists = torch.einsum( + "b i, b j -> b i j", center_atom_mask, center_atom_mask + ) + feat.masked_fill_(~center_atom_pair_exists, self.mask_value) + return self.make_feature(feat.unsqueeze(-1)) diff --git a/forks/chai-lab/chai_lab/data/features/generators/token_pair_pocket_restraint.py b/forks/chai-lab/chai_lab/data/features/generators/token_pair_pocket_restraint.py new file mode 100644 index 00000000..41bb0d6e --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/generators/token_pair_pocket_restraint.py @@ -0,0 +1,283 @@ +import logging +from dataclasses import dataclass + +import torch +from einops import rearrange, repeat +from torch import Tensor + +from chai_lab.data.features.feature_type import FeatureType +from chai_lab.data.features.generators.base import EncodingType, FeatureGenerator +from chai_lab.data.features.generators.token_dist_restraint import ( + TokenDistanceRestraint, +) +from chai_lab.data.parsing.structure.entity_type import EntityType +from chai_lab.model.utils import get_asym_id_from_subchain_id +from chai_lab.utils.tensor_utils import tensorcode_to_string +from chai_lab.utils.typing import Bool, Float, Int, UInt8, typecheck + +logger = logging.getLogger(__name__) + + +@typecheck +@dataclass +class ConstraintGroup: + """ + Container for a token pocket pair restraint group + """ + + # subchain ID of the pocket chain + pocket_chain_subchain_id: str + # subchain ID of the pocket token + pocket_token_subchain_id: str + # residue index of the pocket token + pocket_token_residue_index: int + # residue name of the pocket token + pocket_token_residue_name: str + # pocket distance threshold + pocket_distance_threshold: float + # optional subchain IDs + + def get_chain_and_token_asym_ids( + self, + token_subchain_id: UInt8[Tensor, "n 4"], + token_asym_id: Int[Tensor, "n"], + ): + pocket_chain_asym_id = get_asym_id_from_subchain_id( + subchain_id=self.pocket_chain_subchain_id, + source_pdb_chain_id=token_subchain_id, + token_asym_id=token_asym_id, + ) + pocket_token_asym_id = get_asym_id_from_subchain_id( + subchain_id=self.pocket_token_subchain_id, + source_pdb_chain_id=token_subchain_id, + token_asym_id=token_asym_id, + ) + return pocket_chain_asym_id, pocket_token_asym_id + + def __str__(self): + return ( + f"ConstraintGroup(pocket_chain_subchain_id={self.pocket_chain_subchain_id}, " + f"pocket_token_subchain_id={self.pocket_token_subchain_id}, " + f"pocket_token_residue_index={self.pocket_token_residue_index}, " + f"pocket_token_residue_name={self.pocket_token_residue_name}, " + f"pocket_distance_threshold={self.pocket_distance_threshold})" + ) + + +class TokenPairPocketRestraint(FeatureGenerator): + def __init__( + self, + include_probability: float = 1.0, + size: int | float = 0.33, + min_dist: int | float = 10.0, + max_dist: int | float = 30.0, + coord_noise: float = 0.0, + num_rbf_radii: int = 5, + query_entity_types: list[EntityType] | None = None, + key_entity_types: list[EntityType] | None = None, + ): + """ + Derives pocket constraints by first generating pairwise distance restraints, + and then selecting the query tokens that were assigned to some non-zero + constraint. + + NOTE: Pocket restraints will only be sampled for tokens that are in the + query entity types. + """ + super().__init__( + ty=FeatureType.TOKEN_PAIR, + can_mask=False, + encoding_ty=EncodingType.RBF, + num_classes=num_rbf_radii, + mult=1, + ignore_index=-1.0, + ) + # use distance restraint generator to sample pocket tokens/chains + self.distance_restraint_gen = TokenDistanceRestraint( + include_probability=include_probability, + size=size, + min_dist=min_dist, + max_dist=max_dist, + coord_noise=coord_noise, + num_rbf_radii=num_rbf_radii, + query_entity_types=query_entity_types, + key_entity_types=key_entity_types, + ) + self.ignore_idx = -1.0 + self.min_dist, self.max_dist = min_dist, max_dist + self.coord_noise = coord_noise + self.include_prob = include_probability + self.size = size + # override feature type + self.ty = FeatureType.TOKEN_PAIR + + def get_input_kwargs_from_batch(self, batch) -> dict: + # cast pocket constraints from dict back to dataclass + maybe_constraint_dicts = batch["inputs"].get("pocket_constraints", [[None]])[0] + pocket_constraints = batch["inputs"]["pocket_constraints"] = ( + [ConstraintGroup(**d) for d in maybe_constraint_dicts] + if isinstance(maybe_constraint_dicts[0], dict) + else None + ) + + return dict( + atom_gt_coords=batch["inputs"]["atom_gt_coords"], + atom_exists_mask=batch["inputs"]["atom_exists_mask"], + token_asym_id=batch["inputs"]["token_asym_id"].long(), + token_ref_atom_index=batch["inputs"]["token_ref_atom_index"].long(), + token_exists_mask=batch["inputs"]["token_exists_mask"], + token_entity_type=batch["inputs"]["token_entity_type"].long(), + token_residue_index=batch["inputs"]["token_residue_index"].long(), + token_residue_names=batch["inputs"]["token_residue_name"], + token_subchain_id=batch["inputs"]["subchain_id"], + constraints=pocket_constraints, + ) + + @typecheck + def _generate( + self, + atom_gt_coords: Float[Tensor, "b a 3"], + atom_exists_mask: Bool[Tensor, "b a"], + token_asym_id: Int[Tensor, "b n"], + token_ref_atom_index: Int[Tensor, "b n"], + token_exists_mask: Bool[Tensor, "b n"], + token_entity_type: Int[Tensor, "b n"], + token_residue_index: Int[Tensor, "b n"], + token_residue_names: UInt8[Tensor, "b n 8"], + token_subchain_id: UInt8[Tensor, "b n 4"], + constraints: list[ConstraintGroup] | None = None, + ) -> Tensor: + try: + if constraints is not None: + assert atom_gt_coords.shape[0] == 1 + return self.generate_from_constraints( + token_asym_id=token_asym_id, + token_residue_index=token_residue_index, + token_residue_names=token_residue_names, + token_subchain_id=token_subchain_id, + constraints=constraints, + ) + except Exception as e: + logger.error(f"Error {e} generating pocket constraints: {constraints}") + + return self._generate_from_batch( + atom_gt_coords=atom_gt_coords, + atom_exists_mask=atom_exists_mask, + token_asym_id=token_asym_id, + token_ref_atom_index=token_ref_atom_index, + token_exists_mask=token_exists_mask, + token_entity_type=token_entity_type, + ) + + @typecheck + def _generate_from_batch( + self, + atom_gt_coords: Float[Tensor, "b a 3"], + atom_exists_mask: Bool[Tensor, "b a"], + token_asym_id: Int[Tensor, "b n"], + token_ref_atom_index: Int[Tensor, "b n"], + token_exists_mask: Bool[Tensor, "b n"], + token_entity_type: Int[Tensor, "b n"], + ) -> Tensor: + contact_feat = self.distance_restraint_gen._generate_from_batch( + atom_gt_coords=atom_gt_coords, + atom_exists_mask=atom_exists_mask, + token_asym_id=token_asym_id, + token_ref_atom_index=token_ref_atom_index, + token_exists_mask=token_exists_mask, + token_entity_type=token_entity_type, + ).data + # derive the pocket from the contact feature + contact_feat[contact_feat == self.ignore_idx] = self.max_dist + 1 + # determine contacting asym pairs and their respective distances + contact_mask = contact_feat < self.max_dist + # batch dim, row dim, col dim + bs, rs, cs = torch.where(contact_mask.squeeze(-1)) + # determine asym ids of tokens in contact. + for b, r, c in zip(bs, rs, cs): + col_asym_mask = token_asym_id[b] == token_asym_id[b, c] + pocket_constraint = contact_feat[b, r, c] + contact_feat[b, r, col_asym_mask] = pocket_constraint + + # re-mask + contact_feat[contact_feat > self.max_dist] = self.ignore_idx + return self.make_feature(contact_feat) + + @typecheck + def generate_from_constraints( + self, + # only batch size 1 is supported + token_asym_id: Int[Tensor, "1 n"], + token_subchain_id: UInt8[Tensor, "1 n 4"], + token_residue_index: Int[Tensor, "1 n"], + token_residue_names: UInt8[Tensor, "1 n 8"], + constraints: list[ConstraintGroup], + ) -> Tensor: + logger.info(f"Generating pocket feature from constraints: {constraints}") + n, device = token_asym_id.shape[1], token_asym_id.device + constraint_mat = torch.full( + (n, n), fill_value=self.ignore_idx, device=device, dtype=torch.float32 + ) + for constraint_group in constraints: + pocket_chain_asym_id, pocket_token_asym_id = ( + constraint_group.get_chain_and_token_asym_ids( + token_subchain_id=rearrange(token_subchain_id, "1 ... -> ..."), + token_asym_id=rearrange(token_asym_id, "1 ... -> ..."), + ) + ) + constraint_mat = self.add_pocket_constraint( + constraint_mat=constraint_mat, + token_asym_id=rearrange(token_asym_id, "1 ... -> ..."), + token_residue_index=rearrange(token_residue_index, "1 ... -> ..."), + token_residue_names=rearrange(token_residue_names, "1 ... -> ..."), + pocket_chain_asym_id=pocket_chain_asym_id, + pocket_token_asym_id=pocket_token_asym_id, + pocket_token_residue_index=constraint_group.pocket_token_residue_index, + pocket_token_residue_name=constraint_group.pocket_token_residue_name, + pocket_distance_threshold=constraint_group.pocket_distance_threshold, + ) + # encode and apply mask + constraint_mat = repeat(constraint_mat, "i j -> 1 i j 1") + return self.make_feature(constraint_mat) + + @typecheck + def add_pocket_constraint( + self, + constraint_mat: Float[Tensor, "n n"], + token_asym_id: Int[Tensor, "n"], + token_residue_index: Int[Tensor, "n"], + token_residue_names: UInt8[Tensor, "n 8"], + # asym id of the chain that binds in the pocket + pocket_chain_asym_id: int, + # asym id of the token defining the pocket + pocket_token_asym_id: int, + # residue index of the pocket token + pocket_token_residue_index: int, + # residue name of the pocket token + pocket_token_residue_name: str, + # distance from the pocket token to pocket chain + pocket_distance_threshold: float, + ): + pocket_chain_asym_mask = token_asym_id == pocket_chain_asym_id + pocket_token_asym_mask = token_asym_id == pocket_token_asym_id + pocket_token_residue_mask = token_residue_index == pocket_token_residue_index + pocket_token_residue_mask &= pocket_token_asym_mask + assert torch.sum(pocket_token_residue_mask) == 1, ( + f"Expected unique residue but found {torch.sum(pocket_token_residue_mask)}\n" + f"{pocket_token_asym_id=}, {pocket_token_residue_index=}, " + f"{pocket_token_residue_name=}" + ) + pocket_token_res_name = token_residue_names[pocket_token_residue_mask] + pocket_token_res_name = rearrange(pocket_token_res_name, "1 l -> l") + expected_res_name = tensorcode_to_string(pocket_token_res_name) + assert expected_res_name == pocket_token_residue_name, ( + f"Expected residue name {expected_res_name} but got " + f"{pocket_token_residue_name}" + ) + # add constraints between the pocket token and all other tokens in the pocket + # chain + # NOTE: feature is not symmetric + constraint_mat[pocket_token_residue_mask, pocket_chain_asym_mask] = ( + pocket_distance_threshold + ) + return constraint_mat diff --git a/forks/chai-lab/chai_lab/data/features/token_utils.py b/forks/chai-lab/chai_lab/data/features/token_utils.py new file mode 100644 index 00000000..1a833edb --- /dev/null +++ b/forks/chai-lab/chai_lab/data/features/token_utils.py @@ -0,0 +1,26 @@ +import torch +from einops import repeat +from torch import Tensor + +from chai_lab.utils.typing import Bool, Float, Int, typecheck + + +@typecheck +def get_centre_positions_and_mask( + atom_gt_coords: Float[Tensor, "... n_atoms 3"], + atom_exists_mask: Bool[Tensor, "... n_atoms"], + token_centre_atom_index: Int[Tensor, "... n_tokens"], + token_exists_mask: Bool[Tensor, "... n_tokens"], +) -> tuple[Float[Tensor, "... n_tokens 3"], Bool[Tensor, "... n_tokens"]]: + assert token_centre_atom_index.dtype in (torch.int32, torch.long) + center_index = token_centre_atom_index.long() + indices = repeat(center_index, "... n -> ... n c", c=3) + center_pos = torch.gather(atom_gt_coords, dim=-2, index=indices) + center_mask = torch.gather(atom_exists_mask, dim=-1, index=center_index) + + # because token_centre_atom_index is zero-padded, and because + # atom number 0 is probably a valid atom, we need to reapply + # the token mask + center_mask = center_mask & token_exists_mask + + return center_pos, center_mask diff --git a/forks/chai-lab/chai_lab/data/io/__init__.py b/forks/chai-lab/chai_lab/data/io/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/forks/chai-lab/chai_lab/data/io/pdb_utils.py b/forks/chai-lab/chai_lab/data/io/pdb_utils.py new file mode 100644 index 00000000..c7c84330 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/io/pdb_utils.py @@ -0,0 +1,219 @@ +import logging +import string +from collections import defaultdict +from dataclasses import dataclass +from functools import cached_property +from pathlib import Path + +import gemmi +from torch import Tensor + +from chai_lab.data.parsing.structure.entity_type import EntityType +from chai_lab.utils.tensor_utils import tensorcode_to_string +from chai_lab.utils.typing import Bool, Float, Int, UInt8, typecheck + +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class PDBAtom: + record_type: str + atom_index: int + atom_name: str + alt_loc: str + res_name_3: str + chain_tag: str + residue_index: int + insertion_code: str + pos: list[float] + occupancy: float + b_factor: float + element: str + charge: str + + def __str__( + self, + ): + # currently this works only for single-char chain tags + atom_line = ( + f"{self.record_type:<6}{self.atom_index:>5} {self.atom_name:<4}{self.alt_loc:>1}" + f"{self.res_name_3:>3} {self.chain_tag:>1}" + f"{self.residue_index:>4}{self.insertion_code:>1} " + f"{self.pos[0]:>8.3f}{self.pos[1]:>8.3f}{self.pos[2]:>8.3f}" + f"{self.occupancy:>6.2f}{self.b_factor:>6.2f} " + f"{self.element:>2}{self.charge:>2}" + ) + return atom_line + + +def write_pdb(chain_atoms: list[list[PDBAtom]], out_path: str): + with open(out_path, "w") as f: + for chain in chain_atoms: + for atom in chain: + f.write(str(atom) + "\n") + f.write("TER\n") + f.write("END\n") + + +@typecheck +@dataclass +class PDBContext: + """Data needed to produce Posebuster input file types""" + + token_residue_index: Int[Tensor, "n_tokens"] + token_asym_id: Int[Tensor, "n_tokens"] + token_entity_type: Int[Tensor, "n_tokens"] + token_residue_names: UInt8[Tensor, "n_tokens 8"] + atom_token_index: Int[Tensor, "n_atoms"] + atom_ref_element: Int[Tensor, "n_atoms"] + atom_ref_mask: Bool[Tensor, "n_atoms"] + atom_coords: Float[Tensor, "n_atoms 3"] + atom_exists_mask: Bool[Tensor, "n_atoms"] + atom_ref_name_chars: Int[Tensor, "n_atoms 4"] + atom_bfactor_or_plddt: Float[Tensor, "n_atoms"] | None = None + + @cached_property + def token_res_names_to_string(self) -> list[str]: + return [tensorcode_to_string(x) for x in self.token_residue_names.cpu()] + + @property + def num_atoms(self) -> int: + return self.atom_coords.shape[0] + + @property + def is_protein(self) -> bool: + return self.is_entity(EntityType.PROTEIN) + + @property + def is_ligand(self) -> bool: + return self.is_entity(EntityType.LIGAND) + + @property + def first_residue_name(self) -> str: + return self.token_res_names_to_string[0].strip() + + def is_entity(self, ety: EntityType) -> bool: + return self.token_entity_type[0].item() == ety.value + + def get_pdb_atoms(self): + # warning: calling this on cuda tensors is extremely slow + atom_asym_id = self.token_asym_id[self.atom_token_index] + # atom level attributes + atom_residue_index = ( + self.token_residue_index[self.atom_token_index] + 1 + ) # residues are 1-indexed + atom_names = _tensor_to_atom_names(self.atom_ref_name_chars.unsqueeze(0)) + atom_res_names = self.token_residue_names[self.atom_token_index] + atom_res_names_strs = [ + tensorcode_to_string(x)[:3].ljust(3) for x in atom_res_names + ] + atom_element_names = [ + _atomic_num_to_element(int(x.item())) for x in self.atom_ref_element + ] + + pdb_atoms = [] + for atom_index in range(self.num_atoms): + if not self.atom_exists_mask[atom_index].item(): + # skip missing atoms + continue + + chain_tag_vocab = string.ascii_uppercase + string.ascii_lowercase + if int(atom_asym_id[atom_index].item()) >= len(chain_tag_vocab): + logger.warning( + f"Too many chains for PDB file: {atom_asym_id[atom_index].item()} -- wrapping around" + ) + atom = PDBAtom( + record_type="ATOM" if not self.is_ligand else "HETATM", + atom_index=atom_index, + atom_name=atom_names[atom_index], + alt_loc="", + res_name_3=atom_res_names_strs[atom_index], + chain_tag=chain_tag_vocab[ + int(atom_asym_id[atom_index].item()) % len(chain_tag_vocab) + ], + residue_index=int(atom_residue_index[atom_index].item()), + insertion_code="", + pos=self.atom_coords[atom_index].tolist(), + occupancy=1.00, + b_factor=( + 1.00 + if self.atom_bfactor_or_plddt is None + else self.atom_bfactor_or_plddt[atom_index].item() + ), + element=atom_element_names[atom_index], + charge="", + ) + pdb_atoms.append(atom) + return pdb_atoms + + # @classmethod + # def cat(cls, contexts: list["PDBContext"]) -> "PDBContext": + # """Concatenates multiple posebuster contexts into a single context""" + # cat_attrs: dict[str, Tensor] = dict() + # for attr in cls.__annotations__.keys(): + # cat_attrs[attr] = torch.cat([getattr(c, attr) for c in contexts], dim=0) + # return cls(**cat_attrs) + + +def _atomic_num_to_element(atomic_num: int) -> str: + return gemmi.Element(atomic_num).name + + +def entity_to_pdb_atoms(entity: PDBContext) -> list[list[PDBAtom]]: + """Writes a single tokenized entity to PDB file""" + pdb_atoms = entity.get_pdb_atoms() + chains = defaultdict(list) + for atom in pdb_atoms: + chains[atom.chain_tag].append(atom) + return list(chains.values()) + + +def entities_to_pdb_file(entities: list[PDBContext], path: str): + pdb_atoms: list[list[PDBAtom]] = [] + for entity in entities: + pdb_atoms = pdb_atoms + entity_to_pdb_atoms(entity) + write_pdb(pdb_atoms, path) + + +def pdb_context_from_batch( + d: dict, coords: Tensor, plddt: Tensor | None = None +) -> PDBContext: + return PDBContext( + token_residue_index=d["token_residue_index"][0], + token_asym_id=d["token_asym_id"][0], + token_entity_type=d["token_entity_type"][0], + token_residue_names=d["token_residue_name"][0], + atom_token_index=d["atom_token_index"][0], + atom_ref_element=d["atom_ref_element"][0], + atom_ref_mask=d["atom_ref_mask"][0], + atom_coords=coords[0], + atom_exists_mask=d["atom_exists_mask"][0], + atom_ref_name_chars=d["atom_ref_name_chars"][0], + atom_bfactor_or_plddt=plddt[0] if plddt is not None else None, + ) + + +def write_pdbs_from_outputs( + coords: Float[Tensor, "1 n_atoms 3"], + output_batch: dict, + write_path: Path, + bfactors: Float[Tensor, "1 n_atoms"] | None = None, +): + # save outputs + context = pdb_context_from_batch(output_batch, coords, plddt=bfactors) + write_path.parent.mkdir(parents=True, exist_ok=True) + entities_to_pdb_file( + [context], + str(write_path), + ) + logger.info(f"saved pdb file to {write_path}") + + +@typecheck +def _tensor_to_atom_names( + tensor: Int[Tensor, "*dims 4"] | UInt8[Tensor, "*dims 4"], +) -> list[str]: + return [ + "".join([chr(ord_val + 32) for ord_val in ords_atom]).rstrip() + for ords_atom in tensor.squeeze(0) + ] diff --git a/forks/chai-lab/chai_lab/data/parsing/__init__.py b/forks/chai-lab/chai_lab/data/parsing/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/forks/chai-lab/chai_lab/data/parsing/fasta.py b/forks/chai-lab/chai_lab/data/parsing/fasta.py new file mode 100644 index 00000000..b15631e3 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/parsing/fasta.py @@ -0,0 +1,47 @@ +import logging +from pathlib import Path +from typing import Iterable + +from chai_lab.data.parsing.structure.entity_type import EntityType +from chai_lab.data.residue_constants import restype_1to3_with_x + +logger = logging.getLogger(__name__) + +Fasta = tuple[str, str] +Fastas = list[Fasta] + + +nucleic_acid_1_to_name: dict[tuple[str, EntityType], str] = { + ("A", EntityType.RNA): "A", + ("U", EntityType.RNA): "U", + ("G", EntityType.RNA): "G", + ("C", EntityType.RNA): "C", + ("A", EntityType.DNA): "DA", + ("T", EntityType.DNA): "DT", + ("G", EntityType.DNA): "DG", + ("C", EntityType.DNA): "DC", +} + + +def read_fasta(file_path: str | Path) -> Iterable[Fasta]: + from Bio import SeqIO + + fasta_sequences = SeqIO.parse(open(file_path), "fasta") + return [(fasta.id, str(fasta.seq)) for fasta in fasta_sequences] + + +def get_residue_name( + fasta_code: str, + entity_type: EntityType, +) -> str: + if len(fasta_code) != 1: + raise ValueError("Cannot handle non-single chars: {}".format(fasta_code)) + match entity_type: + case EntityType.PROTEIN: + return restype_1to3_with_x.get(fasta_code, "UNK") + case EntityType.RNA | EntityType.DNA: + # under nucleic_acid_1_to_name, DNA is mapped to D_ and RNA to _ + unk = "X" if entity_type == EntityType.RNA else "DX" + return nucleic_acid_1_to_name.get((fasta_code, entity_type), unk) + case _: + raise ValueError(f"Invalid polymer entity type {entity_type}") diff --git a/forks/chai-lab/chai_lab/data/parsing/input_validation.py b/forks/chai-lab/chai_lab/data/parsing/input_validation.py new file mode 100644 index 00000000..7198abe0 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/parsing/input_validation.py @@ -0,0 +1,74 @@ +""" +Simple heuristics that can help with identification of EntityType +""" + +import string +from string import ascii_letters + +from chai_lab.data.parsing.structure.entity_type import EntityType + + +def constituents_of_modified_fasta(x: str) -> list[str] | None: + """ + Accepts RNA/DNA inputs: 'agtc', 'AGT(ASP)TG', etc. Does not accept SMILES strings. + Returns constituents, e.g, [A, G, T, ASP, T, G] or None if string is incorrect. + Everything in returned list is single character, except for blocks specified in brackets. + """ + x = x.strip().upper() + # it is a bit strange that digits are here, but [NH2] was in one protein + allowed_chars = ascii_letters + "()" + string.digits + if not all(letter in allowed_chars for letter in x): + return None + + current_modified: str | None = None + + constituents = [] + for letter in x: + if letter == "(": + if current_modified is not None: + return None # double open bracket + current_modified = "" + elif letter == ")": + if current_modified is None: + return None # closed without opening + if len(current_modified) <= 1: + return None # empty modification: () or single (K) + constituents.append(current_modified) + current_modified = None + else: + if current_modified is not None: + current_modified += letter + else: + if letter not in ascii_letters: + return None # strange single-letter residue + constituents.append(letter) + if current_modified is not None: + return None # did not close bracket + return constituents + + +def identify_potential_entity_types(sequence: str) -> list[EntityType]: + """ + Provided FASTA sequence or smiles, lists which entities those could be. + Returns an empty list if sequence is invalid for all entity types. + """ + sequence = sequence.strip() + if len(sequence) == 0: + return [] + possible_entity_types = [] + + constituents = constituents_of_modified_fasta(sequence) + if constituents is not None: + # this can be RNA/DNA/protein. + one_letter_constituents = set(x for x in constituents if len(x) == 1) + if set.issubset(one_letter_constituents, set("AGTC")): + possible_entity_types.append(EntityType.DNA) + if set.issubset(one_letter_constituents, set("AGUC")): + possible_entity_types.append(EntityType.RNA) + if "U" not in one_letter_constituents: + possible_entity_types.append(EntityType.PROTEIN) + + ascii_symbols = string.ascii_letters + string.digits + ".-+=#$%:/\\[]()<>@" + if set.issubset(set(sequence.upper()), set(ascii_symbols)): + possible_entity_types.append(EntityType.LIGAND) + return possible_entity_types diff --git a/forks/chai-lab/chai_lab/data/parsing/msas/__init__.py b/forks/chai-lab/chai_lab/data/parsing/msas/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/forks/chai-lab/chai_lab/data/parsing/msas/data_source.py b/forks/chai-lab/chai_lab/data/parsing/msas/data_source.py new file mode 100644 index 00000000..831a3851 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/parsing/msas/data_source.py @@ -0,0 +1,53 @@ +import logging +from enum import Enum + +logger = logging.getLogger(__name__) + + +class MSADataSource(Enum): + UNIPROT = "uniprot" + UNIREF90 = "uniref90" + BFD = "BFD" + MGNIFY = "mgnify" + PAIRED = "paired" + MAIN = "main" + BFD_UNICLUST = "bfd_uniclust" + SINGLETON = "singleton" + NONE = "none" + + # templates + PDB70 = "pdb70" + + # ran with 3 jackhmmer iterations (-N=3), + # higher quality but sloow to generate + UNIPROT_N3 = "uniprot_n3" + UNIREF90_N3 = "uniref90_n3" + MGNIFY_N3 = "mgnify_n3" + + @classmethod + def get_default_sources(cls): + return [ + MSADataSource.BFD_UNICLUST, + MSADataSource.MGNIFY, + MSADataSource.UNIREF90, + MSADataSource.UNIPROT, + ] + + +def encode_source_to_int(source: MSADataSource) -> int: + return msa_dataset_source_to_int.get(source, 4) + + +# This becomes a feature so changing it might break checkpoint compatibility +msa_dataset_source_to_int = { + MSADataSource.BFD_UNICLUST: 0, + MSADataSource.MGNIFY: 1, + MSADataSource.UNIREF90: 2, + MSADataSource.UNIPROT: 3, + MSADataSource.NONE: 4, + MSADataSource.UNIPROT_N3: 3, + MSADataSource.UNIREF90_N3: 2, + MSADataSource.MGNIFY_N3: 1, +} + +database_ids: set[str] = set(x.value for x in MSADataSource) diff --git a/forks/chai-lab/chai_lab/data/parsing/msas/species.py b/forks/chai-lab/chai_lab/data/parsing/msas/species.py new file mode 100644 index 00000000..ac684f31 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/parsing/msas/species.py @@ -0,0 +1,5 @@ +import logging + +logger = logging.getLogger(__name__) + +UNKNOWN_SPECIES = 0 diff --git a/forks/chai-lab/chai_lab/data/parsing/structure/__init__.py b/forks/chai-lab/chai_lab/data/parsing/structure/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/forks/chai-lab/chai_lab/data/parsing/structure/all_atom_entity_data.py b/forks/chai-lab/chai_lab/data/parsing/structure/all_atom_entity_data.py new file mode 100644 index 00000000..a28871ae --- /dev/null +++ b/forks/chai-lab/chai_lab/data/parsing/structure/all_atom_entity_data.py @@ -0,0 +1,87 @@ +import logging +from dataclasses import dataclass +from datetime import datetime +from functools import cached_property + +from chai_lab.data.parsing.structure import sequence +from chai_lab.data.parsing.structure.entity_type import EntityType +from chai_lab.data.parsing.structure.residue import Residue +from chai_lab.data.residue_constants import standard_residue_pdb_codes +from chai_lab.utils.typing import typecheck + +logger = logging.getLogger(__name__) + + +@typecheck +@dataclass +class AllAtomEntityData: + residues: list[Residue] + full_sequence: list[str] + resolution: float + release_datetime: datetime | None # None if no date found + pdb_id: str + source_pdb_chain_id: str + # Unique string identifying the entity. + entity_name: str + # Unique integer identifying the entity, starting at 0. There is a 1:1 mapping + # between entity_name and entity_index. + entity_id: int + method: str + entity_type: EntityType + subchain_id: str + is_d_polypeptide: bool = False # NOTE (mostly) exists for eval set construction + + def __post_init__(self): + assert ( + len(self.residues) == len(self.full_sequence) + ), f"{self.__class__.__name__} residues and full_sequence must be the same length" + + @property + def missing_residues(self) -> list[Residue]: + """ + Returns a list of missing residues in the entity + """ + return [residue for residue in self.residues if residue.is_missing] + + @cached_property + def has_modifications(self) -> bool: + """ + Returns True if the entity has modifications; this only applies to polymers so + is always False for ligands, waters, and unknowns. + """ + if self.entity_type not in ( + EntityType.PROTEIN, + EntityType.RNA, + EntityType.DNA, + EntityType.POLYMER_HYBRID, + ): + return False + + return any(res.name not in standard_residue_pdb_codes for res in self.residues) + + @property + def is_distillation(self) -> bool: + return self.pdb_id.startswith("AF-") + + @property + def sequence(self) -> str: + """Sequence with modified residues encoded as X.""" + return sequence.protein_one_letter_sequence(self.full_sequence) + + @property + def sequence_with_mods(self) -> str: + """Sequence with modifications encoded as [FOO] where FOO is modified residue.""" + return sequence.protein_one_letter_sequence_with_mods(self.full_sequence) + + def __str__(self) -> str: + fields = ", ".join( + [ + f"pdb_id={self.pdb_id}", + f"source_pdb_chain_id={self.source_pdb_chain_id}", + f"entity_name={self.entity_name}", + f"entity_id={self.entity_id}", + f"entity_type={self.entity_type}", + f"subchain_id={self.subchain_id}", + ] + ) + return f"AllAtomEntityData({fields})" diff --git a/forks/chai-lab/chai_lab/data/parsing/structure/entity_type.py b/forks/chai-lab/chai_lab/data/parsing/structure/entity_type.py new file mode 100644 index 00000000..ec5249b1 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/parsing/structure/entity_type.py @@ -0,0 +1,14 @@ +import logging +from enum import Enum + +logger = logging.getLogger(__name__) + + +class EntityType(Enum): + PROTEIN = 0 + RNA = 1 + DNA = 2 + LIGAND = 3 + POLYMER_HYBRID = 4 + WATER = 5 + UNKNOWN = 6 diff --git a/forks/chai-lab/chai_lab/data/parsing/structure/residue.py b/forks/chai-lab/chai_lab/data/parsing/structure/residue.py new file mode 100644 index 00000000..6174c75b --- /dev/null +++ b/forks/chai-lab/chai_lab/data/parsing/structure/residue.py @@ -0,0 +1,105 @@ +from dataclasses import dataclass + +import gemmi +import torch +from torch import Tensor + +from chai_lab.data.parsing.structure.entity_type import EntityType +from chai_lab.data.residue_constants import residue_types_with_nucleotides_order +from chai_lab.model.utils import center_random_augmentation +from chai_lab.utils.typing import Bool, Float, Int + + +@dataclass +class ConformerData: + position: Float[Tensor, "n 3"] + element: Int[Tensor, "n"] + charge: Int[Tensor, "n"] + atom_names: list[str] + bonds: list[tuple[int, int]] + symmetries: Int[Tensor, "n n_symm"] + + @property + def num_atoms(self) -> int: + num_atoms, _ = self.position.shape + assert num_atoms == len(self.atom_names) + return num_atoms + + def gather_atom_positions( + self, query_atom_names: list[str] + ) -> tuple[Float[Tensor, "n 3"], Bool[Tensor, "n"]]: + if self.num_atoms == 0: + gathered_positions = torch.zeros(len(query_atom_names), 3) + mask = torch.zeros(len(query_atom_names), dtype=torch.bool) + return gathered_positions, mask + + atom_indices = {name: i for i, name in enumerate(self.atom_names)} + indices = torch.tensor( + [atom_indices.get(name, -1) for name in query_atom_names], + dtype=torch.int, + ) + mask = indices != -1 + gathered_positions = self.position[indices] * mask.unsqueeze(-1) + + return gathered_positions, mask + + def center_random_augment( + self, + ) -> "ConformerData": + if self.num_atoms == 0: + return self + + atom_mask = torch.ones_like(self.element, dtype=torch.bool) + centered_coords = center_random_augmentation( + self.position.unsqueeze(0), atom_mask.unsqueeze(0) + )[0] + return ConformerData( + centered_coords, + self.element, + self.charge, + self.atom_names, + self.bonds, + self.symmetries, + ) + + +@dataclass +class Residue: + name: str + label_seq: int | None + restype: int + residue_index: int + is_missing: bool + b_factor_or_plddt: float + conformer_data: ConformerData | None + smiles: str | None = None + + +def get_restype( + residue_info: gemmi.ResidueInfo, + entity_type: EntityType, +) -> int: + """ + Encodes residues into alphabet of size 32: + 20 standards AAs + X + 4 RNA bases + RX + 4 DNA bases + DX + GAP + note: ligand residues as encoded as X + """ + + if residue_info.is_amino_acid(): + restype = residue_info.fasta_code() # encodes non-standard as X + unknown_value = residue_types_with_nucleotides_order["X"] + elif residue_info.is_nucleic_acid() and entity_type == EntityType.RNA: + restype = "R{}".format(residue_info.one_letter_code) + unknown_value = residue_types_with_nucleotides_order["RX"] + elif residue_info.is_nucleic_acid() and entity_type == EntityType.DNA: + restype = "D{}".format(residue_info.one_letter_code) + unknown_value = residue_types_with_nucleotides_order["DX"] + else: + restype = "X" + unknown_value = residue_types_with_nucleotides_order["X"] + + tokenized_restype = residue_types_with_nucleotides_order.get(restype, unknown_value) + return tokenized_restype diff --git a/forks/chai-lab/chai_lab/data/parsing/structure/sequence.py b/forks/chai-lab/chai_lab/data/parsing/structure/sequence.py new file mode 100644 index 00000000..dbea3fd5 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/parsing/structure/sequence.py @@ -0,0 +1,135 @@ +import logging + +import gemmi + +from chai_lab.data import residue_constants +from chai_lab.data.parsing.structure.entity_type import EntityType + +logger = logging.getLogger(__name__) + + +def fasta_one_letter_sequence(residue_codes: list[str]) -> str: + """ + Converts a list of residue names into a one-letter-code sequence + """ + return "".join( + [gemmi.find_tabulated_residue(res).fasta_code() for res in residue_codes] + ) + + +def protein_one_letter_sequence(residue_codes: list[str]) -> str: + """ + Converts a list of protein residue names into a one-letter-code sequence. + Probably equivalent to gemmi fasta_code() method but kept for consistency + with old parsing + to be explicit about how non-standard res are handled (with X) + """ + return "".join([_get_protein_only_residue_token(res) for res in residue_codes]) + + +def protein_one_letter_sequence_with_mods(residue_codes: list[str]) -> str: + """ + Convert a list of protein residue names into a one-letter code sequence, + insert non-standard residues as [FOO] where FOO corresponds to the residue code of + that non-standard residue. + + For example, 1PFH is ...APNGL[HIP]TRP... where HIP is the modified residue. + """ + return "".join( + [ + _get_protein_only_residue_token(res, mods_in_brackets=True) + for res in residue_codes + ] + ) + + +def _get_protein_only_residue_token( + three_letter_code: str, + mods_in_brackets: bool = False, +) -> str: + """Encodes everything that is not a standard amino acid as X if nonstandard_as_X is + True, otherwise return nonstandard FOO as [FOO]""" + residue_info = gemmi.find_tabulated_residue(three_letter_code) + # Standard amino acids are always given as single letters + if residue_info.is_amino_acid() and residue_info.is_standard(): + single_letter = residue_info.one_letter_code + single_letter = single_letter.upper() + # non-standard residues derived from a parent std residue are lowercase + single_letter = ( + single_letter if single_letter in residue_constants.restypes else "X" + ) + return single_letter + else: + if mods_in_brackets: + return f"[{three_letter_code}]" + else: + # non-standard residues derived from a parent std residue may have a + # lowercase one-letter code; make this upper case. + single_letter = residue_info.one_letter_code.upper() + return single_letter if single_letter in residue_constants.restypes else "X" + + +def _get_residue_token( + three_letter_code: str, + entity_type: EntityType, +) -> str: + """ + Encodes amino-acids and nucleic acids into corresponding tokens + 20 standard AAs + X + 4 RNA bases + RX + 4 DNA bases + DX + """ + residue_info = gemmi.find_tabulated_residue(three_letter_code) + if residue_info.is_amino_acid(): + single_letter = residue_info.one_letter_code + single_letter = single_letter.upper() + # non-standard residues derived from a parent std residue are lowercase + single_letter = ( + single_letter if single_letter in residue_constants.restypes else "X" + ) + return single_letter + + elif residue_info.is_nucleic_acid() and entity_type == EntityType.RNA: + return "R{}".format(residue_info.one_letter_code) + + elif residue_info.is_nucleic_acid() and entity_type == EntityType.DNA: + return "D{}".format(residue_info.one_letter_code) + + else: + # more properties at https://gemmi.readthedocs.io/en/latest/mol.html#built-in-data + return "X" + + +def get_residue_codes(subchain: gemmi.ResidueSpan, entity: gemmi.Entity) -> list[str]: + """ + Get list of residue codes (3-letter for protein residues, + 1 to 3 letters/digits for ligands, 1 or 2 letters for RNA/DNA) + for a gemmi subchain + """ + # entity.full_sequence comes from SEQRES, so it might be missing in PDB files + if entity.full_sequence is not None and len(entity.full_sequence) > 0: + return [ + gemmi.Entity.first_mon(item) # Ignore point mutations + for item in entity.full_sequence + ] + # this infers the sequence from the set of residues in the structure + return [res.name for res in subchain.first_conformer()] + + +def tokenize_sequence( + subchain: gemmi.ResidueSpan, entity: gemmi.Entity, entity_type: EntityType +) -> list[str]: + three_letter_sequence = get_residue_codes(subchain, entity) + + match entity_type: + case EntityType.PROTEIN: + return [ + _get_protein_only_residue_token(three_letter_code) + for three_letter_code in three_letter_sequence + ] + case EntityType.RNA | EntityType.DNA: + return [ + _get_residue_token(three_letter_code, entity_type) + for three_letter_code in three_letter_sequence + ] + case _: + raise NotImplementedError diff --git a/forks/chai-lab/chai_lab/data/residue_constants.py b/forks/chai-lab/chai_lab/data/residue_constants.py new file mode 100644 index 00000000..fdd6e11e --- /dev/null +++ b/forks/chai-lab/chai_lab/data/residue_constants.py @@ -0,0 +1,597 @@ +# Copyright 2021 AlQuraishi Laboratory +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from enum import Enum + +# A list of atoms (excluding hydrogen) for each AA type. PDB naming convention. +residue_atoms = { + "ALA": ["C", "CA", "CB", "N", "O"], + "ARG": ["C", "CA", "CB", "CG", "CD", "CZ", "N", "NE", "O", "NH1", "NH2"], + "ASP": ["C", "CA", "CB", "CG", "N", "O", "OD1", "OD2"], + "ASN": ["C", "CA", "CB", "CG", "N", "ND2", "O", "OD1"], + "CYS": ["C", "CA", "CB", "N", "O", "SG"], + "GLU": ["C", "CA", "CB", "CG", "CD", "N", "O", "OE1", "OE2"], + "GLN": ["C", "CA", "CB", "CG", "CD", "N", "NE2", "O", "OE1"], + "GLY": ["C", "CA", "N", "O"], + "HIS": ["C", "CA", "CB", "CG", "CD2", "CE1", "N", "ND1", "NE2", "O"], + "ILE": ["C", "CA", "CB", "CG1", "CG2", "CD1", "N", "O"], + "LEU": ["C", "CA", "CB", "CG", "CD1", "CD2", "N", "O"], + "LYS": ["C", "CA", "CB", "CG", "CD", "CE", "N", "NZ", "O"], + "MET": ["C", "CA", "CB", "CG", "CE", "N", "O", "SD"], + "PHE": ["C", "CA", "CB", "CG", "CD1", "CD2", "CE1", "CE2", "CZ", "N", "O"], + "PRO": ["C", "CA", "CB", "CG", "CD", "N", "O"], + "SER": ["C", "CA", "CB", "N", "O", "OG"], + "THR": ["C", "CA", "CB", "CG2", "N", "O", "OG1"], + "TRP": [ + "C", + "CA", + "CB", + "CG", + "CD1", + "CD2", + "CE2", + "CE3", + "CZ2", + "CZ3", + "CH2", + "N", + "NE1", + "O", + ], + "TYR": ["C", "CA", "CB", "CG", "CD1", "CD2", "CE1", "CE2", "CZ", "N", "O", "OH"], + "VAL": ["C", "CA", "CB", "CG1", "CG2", "N", "O"], +} + +# nucleic acid atoms from rosettafold-all-atoms +# we prefix nucleic acid tokens with "R" for RNA and "D" for DNA +# we add an unknown token RX for RNA-unknown and DX for DNA-unknown +nucleic_acid_atoms: dict[str, tuple[str | None, ...]] = { + "DA": ( + "O4'", + "C1'", + "C2'", + "OP1", + "P", + "OP2", + "O5'", + "C5'", + "C4'", + "C3'", + "O3'", + "N9", + "C4", + "N3", + "C2", + "N1", + "C6", + "C5", + "N7", + "C8", + "N6", + None, + None, + "H5''", + "H5'", + "H4'", + "H3'", + "H2''", + "H2'", + "H1'", + "H2", + "H61", + "H62", + "H8", + None, + None, + ), + "DC": ( + "O4'", + "C1'", + "C2'", + "OP1", + "P", + "OP2", + "O5'", + "C5'", + "C4'", + "C3'", + "O3'", + "N1", + "C2", + "O2", + "N3", + "C4", + "N4", + "C5", + "C6", + None, + None, + None, + None, + "H5''", + "H5'", + "H4'", + "H3'", + "H2''", + "H2'", + "H1'", + "H42", + "H41", + "H5", + "H6", + None, + None, + ), + "DG": ( + "O4'", + "C1'", + "C2'", + "OP1", + "P", + "OP2", + "O5'", + "C5'", + "C4'", + "C3'", + "O3'", + "N9", + "C4", + "N3", + "C2", + "N1", + "C6", + "C5", + "N7", + "C8", + "N2", + "O6", + None, + "H5''", + "H5'", + "H4'", + "H3'", + "H2''", + "H2'", + "H1'", + "H1", + "H22", + "H21", + "H8", + None, + None, + ), + "DT": ( + "O4'", + "C1'", + "C2'", + "OP1", + "P", + "OP2", + "O5'", + "C5'", + "C4'", + "C3'", + "O3'", + "N1", + "C2", + "O2", + "N3", + "C4", + "O4", + "C5", + "C7", + "C6", + None, + None, + None, + "H5''", + "H5'", + "H4'", + "H3'", + "H2''", + "H2'", + "H1'", + "H3", + "H71", + "H72", + "H73", + "H6", + None, + ), + "DX": ( + "O4'", + "C1'", + "C2'", + "OP1", + "P", + "OP2", + "O5'", + "C5'", + "C4'", + "C3'", + "O3'", + "O2'", + "N1", + "C2", + "N3", + "C4", + "C5", + "C6", + "N6", + "N7", + "C8", + "N9", + None, + "H5'", + "H5''", + "H4'", + "H3'", + "H2'", + "HO2'", + "H1'", + "H2", + "H61", + "H62", + "H8", + None, + None, + ), + "RA": ( + "O4'", + "C1'", + "C2'", + "OP1", + "P", + "OP2", + "O5'", + "C5'", + "C4'", + "C3'", + "O3'", + "O2'", + "N1", + "C2", + "N3", + "C4", + "C5", + "C6", + "N6", + "N7", + "C8", + "N9", + None, + "H5'", + "H5''", + "H4'", + "H3'", + "H2'", + "HO2'", + "H1'", + "H2", + "H61", + "H62", + "H8", + None, + None, + ), + "RC": ( + "O4'", + "C1'", + "C2'", + "OP1", + "P", + "OP2", + "O5'", + "C5'", + "C4'", + "C3'", + "O3'", + "O2'", + "N1", + "C2", + "O2", + "N3", + "C4", + "N4", + "C5", + "C6", + None, + None, + None, + "H5'", + "H5''", + "H4'", + "H3'", + "H2'", + "HO2'", + "H1'", + "H42", + "H41", + "H5", + "H6", + None, + None, + ), + "RG": ( + "O4'", + "C1'", + "C2'", + "OP1", + "P", + "OP2", + "O5'", + "C5'", + "C4'", + "C3'", + "O3'", + "O2'", + "N1", + "C2", + "N2", + "N3", + "C4", + "C5", + "C6", + "O6", + "N7", + "C8", + "N9", + "H5'", + "H5''", + "H4'", + "H3'", + "H2'", + "HO2'", + "H1'", + "H1", + "H22", + "H21", + "H8", + None, + None, + ), + "RU": ( + "O4'", + "C1'", + "C2'", + "OP1", + "P", + "OP2", + "O5'", + "C5'", + "C4'", + "C3'", + "O3'", + "O2'", + "N1", + "C2", + "O2", + "N3", + "C4", + "O4", + "C5", + "C6", + None, + None, + None, + "H5'", + "H5''", + "H4'", + "H3'", + "H2'", + "HO2'", + "H1'", + "H3", + "H5", + "H6", + None, + None, + None, + ), + "RX": ( + "O4'", + "C1'", + "C2'", + "OP1", + "P", + "OP2", + "O5'", + "C5'", + "C4'", + "C3'", + "O3'", + "O2'", + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + "H5'", + "H5''", + "H4'", + "H3'", + "H2'", + "HO2'", + "H1'", + None, + None, + None, + None, + None, + None, + ), +} + + +# This mapping is used when we need to store atom data in a format that requires +# fixed atom data size for every residue (e.g. a numpy array). +atom_indices = Enum( + "atom_indices", + [ + "N", + "CA", + "C", + "CB", + "O", + "CG", + "CG1", + "CG2", + "OG", + "OG1", + "SG", + "CD", + "CD1", + "CD2", + "ND1", + "ND2", + "OD1", + "OD2", + "SD", + "CE", + "CE1", + "CE2", + "CE3", + "NE", + "NE1", + "NE2", + "OE1", + "OE2", + "CH2", + "NH1", + "NH2", + "OH", + "CZ", + "CZ2", + "CZ3", + "NZ", + "OXT", + ], + start=0, +) +atom_types = [atom_index.name for atom_index in atom_indices] +atom_order = {atom_type: i for i, atom_type in enumerate(atom_types)} + +# This is the standard residue order when coding AA type as a number. +# Reproduce it by taking 3-letter AA codes and sorting them alphabetically. +restypes = [ + "A", + "R", + "N", + "D", + "C", + "Q", + "E", + "G", + "H", + "I", + "L", + "K", + "M", + "F", + "P", + "S", + "T", + "W", + "Y", + "V", +] + +residue_types_with_nucleotides = ( + restypes + + ["X"] + + ["RA", "RC", "RG", "RU", "RX"] + + ["DA", "DC", "DG", "DT", "DX"] + + ["-"] # gap + + [":"] # non-existent (i.e. should get masked) +) + +residue_types_with_nucleotides_order = { + restype: i for i, restype in enumerate(residue_types_with_nucleotides) +} + + +# Residue names as found in mmcif/ gemmi parsed data +# that indicate a residue will be tokenized by residue +# and not by atom. +standard_residue_pdb_codes = { + "ALA", + "ARG", + "ASN", + "ASP", + "CYS", + "GLN", + "GLU", + "GLY", + "HIS", + "ILE", + "LEU", + "LYS", + "MET", + "PHE", + "PRO", + "SER", + "THR", + "TRP", + "TYR", + "VAL", + "UNK", + "A", + "G", + "C", + "U", + "DA", + "DG", + "DC", + "DT", +} + +# we reserve this residue name for ligands +# it is not assigned to any chemical in the PDB +# it should never have a cached ref conformer +new_ligand_residue_name = "LIG" + + +restype_1to3 = { + "A": "ALA", + "R": "ARG", + "N": "ASN", + "D": "ASP", + "C": "CYS", + "Q": "GLN", + "E": "GLU", + "G": "GLY", + "H": "HIS", + "I": "ILE", + "L": "LEU", + "K": "LYS", + "M": "MET", + "F": "PHE", + "P": "PRO", + "S": "SER", + "T": "THR", + "W": "TRP", + "Y": "TYR", + "V": "VAL", +} + +restype_1to3_with_x = {**restype_1to3, "X": "UNK"} diff --git a/forks/chai-lab/chai_lab/data/sources/rdkit.py b/forks/chai-lab/chai_lab/data/sources/rdkit.py new file mode 100644 index 00000000..be6df3a4 --- /dev/null +++ b/forks/chai-lab/chai_lab/data/sources/rdkit.py @@ -0,0 +1,253 @@ +import logging +from pathlib import Path + +import antipickle +import torch +from rdkit import Chem +from rdkit.Chem import AllChem + +# for some reason calling Chem.rdDetermineBonds doesnt work +from rdkit.Chem.rdDetermineBonds import DetermineBonds +from rdkit.Geometry import Point3D +from rdkit.rdBase import BlockLogs +from tqdm import tqdm + +from chai_lab.data.parsing.structure.residue import ConformerData +from chai_lab.data.residue_constants import ( + new_ligand_residue_name, + standard_residue_pdb_codes, +) +from chai_lab.utils import paths +from chai_lab.utils.pickle import TorchAntipickleAdapter +from chai_lab.utils.timeout import timeout + +# important to set this flag otherwise atom properties such as +# "name" will be lost when pickling +# See https://github.com/rdkit/rdkit/issues/1320 +Chem.SetDefaultPickleProperties(Chem.PropertyPickleOptions.AllProps) + +logger = logging.getLogger(__name__) + + +class RefConformerGenerator: + def __init__( + self, + leaving_atoms_cache_file: str | None = None, + ): + """ + N.B. in almost all cases, you want to use RefConformerGenerator.make() rather + than initializing the object directly, since constructor the conformer generator + is expensive, and we want to cache the result. + + Caches idealized 3D coordinates and list of atoms for residues that exist in the PDB + This is needed to create empty atom coordinates and mask for missing residues + and ensure the number of tokens and atoms is the same for chains with the same entity_id + """ + # Mapping of molecule names to (atom_names, leaving_atoms); leaving atoms + # correspond to True. See the following file for how this was constructed: + # src/scripts/small_molecule_preprocess/leaving_atoms.py + self.leaving_atoms: dict[str, tuple[list[str], list[bool]]] = dict() + if leaving_atoms_cache_file is not None: + self.leaving_atoms = antipickle.load(leaving_atoms_cache_file) + + # download conformers' cache if needed + conformers_cache_file = paths.cached_conformers.get_path().as_posix() + # load cached conformers after leaving atoms cache is generated in + # case we need to re-generate the cache + self.cached_conformers = self._load_apkl_conformers(conformers_cache_file) + + if new_ligand_residue_name in self.cached_conformers: + self.cached_conformers.pop(new_ligand_residue_name) + + assert len(self.cached_conformers) > 0 + + def _load_apkl_conformers(self, path: str) -> dict[str, ConformerData]: + assert path.endswith(".apkl") + assert Path(path).exists() + return antipickle.load(path, adapters=_get_adapters()) + + def _load_cached_conformers(self, path: str) -> dict[str, ConformerData]: + block = BlockLogs() + with Chem.SDMolSupplier(path) as suppl: + mols = [m for m in suppl if m is not None] + del block + logger.info(f"Loaded {len(mols)} cached conformers") + + residues_dict = { + m.GetProp("_Name"): self._load_ref_conformer_from_rdkit(m) + for m in tqdm(mols) + } + + # check at least standard residues were loaded + # otherwise missing protein residues cannot be handled + for res_name in standard_residue_pdb_codes: + assert ( + res_name in residues_dict + ), f"Standard residue {res_name} should have a reference conformer loaded" + + return residues_dict + + @classmethod + def _load_ref_conformer_from_rdkit(self, mol: Chem.Mol) -> ConformerData: + mol = Chem.RemoveAllHs(mol) + + ref_pos = torch.tensor(mol.GetConformer().GetPositions(), dtype=torch.float) + + ref_atom_names = [atom.GetProp("name") for atom in mol.GetAtoms()] + + ref_atom_charge = torch.tensor( + [atom.GetFormalCharge() for atom in mol.GetAtoms()], dtype=torch.int + ) + ref_atom_element = torch.tensor( + [atom.GetAtomicNum() for atom in mol.GetAtoms()], dtype=torch.int + ) + + bonds = [ + (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()) for bond in mol.GetBonds() + ] + + symms = get_intra_res_atom_symmetries(mol) + + symmetries = ( + torch.stack([torch.tensor(x) for x in symms], dim=-1) + if len(symms) > 0 + else torch.arange(len(ref_atom_names)).unsqueeze(-1) + ) + + return ConformerData( + position=ref_pos, + element=ref_atom_element, + charge=ref_atom_charge, + atom_names=ref_atom_names, + bonds=bonds, + symmetries=symmetries, + ) + + def get(self, residue_name: str) -> ConformerData | None: + """ + Returns an rdkit reference conformer if residue is in CCD and conformer + generation succeeded. Otherwise, returns None. + + N.B. we should _not_ add more post-processing logic to this method, since we + call this for every residue and want cache lookups to be fast for large + chains. If you need to modify the conformer data, do that when building the + cache instead. + """ + return self.cached_conformers.get(residue_name) + + def generate(self, smiles: str) -> ConformerData: + """Generates a conformer for a ligand from its SMILES string.""" + mol = Chem.MolFromSmiles(smiles) + assert mol is not None, f"Invalid smiles {smiles}" + + mol_with_hs = Chem.AddHs(mol) + + params = AllChem.ETKDGv3() + params.useSmallRingTorsions = True + params.randomSeed = 123 + params.useChirality = True + # below params were added after facing 'Value Error: Bad Conformer id' + # https://github.com/rdkit/rdkit/issues/1433#issuecomment-305097888 + params.maxAttempts = 10_000 + params.useRandomCoords = True + + AllChem.EmbedMultipleConfs(mol_with_hs, numConfs=1, params=params) + AllChem.RemoveHs(mol_with_hs) + for atom in mol_with_hs.GetAtoms(): + atom.SetProp("name", atom.GetSymbol()) + retval = self._load_ref_conformer_from_rdkit(mol_with_hs) + retval.atom_names = [a.upper() for a in retval.atom_names] + return retval + + +def _get_adapters(): + ## adapters define how antipickle should serialize unknown types + from antipickle.adapters import DataclassAdapter + + return [TorchAntipickleAdapter(), DataclassAdapter(dict(conf=ConformerData))] + + +def conformer_data_to_rdkit_mol(conformer: ConformerData) -> Chem.Mol: + """Convert ConformerData to RDKit Mol + RDKit Molecules can be used infer bonds (often better than the PDB) and compute + intra-residue atom symmetries. + """ + + # Create an editable molecule object and add atoms + editable_mol = Chem.RWMol() + + # Add atoms to the molecule + for atom_type, atom_name in zip(conformer.element, conformer.atom_names): + atom = Chem.Atom(atom_type.item()) + atom.SetProp("name", atom_name) + editable_mol.AddAtom(atom) + + # Create a conformer to hold the 3D coordinates + rd_conformer = Chem.Conformer(len(conformer.element)) + + # Set the coordinates for each atom + for i, pos in enumerate(conformer.position.tolist()): + rd_conformer.SetAtomPosition(i, Point3D(*pos)) + + # Add the conformer and convert back to standard molecule instance + editable_mol.AddConformer(rd_conformer) + # add bonds + mol = editable_mol.GetMol() + mol = maybe_add_bonds(mol) + return mol + + +def maybe_add_bonds(mol: Chem.Mol, timeout_after: float = 1.0) -> Chem.Mol: + """Attempts to add bonds to a molecule. Returns original molecule if not + successful + + The RDKit determineBonds function is known to hang for certain molecules. + This function wraps the call in a timeout. + + """ + + @timeout(timeout_after) + def _add_bonds(mol): + # hard-to-find function for inferring bond information + # https://rdkit.org/docs/source/rdkit.Chem.rdDetermineBonds.html + # We wrap this in a timeout because this function is known to hang + # for some molecules. See Issue + # (https://github.com/rdkit/rdkit/discussions/7289#discussioncomment-8930333) + DetermineBonds(mol) + return mol + + try: + mol = _add_bonds(mol) + except ValueError as e: + # ValueError is caused by rdKit, e.g. + # - "could not find valid bond ordering" + # - "determineBondOrdering() does not work with element Os" + logger.warning(f"Failed to determine bonds for {Chem.MolToSmiles(mol)}, {e}") + except TimeoutError as e: + # TimoutError is cause by bug in rdkit + logger.warning(f"Failed to determine bonds for {Chem.MolToSmiles(mol)}, {e}") + + return mol + + +def get_intra_res_atom_symmetries( + mol: Chem.Mol, max_symmetries: int = 1000, timeout_after: float = 1.0 +) -> tuple[tuple[int, ...]]: + """Attempts to compute full set of intra-residue atom symmetries. Returns identity + permutation of atoms if not successful""" + + @timeout(timeout_after) + def _get_symmetries(): + return mol.GetSubstructMatches( + mol, uniquify=False, maxMatches=max_symmetries, useChirality=False + ) + + try: + symms = _get_symmetries() + except TimeoutError: + # Issues of hangup have been reported for certain ligand pairs + # Issue(https://github.com/michellab/BioSimSpace/issues/100) + # NOTE: this function calls MCS algorithm described in linked issue. + symms = (tuple(range(mol.GetNumAtoms())),) + + return symms diff --git a/forks/chai-lab/chai_lab/model/__init__.py b/forks/chai-lab/chai_lab/model/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/forks/chai-lab/chai_lab/model/diffusion_schedules.py b/forks/chai-lab/chai_lab/model/diffusion_schedules.py new file mode 100644 index 00000000..41491150 --- /dev/null +++ b/forks/chai-lab/chai_lab/model/diffusion_schedules.py @@ -0,0 +1,44 @@ +from dataclasses import dataclass + +import torch +from torch import Tensor + +from chai_lab.utils.typing import Float, typecheck + + +@dataclass(frozen=True) +class InferenceNoiseSchedule: + s_max: float = 160.0 + s_min: float = 4e-4 + p: float = 7.0 + sigma_data: float = 16.0 + + @typecheck + def get_schedule( + self, + device, + num_timesteps: int = 200, + ) -> Float[Tensor, "{num_timesteps}"]: + times = torch.linspace(0, 1, 2 * num_timesteps + 1, device=device)[1::2] + return self.get_noise_for_times(times) + + @typecheck + def get_noise_for_times( + self, times: Float[Tensor, "n_samples"] + ) -> Float[Tensor, "n_samples"]: + if times.min() < 0 or times.max() > 1: + raise ValueError("times must be in [0, 1]") + + sigmas = self.sigma_data * _power_interpolation( + times, val_0=self.s_max, val_1=self.s_min, p=self.p + ) + return sigmas + + +@typecheck +def _power_interpolation( + t: Float[Tensor, "n_samples"], val_0: float, val_1: float, p: float +) -> Float[Tensor, "n_samples"]: + # val0 at t=0, and val1 at t=1 + assert t.min() >= 0 and t.max() <= 1, f"0 <= t <= 1, but {t=}" + return (t * val_1 ** (1 / p) + (1 - t) * val_0 ** (1 / p)) ** p diff --git a/forks/chai-lab/chai_lab/model/utils.py b/forks/chai-lab/chai_lab/model/utils.py new file mode 100644 index 00000000..42a74d94 --- /dev/null +++ b/forks/chai-lab/chai_lab/model/utils.py @@ -0,0 +1,212 @@ +from typing import Any + +import torch +from einops import rearrange, reduce, repeat +from torch import Tensor + +from chai_lab.utils.tensor_utils import string_to_tensorcode, und +from chai_lab.utils.typing import Bool, Float, Int, UInt8, typecheck + + +def get_qkv_indices_for_blocks( + sequence_length: int, + stride: int, + kv_block_size: int, + device: Any, +) -> tuple[ + Int[torch.Tensor, "bl bl_q"], + Int[torch.Tensor, "bl bl_kv"], + Bool[torch.Tensor, "bl bl_kv"], +]: + """Gets q, kv indices for local attention blocks.""" + sequence_length + # from now on pretend q and kv are different axes + num_blocks = sequence_length // stride + assert ( + sequence_length == num_blocks * stride + ), f"only seqlens divisible by {stride=} are supported, not {sequence_length=}" + q_indices = torch.arange(sequence_length, device=device) + q_indices = rearrange( + q_indices, "(bl bl_q) -> bl bl_q", bl=num_blocks, bl_q=stride + ) # bl bl_q -> q + kv_indices = q_indices[:, :1] + (stride - kv_block_size) // 2 + kv_indices = kv_indices + torch.arange( + kv_block_size, device=kv_indices.device + ) # bl bl_kv -> kv + # mask out positions where kv_indices gets wrapped + # Rationale: the local attention block should always process + # local blocks (i.e. same rel-positional encodings for each block.) + kv_mask = (kv_indices < sequence_length) & (kv_indices >= 0) + # Use of % not .clamp is important for short sequences + kv_indices = kv_indices % sequence_length + # q_idx is returned for reference, downstream code uses reshapes instead + return q_indices, kv_indices, kv_mask + + +@typecheck +def get_block_atom_pair_mask( + atom_single_mask: Bool[Tensor, "b a"], + q_idx: Int[Tensor, "bl bl_q"], + kv_idx: Int[Tensor, "bl bl_kv"], + kv_is_wrapped_mask: Bool[Tensor, "bl bl_kv"], +) -> Bool[Tensor, "b bl bl_q bl_kv"]: + atom_q_mask = atom_single_mask[:, q_idx] + atom_kv_mask = atom_single_mask[:, kv_idx] + + block_atom_pair_mask = und( + atom_q_mask, atom_kv_mask, "b bl bl_q, b bl bl_kv -> b bl bl_q bl_kv" + ) + + block_atom_pair_mask &= rearrange(kv_is_wrapped_mask, "bl bl_kv -> 1 bl 1 bl_kv") + return block_atom_pair_mask + + +@typecheck +def calc_centroid( + coords: Float[Tensor, "b a 3"], + mask: Bool[Tensor, "#b a"], + weights: Float[Tensor, "b a"] | None = None, +) -> Float[Tensor, "b 3"]: + # mean-center coordinates + masked_weights = weights * mask if weights is not None else mask.to(coords.dtype) + masked_weights /= reduce(masked_weights, "b a -> b 1", "sum").clamp(min=1e-4) + # not using einsum to avoid autocasting + return reduce(coords * masked_weights[:, :, None], "b a c -> b c", "sum") + + +def _copysign(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor: + """ + Transform from: https://pytorch3d.readthedocs.io/en/latest/_modules/pytorch3d/transforms + Return a tensor where each element has the absolute value taken from the, + corresponding element of a, with sign taken from the corresponding + element of b. This is like the standard copysign floating-point operation, + but is not careful about negative 0 and NaN. + + Args: + a: source tensor. + b: tensor whose signs will be used, of the same shape as a. + + Returns: + Tensor of the same shape as a with the signs of b. + """ + signs_differ = (a < 0) != (b < 0) + return torch.where(signs_differ, -a, a) + + +def quaternion_to_matrix(quaternions: torch.Tensor) -> torch.Tensor: + """ + Transform from: https://pytorch3d.readthedocs.io/en/latest/_modules/pytorch3d/transforms + Convert rotations given as quaternions to rotation matrices. + + Args: + quaternions: quaternions with real part first, + as tensor of shape (..., 4). + + Returns: + Rotation matrices as tensor of shape (..., 3, 3). + """ + r, i, j, k = torch.unbind(quaternions, -1) + # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`. + two_s = 2.0 / (quaternions * quaternions).sum(-1) + + o = torch.stack( + ( + 1 - two_s * (j * j + k * k), + two_s * (i * j - k * r), + two_s * (i * k + j * r), + two_s * (i * j + k * r), + 1 - two_s * (i * i + k * k), + two_s * (j * k - i * r), + two_s * (i * k - j * r), + two_s * (j * k + i * r), + 1 - two_s * (i * i + j * j), + ), + -1, + ) + return o.reshape(quaternions.shape[:-1] + (3, 3)) + + +def random_quaternions( + n: int, dtype: torch.dtype | None = None, device: Any | None = None +) -> torch.Tensor: + """ + Transform from: https://pytorch3d.readthedocs.io/en/latest/_modules/pytorch3d/transforms + Generate random quaternions representing rotations, + i.e. versors with nonnegative real part. + + Args: + n: Number of quaternions in a batch to return. + dtype: Type to return. + device: Desired device of returned tensor. Default: + uses the current device for the default tensor type. + + Returns: + Quaternions as tensor of shape (N, 4). + """ + if isinstance(device, str): + device = torch.device(device) + o = torch.randn((n, 4), dtype=dtype, device=device) + s = (o * o).sum(1) + o = o / _copysign(torch.sqrt(s), o[:, 0])[:, None] + return o + + +def random_rotations( + n: int, dtype: torch.dtype | None = None, device: Any = None +) -> torch.Tensor: + """ + Transform from: https://pytorch3d.readthedocs.io/en/latest/_modules/pytorch3d/transforms + Generate random rotations as 3x3 rotation matrices. + + Args: + n: Number of rotation matrices in a batch to return. + dtype: Type to return. + device: Device of returned tensor. Default: if None, + uses the current device for the default tensor type. + + Returns: + Rotation matrices as tensor of shape (n, 3, 3). + """ + quaternions = random_quaternions(n, dtype=dtype, device=device) + return quaternion_to_matrix(quaternions) + + +@torch.no_grad() +@typecheck +def center_random_augmentation( + atom_coords: Float[Tensor, "b a 3"], + atom_single_mask: Bool[Tensor, "#b a"], + s_trans: float = 1.0, + rotations: Float[Tensor, "b 3 3"] | None = None, +) -> Float[Tensor, "b a 3"]: + centroid = calc_centroid(atom_coords, mask=atom_single_mask) + centroid = rearrange(centroid, "b c -> b 1 c") + atom_coords = atom_coords - centroid + # randomly rotate + if rotations is None: + rotations = random_rotations(atom_coords.shape[0], device=atom_coords.device) + rotated_coords = torch.einsum("b i j, b a j -> b a i", rotations, atom_coords) + random_translation = torch.randn_like(centroid) # b 1 c=3 + return rotated_coords + s_trans * random_translation + + +@typecheck +def get_asym_id_from_subchain_id( + subchain_id: str, + source_pdb_chain_id: UInt8[Tensor, "n_tokens 4"], + token_asym_id: Int[Tensor, "n"], +): + # encde the subchain ids and perform lookup in context features + chain_id_tensorcode = string_to_tensorcode(subchain_id, pad_to_length=4) + chain_id_tensorcode = chain_id_tensorcode.to(token_asym_id.device) + # create masks + chain_id_tensorcode = repeat(chain_id_tensorcode, "c -> 1 c") + chain_id_mask = torch.all(chain_id_tensorcode == source_pdb_chain_id, dim=-1) + # check uniqueness + chain_id_asyms = torch.unique(token_asym_id[chain_id_mask]) + + assert len(chain_id_asyms) == 1, ( + f"Expected only one token asym, but got {len(chain_id_asyms)} " + f"asyms: {chain_id_asyms}" + ) + return chain_id_asyms[0].item() diff --git a/forks/chai-lab/chai_lab/py.typed b/forks/chai-lab/chai_lab/py.typed new file mode 100644 index 00000000..cd62ab2e --- /dev/null +++ b/forks/chai-lab/chai_lab/py.typed @@ -0,0 +1 @@ +# marker that this package is compatible with python typing \ No newline at end of file diff --git a/forks/chai-lab/chai_lab/ranking/__init__.py b/forks/chai-lab/chai_lab/ranking/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/forks/chai-lab/chai_lab/ranking/clashes.py b/forks/chai-lab/chai_lab/ranking/clashes.py new file mode 100644 index 00000000..aacf2223 --- /dev/null +++ b/forks/chai-lab/chai_lab/ranking/clashes.py @@ -0,0 +1,155 @@ +from dataclasses import dataclass + +import torch +from einops import rearrange, reduce, repeat +from torch import Tensor + +import chai_lab.ranking.utils as rutils +from chai_lab.utils.tensor_utils import cdist, und_self +from chai_lab.utils.typing import Bool, Float, Int, typecheck + + +@typecheck +@dataclass +class ClashScores: + """ + total_clashes: total number of clashes in the complex + total_inter_chain_clashes: total number of inter-chain clashes in the complex, + i.e. inter-chain clashes summed over all chain pairs + per_chain_intra_clashes: number of intra-chain clashes for each chain in the complex + per_chain_pair_clashes: number of inter-chain clashes for each chain pair in the complex + """ + + total_clashes: Int[Tensor, "..."] + total_inter_chain_clashes: Int[Tensor, "..."] + chain_intra_clashes: Int[Tensor, "... n_chains"] + chain_chain_inter_clashes: Int[Tensor, "... n_chains n_chains"] + has_inter_chain_clashes: Bool[Tensor, "..."] + + +@typecheck +def _compute_clashes( + atom_coords: Float[Tensor, "... a 3"], + atom_mask: Bool[Tensor, "... a"], + clash_threshold: float = 1.1, +) -> Bool[Tensor, "... a a"]: + pairwise_dists = cdist(atom_coords) + valid_mask = und_self(atom_mask, "... i, ... j -> ... i j") + valid_mask = valid_mask & ~torch.eye( + atom_coords.shape[-2], device=atom_coords.device, dtype=torch.bool + ) + return valid_mask & (pairwise_dists < clash_threshold) + + +@typecheck +def has_inter_chain_clashes( + atom_mask: Bool[Tensor, "... a"], + atom_asym_id: Int[Tensor, "... a"], + atom_entity_type: Int[Tensor, "... a"], + per_chain_pair_clashes: Int[Tensor, "... n_chains n_chains"], + max_clashes: int = 100, + max_clash_ratio: float = 0.5, +) -> Bool[Tensor, "..."]: + """ + Determine if the complex has inter-chain clashes. + Criteria: + (1) If a chain pair has more than `max_clashes` clashes, then consider it a clash + (2) If a chain pair has less than `max_clashes` clashes, but the total number of + clashes is more than `max_clash_ratio` of the smaller chain's total atoms, + then also consider it a clash + (3) The chain pairs must be both be polymers + + """ + has_clashes = per_chain_pair_clashes >= max_clashes + + atoms_per_chain = rutils.num_atoms_per_chain( + atom_mask=atom_mask, + asym_id=atom_asym_id, + ) + + # if a chain pair has less than max_clashes clashes, butmore than + # max_clash_ratio of the smaller chain's total atoms, then also + # consider it a clash + has_clashes |= ( + per_chain_pair_clashes + / rearrange(atoms_per_chain, "... c -> ... c 1").clamp(min=1) + ).ge(max_clash_ratio) + + has_clashes |= ( + per_chain_pair_clashes / rearrange(atoms_per_chain, "b c -> b 1 c").clamp(min=1) + ).ge(max_clash_ratio) + + # only consider clashes between pairs of polymer chains + polymer_chains = rutils.chain_is_polymer( + asym_id=atom_asym_id, + mask=atom_mask, + entity_type=atom_entity_type, + ) + is_polymer_pair = und_self(polymer_chains, "... c1, ... c2 -> ... c1 c2") + + # reduce over all chain pairs + return reduce(has_clashes & is_polymer_pair, "... c1 c2 -> ...", torch.any) + + +@typecheck +def get_scores( + atom_coords: Float[Tensor, "... a 3"], + atom_mask: Bool[Tensor, "... a"], + atom_asym_id: Int[Tensor, "... a"], + atom_entity_type: Int[Tensor, "... a"], + clash_threshold: float = 1.1, + max_clashes: int = 100, + max_clash_ratio: float = 0.5, +) -> ClashScores: + # shift asym_id from 1-based to 0-based + assert atom_asym_id.dtype in (torch.int32, torch.int64) + atom_asym_id = (atom_asym_id - 1).to(torch.int64) + assert torch.amin(atom_asym_id) >= 0 + + # dimensions + n_chains = atom_asym_id.amax().add(1).item() + assert isinstance(n_chains, int) + *b, a = atom_mask.shape + + clashes_a_a = _compute_clashes(atom_coords, atom_mask, clash_threshold) + clashes_a_a = clashes_a_a.to(torch.int32) # b a a + + clashes_a_chain = clashes_a_a.new_zeros(*b, a, n_chains) + clashes_a_chain.scatter_add_( + dim=-1, + src=clashes_a_a, + index=repeat(atom_asym_id, f"b a -> b {a} a"), + ) + + clashes_chain_chain = clashes_a_a.new_zeros(*b, n_chains, n_chains) + clashes_chain_chain.scatter_add_( + dim=-2, + src=clashes_a_chain, + index=repeat(atom_asym_id, f"b a -> b a {n_chains}"), + ) + # i, j enumerate chains + total_clashes = reduce(clashes_chain_chain, "... i j -> ...", "sum") // 2 + # NB: diagonal term (self-interaction of chain), contains doubled self-interaction + per_chain_intra_clashes = torch.einsum("... i i -> ... i", clashes_chain_chain) // 2 + # delete self-interaction for simplicity + non_diag = 1 - torch.diag(clashes_a_a.new_ones(n_chains)) + inter_chain_chain = non_diag * clashes_chain_chain + + inter_chain_clashes = ( + reduce(inter_chain_chain, "... i j -> ... ", "sum") // 2 + ) # div by 2 to compensate for symmetricity of matrix + + return ClashScores( + total_clashes=total_clashes, + total_inter_chain_clashes=inter_chain_clashes, + chain_intra_clashes=per_chain_intra_clashes, + chain_chain_inter_clashes=inter_chain_chain, + has_inter_chain_clashes=has_inter_chain_clashes( + atom_mask=atom_mask, + atom_asym_id=atom_asym_id, + atom_entity_type=atom_entity_type, + per_chain_pair_clashes=inter_chain_chain, + max_clashes=max_clashes, + max_clash_ratio=max_clash_ratio, + ), + ) diff --git a/forks/chai-lab/chai_lab/ranking/frames.py b/forks/chai-lab/chai_lab/ranking/frames.py new file mode 100644 index 00000000..0387dcd1 --- /dev/null +++ b/forks/chai-lab/chai_lab/ranking/frames.py @@ -0,0 +1,168 @@ +import torch +from einops import rearrange, repeat +from torch import Tensor + +from chai_lab.data.features.token_utils import get_centre_positions_and_mask +from chai_lab.utils.tensor_utils import cdist, und_self +from chai_lab.utils.typing import Bool, Float, Int, typecheck + + +@typecheck +def abc_is_colinear( + atoms_a: Float[Tensor, "b n_triplets 3"], + atoms_b: Float[Tensor, "b n_triplets 3"], + atoms_c: Float[Tensor, "b n_triplets 3"], +) -> Bool[Tensor, "b n_triplets"]: + """Check to see if each triplet of 3 atoms (a, b, c) is co-linear.""" + w1 = atoms_a - atoms_b + w1 /= torch.linalg.norm(w1, dim=-1, keepdim=True) + w2 = atoms_c - atoms_b + w2 /= torch.linalg.norm(w2, dim=-1, keepdim=True) + + cos_sim = torch.sum(w1 * w2, dim=-1) + cos_sim = torch.clamp(cos_sim, -1.0, 1.0) + angle = torch.acos(cos_sim) # radians + + # Colinearity should cover cases that are very small acute angles and cases of large + # obtuse angles that are close to 180 degrees. + colinear = ( + torch.isnan(angle) + | (angle < 25 / 180 * torch.pi) + | (angle > 155 / 180 * torch.pi) + ) + return colinear + + +@typecheck +def get_single_atom_frames( + atom_coords: Float[Tensor, "b n_atoms 3"], + token_asym_id: Int[Tensor, "b n_tokens"], + token_residue_index: Int[Tensor, "b n_tokens"], + token_backbone_frame_mask: Bool[Tensor, "b n_tokens"], + token_centre_atom_index: Int[Tensor, "b n_tokens"], + token_exists_mask: Bool[Tensor, "b n_tokens"], + atom_exists_mask: Bool[Tensor, "b n_atoms"], + atom_token_index: Int[Tensor, "b n_atoms"], +) -> tuple[Int[Tensor, "b n_tokens 3"], Bool[Tensor, "b n_tokens"]]: + """Makes frames for everything that is tokenized per-atom""" + # For tokens that are one atom per token, a_i, b_i, c_i for frame is: + # - token atom is assigned as b_i + # - closest atom to the token atom is a_i + # - second closest atom is c_i + + # Compute distances; n_tokens size + centre_coords, centre_mask = get_centre_positions_and_mask( + atom_coords, + atom_exists_mask, + token_centre_atom_index, + token_exists_mask, + ) + + asym_mask = rearrange(token_asym_id, "b i -> b i 1") == rearrange( + token_asym_id, "b j -> b 1 j" + ) + res_idx_mask = rearrange(token_residue_index, "b i -> b i 1") == rearrange( + token_residue_index, "b j -> b 1 j" + ) + dists = cdist(centre_coords) # Symmetric (tokens x tokens) + # Mask out distances that don't exist + centre_mask_square = und_self(centre_mask, "b i, b j -> b i j") + # restrict to intra-residue pairs with valid coords + dists = dists.masked_fill( + ~centre_mask_square | ~asym_mask | ~res_idx_mask, torch.inf + ) + + B, tokens = dists.shape[:2] + device = dists.device + + # Mask out diagonal + batch_indices = torch.arange(B, device=device)[..., None, None] + dists[batch_indices, torch.eye(tokens, device=device).bool()] = torch.inf + + _, idces = torch.topk(dists, 2, dim=-1, largest=False) # b, n_tokens, 2 + a, c = idces.unbind(dim=-1) + b = torch.arange(tokens, device=device).unsqueeze(0) # Token index + + # Convert from token index to ATOM index + batch_indices = torch.arange(B, device=device)[..., None] + abc_atom_indices = torch.stack( + [token_centre_atom_index[batch_indices, idx] for idx in [a, b, c]], + dim=-1, + ) + abc_coords_mask = torch.stack( + [centre_mask[batch_indices, idx] for idx in [a, b, c]], + dim=-1, + ).all(dim=-1) + + # Make mask for tokens within the same chain + a_res_idx = token_residue_index[batch_indices, a] + b_res_idx = token_residue_index[batch_indices, b] + c_res_idx = token_residue_index[batch_indices, c] + + a_asym, b_asym, c_asym = ( + token_asym_id[batch_indices, a], + token_asym_id[batch_indices, b], + token_asym_id[batch_indices, c], + ) + + same_residue = (a_res_idx == b_res_idx) & (b_res_idx == c_res_idx) + same_chain = (a_asym == b_asym) & (b_asym == c_asym) + + # Check for co-linearity (< 25 degrees deviation) + colinear = abc_is_colinear( + centre_coords[batch_indices, a], + centre_coords[batch_indices, b], + centre_coords[batch_indices, c], + ) + + # Positions where the token backbone was NOT already defined, shares the same + # entity_id, are not co-linear, and is actually a centre atom + mask = torch.ones_like(token_backbone_frame_mask) + for i in range(mask.shape[0]): + all_idces, counts = torch.unique(atom_token_index[i], return_counts=True) + not_single_idces = all_idces[counts != 1] + mask[i, not_single_idces] = False + + mask &= ( + ~token_backbone_frame_mask + & same_residue + & same_chain + & ~colinear + & abc_coords_mask + & token_exists_mask + ) + + return abc_atom_indices, mask + + +@typecheck +def get_frames_and_mask( + atom_coords: Float[Tensor, "b n_atoms 3"], + token_asym_id: Int[Tensor, "b n_tokens"], + token_residue_index: Int[Tensor, "b n_tokens"], + token_backbone_frame_mask: Bool[Tensor, "b n_tokens"], + token_centre_atom_index: Int[Tensor, "b n_tokens"], + token_exists_mask: Bool[Tensor, "b n_tokens"], + atom_exists_mask: Bool[Tensor, "b n_atoms"], + backbone_frame_idces: Int[Tensor, "b n_tokens 3"], + atom_token_index: Int[Tensor, "b n_atoms"], +) -> tuple[Int[Tensor, "b n_tokens 3"], Bool[Tensor, "b n_tokens"]]: + """Computes union of defined backbone frames and single atom frames""" + single_atom_frame_idces, single_atom_frames_mask = get_single_atom_frames( + atom_coords=atom_coords, + token_asym_id=token_asym_id, + token_residue_index=token_residue_index, + token_backbone_frame_mask=token_backbone_frame_mask, + token_centre_atom_index=token_centre_atom_index, + token_exists_mask=token_exists_mask, + atom_exists_mask=atom_exists_mask, + atom_token_index=atom_token_index, + ) + + frame_idces = backbone_frame_idces.clone() + mask = repeat(single_atom_frames_mask, "b n -> b n 3") + frame_idces[mask] = single_atom_frame_idces[mask] + + all_frames_mask = single_atom_frames_mask | token_backbone_frame_mask + + return frame_idces, all_frames_mask diff --git a/forks/chai-lab/chai_lab/ranking/plddt.py b/forks/chai-lab/chai_lab/ranking/plddt.py new file mode 100644 index 00000000..525c0dd7 --- /dev/null +++ b/forks/chai-lab/chai_lab/ranking/plddt.py @@ -0,0 +1,77 @@ +from dataclasses import dataclass + +from einops import repeat +from torch import Tensor + +import chai_lab.ranking.utils as rutils +from chai_lab.utils.tensor_utils import masked_mean +from chai_lab.utils.typing import Bool, Float, Int, typecheck + + +@typecheck +@dataclass +class PLDDTScores: + """ + complex_plddt: plddt score of the complex + per_chain_plddt: plddt score for each chain in the complex + per_atom_plddt: plddt score for each atom in the complex + """ + + complex_plddt: Float[Tensor, "..."] + per_chain_plddt: Float[Tensor, "... c"] + per_atom_plddt: Float[Tensor, "... a"] + + +@typecheck +def plddt( + logits: Float[Tensor, "... a bins"], + mask: Bool[Tensor, "... a"], + bin_centers: Float[Tensor, "bins"], + per_residue: bool = False, +) -> Float[Tensor, "..."] | Float[Tensor, "... a"]: + expectations = rutils.expectation(logits, bin_centers) + if per_residue: + return expectations + else: + return masked_mean(mask, expectations, dim=-1) + + +@typecheck +def per_chain_plddt( + logits: Float[Tensor, "... a bins"], + atom_mask: Bool[Tensor, "... a"], + asym_id: Int[Tensor, "... a"], + bin_centers: Float[Tensor, "bins"], +) -> Float[Tensor, "... c"]: + chain_masks, _ = rutils.get_chain_masks_and_asyms(asym_id, atom_mask) + logits = repeat(logits, "... a b -> ... c a b", c=chain_masks.shape[-2]) + return plddt(logits, chain_masks, bin_centers, per_residue=False) + + +@typecheck +def get_scores( + lddt_logits: Float[Tensor, "... a bins"], + atom_mask: Bool[Tensor, "... a"], + atom_asym_id: Int[Tensor, "... a"], + bin_centers: Float[Tensor, "bins"], +) -> PLDDTScores: + return PLDDTScores( + complex_plddt=plddt( + logits=lddt_logits, + mask=atom_mask, + bin_centers=bin_centers, + per_residue=False, + ), + per_atom_plddt=plddt( + logits=lddt_logits, + mask=atom_mask, + bin_centers=bin_centers, + per_residue=True, + ), + per_chain_plddt=per_chain_plddt( + logits=lddt_logits, + atom_mask=atom_mask, + asym_id=atom_asym_id, + bin_centers=bin_centers, + ), + ) diff --git a/forks/chai-lab/chai_lab/ranking/ptm.py b/forks/chai-lab/chai_lab/ranking/ptm.py new file mode 100644 index 00000000..25fbcc7b --- /dev/null +++ b/forks/chai-lab/chai_lab/ranking/ptm.py @@ -0,0 +1,217 @@ +from dataclasses import dataclass + +import torch +from einops import rearrange, reduce, repeat +from torch import Tensor + +from chai_lab.ranking.utils import expectation, get_chain_masks_and_asyms +from chai_lab.utils.tensor_utils import und +from chai_lab.utils.typing import Bool, Float, Int, typecheck + + +@typecheck +@dataclass +class PTMScores: + """ + complex_ptm: pTM score of the complex + interface_ptm: ipTM score of the complex + per_chain_ptm: pTM score for each chain in the complex + per_chain_pair_iptm: ipTM score for each chain pair in the complex + """ + + complex_ptm: Float[Tensor, "..."] + interface_ptm: Float[Tensor, "..."] + per_chain_ptm: Float[Tensor, "... c"] + per_chain_pair_iptm: Float[Tensor, "... c c"] + + +@typecheck +def tm_d0(n_tokens: Float[Tensor, "*dims"]) -> Float[Tensor, "*dims"]: + """Compute TM-Score d0 from the number of tokens""" + n_tokens = torch.clamp_min(n_tokens, 19) + return 1.24 * (n_tokens - 15) ** (1.0 / 3) - 1.8 + + +@typecheck +def _compute_ptm( + logits: Float[Tensor, "... n n bins"], + query_res_mask: Bool[Tensor, "... n"], + query_has_frame_mask: Bool[Tensor, "... n"], + key_res_mask: Bool[Tensor, "... n"], + bin_centers: Float[Tensor, "bins"], +) -> Float[Tensor, "..."]: + """ + Compute predicted TM score, normalized by the number of "key" tokens + """ + num_key_tokens = reduce(key_res_mask, "... n -> ...", "sum").to(logits.dtype) + # compute pairwise-TM normalized by the number of key tokens + d0 = rearrange(tm_d0(num_key_tokens), "... -> ... 1") + bin_weights: Float[Tensor, "bins"] = 1 / (1 + (bin_centers / d0) ** 2) + # btm has shape (b,bins). Need to broadcast with probs + # of shape (b,n,n,bins) + bin_weights = rearrange(bin_weights, "... bins -> ... 1 1 bins") + # determine key-query pairs with valid logits + valid_pairs = und( + query_has_frame_mask & query_res_mask, key_res_mask, "... i, ... j -> ... i j" + ) + # compute per-pair expected TM scores + expected_pair_tm = expectation(logits, bin_weights) + # normalized scores by the number of key tokens + num_key_tokens = rearrange(num_key_tokens, "... -> ... 1 1") + qk_weights = valid_pairs.float() / torch.clamp_min(num_key_tokens, 1) + # (b i j) -> (b i) + query_key_tm = torch.sum(qk_weights * expected_pair_tm, dim=-1) + # want to select the row with the most optimistic logits + # and compute TM for this rows predicted alignment + return torch.max(query_key_tm, dim=-1)[0] + + +@typecheck +def complex_ptm( + pae_logits: Float[Tensor, "... n n n_bins"], + token_exists_mask: Bool[Tensor, "... n"], + valid_frames_mask: Bool[Tensor, "... n"], + bin_centers: Float[Tensor, "n_bins"], +) -> Float[Tensor, "..."]: + """Compute pTM score of the complex""" + return _compute_ptm( + logits=pae_logits, + query_res_mask=token_exists_mask, + query_has_frame_mask=valid_frames_mask, + key_res_mask=token_exists_mask, + bin_centers=bin_centers, + ) + + +@typecheck +def interface_ptm( + pae_logits: Float[Tensor, "... n n n_bins"], + token_exists_mask: Bool[Tensor, "... n"], + valid_frames_mask: Bool[Tensor, "... n"], + bin_centers: Float[Tensor, "n_bins"], + token_asym_id: Int[Tensor, "... n"], +) -> Float[Tensor, "..."]: + """Compute Interface pTM score + + ipTM is the max TM score over chains c \in C, restricting + to interactions between c and C - {c}. + """ + query_res_mask, _ = get_chain_masks_and_asyms( + asym_id=token_asym_id, mask=token_exists_mask + ) + + per_chain_ptm = _compute_ptm( + logits=rearrange(pae_logits, "... i j n_bins -> ... 1 i j n_bins"), + query_res_mask=query_res_mask, + query_has_frame_mask=rearrange(valid_frames_mask, "... n -> ... 1 n"), + key_res_mask=~query_res_mask & rearrange(token_exists_mask, "... n -> ... 1 n"), + bin_centers=bin_centers, + ) + + return torch.max(per_chain_ptm, dim=-1)[0] + + +@typecheck +def per_chain_pair_iptm( + pae_logits: Float[Tensor, "... n n n_bins"], + token_exists_mask: Bool[Tensor, "... n"], + valid_frames_mask: Bool[Tensor, "... n"], + bin_centers: Float[Tensor, "n_bins"], + token_asym_id: Int[Tensor, "... n"], + batched=False, +) -> tuple[Float[Tensor, "... n_chains n_chains"], Int[Tensor, "n_chains"]]: + """Compute pairwise pTM score for each chain pair""" + chain_mask, asyms = get_chain_masks_and_asyms( + asym_id=token_asym_id, mask=token_exists_mask + ) + c = asyms.numel() + size = 32 * chain_mask.numel() ** 2 * c**2 + + batched = batched and size < 2**32 + + if not batched: + # in the interest of saving memory we compute this in a for-loop + results = [] + for i in range(c): + result = _compute_ptm( + logits=rearrange(pae_logits, "... i j n_bins -> ... 1 i j n_bins"), + query_res_mask=repeat(chain_mask[..., i, :], "... n -> ... k n", k=c), + query_has_frame_mask=rearrange(valid_frames_mask, "... n -> ... 1 n"), + key_res_mask=chain_mask, + bin_centers=bin_centers, + ) + results.append(result) + return torch.stack(results, dim=-2), asyms # b, query_chain, key_chain + else: + # compute batched + query_mask = repeat(chain_mask, "... c n -> ... c k n", k=c) + key_mask = repeat(chain_mask, "... c n -> ... k c n", k=c) + result = _compute_ptm( + logits=rearrange(pae_logits, "... i j n_bins -> ... 1 1 i j n_bins"), + query_res_mask=query_mask, + query_has_frame_mask=rearrange(valid_frames_mask, "... n -> ... 1 1 n"), + key_res_mask=key_mask, + bin_centers=bin_centers, + ) + return result, asyms + + +@typecheck +def per_chain_ptm( + pae_logits: Float[Tensor, "... n n n_bins"], + token_exists_mask: Bool[Tensor, "... n"], + valid_frames_mask: Bool[Tensor, "... n"], + bin_centers: Float[Tensor, "n_bins"], + token_asym_id: Int[Tensor, "... n"], +) -> tuple[Float[Tensor, "... n_chains"], Int[Tensor, "n_chains"]]: + """Computes pTM for each chain in the input""" + chain_mask, unique_asyms = get_chain_masks_and_asyms( + asym_id=token_asym_id, mask=token_exists_mask + ) + per_chain_ptm = _compute_ptm( + logits=rearrange(pae_logits, "... i j n_bins -> ... 1 i j n_bins"), + query_res_mask=chain_mask, + query_has_frame_mask=rearrange(valid_frames_mask, "... n -> ... 1 n"), + key_res_mask=chain_mask, + bin_centers=bin_centers, + ) + return per_chain_ptm, unique_asyms + + +@typecheck +def get_scores( + pae_logits: Float[Tensor, "... n n n_bins"], + token_exists_mask: Bool[Tensor, "... n"], + valid_frames_mask: Bool[Tensor, "... n"], + bin_centers: Float[Tensor, "n_bins"], + token_asym_id: Int[Tensor, "... n"], +) -> PTMScores: + return PTMScores( + complex_ptm=complex_ptm( + pae_logits=pae_logits, + token_exists_mask=token_exists_mask, + valid_frames_mask=valid_frames_mask, + bin_centers=bin_centers, + ), + interface_ptm=interface_ptm( + pae_logits=pae_logits, + token_exists_mask=token_exists_mask, + valid_frames_mask=valid_frames_mask, + bin_centers=bin_centers, + token_asym_id=token_asym_id, + ), + per_chain_pair_iptm=per_chain_pair_iptm( + pae_logits=pae_logits, + token_exists_mask=token_exists_mask, + valid_frames_mask=valid_frames_mask, + bin_centers=bin_centers, + token_asym_id=token_asym_id, + )[0], + per_chain_ptm=per_chain_ptm( + pae_logits=pae_logits, + token_exists_mask=token_exists_mask, + valid_frames_mask=valid_frames_mask, + bin_centers=bin_centers, + token_asym_id=token_asym_id, + )[0], + ) diff --git a/forks/chai-lab/chai_lab/ranking/rank.py b/forks/chai-lab/chai_lab/ranking/rank.py new file mode 100644 index 00000000..6c7853e5 --- /dev/null +++ b/forks/chai-lab/chai_lab/ranking/rank.py @@ -0,0 +1,130 @@ +from dataclasses import dataclass + +import numpy as np +import torch +from torch import Tensor + +import chai_lab.ranking.clashes as clashes +import chai_lab.ranking.plddt as plddt +import chai_lab.ranking.ptm as ptm +import chai_lab.ranking.utils as rutils +from chai_lab.utils.typing import Bool, Float, Int, typecheck + + +@typecheck +@dataclass +class SampleRanking: + """Sample Ranking Data + asym ids: a tensor of shape (c,) containing the unique asym ids for + each chain in the sample. The asym ids are sorted numerically. + aggregate_score: a tensor of shape (...) containing the aggregate ranking + score for the sample + ptm_scores: see ptm.get_scores for a description of the ptm scores + clash_scores: a dictionary of clash scores + plddt_scores: see plddt.PLDDTScores for a description of the plddt scores + """ + + asym_ids: Int[Tensor, "c"] + aggregate_score: Float[Tensor, "..."] + ptm_scores: ptm.PTMScores + clash_scores: clashes.ClashScores + plddt_scores: plddt.PLDDTScores + + +@typecheck +def rank( + atom_coords: Float[Tensor, "... a 3"], + atom_mask: Bool[Tensor, "... a"], + atom_token_index: Int[Tensor, "... a"], + token_exists_mask: Bool[Tensor, "... n"], + token_asym_id: Int[Tensor, "... n"], + token_entity_type: Int[Tensor, "... n"], + token_valid_frames_mask: Bool[Tensor, "... n"], + # lddt + lddt_logits: Float[Tensor, "... a lddt_bins"], + lddt_bin_centers: Float[Tensor, "lddt_bins"], + # pae + pae_logits: Float[Tensor, "... n n pae_bins"], + pae_bin_centers: Float[Tensor, "pae_bins"], + # clash + clash_threshold: float = 1.1, + max_clashes: int = 100, + max_clash_ratio: float = 0.5, +) -> SampleRanking: + """ + Compute ranking scores for a sample. + In addition to the pTM/ipTM aggregate score, we also return chain + and inter-chain level statistics for pTM and clashes. + see documentation for SampleRanking for a complete description. + """ + + ptm_scores = ptm.get_scores( + pae_logits=pae_logits, + token_exists_mask=token_exists_mask, + valid_frames_mask=token_valid_frames_mask, + bin_centers=pae_bin_centers, + token_asym_id=token_asym_id, + ) + clash_scores = clashes.get_scores( + atom_coords=atom_coords, + atom_mask=atom_mask, + atom_asym_id=torch.gather( + token_asym_id, + dim=-1, + index=atom_token_index.long(), + ), + atom_entity_type=torch.gather( + token_entity_type, + dim=-1, + index=atom_token_index.long(), + ), + max_clashes=max_clashes, + max_clash_ratio=max_clash_ratio, + clash_threshold=clash_threshold, + ) + + plddt_scores = plddt.get_scores( + lddt_logits=lddt_logits, + atom_mask=atom_mask, + bin_centers=lddt_bin_centers, + atom_asym_id=torch.gather( + token_asym_id, + dim=-1, + index=atom_token_index.long(), + ), + ) + + # aggregate score + aggregate_score = ( + 0.2 * ptm_scores.complex_ptm + + 0.8 * ptm_scores.interface_ptm + - 100 * clash_scores.has_inter_chain_clashes.float() + ) + + _, asyms = rutils.get_chain_masks_and_asyms( + asym_id=token_asym_id, + mask=token_exists_mask, + ) + + return SampleRanking( + asym_ids=asyms, + aggregate_score=aggregate_score, + ptm_scores=ptm_scores, + clash_scores=clash_scores, + plddt_scores=plddt_scores, + ) + + +def get_scores(ranking_data: SampleRanking) -> dict[str, np.ndarray]: + scores = { + "aggregate_score": ranking_data.aggregate_score, + "ptm": ranking_data.ptm_scores.complex_ptm, + "iptm": ranking_data.ptm_scores.interface_ptm, + "per_chain_ptm": ranking_data.ptm_scores.per_chain_ptm, + "per_chain_pair_iptm": ranking_data.ptm_scores.per_chain_pair_iptm, + "has_inter_chain_clashes": ranking_data.clash_scores.has_inter_chain_clashes, + # TODO replace with just one tensor that contains both + "chain_intra_clashes": ranking_data.clash_scores.chain_intra_clashes, + "chain_chain_inter_clashes": ranking_data.clash_scores.chain_chain_inter_clashes, + } + return {k: v.cpu().numpy() for k, v in scores.items()} diff --git a/forks/chai-lab/chai_lab/ranking/utils.py b/forks/chai-lab/chai_lab/ranking/utils.py new file mode 100644 index 00000000..987ab220 --- /dev/null +++ b/forks/chai-lab/chai_lab/ranking/utils.py @@ -0,0 +1,82 @@ +import torch +from einops import rearrange +from torch import Tensor + +from chai_lab.data.parsing.structure.entity_type import EntityType +from chai_lab.utils.tensor_utils import cdist +from chai_lab.utils.typing import Bool, Float, Int, typecheck + + +@typecheck +def get_chain_masks_and_asyms( + asym_id: Int[Tensor, "... n"], + mask: Bool[Tensor, "... n"], +) -> tuple[Bool[Tensor, "... c n"], Int[Tensor, "c"]]: + """ + Returns a mask for each chain and the unique asym ids + """ + sorted_unique_asyms = torch.unique(asym_id[mask]) + # shape: (..., max_num_chains, n) + chain_masks = rearrange(asym_id, "... n -> ... 1 n") == rearrange( + sorted_unique_asyms, "nc -> nc 1" + ) # shape: (..., n, max_num_chains) + return chain_masks & rearrange(mask, "... n -> ... 1 n"), sorted_unique_asyms + + +@typecheck +def get_interface_mask( + coords: Float[Tensor, "... n 3"], + asym_id: Int[Tensor, "... n"], + mask: Bool[Tensor, "... n"], + interface_threshold: float, +) -> Bool[Tensor, "... n n"]: + valid_mask = rearrange(asym_id, "... n -> ... n 1") != rearrange( + asym_id, "... n -> ... 1 n" + ) + valid_mask &= rearrange(mask, "... n -> ... n 1") & rearrange( + mask, "... n -> ... 1 n" + ) + dists = torch.masked_fill(cdist(coords), ~valid_mask, torch.inf) + min_dists, _ = torch.min(dists, dim=-1) + return min_dists < interface_threshold + + +@typecheck +def expectation( + logits: Float[Tensor, "... bins"], + weights: Float[Tensor, "... bins"], +) -> Float[Tensor, "..."]: # last dim will be dropped + logits = torch.softmax(logits, dim=-1) + return (logits * weights).sum(dim=-1) + + +@typecheck +def num_atoms_per_chain( + atom_mask: Bool[Tensor, "... a"], + asym_id: Int[Tensor, "... a"], +) -> Int[Tensor, "... c"]: + masks, _ = get_chain_masks_and_asyms(asym_id, atom_mask) + return masks.sum(dim=-1) + + +@typecheck +def chain_is_polymer( + asym_id: Int[Tensor, "... n"], + mask: Bool[Tensor, "... n"], + entity_type: Int[Tensor, "... n"], +) -> Bool[Tensor, "... c"]: + chain_masks, _ = get_chain_masks_and_asyms(asym_id, mask) + polymer_types = torch.tensor( + [ + EntityType.PROTEIN.value, + EntityType.RNA.value, + EntityType.DNA.value, + EntityType.POLYMER_HYBRID.value, + ], + device=entity_type.device, + ) + is_polymer = torch.any(entity_type.unsqueeze(-1) == polymer_types, dim=-1) + chain_is_polymer = [] + for polymer_mask in chain_masks.unbind(dim=-2): + chain_is_polymer.append(torch.any(is_polymer & polymer_mask, dim=-1)) + return torch.stack(chain_is_polymer, dim=-1) diff --git a/forks/chai-lab/chai_lab/utils/__init__.py b/forks/chai-lab/chai_lab/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/forks/chai-lab/chai_lab/utils/defaults.py b/forks/chai-lab/chai_lab/utils/defaults.py new file mode 100644 index 00000000..77310461 --- /dev/null +++ b/forks/chai-lab/chai_lab/utils/defaults.py @@ -0,0 +1,7 @@ +from typing import TypeVar + +T = TypeVar("T") + + +def default(x: T | None, y: T) -> T: + return x if x is not None else y diff --git a/forks/chai-lab/chai_lab/utils/dict.py b/forks/chai-lab/chai_lab/utils/dict.py new file mode 100644 index 00000000..2ae3d893 --- /dev/null +++ b/forks/chai-lab/chai_lab/utils/dict.py @@ -0,0 +1,19 @@ +from typing import TypeVar + +K = TypeVar("K") +V = TypeVar("V") + + +def list_dict_to_dict_list(list_dict: list[dict[K, V]]) -> dict[K, list[V]]: + """ + Converts a list of dicts that contain the same keys to a dict of lists, where each + list contains an ordered list of values of the corresponding dict. + """ + if len(list_dict) == 0: + return {} + + keys = list_dict[0].keys() + if any(d.keys() != keys for d in list_dict): + raise ValueError("All dicts must have the same keys") + + return {k: [d[k] for d in list_dict] for k in keys} diff --git a/forks/chai-lab/chai_lab/utils/paths.py b/forks/chai-lab/chai_lab/utils/paths.py new file mode 100644 index 00000000..d816cd64 --- /dev/null +++ b/forks/chai-lab/chai_lab/utils/paths.py @@ -0,0 +1,62 @@ +import dataclasses +from pathlib import Path +from typing import Final + +import requests + +# use this path object to specify location +# of anything within repository +repo_root: Final[Path] = Path(__file__).parents[2].absolute() + +# minimal sanity check in case we start moving things around +assert repo_root.exists() + + +def download(http_url: str, path: Path): + print(f"downloading {http_url}") + tmp_path = path.with_suffix(".download_tmp") + + with requests.get(http_url, stream=True) as response: + response.raise_for_status() # Check if the request was successful + # Open a local file with the specified name + path.parent.mkdir(exist_ok=True, parents=True) + with tmp_path.open("wb") as file: + # Download the file in chunks + for chunk in response.iter_content(chunk_size=8192): + if chunk: # Filter out keep-alive new chunks + file.write(chunk) + tmp_path.rename(path) + assert path.exists() + + +@dataclasses.dataclass +class Downloadable: + url: str + path: Path + + def get_path(self) -> Path: + # downloads artifact if necessary + if not self.path.exists(): + download(self.url, path=self.path) + + return self.path + + +cached_conformers = Downloadable( + url="https://chaiassets.com/chai1-inference-depencencies/conformers.apkl", + path=repo_root.joinpath("downloads", "conformers.apkl"), +) + + +def chai1_component(comp_key: str) -> Path: + """ + Downloads exported model, stores in locally in the repo/downloads + comp_key: e.g. '384/trunk.pt2' + """ + assert comp_key.endswith(".pt2") + url = f"https://chaiassets.com/chai1-inference-depencencies/models/{comp_key}" + result = repo_root.joinpath("downloads", "models", comp_key) + if not result.exists(): + download(url, result) + + return result diff --git a/forks/chai-lab/chai_lab/utils/pickle.py b/forks/chai-lab/chai_lab/utils/pickle.py new file mode 100644 index 00000000..9cda4b30 --- /dev/null +++ b/forks/chai-lab/chai_lab/utils/pickle.py @@ -0,0 +1,19 @@ +import antipickle +import torch + + +class TorchAntipickleAdapter(antipickle.AbstractAdapter): + typestring = "torch" + + def __init__(self): + self.cpu_device = torch.device("cpu") + + def check_type(self, obj): + return type(obj) is torch.Tensor # ignore inherited classes + + def to_dict(self, obj): + assert obj.device == self.cpu_device, "serializing only cpu tensors" + return {"data": antipickle.wrap(obj.numpy())} # use numpy serialization + + def from_dict(self, d): + return torch.from_numpy(d["data"]) diff --git a/forks/chai-lab/chai_lab/utils/plot.py b/forks/chai-lab/chai_lab/utils/plot.py new file mode 100644 index 00000000..290e68a7 --- /dev/null +++ b/forks/chai-lab/chai_lab/utils/plot.py @@ -0,0 +1,68 @@ +"""""" + +import logging +from pathlib import Path + +import torch +from einops import reduce +from matplotlib import pyplot as plt +from torch import Tensor + +from chai_lab.data import residue_constants as rc +from chai_lab.utils.typing import Int, UInt8, typecheck + + +@typecheck +def plot_msa( + input_tokens: Int[Tensor, "n_tokens"], + msa_tokens: UInt8[Tensor, "msa_depth n_tokens"], + out_fname: Path, + gap: str = "-", + mask: str = ":", + sort_by_identity: bool = True, +) -> Path: + gap_idx = rc.residue_types_with_nucleotides.index(gap) + mask_idx = rc.residue_types_with_nucleotides.index(mask) + + # Trim padding tokens (= pad in all alignments) + token_is_pad = torch.all(msa_tokens == mask_idx, dim=0) + msa_tokens = msa_tokens[:, ~token_is_pad] + input_tokens = input_tokens[~token_is_pad] + + # Calculate sequence identity for each MSA sequence + msa_seq_ident = (msa_tokens == input_tokens).float().mean(dim=-1) + sort_idx = ( + torch.argsort(msa_seq_ident, descending=True) + if sort_by_identity + else torch.arange(msa_tokens.shape[0]) + ) + + # Valid tokens are not padding and not a gap; we plot the valid tokens + msa_tokens_is_valid = (msa_tokens != gap_idx) & (msa_tokens != mask_idx) + msa_coverage = reduce(msa_tokens_is_valid.float(), "m t -> t", "mean") + + # Scale each of the MSA entries by its sequence identity for plotting + msa_by_identity = msa_tokens_is_valid.float() * msa_seq_ident.unsqueeze(-1) + msa_by_identity[~msa_tokens_is_valid] = torch.nan + + # Plotting + fig, ax = plt.subplots(dpi=150) + patch = ax.imshow( + msa_by_identity[sort_idx], + cmap="rainbow_r", + vmin=0, + vmax=1, + interpolation="nearest", + ) + ax.set_aspect("auto") + ax.set(ylabel="Sequences", xlabel="Positions") + + ax2 = ax.twinx() + ax2.plot(msa_coverage, color="black") + ax2.set(ylim=[0, 1], yticks=[]) + + fig.colorbar(patch) + fig.savefig(out_fname, bbox_inches="tight") + logging.info(f"Saved MSA plot to {out_fname}") + plt.close(fig) + return out_fname diff --git a/forks/chai-lab/chai_lab/utils/tensor_utils.py b/forks/chai-lab/chai_lab/utils/tensor_utils.py new file mode 100644 index 00000000..9fbc0ba4 --- /dev/null +++ b/forks/chai-lab/chai_lab/utils/tensor_utils.py @@ -0,0 +1,288 @@ +import typing +from functools import lru_cache +from typing import TypeVar + +import torch +import torch.nn.functional as F +from einops import rearrange +from torch import Tensor + +from chai_lab.utils.defaults import default +from chai_lab.utils.typing import Bool, Float, UInt8, typecheck + + +@typecheck +def cdist( + x: Float[Tensor, "... p m"], + y: Float[Tensor, "... r m"] | None = None, + p: float = 2.0, +) -> Float[Tensor, "... p r"]: + y = default(y, x) + assert x.ndim == y.ndim + + _threshold = 2147400000 + n, m = x.shape[-2], y.shape[-2] + + flat_size = torch.prod(torch.tensor(x.shape[:-2])) * n * m + + if x.is_cuda and flat_size > _threshold: + # Torch cdist without mm fails when the total number of + # points is > _threshold (in dimension 3) + # or 8192 points for batch size 32. + # To preserve accuracy, we fallback to naive distances + return _naive_pairwise_distances(x, y) + + return torch.cdist(x1=x, x2=y, compute_mode="donot_use_mm_for_euclid_dist", p=p) + + +@typecheck +def _naive_pairwise_distances( + x: Float[Tensor, "... p m"], + y: Float[Tensor, "... r m"] | None = None, + eps: float = 1e-10, +) -> Float[Tensor, "... p r"]: + y = default(y, x) + diff = x.unsqueeze(-2) - y.unsqueeze(-3) + + return diff.pow_(2).sum(dim=-1).add_(eps).sqrt_() + + +@typecheck +def masked_mean( + mask: Bool[Tensor, "..."], + value: Tensor, + dim: int | tuple[int, ...], + keepdim=False, +) -> Tensor: + mask = mask.expand(*value.shape) + num = torch.sum(mask * value, dim=dim, keepdim=keepdim) + denom = torch.sum(mask, dim=dim, keepdim=keepdim).clamp(min=1) + return num / denom + + +@typecheck +def one_hot(x: Tensor, v_bins: Tensor) -> Tensor: + """One hot encoding; v_bins should N-1 bins where N is desired bins.""" + bins = torch.searchsorted(v_bins, x) + return F.one_hot(bins, v_bins.shape[-1] + 1).float() + + +@lru_cache() +def _get_individual_und_patterns(multipattern: str) -> list[str]: + assert isinstance(multipattern, str), "pattern goes as last argument" + left_parts, right_part = multipattern.split("->") + assert "(" not in right_part, "parenthesis not supported for now" + result = [] + + all_left_ids = set() + all_left_parts_have_ellipsis = True + + for left_part in left_parts.split(","): + left_ids = set(left_part.split()) + if "..." not in left_ids: + all_left_parts_have_ellipsis = False + all_left_ids.update(left_ids) + right_parts = [] + for token in right_part.split(): + if token == "1" or token in left_ids: # '...' should be in left ids + right_parts.append(token) + elif token.isidentifier(): + right_parts.append("1") + elif token == "...": + raise RuntimeError( + f"Ellipis not in one of left sides of {multipattern=}" + ) + else: + raise RuntimeError(f"Unknown {token=} in {multipattern=}") + result.append(f"{left_part} -> " + " ".join(right_parts)) + + if "..." in right_part.split(): + msg = "for now ALL or NONE left parts should have ellipsis (...) " + assert all_left_parts_have_ellipsis, msg + + unk_ids = [ + x + for x in right_part.split() + if x not in all_left_ids and x != "1" and x != "..." + ] + assert len(unk_ids) == 0, f"{unk_ids=} not found on left side of {multipattern}" + + return result + + +@typing.overload +def und(t1: Tensor, pattern: str) -> Tensor: ... + + +@typing.overload +def und(t1: Tensor, t2: Tensor, pattern: str) -> Tensor: ... + + +@typing.overload +def und(t1: Tensor, t2: Tensor, t3: Tensor, pattern: str) -> Tensor: ... + + +@typing.overload +def und(t1: Tensor, t2: Tensor, t3: Tensor, t4: Tensor, pattern: str) -> Tensor: ... + + +def und(*args): + """ + Micro-extension to einops. + + Performs & (logical_and) for several masks. + Similar to einsum over masks, but additionally can add/remove 1-dims. + + > und(mask1, mask2, "b i, b j -> b 1 i j") + """ + *tensors, multipattern = args + patterns = _get_individual_und_patterns(multipattern) + + result = None + for arg_val, arg_pattern in zip(tensors, patterns, strict=True): + assert arg_val.dtype == torch.bool + if result is None: + result = rearrange(arg_val, arg_pattern) + else: + result = result & rearrange(arg_val, arg_pattern) + return result + + +def und_self(mask: Tensor, pattern: str) -> Tensor: + """ + Performs & (logical_and) for two replicas of the same tensor + + > und_self(mask, "b i, b j -> b 1 i j") + is a better version of + > und(mask, mask, "b i, b j -> b 1 i j") + """ + return und(mask, mask, pattern) + + +# 255 is not an ASCII char +TENSORCODE_PAD_TOKEN = torch.iinfo(torch.uint8).max + + +@typecheck +def string_to_tensorcode( + input: str, + pad_to_length: int | None = None, + device: torch.device | None = None, +) -> UInt8[Tensor, "l"]: + """ + Converts an ASCII string to a tensor of integers. + + If pad_to_length is specified, the output tensor will have this length, and we add a + special padding character if the tensor has less than the specified length. + + The minimum value of the output tensor is 0, and the maximum is 127 (excluding the + padding token, which can be 255). + """ + assert input.isascii(), "Expected input to be ASCII" + ords = [ord(c) for c in input] + + tensorcode = torch.tensor(ords, dtype=torch.uint8, device=device) + if pad_to_length is None: + return tensorcode + + input_length = len(input) + assert ( + pad_to_length >= input_length + ), f"Expected {input_length=} to be shorter than {pad_to_length=} for {input=}" + + return F.pad( + tensorcode, + (0, pad_to_length - input_length), + value=TENSORCODE_PAD_TOKEN, + ) + + +@typecheck +def tensorcode_to_string(tensor: UInt8[Tensor, "l"]) -> str: + """ + Applies the inverse of the string_to_tensorcode function + """ + assert tensor.device == torch.device("cpu") + chars = [chr(i) for i in tensor if i != TENSORCODE_PAD_TOKEN] + return "".join(chars) + + +@typecheck +def batch_tensorcode_to_string( + tensor: UInt8[Tensor, "*dims l"], +) -> list[str]: + tensor = rearrange(tensor, "... l -> (...) l") + tensor = tensor[tensor.amax(dim=1) > 0, :] + return [ + "".join(chr(i) for i in row if i != TENSORCODE_PAD_TOKEN) + for row in tensor.tolist() + ] + + +def unique_indexes(x: torch.Tensor, dim=-1, sorted: bool = True): + """Implements return_index=True behavior for torch.unique. + + See https://numpy.org/doc/stable/reference/generated/numpy.unique.html for info and + https://github.com/pytorch/pytorch/issues/36748 for context.""" + assert x.size(dim) > 0 + + unique, inverse = torch.unique(x, return_inverse=True, sorted=True, dim=dim) + perm = torch.arange(inverse.size(0), dtype=inverse.dtype, device=inverse.device) + inverse, perm = inverse.flip([0]), perm.flip([0]) + inverse = inverse.new_empty(unique.size(dim)).scatter_(0, inverse, perm) + if sorted: + inverse = inverse.sort().values + + return unique, inverse + + +T = TypeVar("T") + + +# mypy is too angry when this function is directly annotated +def _move_data_to_device(x, device: torch.device): + if x is None: + return None + if isinstance(x, (str, int, float, bool)): + return x + if isinstance(x, torch.Tensor): + return x.to(device=device) + elif isinstance(x, dict): + return {k: move_data_to_device(v, device) for k, v in x.items()} + elif isinstance(x, list): + return [move_data_to_device(el, device) for el in x] + elif isinstance(x, tuple): + return tuple(move_data_to_device(el, device) for el in x) + else: + raise NotImplementedError(type(x)) + + +def move_data_to_device(x: T, device: torch.device) -> T: + return _move_data_to_device(x, device=device) + + +def set_seed(seed_sequence: list[int]) -> None: + """ + Seeds numpy, torch, and Python. + + This function is heavily inspired by Lightning's pl_worker_init_function. + """ + import random + + import numpy as np + + # Spawn distinct SeedSequences for the PyTorch PRNG and the stdlib random module + np_ss = np.random.SeedSequence(seed_sequence) + torch_ss, stdlib_ss = np_ss.spawn(2) + + # Seed numpy, use 128 bits (4 x 32-bit words) + np.random.seed(np_ss.generate_state(4)) + + # Seed torch + torch.manual_seed(torch_ss.generate_state(1, dtype=np.uint64)[0]) + + # Seed python, use 128 bits expressed as an integer + stdlib_seed = ( + stdlib_ss.generate_state(2, dtype=np.uint64).astype(object) * [1 << 64, 1] + ).sum() + random.seed(stdlib_seed) diff --git a/forks/chai-lab/chai_lab/utils/timeout.py b/forks/chai-lab/chai_lab/utils/timeout.py new file mode 100644 index 00000000..db069ee7 --- /dev/null +++ b/forks/chai-lab/chai_lab/utils/timeout.py @@ -0,0 +1,100 @@ +""" +Timeout utility for a function, creates a new process + +Implementation modified from: +https://www.reddit.com/r/Python/comments/8t9bk4/the_absolutely_easiest_way_to_time_out_a_function/ +""" + +import multiprocessing +import queue as _queue +from enum import Enum +from functools import wraps +from multiprocessing import Process, Queue +from typing import Any + +from typing_extensions import assert_never + + +# TODO: This is dangerous: revert once the underlying problem in rdkit is fixed +# RDKit Issue(https://github.com/rdkit/rdkit/discussions/7289) +class Undaemonize(object): + """Context Manager to resolve AssertionError: daemonic processes are not allowed to have children + See https://stackoverflow.com/questions/6974695/python-process-pool-non-daemonic""" + + def __init__(self): + self.conf: dict = multiprocessing.process.current_process()._config # type: ignore + if "daemon" in self.conf: + self.daemon_status_set = True + else: + self.daemon_status_set = False + self.daemon_status_value = self.conf.get("daemon") + + def __enter__(self): + if self.daemon_status_set: + del self.conf["daemon"] + + def __exit__(self, type, value, traceback): + if self.daemon_status_set: + self.conf["daemon"] = self.daemon_status_value + + +class HandlerStatus(Enum): + SUCCESS = 0 + EXCEPTION = 1 + + +class ChildProcessException(Exception): + pass + + +def timeout(timeout: float | int) -> Any: + """Force function to timeout after 'seconds'. + + Returns: + The return value of the wrapped function. + Raises: + TimeoutError if the function does not return before the timeout. + """ + + def handler(queue, func, args, kwargs) -> None: + try: + queue.put((HandlerStatus.SUCCESS, func(*args, **kwargs))) + except Exception as e: + queue.put((HandlerStatus.EXCEPTION, e)) + + def decorator(func): + @wraps(func) + def new_fn(*args, **kwargs): + queue: Queue = Queue() + proc = Process( + target=handler, args=(queue, func, args, kwargs), daemon=True + ) + with Undaemonize(): + proc.start() + proc.join(timeout=float(timeout)) + if proc.is_alive(): + proc.terminate() + proc.join() + raise TimeoutError(f"Function {func} timed out after {timeout} seconds") + else: + # When child process dies unexpectedly Queue.get waits indefinitely. + # See Issue(https://bugs.python.org/issue43805) + # prevent queue from hanging with another very short timeout + try: + status, value = queue.get(timeout=0.1) + except _queue.Empty: + # in this case, child process has died unexpectedly + raise ChildProcessException("Child process died unexpectedly") + + match status: + case HandlerStatus.SUCCESS: + return value + case HandlerStatus.EXCEPTION: + # Re-raise the exception we caught in the child process + raise value + + assert_never(status) + + return new_fn + + return decorator diff --git a/forks/chai-lab/chai_lab/utils/typing.py b/forks/chai-lab/chai_lab/utils/typing.py new file mode 100644 index 00000000..91c4d164 --- /dev/null +++ b/forks/chai-lab/chai_lab/utils/typing.py @@ -0,0 +1,44 @@ +import typing + +from beartype import beartype +from jaxtyping import ( + Bool, + Float, + Float32, + Int, + Int32, + Num, + Shaped, + TypeCheckError, + UInt8, + jaxtyped, +) + +# Modules are only loaded and executed the first time they are imported, so the value of +# should_typecheck will constant over the lifetime of the program. +should_typecheck = True + + +Func = typing.TypeVar("Func") + + +def typecheck(cls_or_func: Func) -> Func: + if should_typecheck: + return jaxtyped(typechecker=beartype)(cls_or_func) + else: + return cls_or_func + + +__all__ = [ + "typecheck", + "TypeCheckError", + # re-export jaxtyping types + "Bool", + "Float", + "Int", + "Int32", + "Float32", + "Num", + "Shaped", + "UInt8", +] diff --git a/forks/chai-lab/examples/predict_structure.py b/forks/chai-lab/examples/predict_structure.py new file mode 100644 index 00000000..37fa4870 --- /dev/null +++ b/forks/chai-lab/examples/predict_structure.py @@ -0,0 +1,39 @@ +from pathlib import Path + +import numpy as np +import torch + +from chai_lab.chai1 import run_inference + +# We use fasta-like format for inputs. +# Every record may encode protein, ligand, RNA or DNA +# see example below + +example_fasta = """ +>protein|example-of-long-protein +AGSHSMRYFSTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASPRGEPRAPWVEQEGPEYWDRETQKYKRQAQTDRVSLRNLRGYYNQSEAGSHTLQWMFGCDLGPDGRLLRGYDQSAYDGKDYIALNEDLRSWTAADTAAQITQRKWEAAREAEQRRAYLEGTCVEWLRRYLENGKETLQRAEHPKTHVTHHPVSDHEATLRCWALGFYPAEITLTWQWDGEDQTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPLTLRWEP +>protein|example-of-short-protein +AIQRTPKIQVYSRHPAENGKSNFLNCYVSGFHPSDIEVDLLKNGERIEKVEHSDLSFSKDWSFYLLYYTEFTPTEKDEYACRVNHVTLSQPKIVKWDRDM +>protein|example-of-peptide +GAAL +>ligand|and-example-for-ligand-encoded-as-smiles +CCCCCCCCCCCCCC(=O)O +""".strip() + +fasta_path = Path("/tmp/example.fasta") +fasta_path.write_text(example_fasta) + +output_dir = Path("/tmp/outputs") +output_pdb_paths = run_inference( + fasta_file=fasta_path, + output_dir=output_dir, + # 'default' setup + num_trunk_recycles=3, + num_diffn_timesteps=200, + seed=42, + device=torch.device("cuda:0"), + use_esm_embeddings=True, +) + +# Load pTM, ipTM, pLDDTs and clash scores for sample 2 +scores = np.load(output_dir.joinpath("scores.model_idx_2.npz")) diff --git a/forks/chai-lab/pyproject.toml b/forks/chai-lab/pyproject.toml new file mode 100644 index 00000000..1996a24d --- /dev/null +++ b/forks/chai-lab/pyproject.toml @@ -0,0 +1,69 @@ +# important: install in editable mode +[build-system] +requires = [ + "hatchling>=1.20", # build backend + "hatch-requirements-txt", # plugin, to parse requirements.txt +] +build-backend = "hatchling.build" + + +[project] +name = "chai_lab" +description = "Chai Discovery tools for AI + protein research." +requires-python = ">=3.10" +authors = [{ name = "Chai Discovery" }] +# see both defined below +dynamic = ["version", "dependencies"] + +[tool.hatch.version] +path = "chai_lab/__init__.py" +[tool.hatch.metadata.hooks.requirements_txt] +files = ["requirements.in"] +[tool.hatch.metadata] +allow-direct-references = true + +[tool.mypy] +check_untyped_defs = true + +# Ignore missing imports for packages with missing type stubs +[[tool.mypy.overrides]] +module = [ + "anarci.*", + "fsspec.*", + "google.*", + "joblib.*", + "needletail.*", + "numba.*", + "pyximport.*", + "rdkit.*", + "scipy.*", + "seaborn.*", + "sh.*", + "tmtools.*", + "botocore.*", + "s3fs.*", + "biotite.*", + "DockQ.*", + "boto3.*", + "transformers.*", +] +ignore_missing_imports = true + +[tool.pytest.ini_options] +cache_dir = "/tmp/.common_pytest_cache" + + +[tool.hatch.build.targets.sdist] +exclude = [ + "/.devcontainer", + "/.github", + "/.idea", + "/.vscode", + "/.pytest_cache", + "/assets", + "/downloads", + "/outputs", +] + +[tool.hatch.build.targets.wheel] +# should use packages from sdist section \ No newline at end of file diff --git a/forks/chai-lab/requirements.in b/forks/chai-lab/requirements.in new file mode 100644 index 00000000..580cbc90 --- /dev/null +++ b/forks/chai-lab/requirements.in @@ -0,0 +1,48 @@ +# dev-deps, still placed in the same requirements file +ruff==0.6.3 # in sync with pre-commit-hook +mypy +pytest +pre-commit + +# types/stubs are required by mypy +pandas-stubs +types-pyyaml +types-tqdm +typing-extensions +types-requests + +# CLI, administrator tools +typer~=0.12 # CLI generator +# pydantic~=2.5 # serialization/deserialization of configs + +# notebooks, plotting +ipykernel~=6.27 # needed by vs code to run notebooks in devcontainer +# seaborn +matplotlib + +# misc +tqdm~=4.66 + +# data import/export, application-specific +gemmi~=0.6.3 # pdb/mmcif parsing +rdkit==2023.9.5 # parsing of ligands. 2023.9.6 has broken type stubs +biopython==1.83 # parsing, data access +antipickle==0.2.0 # save/load heterogeneous python structures +tmtools>=0.0.3 # Python bindings for the TM-align algorithm + +# commented out following optional dependencies for release on pypi +# dockq metric for comparing predicted pdbs and ground truth pdbs +# dockq @ git+https://github.com/bjornwallner/DockQ.git@v2.1.1 +# pip-compatible minimized version of anarci +# anarci @ git+https://github.com/arogozhnikov/microANARCI@d81823395d0c3532d6e033d80b036b4aa4a4565e + +# computing, dl +numpy~=1.21 +pandas[parquet,gcp,aws]~=2.1 +# polars +einops~=0.8 +jaxtyping>=0.2.25 # versions <0.2.25 do not easily support runtime typechecking +beartype>=0.18 # compatible typechecker to use with jaxtyping +# do not use 2.2 because https://github.com/pytorch/pytorch/issues/122385 +torch~=2.3.1 +transformers~=4.44 # for esm inference \ No newline at end of file diff --git a/forks/chai-lab/ruff.toml b/forks/chai-lab/ruff.toml new file mode 100644 index 00000000..1b6d2031 --- /dev/null +++ b/forks/chai-lab/ruff.toml @@ -0,0 +1,12 @@ +# move ruff cache outside of worktree +cache-dir = "/tmp/.ruff_chai_cache" + + +[lint] +extend-select = ["I"] +# jaxtyping requires disabling two following errors +# https://docs.kidger.site/jaxtyping/faq/#flake8-or-ruff-are-throwing-an-error +ignore = ["F821", "F722"] + +[lint.isort] +known-first-party = ["chai", "chai_lab"] diff --git a/forks/chai-lab/tests/__init__.py b/forks/chai-lab/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/forks/chai-lab/tests/example_inputs.py b/forks/chai-lab/tests/example_inputs.py new file mode 100644 index 00000000..19b44519 --- /dev/null +++ b/forks/chai-lab/tests/example_inputs.py @@ -0,0 +1,32 @@ +example_ligands = [ + "C", + "O", + "C(C1C(C(C(C(O1)O)O)O)O)O", + "[O-]S(=O)(=O)[O-]", + "CC1=C(C(CCC1)(C)C)/C=C/C(=C/C=C/C(=C/C=O)/C)/C", + "CCC1=C(c2cc3c(c(c4n3[Mg]56[n+]2c1cc7n5c8c(c9[n+]6c(c4)C(C9CCC(=O)OC/C=C(\C)/CCC[C@H](C)CCC[C@H](C)CCCC(C)C)C)[C@H](C(=O)c8c7C)C(=O)OC)C)C=C)C=O", + r"C=CC1=C(C)/C2=C/c3c(C)c(CCC(=O)O)c4n3[Fe@TB16]35<-N2=C1/C=c1/c(C)c(C=C)/c(n13)=C/C1=N->5/C(=C\4)C(CCC(=O)O)=C1C", + # different ions + "[Mg+2]", + "[Na+]", + "[Cl-]", +] + +example_proteins = [ + "AGSHSMRYFSTSVSRPGRGEPRFIAVGYVDDTQFVR", + "(KCJ)(SEP)(PPN)(B3S)(BAL)(PPN)K(NH2)", + "XDHPX", +] + + +example_rna = [ + "AGUGGCUA", + "AAAAAA", + "AGUC", +] + +example_dna = [ + "AGTGGCTA", + "AAAAAA", + "AGTC", +] diff --git a/forks/chai-lab/tests/test_parsing.py b/forks/chai-lab/tests/test_parsing.py new file mode 100644 index 00000000..4e141311 --- /dev/null +++ b/forks/chai-lab/tests/test_parsing.py @@ -0,0 +1,48 @@ +from chai_lab.data.parsing.input_validation import ( + constituents_of_modified_fasta, + identify_potential_entity_types, +) +from chai_lab.data.parsing.structure.entity_type import EntityType + +from .example_inputs import example_dna, example_ligands, example_proteins, example_rna + + +def test_simple_protein_fasta(): + parts = constituents_of_modified_fasta("RKDES") + assert parts is not None + assert all(x == y for x, y in zip(parts, ["R", "K", "D", "E", "S"])) + + +def test_modified_protein_fasta(): + parts = constituents_of_modified_fasta("(KCJ)(SEP)(PPN)(B3S)(BAL)(PPN)KX(NH2)") + assert parts is not None + expected = ["KCJ", "SEP", "PPN", "B3S", "BAL", "PPN", "K", "X", "NH2"] + assert all(x == y for x, y in zip(parts, expected)) + + +def test_rna_fasta(): + seq = "ACUGACG" + parts = constituents_of_modified_fasta(seq) + assert parts is not None + assert all(x == y for x, y in zip(parts, seq)) + + +def test_dna_fasta(): + seq = "ACGACTAGCAT" + parts = constituents_of_modified_fasta(seq) + assert parts is not None + assert all(x == y for x, y in zip(parts, seq)) + + +def test_parsing(): + for ligand in example_ligands: + assert EntityType.LIGAND in identify_potential_entity_types(ligand) + + for protein in example_proteins: + assert EntityType.PROTEIN in identify_potential_entity_types(protein) + + for dna in example_dna: + assert EntityType.DNA in identify_potential_entity_types(dna) + + for rna in example_rna: + assert EntityType.RNA in identify_potential_entity_types(rna) diff --git a/img/PoseBench.png b/img/PoseBench.png index 8e45d221..c26ea239 100644 Binary files a/img/PoseBench.png and b/img/PoseBench.png differ diff --git a/notebooks/adding_new_dataset_tutorial.ipynb b/notebooks/adding_new_dataset_tutorial.ipynb index 701a801a..6b196317 100644 --- a/notebooks/adding_new_dataset_tutorial.ipynb +++ b/notebooks/adding_new_dataset_tutorial.ipynb @@ -16,12 +16,13 @@ "\n", "`PoseBench` encompasses several datasets for benchmarking protein-ligand structure generation methods. However, you may want to add your own dataset to `PoseBench` to fulfill a specific use case or to test a specific idea. This tutorial will show you how to do so for a new collection of *single*-ligand protein complexes. For help integrating a new *multi*-ligand protein complex dataset, please create a new GitHub issue or send us an email. We would be happy to help you add this new collection to the benchmark!\n", "\n", - "To add your custom *single*-ligand dataset to `PoseBench`, you just have to follow the following 4-step procedure:\n", + "To add your custom *single*-ligand dataset to `PoseBench`, you just have to follow the following 5-step procedure:\n", "\n", "1. Create a new directory under `data/` with the required suffix `_set` (e.g., `data/newest_set/`) and group your (ground-truth) data files by unique IDs within this new directory (e.g., `data/newest_set/1G9V_RQ4/1G9V_RQ4_{protein.pdb,ligand.sdf}`)\n", "2. Update the config files throughout `configs/analysis/`, `configs/data/`, and `configs/model/` to list your new dataset as a CLI argument (e.g., `dataset: newest`)\n", "3. Predict `apo` protein structures for your new dataset using a structure predictor of your choice (e.g., ESMFold) by integrating parsing for your dataset into the prediction-related source code within `src/data/components/protein_fasta_preparation.py` and `src/data/components/protein_apo_to_holo_alignment.py`\n", - "4. Using `notebooks/posebusters_astex_inference_results_plotting.ipynb` as a template, add a new Jupyter notebook to `notebooks/` for plotting each method's results on your new dataset (after preparing each method's dataset inputs and running inference with each desired method)" + "4. Using `notebooks/posebusters_astex_inference_results_plotting.ipynb` as a template, add a new Jupyter notebook to `notebooks/` for plotting each method's results on your new dataset (after preparing each method's dataset inputs and running inference with each desired method)\n", + "5. To enable convenient inference sweeps with your new dataset, add support for your new dataset to `scripts/build_inference_script.py`" ] }, { diff --git a/notebooks/adding_new_method_tutorial.ipynb b/notebooks/adding_new_method_tutorial.ipynb index 7df98410..188d55cd 100644 --- a/notebooks/adding_new_method_tutorial.ipynb +++ b/notebooks/adding_new_method_tutorial.ipynb @@ -16,13 +16,14 @@ "\n", "`PoseBench` implements several recent methods for protein-ligand docking and structure generation. However, you may want to add your own (new) method to `PoseBench` to rigorously benchmark the algorithm for both *single* and *multi*-ligand prediction targets. This tutorial will show you how to do that.\n", "\n", - "To add your custom method to `PoseBench`, you just have to follow the following 5-step procedure:\n", + "To add your custom method to `PoseBench`, you just have to follow the following 6-step procedure:\n", "\n", "1. Update the config files within `configs/analysis/` and at `configs/model/ensemble_generation.yaml`, `configs/model/inference_relaxation.yaml`, and `configs/model/vina_inference.yaml` to list your new method as a CLI argument (e.g., `method: newest_model`)\n", "2. Using `configs/data/neuralplexer_input_preparation.yaml` as a template, add a new input (and, if necessary, output - e.g., `configs/data/rfaa_output_extraction.yaml`) construction script for your method to `configs/data/` (e.g., `configs/data/newest_model_input_preparation.yaml`)\n", "3. Clone (or copy) your new method's source code into the `forks/` directory, and then remove any traces of Git version control files (e.g., `rm -rf forks/new_method_repo/.git/`)\n", "4. Referencing `configs/model/neuralplexer_inference.yaml` and `src/models/neuralplexer_inference.py` as config file and source code file templates, respectively, add new inference config and source code files for your new method to `configs/model/` and `src/model/`, correspondingly\n", - "5. After using your new inference script to test your method on an available dataset (e.g., `casp15`) (and after performing any necessary complex structure alignments via `src/analysis/complex_alignment.py`), score the method's results using the corresponding test dataset's analysis script within `src/analysis/` (e.g., `src/analysis/inference_analysis_casp.py`), and then plot the final results by adding the results' CSV file path(s) to the corresponding dataset's plotting notebook within the `notebooks/` directory (e.g., `notebooks/casp15_inference_results_plotting.ipynb`)" + "5. After using your new inference script to test your method on an available dataset (e.g., `casp15`) (and after performing any necessary complex structure alignments via `src/analysis/complex_alignment.py`), score the method's results using the corresponding test dataset's analysis script within `src/analysis/` (e.g., `src/analysis/inference_analysis_casp.py`), and then plot the final results by adding the results' CSV file path(s) to the corresponding dataset's plotting notebook within the `notebooks/` directory (e.g., `notebooks/casp15_inference_results_plotting.ipynb`)\n", + "6. To enable convenient inference sweeps with your new method, add support for your new method to `scripts/build_inference_script.py`" ] }, { diff --git a/notebooks/casp15_all_multi_ligand_relaxed_lddt_pli_violin_plot.png b/notebooks/casp15_all_multi_ligand_relaxed_lddt_pli_violin_plot.png index 3b843811..5e13c51a 100644 Binary files a/notebooks/casp15_all_multi_ligand_relaxed_lddt_pli_violin_plot.png and b/notebooks/casp15_all_multi_ligand_relaxed_lddt_pli_violin_plot.png differ diff --git a/notebooks/casp15_all_multi_ligand_relaxed_pb_valid_bar_chart.png b/notebooks/casp15_all_multi_ligand_relaxed_pb_valid_bar_chart.png index e278a867..438bce36 100644 Binary files a/notebooks/casp15_all_multi_ligand_relaxed_pb_valid_bar_chart.png and b/notebooks/casp15_all_multi_ligand_relaxed_pb_valid_bar_chart.png differ diff --git a/notebooks/casp15_all_multi_ligand_relaxed_rmsd_lt2_bar_chart.png b/notebooks/casp15_all_multi_ligand_relaxed_rmsd_lt2_bar_chart.png index 44f2c6e6..a10a5036 100644 Binary files a/notebooks/casp15_all_multi_ligand_relaxed_rmsd_lt2_bar_chart.png and b/notebooks/casp15_all_multi_ligand_relaxed_rmsd_lt2_bar_chart.png differ diff --git a/notebooks/casp15_all_multi_ligand_relaxed_rmsd_violin_plot.png b/notebooks/casp15_all_multi_ligand_relaxed_rmsd_violin_plot.png index 7dbce00a..894f8d91 100644 Binary files a/notebooks/casp15_all_multi_ligand_relaxed_rmsd_violin_plot.png and b/notebooks/casp15_all_multi_ligand_relaxed_rmsd_violin_plot.png differ diff --git a/notebooks/casp15_all_single_ligand_relaxed_lddt_pli_violin_plot.png b/notebooks/casp15_all_single_ligand_relaxed_lddt_pli_violin_plot.png index 2aad945d..ff812432 100644 Binary files a/notebooks/casp15_all_single_ligand_relaxed_lddt_pli_violin_plot.png and b/notebooks/casp15_all_single_ligand_relaxed_lddt_pli_violin_plot.png differ diff --git a/notebooks/casp15_all_single_ligand_relaxed_pb_valid_bar_chart.png b/notebooks/casp15_all_single_ligand_relaxed_pb_valid_bar_chart.png index 103dd84f..4e7fda51 100644 Binary files a/notebooks/casp15_all_single_ligand_relaxed_pb_valid_bar_chart.png and b/notebooks/casp15_all_single_ligand_relaxed_pb_valid_bar_chart.png differ diff --git a/notebooks/casp15_all_single_ligand_relaxed_rmsd_lt2_bar_chart.png b/notebooks/casp15_all_single_ligand_relaxed_rmsd_lt2_bar_chart.png index 8522317f..c8796dd8 100644 Binary files a/notebooks/casp15_all_single_ligand_relaxed_rmsd_lt2_bar_chart.png and b/notebooks/casp15_all_single_ligand_relaxed_rmsd_lt2_bar_chart.png differ diff --git a/notebooks/casp15_all_single_ligand_relaxed_rmsd_violin_plot.png b/notebooks/casp15_all_single_ligand_relaxed_rmsd_violin_plot.png index 6a96a41e..4d00cf2d 100644 Binary files a/notebooks/casp15_all_single_ligand_relaxed_rmsd_violin_plot.png and b/notebooks/casp15_all_single_ligand_relaxed_rmsd_violin_plot.png differ diff --git a/notebooks/casp15_inference_results_plotting.ipynb b/notebooks/casp15_inference_results_plotting.ipynb index ea653582..87daea40 100644 --- a/notebooks/casp15_inference_results_plotting.ipynb +++ b/notebooks/casp15_inference_results_plotting.ipynb @@ -68,18 +68,17 @@ "outputs": [], "source": [ "# General variables\n", - "SCALING_STUDY_RUN_COUNTS = [40, 1000]\n", - "SCALING_STUDY_REPEAT_INDICES = [1, 2, 3]\n", - "SCALING_STUDY_METHODS = [\"neuralplexer\"]\n", - "SCALING_STUDY_RANKING_METHODS = [\"l-plddt\", \"p-plddt\", \"pl-plddt\", \"consensus\"]\n", - "\n", - "new_methods = [\n", + "baseline_methods = [\n", " \"diffdock\",\n", + " \"diffdockv1\",\n", " \"dynamicbind\",\n", " \"neuralplexer\",\n", + " \"neuralplexer_no_ilcl\",\n", " \"rfaa\",\n", + " \"chai-lab\",\n", " \"tulip\",\n", " \"vina_diffdock\",\n", + " \"vina_p2rank\",\n", " \"consensus_ensemble\",\n", "]\n", "max_num_repeats_per_method = 3\n", @@ -87,33 +86,29 @@ "# Mappings\n", "method_mapping = {\n", " \"diffdock\": \"DiffDock-L\",\n", + " \"diffdockv1\": \"DiffDock w/o SCT\",\n", " \"dynamicbind\": \"DynamicBind\",\n", " \"neuralplexer\": \"NeuralPLexer\",\n", - " \"rfaa\": \"RoseTTAFold-All-Atom\",\n", + " \"neuralplexer_no_ilcl\": \"NeuralPLexer w/o ILCL\",\n", + " \"rfaa\": \"RoseTTAFold-AA\",\n", + " \"chai-lab\": \"Chai-1\",\n", " \"tulip\": \"TULIP\",\n", " \"vina_diffdock\": \"DiffDock-L-Vina\",\n", + " \"vina_p2rank\": \"P2Rank-Vina\",\n", " \"consensus_ensemble\": \"Ensemble (Con)\",\n", "}\n", "\n", - "scaling_method_mapping = {\n", - " \"neuralplexer_40\": \"NeuralPLexer (40)\",\n", - " \"neuralplexer_1000\": \"NeuralPLexer (1000)\",\n", - "}\n", - "\n", - "scaling_ranking_method_mapping = {\n", - " \"l-plddt\": \"Ligand plDDT\",\n", - " \"p-plddt\": \"Protein plDDT\",\n", - " \"pl-plddt\": \"Protein-Ligand plDDT\",\n", - " \"consensus\": \"Consensus\",\n", - "}\n", - "\n", "method_category_mapping = {\n", " \"diffdock\": \"DL-based blind\",\n", + " \"diffdockv1\": \"DL-based blind\",\n", " \"dynamicbind\": \"DL-based blind\",\n", " \"neuralplexer\": \"DL-based blind\",\n", + " \"neuralplexer_no_ilcl\": \"DL-based blind\",\n", " \"rfaa\": \"DL-based blind\",\n", + " \"chai-lab\": \"DL-based blind\",\n", " \"tulip\": \"Conventional blind\",\n", " \"vina_diffdock\": \"Conventional blind\",\n", + " \"vina_p2rank\": \"Conventional blind\",\n", " \"consensus_ensemble\": \"Hybrid blind\",\n", "}" ] @@ -122,7 +117,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Report test results for new methods" + "#### Report test results for each baseline method" ] }, { @@ -131,9 +126,9 @@ "metadata": {}, "outputs": [], "source": [ - "# load and report test results for each new method\n", + "# load and report test results for each baseline method\n", "for config in [\"\", \"_relaxed\"]:\n", - " for method in new_methods:\n", + " for method in baseline_methods:\n", " for repeat_index in range(1, max_num_repeats_per_method + 1):\n", " method_title = method_mapping[method]\n", "\n", @@ -144,11 +139,11 @@ " \"casp15\",\n", " f\"top_{method}{'' if 'ensemble' in method else '_ensemble'}_predictions_{repeat_index}\",\n", " )\n", - " globals()[\n", - " f\"{method}{config}_scoring_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[f\"{method}_output_dir_{repeat_index}\"] + config,\n", - " \"scoring_results.csv\",\n", + " globals()[f\"{method}{config}_scoring_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[f\"{method}_output_dir_{repeat_index}\"] + config,\n", + " \"scoring_results.csv\",\n", + " )\n", " )\n", " globals()[f\"{method}{config}_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[f\"{method}_output_dir_{repeat_index}\"] + config,\n", @@ -171,12 +166,6 @@ " .groupby([\"target\", \"mdl\"])[\"pose\"]\n", " .transform(\"count\")\n", " )\n", - " grouped_num_target_ligands = (\n", - " globals()[f\"{method}{config}_scoring_results_table_{repeat_index}\"]\n", - " .groupby([\"target\", \"mdl\"])[\"num_target_ligands\"]\n", - " .first()\n", - " )\n", - " num_ligands_per_complex = grouped_num_target_ligands.loc[(slice(None), 1)].tolist()\n", " globals()[f\"{method}{config}_bust_results_table_{repeat_index}\"] = (\n", " pd.read_csv(\n", " globals()[f\"{method}{config}_bust_results_csv_filepath_{repeat_index}\"]\n", @@ -215,6 +204,13 @@ " <= 2\n", " )\n", "\n", + " grouped_num_target_ligands = (\n", + " globals()[f\"{method}{config}_scoring_results_table_{repeat_index}\"]\n", + " .groupby([\"target\", \"mdl\"])[\"num_target_ligands\"]\n", + " .first()\n", + " )\n", + " num_ligands_per_complex = grouped_num_target_ligands.loc[(slice(None), 1)].tolist()\n", + "\n", " print(\n", " f\"{method_title}{config}_{repeat_index} CASP15 set average `lddt_pli`: {globals()[f'{method}{config}_scoring_results_table_{repeat_index}']['lddt_pli'].mean()}\"\n", " )\n", @@ -240,6 +236,16 @@ " globals()[f\"{method}{config}_bust_results_table_{repeat_index}\"].loc[\n", " :, \"dataset\"\n", " ] = \"casp15\"\n", + " # filter bust results to only those for targets that were scoreable using the CASP scoring pipeline\n", + " globals()[f\"{method}{config}_bust_results_table_{repeat_index}\"] = globals()[\n", + " f\"{method}{config}_bust_results_table_{repeat_index}\"\n", + " ][\n", + " globals()[f\"{method}{config}_bust_results_table_{repeat_index}\"].target.isin(\n", + " globals()[\n", + " f\"{method}{config}_scoring_results_table_{repeat_index}\"\n", + " ].target.unique()\n", + " )\n", + " ]\n", " globals()[f\"{method}{config}_bust_results_table_{repeat_index}\"].loc[\n", " :, \"num_target_ligands\"\n", " ] = num_ligands_per_complex\n", @@ -281,26 +287,6 @@ " return list(method_mapping.keys()).index(method)\n", "\n", "\n", - "def assign_scaling_method_index(method: str) -> str:\n", - " \"\"\"\n", - " Assign method index for plotting scaling experiment results.\n", - "\n", - " :param method: Method name.\n", - " :return: Method index.\n", - " \"\"\"\n", - " return list(scaling_method_mapping.keys()).index(method)\n", - "\n", - "\n", - "def assign_scaling_ranking_method_index(method: str) -> str:\n", - " \"\"\"\n", - " Assign ranking method index for plotting scaling experiment results.\n", - "\n", - " :param method: Method name.\n", - " :return: Method index.\n", - " \"\"\"\n", - " return list(scaling_ranking_method_mapping.keys()).index(method)\n", - "\n", - "\n", "def categorize_method(method: str) -> str:\n", " \"\"\"\n", " Categorize method for plotting.\n", @@ -329,7 +315,7 @@ " globals()[f\"scoring_results_table_{repeat_index}\"] = pd.concat(\n", " [\n", " globals()[f\"{method}{config}_scoring_results_table_{repeat_index}\"]\n", - " for method in new_methods\n", + " for method in baseline_methods\n", " for config in [\"\", \"_relaxed\"]\n", " if f\"{method}{config}_scoring_results_table_{repeat_index}\" in globals()\n", " ]\n", @@ -388,7 +374,7 @@ " )\n", " ]\n", " combined_data_list.append(pd.concat([casp15_results_table, casp15_relaxed_results_table]))\n", - "combined_data = pd.concat(combined_data_list)\n", + "combined_data = pd.concat(combined_data_list).sort_values(\"method_assignment_index\")\n", "\n", "for complex_type in [\"single\", \"multi\"]:\n", " for complex_license in [\"all\", \"public\"]:\n", @@ -402,19 +388,21 @@ " hue=\"post-processing\",\n", " data=combined_data[\n", " # filter the data based on the complex type and license\n", - " combined_data[\"target\"].isin(\n", - " (\n", - " PUBLIC_CASP15_SINGLE_LIGAND_TARGETS\n", - " if complex_license == \"public\"\n", - " else All_CASP15_SINGLE_LIGAND_TARGETS\n", + " (\n", + " combined_data[\"target\"].isin(\n", + " (\n", + " PUBLIC_CASP15_SINGLE_LIGAND_TARGETS\n", + " if complex_license == \"public\"\n", + " else All_CASP15_SINGLE_LIGAND_TARGETS\n", + " )\n", " )\n", - " )\n", - " if complex_type == \"single\"\n", - " else combined_data[\"target\"].isin(\n", - " (\n", - " PUBLIC_CASP15_MULTI_LIGAND_TARGETS\n", - " if complex_license == \"public\"\n", - " else All_CASP15_MULTI_LIGAND_TARGETS\n", + " if complex_type == \"single\"\n", + " else combined_data[\"target\"].isin(\n", + " (\n", + " PUBLIC_CASP15_MULTI_LIGAND_TARGETS\n", + " if complex_license == \"public\"\n", + " else All_CASP15_MULTI_LIGAND_TARGETS\n", + " )\n", " )\n", " )\n", " ],\n", @@ -468,7 +456,7 @@ " )\n", " ]\n", " combined_data_list.append(pd.concat([casp15_results_table, casp15_relaxed_results_table]))\n", - "combined_data = pd.concat(combined_data_list)\n", + "combined_data = pd.concat(combined_data_list).sort_values(\"method_assignment_index\")\n", "\n", "for complex_type in [\"single\", \"multi\"]:\n", " for complex_license in [\"all\", \"public\"]:\n", @@ -482,7 +470,7 @@ " hue=\"post-processing\",\n", " data=combined_data[\n", " # ignore outliers\n", - " (combined_data[\"rmsd\"] < 50)\n", + " (combined_data[\"rmsd\"] < 150)\n", " & (\n", " # filter the data based on the complex type and license\n", " combined_data[\"target\"].isin(\n", @@ -538,7 +526,7 @@ "colors = [\"#FB8072\", \"#BEBADA\"]\n", "\n", "bar_width = 0.75\n", - "r1 = [item - 0.25 for item in range(2, 16, 2)]\n", + "r1 = [item - 0.25 for item in range(2, 24, 2)]\n", "r2 = [x + bar_width for x in r1]\n", "\n", "for complex_type in [\"single\", \"multi\"]:\n", @@ -694,7 +682,7 @@ " casp15_relaxed_rmsd_lt_2_data_std.fillna(0, inplace=True)\n", "\n", " # create the figure and a list of axes\n", - " fig, axis = plt.subplots(figsize=(12, 6))\n", + " fig, axis = plt.subplots(figsize=(18, 8))\n", " axis.spines[\"top\"].set_visible(False)\n", " axis.spines[\"right\"].set_visible(False)\n", " axis.spines[\"bottom\"].set_visible(False)\n", @@ -731,7 +719,7 @@ " # add labels, titles, ticks, etc.\n", " axis.set_xlabel(f\"{complex_type.title()}-ligand blind docking ({complex_license})\")\n", " axis.set_ylabel(\"Percentage of predictions\")\n", - " axis.set_xlim(1, 15 + 0.1)\n", + " axis.set_xlim(1, 23 + 0.1)\n", " axis.set_ylim(0, 100)\n", "\n", " axis.bar_label(casp15_rmsd_lt2_bar, fmt=\"{:,.1f}%\", label_type=\"center\")\n", @@ -744,18 +732,22 @@ " axis.grid(axis=\"y\", color=\"#EAEFF8\")\n", " axis.set_axisbelow(True)\n", "\n", - " axis.set_xticks([2, 4, 6, 6 + 1e-3, 8, 10, 11, 12, 14, 14 + 1e-3])\n", + " axis.set_xticks([2, 4, 6, 8, 8 + 1e-3, 10, 12, 14, 16, 18, 18 + 1e-3, 20, 22, 22 + 1e-3])\n", " axis.set_xticks([1 + 0.1], minor=True)\n", " axis.set_xticklabels(\n", " [\n", " \"DiffDock-L\",\n", + " \"DiffDock w/o SCT\",\n", " \"DynamicBind\",\n", " \"NeuralPLexer\",\n", " \"DL-based blind\",\n", - " \"RoseTTAFold-All-Atom\",\n", + " \"NeuralPLexer w/o ILCL\",\n", + " \"RoseTTAFold-AA\",\n", + " \"Chai-1\",\n", " \"TULIP\",\n", - " \"Conventional blind\",\n", " \"DiffDock-L-Vina\",\n", + " \"Conventional blind\",\n", + " \"P2Rank-Vina\",\n", " \"Ensemble (Con)\",\n", " \"Hybrid blind\",\n", " ]\n", @@ -769,7 +761,7 @@ " axis.tick_params(axis=\"y\", which=\"major\", left=\"off\", right=\"on\", color=\"#EAEFF8\")\n", "\n", " # vertical alignment of xtick labels\n", - " vert_alignments = [0.0, 0.0, 0.0, -0.1, 0.0, 0.0, -0.1, 0.0, 0.0, -0.1]\n", + " vert_alignments = [0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, -0.1]\n", " for tick, y in zip(axis.get_xticklabels(), vert_alignments):\n", " tick.set_y(y)\n", "\n", @@ -818,7 +810,7 @@ " globals()[f\"bust_results_table_{repeat_index}\"] = pd.concat(\n", " [\n", " globals()[f\"{method}{config}_bust_results_table_{repeat_index}\"]\n", - " for method in new_methods\n", + " for method in baseline_methods\n", " for config in [\"\", \"_relaxed\"]\n", " if f\"{method}{config}_bust_results_table_{repeat_index}\" in globals()\n", " ]\n", @@ -864,7 +856,7 @@ "colors = [\"#FB8072\", \"#BEBADA\"]\n", "\n", "bar_width = 0.75\n", - "r1 = [item - 0.25 for item in range(2, 16, 2)]\n", + "r1 = [item - 0.25 for item in range(2, 24, 2)]\n", "r2 = [x + bar_width for x in r1]\n", "\n", "for complex_type in [\"single\", \"multi\"]:\n", @@ -1010,7 +1002,7 @@ " casp15_relaxed_pb_valid_data_std.fillna(0, inplace=True)\n", "\n", " # create the figure and a list of axes\n", - " fig, axis = plt.subplots(figsize=(12, 6))\n", + " fig, axis = plt.subplots(figsize=(18, 8))\n", " axis.spines[\"top\"].set_visible(False)\n", " axis.spines[\"right\"].set_visible(False)\n", " axis.spines[\"bottom\"].set_visible(False)\n", @@ -1047,7 +1039,7 @@ " # add labels, titles, ticks, etc.\n", " axis.set_xlabel(f\"{complex_type.title()}-ligand blind docking ({complex_license})\")\n", " axis.set_ylabel(\"Percentage of complex predictions\")\n", - " axis.set_xlim(1, 15 + 0.1)\n", + " axis.set_xlim(1, 23 + 0.1)\n", " axis.set_ylim(0, 100)\n", "\n", " axis.bar_label(casp15_pb_valid_bar, fmt=\"{:,.1f}%\", label_type=\"center\")\n", @@ -1060,18 +1052,22 @@ " axis.grid(axis=\"y\", color=\"#EAEFF8\")\n", " axis.set_axisbelow(True)\n", "\n", - " axis.set_xticks([2, 4, 6, 6 + 1e-3, 8, 10, 11, 12, 14, 14 + 1e-3])\n", + " axis.set_xticks([2, 4, 6, 8, 8 + 1e-3, 10, 12, 14, 16, 18, 18 + 1e-3, 20, 22, 22 + 1e-3])\n", " axis.set_xticks([1 + 0.1], minor=True)\n", " axis.set_xticklabels(\n", " [\n", " \"DiffDock-L\",\n", + " \"DiffDock w/o SCT\",\n", " \"DynamicBind\",\n", " \"NeuralPLexer\",\n", " \"DL-based blind\",\n", - " \"RoseTTAFold-All-Atom\",\n", + " \"NeuralPLexer w/o ILCL\",\n", + " \"RoseTTAFold-AA\",\n", + " \"Chai-1\",\n", " \"TULIP\",\n", - " \"Conventional blind\",\n", " \"DiffDock-L-Vina\",\n", + " \"Conventional blind\",\n", + " \"P2Rank-Vina\",\n", " \"Ensemble (Con)\",\n", " \"Hybrid blind\",\n", " ]\n", @@ -1085,7 +1081,7 @@ " axis.tick_params(axis=\"y\", which=\"major\", left=\"off\", right=\"on\", color=\"#EAEFF8\")\n", "\n", " # vertical alignment of xtick labels\n", - " vert_alignments = [0.0, 0.0, 0.0, -0.1, 0.0, 0.0, -0.1, 0.0, 0.0, -0.1]\n", + " vert_alignments = [0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, -0.1]\n", " for tick, y in zip(axis.get_xticklabels(), vert_alignments):\n", " tick.set_y(y)\n", "\n", @@ -1133,7 +1129,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.17" + "version": "3.10.14" } }, "nbformat": 4, diff --git a/notebooks/casp15_method_interaction_analysis.png b/notebooks/casp15_method_interaction_analysis.png index f1da7500..3766f51c 100644 Binary files a/notebooks/casp15_method_interaction_analysis.png and b/notebooks/casp15_method_interaction_analysis.png differ diff --git a/notebooks/casp15_method_interaction_analysis_plotting.ipynb b/notebooks/casp15_method_interaction_analysis_plotting.ipynb index a5a24638..e87f9505 100644 --- a/notebooks/casp15_method_interaction_analysis_plotting.ipynb +++ b/notebooks/casp15_method_interaction_analysis_plotting.ipynb @@ -69,13 +69,17 @@ "outputs": [], "source": [ "# General variables\n", - "new_methods = [\n", + "baseline_methods = [\n", " \"diffdock\",\n", + " \"diffdockv1\",\n", " \"dynamicbind\",\n", " \"neuralplexer\",\n", + " \"neuralplexer_no_ilcl\",\n", " \"rfaa\",\n", + " \"chai-lab\",\n", " \"tulip\",\n", " \"vina_diffdock\",\n", + " \"vina_p2rank\",\n", " \"consensus_ensemble\",\n", "]\n", "max_num_repeats_per_method = (\n", @@ -90,16 +94,20 @@ ")\n", "assert os.path.exists(\n", " casp15_set_dir\n", - "), \"Please download the (public) CASP15 set from `https://zenodo.org/records/11477766` before proceeding.\"\n", + "), \"Please download the (public) CASP15 set from `https://zenodo.org/records/13858866` before proceeding.\"\n", "\n", "# Mappings\n", "method_mapping = {\n", " \"diffdock\": \"DiffDock-L\",\n", + " \"diffdockv1\": \"DiffDock w/o SCT\",\n", " \"dynamicbind\": \"DynamicBind\",\n", " \"neuralplexer\": \"NeuralPLexer\",\n", - " \"rfaa\": \"RoseTTAFold-All-Atom\",\n", + " \"neuralplexer_no_ilcl\": \"NeuralPLexer w/o ILCL\",\n", + " \"rfaa\": \"RoseTTAFold-AA\",\n", + " \"chai-lab\": \"Chai-1\",\n", " \"tulip\": \"TULIP\",\n", " \"vina_diffdock\": \"DiffDock-L-Vina\",\n", + " \"vina_p2rank\": \"P2Rank-Vina\",\n", " \"consensus_ensemble\": \"Ensemble (Con)\",\n", "}\n", "\n", @@ -267,8 +275,8 @@ "metadata": {}, "outputs": [], "source": [ - "# calculate and cache CASP15 interaction statistics for each new method\n", - "for method in new_methods:\n", + "# calculate and cache CASP15 interaction statistics for each baseline method\n", + "for method in baseline_methods:\n", " for repeat_index in range(1, max_num_repeats_per_method + 1):\n", " method_title = method_mapping[method]\n", "\n", @@ -402,22 +410,22 @@ "\n", "\n", "# load data from files\n", - "if os.path.exists(\"casp15_interaction_dataframes.h5\"):\n", - " dfs.append(process_method(\"casp15_interaction_dataframes.h5\", \"CASP15\"))\n", - "\n", - "for method in new_methods:\n", + "for method in baseline_methods:\n", " for repeat_index in range(1, max_num_repeats_per_method + 1):\n", " method_title = method_mapping[method]\n", " file_path = f\"{method}_casp15_interaction_dataframes_{repeat_index}.h5\"\n", " if os.path.exists(file_path):\n", " dfs.append(process_method(file_path, method_title))\n", "\n", + "if os.path.exists(\"casp15_interaction_dataframes.h5\"):\n", + " dfs.append(process_method(\"casp15_interaction_dataframes.h5\", \"CASP15\"))\n", + "\n", "# combine statistics\n", "assert len(dfs) > 0, \"No interaction dataframes found.\"\n", "df = pd.concat(dfs)\n", "\n", "# plot statistics\n", - "fig, axes = plt.subplots(2, 2, figsize=(22, 14), sharey=False)\n", + "fig, axes = plt.subplots(2, 2, figsize=(34, 14), sharey=False)\n", "\n", "interaction_types = [\n", " \"Hydrogen Bond Acceptors\",\n", @@ -480,7 +488,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.17" + "version": "3.10.14" } }, "nbformat": 4, diff --git a/notebooks/casp15_public_multi_ligand_relaxed_lddt_pli_violin_plot.png b/notebooks/casp15_public_multi_ligand_relaxed_lddt_pli_violin_plot.png index 5c802899..53f79da7 100644 Binary files a/notebooks/casp15_public_multi_ligand_relaxed_lddt_pli_violin_plot.png and b/notebooks/casp15_public_multi_ligand_relaxed_lddt_pli_violin_plot.png differ diff --git a/notebooks/casp15_public_multi_ligand_relaxed_pb_valid_bar_chart.png b/notebooks/casp15_public_multi_ligand_relaxed_pb_valid_bar_chart.png index d6a6fac2..b8dd4b61 100644 Binary files a/notebooks/casp15_public_multi_ligand_relaxed_pb_valid_bar_chart.png and b/notebooks/casp15_public_multi_ligand_relaxed_pb_valid_bar_chart.png differ diff --git a/notebooks/casp15_public_multi_ligand_relaxed_rmsd_lt2_bar_chart.png b/notebooks/casp15_public_multi_ligand_relaxed_rmsd_lt2_bar_chart.png index fce782b4..5d7fb635 100644 Binary files a/notebooks/casp15_public_multi_ligand_relaxed_rmsd_lt2_bar_chart.png and b/notebooks/casp15_public_multi_ligand_relaxed_rmsd_lt2_bar_chart.png differ diff --git a/notebooks/casp15_public_multi_ligand_relaxed_rmsd_violin_plot.png b/notebooks/casp15_public_multi_ligand_relaxed_rmsd_violin_plot.png index b236d662..8a7142ce 100644 Binary files a/notebooks/casp15_public_multi_ligand_relaxed_rmsd_violin_plot.png and b/notebooks/casp15_public_multi_ligand_relaxed_rmsd_violin_plot.png differ diff --git a/notebooks/casp15_public_single_ligand_relaxed_lddt_pli_violin_plot.png b/notebooks/casp15_public_single_ligand_relaxed_lddt_pli_violin_plot.png index cb494afe..34257d56 100644 Binary files a/notebooks/casp15_public_single_ligand_relaxed_lddt_pli_violin_plot.png and b/notebooks/casp15_public_single_ligand_relaxed_lddt_pli_violin_plot.png differ diff --git a/notebooks/casp15_public_single_ligand_relaxed_pb_valid_bar_chart.png b/notebooks/casp15_public_single_ligand_relaxed_pb_valid_bar_chart.png index 92ffaad7..5e8624a5 100644 Binary files a/notebooks/casp15_public_single_ligand_relaxed_pb_valid_bar_chart.png and b/notebooks/casp15_public_single_ligand_relaxed_pb_valid_bar_chart.png differ diff --git a/notebooks/casp15_public_single_ligand_relaxed_rmsd_lt2_bar_chart.png b/notebooks/casp15_public_single_ligand_relaxed_rmsd_lt2_bar_chart.png index 8aa1e438..3dd006e9 100644 Binary files a/notebooks/casp15_public_single_ligand_relaxed_rmsd_lt2_bar_chart.png and b/notebooks/casp15_public_single_ligand_relaxed_rmsd_lt2_bar_chart.png differ diff --git a/notebooks/casp15_public_single_ligand_relaxed_rmsd_violin_plot.png b/notebooks/casp15_public_single_ligand_relaxed_rmsd_violin_plot.png index bfd6db0c..2be87c75 100644 Binary files a/notebooks/casp15_public_single_ligand_relaxed_rmsd_violin_plot.png and b/notebooks/casp15_public_single_ligand_relaxed_rmsd_violin_plot.png differ diff --git a/notebooks/chai-lab_casp15_interaction_dataframes_1.h5 b/notebooks/chai-lab_casp15_interaction_dataframes_1.h5 new file mode 100644 index 00000000..eb4d7e8b Binary files /dev/null and b/notebooks/chai-lab_casp15_interaction_dataframes_1.h5 differ diff --git a/notebooks/consensus_ensemble_casp15_interaction_dataframes_1.h5 b/notebooks/consensus_ensemble_casp15_interaction_dataframes_1.h5 new file mode 100644 index 00000000..f53b31d8 Binary files /dev/null and b/notebooks/consensus_ensemble_casp15_interaction_dataframes_1.h5 differ diff --git a/notebooks/dataset_interaction_analysis_plotting.ipynb b/notebooks/dataset_interaction_analysis_plotting.ipynb index 26bd5152..006fa8a6 100644 --- a/notebooks/dataset_interaction_analysis_plotting.ipynb +++ b/notebooks/dataset_interaction_analysis_plotting.ipynb @@ -78,16 +78,16 @@ ")\n", "assert os.path.exists(\n", " ad_set_dir\n", - "), \"Please download the Astex Diverse set from `https://zenodo.org/records/11477766` before proceeding.\"\n", + "), \"Please download the Astex Diverse set from `https://zenodo.org/records/13858866` before proceeding.\"\n", "assert os.path.exists(\n", " pb_set_dir\n", - "), \"Please download the PoseBusters Benchmark set from `https://zenodo.org/records/11477766` before proceeding.\"\n", + "), \"Please download the PoseBusters Benchmark set from `https://zenodo.org/records/13858866` before proceeding.\"\n", "assert os.path.exists(\n", " dg_set_dir\n", - "), \"Please download the DockGen set from `https://zenodo.org/records/11477766` before proceeding.\"\n", + "), \"Please download the DockGen set from `https://zenodo.org/records/13858866` before proceeding.\"\n", "assert os.path.exists(\n", " casp15_set_dir\n", - "), \"Please download the (public) CASP15 set from `https://zenodo.org/records/11477766` before proceeding.\"\n", + "), \"Please download the (public) CASP15 set from `https://zenodo.org/records/13858866` before proceeding.\"\n", "\n", "CASP15_ANALYSIS_TARGETS_TO_SKIP = [\n", " \"T1170\"\n", diff --git a/notebooks/diffdock_casp15_interaction_dataframes_1.h5 b/notebooks/diffdock_casp15_interaction_dataframes_1.h5 new file mode 100644 index 00000000..ed34ee55 Binary files /dev/null and b/notebooks/diffdock_casp15_interaction_dataframes_1.h5 differ diff --git a/notebooks/diffdockv1_casp15_interaction_dataframes_1.h5 b/notebooks/diffdockv1_casp15_interaction_dataframes_1.h5 new file mode 100644 index 00000000..8b2e8c07 Binary files /dev/null and b/notebooks/diffdockv1_casp15_interaction_dataframes_1.h5 differ diff --git a/notebooks/dockgen_expanded_inference_results_plotting.ipynb b/notebooks/dockgen_expanded_inference_results_plotting.ipynb deleted file mode 100644 index 36e03aae..00000000 --- a/notebooks/dockgen_expanded_inference_results_plotting.ipynb +++ /dev/null @@ -1,799 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## DockGen (Expanded) Inference Results Plotting" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Import packages" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import matplotlib.ticker as mtick\n", - "import pandas as pd\n", - "import seaborn as sns\n", - "\n", - "from posebench.analysis.inference_analysis import BUST_TEST_COLUMNS" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Configure packages" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pd.options.mode.copy_on_write = True" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Define constants" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# General variables\n", - "new_methods = [\n", - " \"diffdock\",\n", - " \"diffdock_relaxed_protein\",\n", - " \"fabind\",\n", - " \"dynamicbind\",\n", - " \"neuralplexer\",\n", - " \"rfaa\",\n", - " \"vina_diffdock\",\n", - " \"vina_p2rank\",\n", - " \"consensus_ensemble\",\n", - "]\n", - "max_num_repeats_per_method = 3\n", - "\n", - "# Filepaths for each new method\n", - "globals()[\"diffdock_output_dir\"] = os.path.join(\"..\", \"forks\", \"DiffDock\", \"inference\")\n", - "globals()[\"fabind_output_dir\"] = os.path.join(\"..\", \"forks\", \"FABind\", \"inference\")\n", - "globals()[\"dynamicbind_output_dir\"] = os.path.join(\n", - " \"..\", \"forks\", \"DynamicBind\", \"inference\", \"outputs\", \"results\"\n", - ")\n", - "globals()[\"neuralplexer_output_dir\"] = os.path.join(\"..\", \"forks\", \"NeuralPLexer\", \"inference\")\n", - "globals()[\"rfaa_output_dir\"] = os.path.join(\"..\", \"forks\", \"RoseTTAFold-All-Atom\", \"inference\")\n", - "globals()[\"vina_output_dir\"] = os.path.join(\"..\", \"forks\", \"Vina\", \"inference\")\n", - "for repeat_index in range(1, max_num_repeats_per_method + 1):\n", - " # DiffDock results\n", - " globals()[f\"diffdock_dockgen_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", - " globals()[\"diffdock_output_dir\"],\n", - " f\"diffdock_dockgen_output_{repeat_index}\",\n", - " \"bust_results.csv\",\n", - " )\n", - " globals()[f\"diffdock_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", - " globals()[\"diffdock_output_dir\"],\n", - " f\"diffdock_dockgen_output_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", - " )\n", - "\n", - " # DiffDock (Relaxed Protein) results\n", - " globals()[\n", - " f\"diffdock_relaxed_protein_dockgen_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"diffdock_output_dir\"],\n", - " f\"diffdock_dockgen_output_{repeat_index}\",\n", - " \"bust_results.csv\",\n", - " )\n", - " globals()[\n", - " f\"diffdock_relaxed_protein_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"diffdock_output_dir\"],\n", - " f\"diffdock_dockgen_output_{repeat_index}_relaxed_protein\",\n", - " \"bust_results.csv\",\n", - " )\n", - "\n", - " # FABind results\n", - " globals()[f\"fabind_dockgen_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", - " globals()[\"fabind_output_dir\"],\n", - " f\"fabind_dockgen_output_{repeat_index}\",\n", - " \"bust_results.csv\",\n", - " )\n", - " globals()[f\"fabind_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", - " globals()[\"fabind_output_dir\"],\n", - " f\"fabind_dockgen_output_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", - " )\n", - "\n", - " # DynamicBind results\n", - " globals()[f\"dynamicbind_dockgen_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", - " globals()[\"dynamicbind_output_dir\"],\n", - " f\"dockgen_{repeat_index}\",\n", - " \"bust_results.csv\",\n", - " )\n", - " globals()[\n", - " f\"dynamicbind_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"dynamicbind_output_dir\"],\n", - " f\"dockgen_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", - " )\n", - "\n", - " # NeuralPLexer results\n", - " globals()[f\"neuralplexer_dockgen_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", - " globals()[\"neuralplexer_output_dir\"],\n", - " f\"neuralplexer_dockgen_outputs_{repeat_index}\",\n", - " \"bust_results.csv\",\n", - " )\n", - " globals()[\n", - " f\"neuralplexer_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"neuralplexer_output_dir\"],\n", - " f\"neuralplexer_dockgen_outputs_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", - " )\n", - "\n", - " # RoseTTAFold-All-Atom results\n", - " globals()[f\"rfaa_dockgen_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", - " globals()[\"rfaa_output_dir\"],\n", - " f\"rfaa_dockgen_outputs_{repeat_index}\",\n", - " \"bust_results.csv\",\n", - " )\n", - " globals()[f\"rfaa_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", - " globals()[\"rfaa_output_dir\"],\n", - " f\"rfaa_dockgen_outputs_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", - " )\n", - "\n", - " # Vina-DiffDock-L results\n", - " globals()[f\"vina_diffdock_dockgen_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", - " globals()[\"vina_output_dir\"],\n", - " f\"vina_diffdock_dockgen_outputs_{repeat_index}\",\n", - " \"bust_results.csv\",\n", - " )\n", - " globals()[\n", - " f\"vina_diffdock_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"vina_output_dir\"],\n", - " f\"vina_diffdock_dockgen_outputs_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", - " )\n", - "\n", - " # Vina-P2Rank results\n", - " globals()[f\"vina_p2rank_dockgen_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", - " globals()[\"vina_output_dir\"],\n", - " f\"vina_p2rank_dockgen_outputs_{repeat_index}\",\n", - " \"bust_results.csv\",\n", - " )\n", - " globals()[\n", - " f\"vina_p2rank_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"vina_output_dir\"],\n", - " f\"vina_p2rank_dockgen_outputs_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", - " )\n", - "\n", - " # Consensus ensemble results\n", - " globals()[\n", - " f\"consensus_ensemble_dockgen_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " os.path.join(\"..\", \"data\", \"test_cases\", \"dockgen\"),\n", - " f\"top_consensus_ensemble_predictions_{repeat_index}\",\n", - " \"bust_results.csv\",\n", - " )\n", - " globals()[\n", - " f\"consensus_ensemble_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " os.path.join(\"..\", \"data\", \"test_cases\", \"dockgen\"),\n", - " f\"top_consensus_ensemble_predictions_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", - " )\n", - "\n", - "# Mappings\n", - "method_mapping = {\n", - " \"diffdock\": \"DiffDock-L\",\n", - " \"diffdock_relaxed_protein\": \"DiffDock-L (Relaxed Protein)\",\n", - " \"fabind\": \"FABind\",\n", - " \"dynamicbind\": \"DynamicBind\",\n", - " \"neuralplexer\": \"NeuralPLexer\",\n", - " \"rfaa\": \"RoseTTAFold-All-Atom\",\n", - " \"vina_diffdock\": \"DiffDock-L-Vina\",\n", - " \"vina_p2rank\": \"P2Rank-Vina\",\n", - " \"consensus_ensemble\": \"Ensemble (Con)\",\n", - "}\n", - "\n", - "method_category_mapping = {\n", - " \"diffdock\": \"DL-based blind\",\n", - " \"diffdock_relaxed_protein\": \"DL-based blind\",\n", - " \"fabind\": \"DL-based blind\",\n", - " \"dynamicbind\": \"DL-based blind\",\n", - " \"neuralplexer\": \"DL-based blind\",\n", - " \"rfaa\": \"DL-based blind\",\n", - " \"vina_diffdock\": \"Conventional blind\",\n", - " \"vina_p2rank\": \"Conventional blind\",\n", - " \"consensus_ensemble\": \"Hybrid blind\",\n", - "}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Report test results for new methods" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# load and report test results for each new method\n", - "for config in [\"\", \"_relaxed\"]:\n", - " for method in new_methods:\n", - " for repeat_index in range(1, max_num_repeats_per_method + 1):\n", - " method_title = method_mapping[method]\n", - "\n", - " if not os.path.exists(\n", - " globals()[f\"{method}_dockgen{config}_bust_results_csv_filepath_{repeat_index}\"]\n", - " ):\n", - " continue\n", - "\n", - " globals()[f\"{method}_dockgen{config}_bust_results_{repeat_index}\"] = pd.read_csv(\n", - " globals()[f\"{method}_dockgen{config}_bust_results_csv_filepath_{repeat_index}\"]\n", - " )\n", - " globals()[f\"{method}_dockgen{config}_bust_results_table_{repeat_index}\"] = globals()[\n", - " f\"{method}_dockgen{config}_bust_results_{repeat_index}\"\n", - " ][BUST_TEST_COLUMNS + [\"rmsd\"]]\n", - " globals()[f\"{method}_dockgen{config}_bust_results_table_{repeat_index}\"].loc[\n", - " :, \"pb_valid\"\n", - " ] = (\n", - " globals()[f\"{method}_dockgen{config}_bust_results_table_{repeat_index}\"]\n", - " .iloc[:, 1:-1]\n", - " .all(axis=1)\n", - " )\n", - "\n", - " globals()[f\"{method}_dockgen{config}_bust_results_table_{repeat_index}\"].loc[\n", - " :, \"method\"\n", - " ] = method\n", - " globals()[f\"{method}_dockgen{config}_bust_results_table_{repeat_index}\"].loc[\n", - " :, \"post-processing\"\n", - " ] = (\"energy minimization\" if config == \"_relaxed\" else \"none\")\n", - " globals()[f\"{method}_dockgen{config}_bust_results_table_{repeat_index}\"].loc[\n", - " :, \"dataset\"\n", - " ] = \"dockgen\"\n", - " globals()[f\"{method}_dockgen{config}_bust_results_table_{repeat_index}\"].loc[\n", - " :, \"docked_ligand_successfully_loaded\"\n", - " ] = globals()[f\"{method}_dockgen{config}_bust_results_table_{repeat_index}\"][\n", - " [\"mol_pred_loaded\", \"mol_true_loaded\", \"mol_cond_loaded\"]\n", - " ].all(\n", - " axis=1\n", - " )\n", - "\n", - " globals()[f\"{method}{config}_bust_results_table_{repeat_index}\"] = globals()[\n", - " f\"{method}_dockgen{config}_bust_results_table_{repeat_index}\"\n", - " ]\n", - "\n", - " print(\n", - " f\"\\n{method_title}{config}_{repeat_index} DockGen set `rmsd_≤_2å`: {globals()[f'{method}_dockgen{config}_bust_results_table_{repeat_index}']['rmsd_≤_2å'].mean()}\"\n", - " )\n", - " print(\n", - " f\"{method_title}{config}_{repeat_index} DockGen set `rmsd_≤_2å and pb_valid`: {globals()[f'{method}_dockgen{config}_bust_results_table_{repeat_index}'][globals()[f'{method}_dockgen{config}_bust_results_table_{repeat_index}']['pb_valid']]['rmsd_≤_2å'].sum() / len(globals()[f'{method}_dockgen{config}_bust_results_table_{repeat_index}'])}\\n\"\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Define helper functions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def assign_method_index(method: str) -> str:\n", - " \"\"\"\n", - " Assign method index for plotting.\n", - "\n", - " :param method: Method name.\n", - " :return: Method index.\n", - " \"\"\"\n", - " return list(method_mapping.keys()).index(method)\n", - "\n", - "\n", - "def categorize_method(method: str) -> str:\n", - " \"\"\"\n", - " Categorize method for plotting.\n", - "\n", - " :param method: Method name.\n", - " :return: Method category.\n", - " \"\"\"\n", - " return method_category_mapping.get(method, \"Misc\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Standardize metrics" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# load and organize the DockGen results CSVs\n", - "for repeat_index in range(1, max_num_repeats_per_method + 1):\n", - " globals()[f\"results_table_{repeat_index}\"] = pd.concat(\n", - " [\n", - " globals()[f\"{method}{config}_bust_results_table_{repeat_index}\"]\n", - " for method in new_methods\n", - " for config in [\"\", \"_relaxed\"]\n", - " if f\"{method}{config}_bust_results_table_{repeat_index}\" in globals()\n", - " ]\n", - " )\n", - " globals()[f\"results_table_{repeat_index}\"].loc[:, \"method_category\"] = globals()[\n", - " f\"results_table_{repeat_index}\"\n", - " ][\"method\"].apply(categorize_method)\n", - " globals()[f\"results_table_{repeat_index}\"].loc[:, \"method_assignment_index\"] = globals()[\n", - " f\"results_table_{repeat_index}\"\n", - " ][\"method\"].apply(assign_method_index)\n", - " globals()[f\"results_table_{repeat_index}\"].loc[:, \"rmsd_within_threshold\"] = (\n", - " globals()[f\"results_table_{repeat_index}\"].loc[:, \"rmsd_≤_2å\"].fillna(False)\n", - " )\n", - " globals()[f\"results_table_{repeat_index}\"].loc[:, \"rmsd_within_threshold_and_pb_valid\"] = (\n", - " globals()[f\"results_table_{repeat_index}\"].loc[:, \"rmsd_within_threshold\"]\n", - " ) & (globals()[f\"results_table_{repeat_index}\"].loc[:, \"pb_valid\"].fillna(False))\n", - " globals()[f\"results_table_{repeat_index}\"].loc[:, \"RMSD ≤ 2 Å & PB-Valid\"] = (\n", - " globals()[f\"results_table_{repeat_index}\"]\n", - " .loc[:, \"rmsd_within_threshold_and_pb_valid\"]\n", - " .astype(int)\n", - " )\n", - " globals()[f\"results_table_{repeat_index}\"].loc[:, \"RMSD ≤ 2 Å\"] = (\n", - " globals()[f\"results_table_{repeat_index}\"]\n", - " .loc[:, \"rmsd_within_threshold\"]\n", - " .fillna(False)\n", - " .astype(int)\n", - " )\n", - " globals()[f\"results_table_{repeat_index}\"].loc[:, \"dataset\"] = (\n", - " globals()[f\"results_table_{repeat_index}\"]\n", - " .loc[:, \"dataset\"]\n", - " .map({\"dockgen\": \"DockGen set\"})\n", - " )\n", - " globals()[f\"results_table_{repeat_index}\"].loc[:, \"method\"] = (\n", - " globals()[f\"results_table_{repeat_index}\"].loc[:, \"method\"].map(method_mapping)\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Make plots" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# RMSD Violin Plot of DockGen Set Results (Relaxed vs. Unrelaxed) #\n", - "\n", - "# prepare data for the violin plots to plot\n", - "colors = [\"#FB8072\", \"#BEBADA\"]\n", - "\n", - "# combine results across all three repeats\n", - "combined_data_list = []\n", - "for repeat_index in range(1, max_num_repeats_per_method + 1):\n", - " pb_relaxed_results_table = globals()[f\"results_table_{repeat_index}\"][\n", - " (globals()[f\"results_table_{repeat_index}\"][\"dataset\"] == \"DockGen set\")\n", - " & (globals()[f\"results_table_{repeat_index}\"][\"post-processing\"] == \"energy minimization\")\n", - " ]\n", - " pb_unrelaxed_results_table = globals()[f\"results_table_{repeat_index}\"][\n", - " (globals()[f\"results_table_{repeat_index}\"][\"dataset\"] == \"DockGen set\")\n", - " & (globals()[f\"results_table_{repeat_index}\"][\"post-processing\"] == \"none\")\n", - " ]\n", - " combined_data_list.append(pd.concat([pb_relaxed_results_table, pb_unrelaxed_results_table]))\n", - "combined_relaxed_data = pd.concat(combined_data_list).sort_values(\"method_assignment_index\")\n", - "\n", - "# set the size of the figure\n", - "plt.figure(figsize=(12, 6))\n", - "\n", - "# create a violin plot\n", - "sns.violinplot(\n", - " x=\"method\",\n", - " y=\"rmsd\",\n", - " hue=\"post-processing\",\n", - " data=combined_relaxed_data[combined_relaxed_data[\"rmsd\"] < 50],\n", - " split=True,\n", - " inner=\"quartile\",\n", - " palette=colors,\n", - " cut=0,\n", - ")\n", - "\n", - "# set labels and title\n", - "plt.xlabel(\"Single-ligand docking\")\n", - "plt.ylabel(\"RMSD\")\n", - "\n", - "# rotate x-axis labels for better readability\n", - "plt.xticks(rotation=45, ha=\"right\")\n", - "\n", - "# display legend outside the plot\n", - "plt.legend(title=\"Post-processing\", bbox_to_anchor=(1.05, 1), loc=\"upper left\")\n", - "\n", - "# display the plots\n", - "plt.tight_layout()\n", - "plt.savefig(\"dockgen_expanded_single_ligand_relaxed_rmsd_violin_plot.png\", dpi=300)\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# RMSD ≤ 2 Å Bar Chart of DockGen Set Results (Relaxed vs. Unrelaxed) #\n", - "\n", - "# prepare data for the bar charts to plot\n", - "colors = [\"#FB8072\", \"#BEBADA\"]\n", - "\n", - "bar_width = 0.5\n", - "r1 = [item - 0.25 for item in range(2, 20, 2)]\n", - "r2 = [x + bar_width for x in r1]\n", - "\n", - "(\n", - " dockgen_rmsd_lt_2_data_list,\n", - " dockgen_relaxed_rmsd_lt_2_data_list,\n", - " dockgen_rmsd_lt_2_and_pb_valid_data_list,\n", - " dockgen_relaxed_rmsd_lt_2_and_pb_valid_data_list,\n", - ") = ([], [], [], [])\n", - "for repeat_index in range(1, max_num_repeats_per_method + 1):\n", - " dockgen_results_table = globals()[f\"results_table_{repeat_index}\"][\n", - " (globals()[f\"results_table_{repeat_index}\"][\"dataset\"] == \"DockGen set\")\n", - " & (globals()[f\"results_table_{repeat_index}\"][\"post-processing\"] == \"none\")\n", - " ].sort_values(by=\"method_assignment_index\")\n", - " dockgen_relaxed_results_table = globals()[f\"results_table_{repeat_index}\"][\n", - " (globals()[f\"results_table_{repeat_index}\"][\"dataset\"] == \"DockGen set\")\n", - " & (globals()[f\"results_table_{repeat_index}\"][\"post-processing\"] == \"energy minimization\")\n", - " ].sort_values(by=\"method_assignment_index\")\n", - "\n", - " dockgen_labels = dockgen_results_table[\"method\"].unique()\n", - " num_methods = len(dockgen_labels)\n", - "\n", - " num_dockgen_data_points = max(\n", - " len(dockgen_results_table[(dockgen_results_table[\"method\"] == method)])\n", - " for method in dockgen_labels\n", - " )\n", - " num_dockgen_relaxed_data_points = max(\n", - " len(dockgen_relaxed_results_table[(dockgen_relaxed_results_table[\"method\"] == method)])\n", - " for method in dockgen_labels\n", - " )\n", - "\n", - " # DockGen (unrelaxed) results\n", - " dockgen_rmsd_lt_2_data = (\n", - " dockgen_results_table.groupby(\"method\")\n", - " .agg({\"RMSD ≤ 2 Å\": \"sum\", \"method_assignment_index\": \"first\"})\n", - " .reset_index()\n", - " )\n", - " dockgen_rmsd_lt_2_data[\"RMSD ≤ 2 Å\"] = (\n", - " dockgen_rmsd_lt_2_data[\"RMSD ≤ 2 Å\"] / num_dockgen_data_points * 100\n", - " )\n", - " dockgen_rmsd_lt_2_data_list.append(\n", - " dockgen_rmsd_lt_2_data.sort_values(\"method_assignment_index\")\n", - " )\n", - "\n", - " # DockGen (relaxed) results\n", - " dockgen_relaxed_rmsd_lt_2_data = (\n", - " dockgen_relaxed_results_table.groupby(\"method\")\n", - " .agg({\"RMSD ≤ 2 Å\": \"sum\", \"method_assignment_index\": \"first\"})\n", - " .reset_index()\n", - " )\n", - " dockgen_relaxed_rmsd_lt_2_data[\"RMSD ≤ 2 Å\"] = (\n", - " dockgen_relaxed_rmsd_lt_2_data[\"RMSD ≤ 2 Å\"] / num_dockgen_relaxed_data_points * 100\n", - " )\n", - " dockgen_relaxed_rmsd_lt_2_data_list.append(\n", - " dockgen_relaxed_rmsd_lt_2_data.sort_values(\"method_assignment_index\")\n", - " )\n", - "\n", - " # DockGen (unrelaxed and PB-Valid) results\n", - " dockgen_rmsd_lt_2_and_pb_valid_data = (\n", - " dockgen_results_table.groupby(\"method\")\n", - " .agg({\"RMSD ≤ 2 Å & PB-Valid\": \"sum\", \"method_assignment_index\": \"first\"})\n", - " .reset_index()\n", - " )\n", - " dockgen_rmsd_lt_2_and_pb_valid_data[\"RMSD ≤ 2 Å & PB-Valid\"] = (\n", - " dockgen_rmsd_lt_2_and_pb_valid_data[\"RMSD ≤ 2 Å & PB-Valid\"]\n", - " / num_dockgen_data_points\n", - " * 100\n", - " )\n", - " dockgen_rmsd_lt_2_and_pb_valid_data_list.append(\n", - " dockgen_rmsd_lt_2_and_pb_valid_data.sort_values(\"method_assignment_index\")\n", - " )\n", - "\n", - " # DockGen (relaxed and PB-Valid) results\n", - " dockgen_relaxed_rmsd_lt_2_and_pb_valid_data = (\n", - " dockgen_relaxed_results_table.groupby(\"method\")\n", - " .agg({\"RMSD ≤ 2 Å & PB-Valid\": \"sum\", \"method_assignment_index\": \"first\"})\n", - " .reset_index()\n", - " )\n", - " dockgen_relaxed_rmsd_lt_2_and_pb_valid_data[\"RMSD ≤ 2 Å & PB-Valid\"] = (\n", - " dockgen_relaxed_rmsd_lt_2_and_pb_valid_data[\"RMSD ≤ 2 Å & PB-Valid\"]\n", - " / num_dockgen_relaxed_data_points\n", - " * 100\n", - " )\n", - " dockgen_relaxed_rmsd_lt_2_and_pb_valid_data_list.append(\n", - " dockgen_relaxed_rmsd_lt_2_and_pb_valid_data.sort_values(\"method_assignment_index\")\n", - " )\n", - "\n", - "# calculate means and standard deviations\n", - "dockgen_rmsd_lt_2_data_mean = (\n", - " pd.concat([df for df in dockgen_rmsd_lt_2_data_list])\n", - " .groupby(\n", - " [\n", - " \"method\",\n", - " \"method_assignment_index\",\n", - " ]\n", - " )\n", - " .mean()\n", - " .sort_values([\"method_assignment_index\"])[\"RMSD ≤ 2 Å\"]\n", - ")\n", - "dockgen_rmsd_lt_2_data_std = (\n", - " pd.concat([df for df in dockgen_rmsd_lt_2_data_list])\n", - " .groupby(\n", - " [\n", - " \"method\",\n", - " \"method_assignment_index\",\n", - " ]\n", - " )\n", - " .std()\n", - " .sort_values([\"method_assignment_index\"])[\"RMSD ≤ 2 Å\"]\n", - ")\n", - "\n", - "dockgen_relaxed_rmsd_lt_2_data_mean = (\n", - " pd.concat([df for df in dockgen_relaxed_rmsd_lt_2_data_list])\n", - " .groupby(\n", - " [\n", - " \"method\",\n", - " \"method_assignment_index\",\n", - " ]\n", - " )\n", - " .mean()\n", - " .sort_values([\"method_assignment_index\"])[\"RMSD ≤ 2 Å\"]\n", - ")\n", - "dockgen_relaxed_rmsd_lt_2_data_std = (\n", - " pd.concat([df for df in dockgen_relaxed_rmsd_lt_2_data_list])\n", - " .groupby(\n", - " [\n", - " \"method\",\n", - " \"method_assignment_index\",\n", - " ]\n", - " )\n", - " .std()\n", - " .sort_values([\"method_assignment_index\"])[\"RMSD ≤ 2 Å\"]\n", - ")\n", - "\n", - "dockgen_rmsd_lt_2_and_pb_valid_data_mean = (\n", - " pd.concat([df for df in dockgen_rmsd_lt_2_and_pb_valid_data_list])\n", - " .groupby(\n", - " [\n", - " \"method\",\n", - " \"method_assignment_index\",\n", - " ]\n", - " )\n", - " .mean()\n", - " .sort_values([\"method_assignment_index\"])[\"RMSD ≤ 2 Å & PB-Valid\"]\n", - ")\n", - "dockgen_rmsd_lt_2_and_pb_valid_data_std = (\n", - " pd.concat([df for df in dockgen_rmsd_lt_2_and_pb_valid_data_list])\n", - " .groupby(\n", - " [\n", - " \"method\",\n", - " \"method_assignment_index\",\n", - " ]\n", - " )\n", - " .std()\n", - " .sort_values([\"method_assignment_index\"])[\"RMSD ≤ 2 Å & PB-Valid\"]\n", - ")\n", - "\n", - "dockgen_relaxed_rmsd_lt_2_and_pb_valid_data_mean = (\n", - " pd.concat([df for df in dockgen_relaxed_rmsd_lt_2_and_pb_valid_data_list])\n", - " .groupby(\n", - " [\n", - " \"method\",\n", - " \"method_assignment_index\",\n", - " ]\n", - " )\n", - " .mean()\n", - " .sort_values([\"method_assignment_index\"])[\"RMSD ≤ 2 Å & PB-Valid\"]\n", - ")\n", - "dockgen_relaxed_rmsd_lt_2_and_pb_valid_data_std = (\n", - " pd.concat([df for df in dockgen_relaxed_rmsd_lt_2_and_pb_valid_data_list])\n", - " .groupby(\n", - " [\n", - " \"method\",\n", - " \"method_assignment_index\",\n", - " ]\n", - " )\n", - " .std()\n", - " .sort_values([\"method_assignment_index\"])[\"RMSD ≤ 2 Å & PB-Valid\"]\n", - ")\n", - "\n", - "dockgen_rmsd_lt_2_data_std.fillna(0, inplace=True)\n", - "dockgen_relaxed_rmsd_lt_2_data_std.fillna(0, inplace=True)\n", - "dockgen_rmsd_lt_2_and_pb_valid_data_std.fillna(0, inplace=True)\n", - "dockgen_relaxed_rmsd_lt_2_and_pb_valid_data_std.fillna(0, inplace=True)\n", - "\n", - "# create the figure and a list of axes\n", - "fig, axis = plt.subplots(figsize=(12, 6))\n", - "axis.spines[\"top\"].set_visible(False)\n", - "axis.spines[\"right\"].set_visible(False)\n", - "axis.spines[\"bottom\"].set_visible(False)\n", - "axis.spines[\"left\"].set_visible(False)\n", - "\n", - "# define font properties\n", - "plt.rcParams[\"font.size\"] = 11\n", - "plt.rcParams[\"axes.labelsize\"] = 13\n", - "\n", - "# plot (unrelaxed) data for the DockGen set\n", - "dockgen_rmsd_lt_2_and_pb_valid_bar = axis.bar(\n", - " r1,\n", - " dockgen_rmsd_lt_2_and_pb_valid_data_mean,\n", - " yerr=dockgen_rmsd_lt_2_and_pb_valid_data_std,\n", - " label=\"RMSD ≤ 2Å & PB-Valid\",\n", - " color=colors[0],\n", - " width=bar_width,\n", - ")\n", - "dockgen_rmsd_lt_2_bar = axis.bar(\n", - " r1,\n", - " dockgen_rmsd_lt_2_data_mean,\n", - " yerr=dockgen_rmsd_lt_2_data_std,\n", - " label=\"RMSD ≤ 2Å\",\n", - " color=\"none\",\n", - " edgecolor=colors[0],\n", - " hatch=\"\\\\\\\\\\\\\",\n", - " width=bar_width,\n", - ")\n", - "\n", - "# plot (relaxed) data for the DockGen set\n", - "dockgen_relaxed_rmsd_lt_2_and_pb_valid_bar = axis.bar(\n", - " r2,\n", - " dockgen_relaxed_rmsd_lt_2_and_pb_valid_data_mean,\n", - " yerr=dockgen_relaxed_rmsd_lt_2_and_pb_valid_data_std,\n", - " label=\"RMSD ≤ 2Å & PB-Valid\",\n", - " color=colors[1],\n", - " width=bar_width,\n", - ")\n", - "dockgen_relaxed_rmsd_lt_2_bar = axis.bar(\n", - " r2,\n", - " dockgen_relaxed_rmsd_lt_2_data_mean,\n", - " yerr=dockgen_relaxed_rmsd_lt_2_data_std,\n", - " label=\"RMSD ≤ 2Å\",\n", - " color=\"none\",\n", - " edgecolor=colors[1],\n", - " hatch=\"\\\\\\\\\\\\\",\n", - " width=bar_width,\n", - ")\n", - "\n", - "# add labels, titles, ticks, etc.\n", - "axis.set_ylabel(\"Percentage of predictions\")\n", - "axis.set_xlim(1, 19 + 0.1)\n", - "axis.set_ylim(0, 100)\n", - "\n", - "axis.bar_label(dockgen_rmsd_lt_2_bar, fmt=\"{:,.1f}%\", label_type=\"edge\")\n", - "axis.bar_label(dockgen_rmsd_lt_2_and_pb_valid_bar, fmt=\"{:,.1f}%\", label_type=\"center\", padding=5)\n", - "axis.bar_label(dockgen_relaxed_rmsd_lt_2_bar, fmt=\"{:,.1f}%\", label_type=\"edge\")\n", - "axis.bar_label(\n", - " dockgen_relaxed_rmsd_lt_2_and_pb_valid_bar, fmt=\"{:,.1f}%\", label_type=\"center\", padding=5\n", - ")\n", - "\n", - "axis.yaxis.set_major_formatter(mtick.PercentFormatter())\n", - "\n", - "axis.set_yticks([0, 20, 40, 60, 80, 100])\n", - "axis.axhline(y=0, color=\"#EAEFF8\")\n", - "axis.grid(axis=\"y\", color=\"#EAEFF8\")\n", - "axis.set_axisbelow(True)\n", - "\n", - "axis.set_xticks([2, 4, 6, 7, 8, 10, 12, 14, 15, 16, 18, 18 + 1e-3])\n", - "axis.set_xticks([1 + 0.1], minor=True)\n", - "axis.set_xticklabels(\n", - " [\n", - " \"DiffDock-L\",\n", - " \"DiffDock-L (Relax-P)\",\n", - " \"FABind\",\n", - " \"DL-based blind\",\n", - " \"DynamicBind\",\n", - " \"NeuralPLexer\",\n", - " \"RoseTTAFold-All-Atom\",\n", - " \"DiffDock-L-Vina\",\n", - " \"Conventional blind\",\n", - " \"P2Rank-Vina\",\n", - " \"Ensemble (Con)\",\n", - " \"Hybrid blind\",\n", - " ]\n", - ")\n", - "\n", - "axis.grid(\"off\", axis=\"x\", color=\"#EAEFF8\")\n", - "axis.grid(\"off\", axis=\"x\", which=\"minor\", color=\"#EAEFF8\")\n", - "\n", - "axis.tick_params(axis=\"x\", which=\"minor\", direction=\"out\", length=30, color=\"#EAEFF8\")\n", - "axis.tick_params(axis=\"x\", which=\"major\", bottom=\"off\", top=\"off\", color=\"#EAEFF8\")\n", - "axis.tick_params(axis=\"y\", which=\"major\", left=\"off\", right=\"on\", color=\"#EAEFF8\")\n", - "\n", - "# vertical alignment of xtick labels\n", - "vert_alignments = [0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, -0.1]\n", - "for tick, y in zip(axis.get_xticklabels(), vert_alignments):\n", - " tick.set_y(y)\n", - "\n", - "# add legends\n", - "legend_0 = fig.legend(\n", - " [dockgen_rmsd_lt_2_bar, dockgen_rmsd_lt_2_and_pb_valid_bar],\n", - " [\"RMSD ≤ 2Å\", \"RMSD ≤ 2Å & PB-Valid\"],\n", - " loc=\"upper right\",\n", - " title=\"No post-processing\",\n", - " bbox_to_anchor=(1, 1, -0.20, -0.05),\n", - ")\n", - "legend_1 = fig.legend(\n", - " [dockgen_relaxed_rmsd_lt_2_bar, dockgen_relaxed_rmsd_lt_2_and_pb_valid_bar],\n", - " [\"RMSD ≤ 2Å\", \"RMSD ≤ 2Å & PB-Valid\"],\n", - " loc=\"upper right\",\n", - " title=\"With relaxation\",\n", - " bbox_to_anchor=(1, 1, -0.01, -0.05),\n", - ")\n", - "legend_0.get_frame().set_alpha(0)\n", - "legend_1.get_frame().set_alpha(0)\n", - "\n", - "# display the plots\n", - "plt.tight_layout()\n", - "plt.savefig(\"dockgen_expanded_single_ligand_relaxed_bar_chart.png\", dpi=300)\n", - "plt.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "PoseBench", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.17" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/dockgen_expanded_single_ligand_relaxed_bar_chart.png b/notebooks/dockgen_expanded_single_ligand_relaxed_bar_chart.png deleted file mode 100644 index c7430568..00000000 Binary files a/notebooks/dockgen_expanded_single_ligand_relaxed_bar_chart.png and /dev/null differ diff --git a/notebooks/dockgen_expanded_single_ligand_relaxed_rmsd_violin_plot.png b/notebooks/dockgen_expanded_single_ligand_relaxed_rmsd_violin_plot.png deleted file mode 100644 index ce3397aa..00000000 Binary files a/notebooks/dockgen_expanded_single_ligand_relaxed_rmsd_violin_plot.png and /dev/null differ diff --git a/notebooks/dockgen_inference_results_plotting.ipynb b/notebooks/dockgen_inference_results_plotting.ipynb index d92925c8..93f0e725 100644 --- a/notebooks/dockgen_inference_results_plotting.ipynb +++ b/notebooks/dockgen_inference_results_plotting.ipynb @@ -60,25 +60,31 @@ "outputs": [], "source": [ "# General variables\n", - "new_methods = [\n", + "baseline_methods = [\n", " \"diffdock\",\n", + " \"diffdock_relaxed_protein\",\n", + " \"diffdockv1\",\n", " \"fabind\",\n", " \"dynamicbind\",\n", " \"neuralplexer\",\n", " \"rfaa\",\n", + " \"chai-lab\",\n", " \"vina_diffdock\",\n", + " \"vina_p2rank\",\n", " \"consensus_ensemble\",\n", "]\n", "max_num_repeats_per_method = 3\n", "\n", - "# Filepaths for each new method\n", + "# Filepaths for each baseline method\n", "globals()[\"diffdock_output_dir\"] = os.path.join(\"..\", \"forks\", \"DiffDock\", \"inference\")\n", + "globals()[\"diffdockv1_output_dir\"] = os.path.join(\"..\", \"forks\", \"DiffDockv1\", \"inference\")\n", "globals()[\"fabind_output_dir\"] = os.path.join(\"..\", \"forks\", \"FABind\", \"inference\")\n", "globals()[\"dynamicbind_output_dir\"] = os.path.join(\n", " \"..\", \"forks\", \"DynamicBind\", \"inference\", \"outputs\", \"results\"\n", ")\n", "globals()[\"neuralplexer_output_dir\"] = os.path.join(\"..\", \"forks\", \"NeuralPLexer\", \"inference\")\n", "globals()[\"rfaa_output_dir\"] = os.path.join(\"..\", \"forks\", \"RoseTTAFold-All-Atom\", \"inference\")\n", + "globals()[\"chai-lab_output_dir\"] = os.path.join(\"..\", \"forks\", \"chai-lab\", \"inference\")\n", "globals()[\"vina_output_dir\"] = os.path.join(\"..\", \"forks\", \"Vina\", \"inference\")\n", "for repeat_index in range(1, max_num_repeats_per_method + 1):\n", " # DiffDock results\n", @@ -93,6 +99,36 @@ " \"bust_results.csv\",\n", " )\n", "\n", + " # DiffDock (relaxed-protein) results\n", + " globals()[f\"diffdock_relaxed_protein_dockgen_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"diffdock_output_dir\"],\n", + " f\"diffdock_dockgen_output_{repeat_index}\",\n", + " \"bust_results.csv\",\n", + " )\n", + " )\n", + " globals()[\n", + " f\"diffdock_relaxed_protein_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", + " ] = os.path.join(\n", + " globals()[\"diffdock_output_dir\"],\n", + " f\"diffdock_dockgen_output_{repeat_index}_relaxed\",\n", + " \"bust_results_protein_relaxed.csv\",\n", + " )\n", + "\n", + " # DiffDock w/o structural cluster training (SCT) results\n", + " globals()[f\"diffdockv1_dockgen_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", + " globals()[\"diffdockv1_output_dir\"],\n", + " f\"diffdock_dockgen_output_{repeat_index}\",\n", + " \"bust_results.csv\",\n", + " )\n", + " globals()[f\"diffdockv1_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"diffdockv1_output_dir\"],\n", + " f\"diffdock_dockgen_output_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", + " )\n", + "\n", " # FABind results\n", " globals()[f\"fabind_dockgen_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[\"fabind_output_dir\"],\n", @@ -111,12 +147,12 @@ " f\"dockgen_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"dynamicbind_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"dynamicbind_output_dir\"],\n", - " f\"dockgen_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"dynamicbind_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"dynamicbind_output_dir\"],\n", + " f\"dockgen_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # NeuralPLexer results\n", @@ -125,12 +161,12 @@ " f\"neuralplexer_dockgen_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"neuralplexer_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"neuralplexer_output_dir\"],\n", - " f\"neuralplexer_dockgen_outputs_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"neuralplexer_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"neuralplexer_output_dir\"],\n", + " f\"neuralplexer_dockgen_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # RoseTTAFold-All-Atom results\n", @@ -145,54 +181,88 @@ " \"bust_results.csv\",\n", " )\n", "\n", - " # Vina-DiffDock-L results\n", + " # Chai-1 results\n", + " globals()[f\"chai-lab_dockgen_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", + " globals()[\"chai-lab_output_dir\"],\n", + " f\"chai-lab_dockgen_outputs_{repeat_index}\",\n", + " \"bust_results.csv\",\n", + " )\n", + " globals()[f\"chai-lab_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", + " globals()[\"chai-lab_output_dir\"],\n", + " f\"chai-lab_dockgen_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", + "\n", + " # DiffDock-L-Vina results\n", " globals()[f\"vina_diffdock_dockgen_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[\"vina_output_dir\"],\n", " f\"vina_diffdock_dockgen_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"vina_diffdock_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", + " globals()[f\"vina_diffdock_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"vina_output_dir\"],\n", + " f\"vina_diffdock_dockgen_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", + " )\n", + "\n", + " # P2Rank-Vina results\n", + " globals()[f\"vina_p2rank_dockgen_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[\"vina_output_dir\"],\n", - " f\"vina_diffdock_dockgen_outputs_{repeat_index}_relaxed\",\n", + " f\"vina_p2rank_dockgen_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", + " globals()[f\"vina_p2rank_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"vina_output_dir\"],\n", + " f\"vina_p2rank_dockgen_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", + " )\n", "\n", " # Consensus ensemble results\n", - " globals()[\n", - " f\"consensus_ensemble_dockgen_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " os.path.join(\"..\", \"data\", \"test_cases\", \"dockgen\"),\n", - " f\"top_consensus_ensemble_predictions_{repeat_index}\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"consensus_ensemble_dockgen_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " os.path.join(\"..\", \"data\", \"test_cases\", \"dockgen\"),\n", + " f\"top_consensus_ensemble_predictions_{repeat_index}\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", - " globals()[\n", - " f\"consensus_ensemble_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " os.path.join(\"..\", \"data\", \"test_cases\", \"dockgen\"),\n", - " f\"top_consensus_ensemble_predictions_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"consensus_ensemble_dockgen_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " os.path.join(\"..\", \"data\", \"test_cases\", \"dockgen\"),\n", + " f\"top_consensus_ensemble_predictions_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", "# Mappings\n", "method_mapping = {\n", " \"diffdock\": \"DiffDock-L\",\n", + " \"diffdock_relaxed_protein\": \"DiffDock-L-Relax-Prot\",\n", + " \"diffdockv1\": \"DiffDock w/o SCT\",\n", " \"fabind\": \"FABind\",\n", " \"dynamicbind\": \"DynamicBind\",\n", " \"neuralplexer\": \"NeuralPLexer\",\n", - " \"rfaa\": \"RoseTTAFold-All-Atom\",\n", + " \"rfaa\": \"RoseTTAFold-AA\",\n", + " \"chai-lab\": \"Chai-1\",\n", " \"vina_diffdock\": \"DiffDock-L-Vina\",\n", + " \"vina_p2rank\": \"P2Rank-Vina\",\n", " \"consensus_ensemble\": \"Ensemble (Con)\",\n", "}\n", "\n", "method_category_mapping = {\n", " \"diffdock\": \"DL-based blind\",\n", + " \"diffdock_relaxed_protein\": \"DL-based blind\",\n", + " \"diffdockv1\": \"DL-based blind\",\n", " \"fabind\": \"DL-based blind\",\n", " \"dynamicbind\": \"DL-based blind\",\n", " \"neuralplexer\": \"DL-based blind\",\n", " \"rfaa\": \"DL-based blind\",\n", + " \"chai-lab\": \"DL-based blind\",\n", " \"vina_diffdock\": \"Conventional blind\",\n", + " \"vina_p2rank\": \"Conventional blind\",\n", " \"consensus_ensemble\": \"Hybrid blind\",\n", "}" ] @@ -201,7 +271,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Report test results for new methods" + "#### Report test results for each baseline method" ] }, { @@ -210,9 +280,9 @@ "metadata": {}, "outputs": [], "source": [ - "# load and report test results for each new method\n", + "# load and report test results for each baseline method\n", "for config in [\"\", \"_relaxed\"]:\n", - " for method in new_methods:\n", + " for method in baseline_methods:\n", " for repeat_index in range(1, max_num_repeats_per_method + 1):\n", " method_title = method_mapping[method]\n", "\n", @@ -315,7 +385,7 @@ " globals()[f\"results_table_{repeat_index}\"] = pd.concat(\n", " [\n", " globals()[f\"{method}{config}_bust_results_table_{repeat_index}\"]\n", - " for method in new_methods\n", + " for method in baseline_methods\n", " for config in [\"\", \"_relaxed\"]\n", " if f\"{method}{config}_bust_results_table_{repeat_index}\" in globals()\n", " ]\n", @@ -393,7 +463,7 @@ " x=\"method\",\n", " y=\"rmsd\",\n", " hue=\"post-processing\",\n", - " data=combined_relaxed_data[combined_relaxed_data[\"rmsd\"] < 50],\n", + " data=combined_relaxed_data[combined_relaxed_data[\"rmsd\"] < 150], # ignore outliers\n", " split=True,\n", " inner=\"quartile\",\n", " palette=colors,\n", @@ -428,7 +498,7 @@ "colors = [\"#FB8072\", \"#BEBADA\"]\n", "\n", "bar_width = 0.5\n", - "r1 = [item - 0.25 for item in range(2, 16, 2)]\n", + "r1 = [item - 0.25 for item in range(2, 24, 2)]\n", "r2 = [x + bar_width for x in r1]\n", "\n", "(\n", @@ -614,7 +684,7 @@ "dockgen_relaxed_rmsd_lt_2_and_pb_valid_data_std.fillna(0, inplace=True)\n", "\n", "# create the figure and a list of axes\n", - "fig, axis = plt.subplots(figsize=(12, 6))\n", + "fig, axis = plt.subplots(figsize=(18, 8))\n", "axis.spines[\"top\"].set_visible(False)\n", "axis.spines[\"right\"].set_visible(False)\n", "axis.spines[\"bottom\"].set_visible(False)\n", @@ -666,7 +736,7 @@ "\n", "# add labels, titles, ticks, etc.\n", "axis.set_ylabel(\"Percentage of predictions\")\n", - "axis.set_xlim(1, 15 + 0.1)\n", + "axis.set_xlim(1, 23 + 0.1)\n", "axis.set_ylim(0, 100)\n", "\n", "axis.bar_label(dockgen_rmsd_lt_2_bar, fmt=\"{:,.1f}%\", label_type=\"edge\")\n", @@ -683,18 +753,22 @@ "axis.grid(axis=\"y\", color=\"#EAEFF8\")\n", "axis.set_axisbelow(True)\n", "\n", - "axis.set_xticks([2, 4, 6, 6 + 1e-3, 8, 10, 12, 12 + 1e-3, 14, 14 + 1e-3])\n", + "axis.set_xticks([2, 4, 6, 8, 9, 10, 12, 14, 16, 18, 18 + 1e-3, 20, 21, 22])\n", "axis.set_xticks([1 + 0.1], minor=True)\n", "axis.set_xticklabels(\n", " [\n", " \"DiffDock-L\",\n", + " \"DiffDock-L-Relax-Prot\",\n", + " \"DiffDock w/o SCT\",\n", " \"FABind\",\n", - " \"DynamicBind\",\n", " \"DL-based blind\",\n", + " \"DynamicBind\",\n", " \"NeuralPLexer\",\n", - " \"RoseTTAFold-All-Atom\",\n", + " \"RoseTTAFold-AA\",\n", + " \"Chai-1\",\n", " \"DiffDock-L-Vina\",\n", " \"Conventional blind\",\n", + " \"P2Rank-Vina\",\n", " \"Ensemble (Con)\",\n", " \"Hybrid blind\",\n", " ]\n", @@ -708,7 +782,7 @@ "axis.tick_params(axis=\"y\", which=\"major\", left=\"off\", right=\"on\", color=\"#EAEFF8\")\n", "\n", "# vertical alignment of xtick labels\n", - "vert_alignments = [0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, -0.1, 0.0, -0.1]\n", + "vert_alignments = [0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, -0.1]\n", "for tick, y in zip(axis.get_xticklabels(), vert_alignments):\n", " tick.set_y(y)\n", "\n", diff --git a/notebooks/dockgen_single_ligand_relaxed_bar_chart.png b/notebooks/dockgen_single_ligand_relaxed_bar_chart.png index 93dee608..2c6220d0 100644 Binary files a/notebooks/dockgen_single_ligand_relaxed_bar_chart.png and b/notebooks/dockgen_single_ligand_relaxed_bar_chart.png differ diff --git a/notebooks/dockgen_single_ligand_relaxed_rmsd_violin_plot.png b/notebooks/dockgen_single_ligand_relaxed_rmsd_violin_plot.png index 731fa73f..d7eb191a 100644 Binary files a/notebooks/dockgen_single_ligand_relaxed_rmsd_violin_plot.png and b/notebooks/dockgen_single_ligand_relaxed_rmsd_violin_plot.png differ diff --git a/notebooks/dynamicbind_casp15_interaction_dataframes_1.h5 b/notebooks/dynamicbind_casp15_interaction_dataframes_1.h5 new file mode 100644 index 00000000..14e1abc7 Binary files /dev/null and b/notebooks/dynamicbind_casp15_interaction_dataframes_1.h5 differ diff --git a/notebooks/neuralplexer_casp15_interaction_dataframes_1.h5 b/notebooks/neuralplexer_casp15_interaction_dataframes_1.h5 new file mode 100644 index 00000000..2fd05fb4 Binary files /dev/null and b/notebooks/neuralplexer_casp15_interaction_dataframes_1.h5 differ diff --git a/notebooks/neuralplexer_no_ilcl_casp15_interaction_dataframes_1.h5 b/notebooks/neuralplexer_no_ilcl_casp15_interaction_dataframes_1.h5 new file mode 100644 index 00000000..f9f33d48 Binary files /dev/null and b/notebooks/neuralplexer_no_ilcl_casp15_interaction_dataframes_1.h5 differ diff --git a/notebooks/pdbbind_training_subset_1000_interaction_dataframes.h5 b/notebooks/pdbbind_training_subset_1000_interaction_dataframes.h5 new file mode 100644 index 00000000..58e5626e Binary files /dev/null and b/notebooks/pdbbind_training_subset_1000_interaction_dataframes.h5 differ diff --git a/notebooks/posebusters_astex_inference_results_plotting.ipynb b/notebooks/posebusters_astex_inference_results_plotting.ipynb index dcff9182..bd254a27 100644 --- a/notebooks/posebusters_astex_inference_results_plotting.ipynb +++ b/notebooks/posebusters_astex_inference_results_plotting.ipynb @@ -60,26 +60,31 @@ "outputs": [], "source": [ "# General variables\n", - "new_methods = [\n", + "baseline_methods = [\n", " \"diffdock\",\n", + " \"diffdockv1\",\n", " \"fabind\",\n", " \"dynamicbind\",\n", " \"neuralplexer\",\n", " \"rfaa\",\n", + " \"chai-lab\",\n", " \"tulip\",\n", " \"vina_diffdock\",\n", + " \"vina_p2rank\",\n", " \"consensus_ensemble\",\n", "]\n", "max_num_repeats_per_method = 3\n", "\n", - "# Filepaths for each new method\n", + "# Filepaths for each baseline method\n", "globals()[\"diffdock_output_dir\"] = os.path.join(\"..\", \"forks\", \"DiffDock\", \"inference\")\n", + "globals()[\"diffdockv1_output_dir\"] = os.path.join(\"..\", \"forks\", \"DiffDockv1\", \"inference\")\n", "globals()[\"fabind_output_dir\"] = os.path.join(\"..\", \"forks\", \"FABind\", \"inference\")\n", "globals()[\"dynamicbind_output_dir\"] = os.path.join(\n", " \"..\", \"forks\", \"DynamicBind\", \"inference\", \"outputs\", \"results\"\n", ")\n", "globals()[\"neuralplexer_output_dir\"] = os.path.join(\"..\", \"forks\", \"NeuralPLexer\", \"inference\")\n", "globals()[\"rfaa_output_dir\"] = os.path.join(\"..\", \"forks\", \"RoseTTAFold-All-Atom\", \"inference\")\n", + "globals()[\"chai-lab_output_dir\"] = os.path.join(\"..\", \"forks\", \"chai-lab\", \"inference\")\n", "globals()[\"tulip_output_dir\"] = os.path.join(\"..\", \"forks\", \"TULIP\", \"inference\")\n", "globals()[\"vina_output_dir\"] = os.path.join(\"..\", \"forks\", \"Vina\", \"inference\")\n", "for repeat_index in range(1, max_num_repeats_per_method + 1):\n", @@ -99,13 +104,37 @@ " f\"diffdock_posebusters_benchmark_output_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"diffdock_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"diffdock_output_dir\"],\n", - " f\"diffdock_posebusters_benchmark_output_{repeat_index}_relaxed\",\n", + " globals()[f\"diffdock_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"diffdock_output_dir\"],\n", + " f\"diffdock_posebusters_benchmark_output_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", + " )\n", + "\n", + " # DiffDock w/o structural cluster training (SCT) results\n", + " globals()[f\"diffdockv1_astex_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", + " globals()[\"diffdockv1_output_dir\"],\n", + " f\"diffdock_astex_diverse_output_{repeat_index}\",\n", + " \"bust_results.csv\",\n", + " )\n", + " globals()[f\"diffdockv1_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", + " globals()[\"diffdockv1_output_dir\"],\n", + " f\"diffdock_astex_diverse_output_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", + " globals()[f\"diffdockv1_posebusters_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", + " globals()[\"diffdockv1_output_dir\"],\n", + " f\"diffdock_posebusters_benchmark_output_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", + " globals()[f\"diffdockv1_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"diffdockv1_output_dir\"],\n", + " f\"diffdock_posebusters_benchmark_output_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", + " )\n", "\n", " # FABind results\n", " globals()[f\"fabind_astex_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", @@ -123,36 +152,36 @@ " f\"fabind_posebusters_benchmark_output_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"fabind_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"fabind_output_dir\"],\n", - " f\"fabind_posebusters_benchmark_output_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"fabind_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"fabind_output_dir\"],\n", + " f\"fabind_posebusters_benchmark_output_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # DynamicBind results\n", " globals()[f\"dynamicbind_astex_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[\"dynamicbind_output_dir\"], f\"astex_diverse_{repeat_index}\", \"bust_results.csv\"\n", " )\n", - " globals()[\n", - " f\"dynamicbind_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"dynamicbind_output_dir\"],\n", - " f\"astex_diverse_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"dynamicbind_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"dynamicbind_output_dir\"],\n", + " f\"astex_diverse_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", " globals()[f\"dynamicbind_posebusters_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[\"dynamicbind_output_dir\"],\n", " f\"posebusters_benchmark_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"dynamicbind_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"dynamicbind_output_dir\"],\n", - " f\"posebusters_benchmark_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"dynamicbind_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"dynamicbind_output_dir\"],\n", + " f\"posebusters_benchmark_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # NeuralPLexer results\n", @@ -161,24 +190,24 @@ " f\"neuralplexer_astex_diverse_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"neuralplexer_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"neuralplexer_output_dir\"],\n", - " f\"neuralplexer_astex_diverse_outputs_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"neuralplexer_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"neuralplexer_output_dir\"],\n", + " f\"neuralplexer_astex_diverse_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", " globals()[f\"neuralplexer_posebusters_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[\"neuralplexer_output_dir\"],\n", " f\"neuralplexer_posebusters_benchmark_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"neuralplexer_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"neuralplexer_output_dir\"],\n", - " f\"neuralplexer_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"neuralplexer_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"neuralplexer_output_dir\"],\n", + " f\"neuralplexer_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # RoseTTAFold-All-Atom results\n", @@ -203,6 +232,30 @@ " \"bust_results.csv\",\n", " )\n", "\n", + " # Chai-1 results\n", + " globals()[f\"chai-lab_astex_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", + " globals()[\"chai-lab_output_dir\"],\n", + " f\"chai-lab_astex_diverse_outputs_{repeat_index}\",\n", + " \"bust_results.csv\",\n", + " )\n", + " globals()[f\"chai-lab_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", + " globals()[\"chai-lab_output_dir\"],\n", + " f\"chai-lab_astex_diverse_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", + " globals()[f\"chai-lab_posebusters_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", + " globals()[\"chai-lab_output_dir\"],\n", + " f\"chai-lab_posebusters_benchmark_outputs_{repeat_index}\",\n", + " \"bust_results.csv\",\n", + " )\n", + " globals()[f\"chai-lab_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"chai-lab_output_dir\"],\n", + " f\"chai-lab_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", + " )\n", + "\n", " # TULIP results\n", " globals()[f\"tulip_astex_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[\"tulip_output_dir\"],\n", @@ -219,61 +272,87 @@ " f\"tulip_posebusters_benchmark_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"tulip_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"tulip_output_dir\"],\n", - " f\"tulip_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"tulip_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"tulip_output_dir\"],\n", + " f\"tulip_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", - " # Vina-DiffDock-L results\n", + " # DiffDock-L-Vina results\n", " globals()[f\"vina_diffdock_astex_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[\"vina_output_dir\"],\n", " f\"vina_diffdock_astex_diverse_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"vina_diffdock_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"vina_output_dir\"],\n", - " f\"vina_diffdock_astex_diverse_outputs_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"vina_diffdock_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"vina_output_dir\"],\n", + " f\"vina_diffdock_astex_diverse_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", - " globals()[\n", - " f\"vina_diffdock_posebusters_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", + " globals()[f\"vina_diffdock_posebusters_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"vina_output_dir\"],\n", + " f\"vina_diffdock_posebusters_benchmark_outputs_{repeat_index}\",\n", + " \"bust_results.csv\",\n", + " )\n", + " )\n", + " globals()[f\"vina_diffdock_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"vina_output_dir\"],\n", + " f\"vina_diffdock_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", + " )\n", + "\n", + " # P2Rank-Vina results\n", + " globals()[f\"vina_p2rank_astex_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[\"vina_output_dir\"],\n", - " f\"vina_diffdock_posebusters_benchmark_outputs_{repeat_index}\",\n", + " f\"vina_p2rank_astex_diverse_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"vina_diffdock_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", + " globals()[f\"vina_p2rank_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"vina_output_dir\"],\n", + " f\"vina_p2rank_astex_diverse_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", + " )\n", + " globals()[f\"vina_p2rank_posebusters_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[\"vina_output_dir\"],\n", - " f\"vina_diffdock_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", + " f\"vina_p2rank_posebusters_benchmark_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", + " globals()[f\"vina_p2rank_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"vina_output_dir\"],\n", + " f\"vina_p2rank_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", + " )\n", "\n", - " # Consensus ensemble results\n", + " # Ensemble (consensus) results\n", " globals()[f\"consensus_ensemble_astex_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " os.path.join(\"..\", \"data\", \"test_cases\", \"astex_diverse\"),\n", " f\"top_consensus_ensemble_predictions_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"consensus_ensemble_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " os.path.join(\"..\", \"data\", \"test_cases\", \"astex_diverse\"),\n", - " f\"top_consensus_ensemble_predictions_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"consensus_ensemble_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " os.path.join(\"..\", \"data\", \"test_cases\", \"astex_diverse\"),\n", + " f\"top_consensus_ensemble_predictions_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", - " globals()[\n", - " f\"consensus_ensemble_posebusters_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " os.path.join(\"..\", \"data\", \"test_cases\", \"posebusters_benchmark\"),\n", - " f\"top_consensus_ensemble_predictions_{repeat_index}\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"consensus_ensemble_posebusters_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " os.path.join(\"..\", \"data\", \"test_cases\", \"posebusters_benchmark\"),\n", + " f\"top_consensus_ensemble_predictions_{repeat_index}\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", " globals()[\n", " f\"consensus_ensemble_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", @@ -286,23 +365,29 @@ "# Mappings\n", "method_mapping = {\n", " \"diffdock\": \"DiffDock-L\",\n", + " \"diffdockv1\": \"DiffDock w/o SCT\",\n", " \"fabind\": \"FABind\",\n", " \"dynamicbind\": \"DynamicBind\",\n", " \"neuralplexer\": \"NeuralPLexer\",\n", - " \"rfaa\": \"RoseTTAFold-All-Atom\",\n", + " \"rfaa\": \"RoseTTAFold-AA\",\n", + " \"chai-lab\": \"Chai-1\",\n", " \"tulip\": \"TULIP\",\n", " \"vina_diffdock\": \"DiffDock-L-Vina\",\n", + " \"vina_p2rank\": \"P2Rank-Vina\",\n", " \"consensus_ensemble\": \"Ensemble (Con)\",\n", "}\n", "\n", "method_category_mapping = {\n", " \"diffdock\": \"DL-based blind\",\n", + " \"diffdockv1\": \"DL-based blind\",\n", " \"fabind\": \"DL-based blind\",\n", " \"dynamicbind\": \"DL-based blind\",\n", " \"neuralplexer\": \"DL-based blind\",\n", " \"rfaa\": \"DL-based blind\",\n", + " \"chai-lab\": \"DL-based blind\",\n", " \"tulip\": \"Conventional blind\",\n", " \"vina_diffdock\": \"Conventional blind\",\n", + " \"vina_p2rank\": \"Conventional blind\",\n", " \"consensus_ensemble\": \"Hybrid blind\",\n", "}" ] @@ -311,7 +396,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Report test results for new methods" + "#### Report test results for each baseline method" ] }, { @@ -320,9 +405,9 @@ "metadata": {}, "outputs": [], "source": [ - "# load and report test results for each new method\n", + "# load and report test results for each baseline method\n", "for config in [\"\", \"_relaxed\"]:\n", - " for method in new_methods:\n", + " for method in baseline_methods:\n", " for repeat_index in range(1, max_num_repeats_per_method + 1):\n", " method_title = method_mapping[method]\n", "\n", @@ -469,7 +554,7 @@ " globals()[f\"results_table_{repeat_index}\"] = pd.concat(\n", " [\n", " globals()[f\"{method}{config}_bust_results_table_{repeat_index}\"]\n", - " for method in new_methods\n", + " for method in baseline_methods\n", " for config in [\"\", \"_relaxed\"]\n", " if f\"{method}{config}_bust_results_table_{repeat_index}\" in globals()\n", " ]\n", @@ -537,7 +622,7 @@ " & (globals()[f\"results_table_{repeat_index}\"][\"post-processing\"] == \"none\")\n", " ]\n", " combined_data_list.append(pd.concat([pb_results_table, ad_results_table]))\n", - "combined_data = pd.concat(combined_data_list)\n", + "combined_data = pd.concat(combined_data_list).sort_values(\"method_assignment_index\")\n", "\n", "# set the size of the figure\n", "plt.figure(figsize=(12, 6))\n", @@ -547,7 +632,7 @@ " x=\"method\",\n", " y=\"rmsd\",\n", " hue=\"dataset\",\n", - " data=combined_data[combined_data[\"rmsd\"] < 100], # ignore outliers\n", + " data=combined_data[combined_data[\"rmsd\"] < 150], # ignore outliers\n", " split=True,\n", " inner=\"quartile\",\n", " palette=colors,\n", @@ -593,7 +678,7 @@ " & (globals()[f\"results_table_{repeat_index}\"][\"post-processing\"] == \"none\")\n", " ]\n", " combined_data_list.append(pd.concat([pb_relaxed_results_table, pb_unrelaxed_results_table]))\n", - "combined_relaxed_data = pd.concat(combined_data_list)\n", + "combined_relaxed_data = pd.concat(combined_data_list).sort_values(\"method_assignment_index\")\n", "\n", "# set the size of the figure\n", "plt.figure(figsize=(12, 6))\n", @@ -603,7 +688,7 @@ " x=\"method\",\n", " y=\"rmsd\",\n", " hue=\"post-processing\",\n", - " data=combined_relaxed_data,\n", + " data=combined_relaxed_data[combined_relaxed_data[\"rmsd\"] < 150], # ignore outliers\n", " split=True,\n", " inner=\"quartile\",\n", " palette=colors,\n", @@ -638,7 +723,7 @@ "colors = [\"#8DD3C7\", \"#FB8072\"]\n", "\n", "bar_width = 0.5\n", - "r1 = [item - 0.25 for item in range(2, 18, 2)]\n", + "r1 = [item - 0.25 for item in range(2, 24, 2)]\n", "r2 = [x + bar_width for x in r1]\n", "\n", "(\n", @@ -821,7 +906,7 @@ "posebusters_rmsd_lt_2_and_pb_valid_data_std.fillna(0, inplace=True)\n", "\n", "# create the figure and a list of axes\n", - "fig, axis = plt.subplots(figsize=(12, 6))\n", + "fig, axis = plt.subplots(figsize=(18, 8))\n", "axis.spines[\"top\"].set_visible(False)\n", "axis.spines[\"right\"].set_visible(False)\n", "axis.spines[\"bottom\"].set_visible(False)\n", @@ -873,7 +958,7 @@ "\n", "# add labels, titles, ticks, etc.\n", "axis.set_ylabel(\"Percentage of predictions\")\n", - "axis.set_xlim(1, 17 + 0.1)\n", + "axis.set_xlim(1, 23 + 0.1)\n", "axis.set_ylim(0, 100)\n", "\n", "axis.bar_label(astex_rmsd_lt_2_bar, fmt=\"{:,.1f}%\", label_type=\"edge\")\n", @@ -890,19 +975,22 @@ "axis.grid(axis=\"y\", color=\"#EAEFF8\")\n", "axis.set_axisbelow(True)\n", "\n", - "axis.set_xticks([2, 4, 6, 6 + 1e-3, 8, 10, 12, 13, 14, 16, 16 + 1e-3])\n", + "axis.set_xticks([2, 4, 6, 8, 8 + 1e-3, 10, 12, 14, 16, 18, 18 + 1e-3, 20, 22, 22 + 1e-3])\n", "axis.set_xticks([1 + 0.1], minor=True)\n", "axis.set_xticklabels(\n", " [\n", " \"DiffDock-L\",\n", + " \"DiffDock w/o SCT\",\n", " \"FABind\",\n", " \"DynamicBind\",\n", " \"DL-based blind\",\n", " \"NeuralPLexer\",\n", - " \"RoseTTAFold-All-Atom\",\n", + " \"RoseTTAFold-AA\",\n", + " \"Chai-1\",\n", " \"TULIP\",\n", - " \"Conventional blind\",\n", " \"DiffDock-L-Vina\",\n", + " \"Conventional blind\",\n", + " \"P2Rank-Vina\",\n", " \"Ensemble (Con)\",\n", " \"Hybrid blind\",\n", " ]\n", @@ -916,7 +1004,7 @@ "axis.tick_params(axis=\"y\", which=\"major\", left=\"off\", right=\"on\", color=\"#EAEFF8\")\n", "\n", "# vertical alignment of xtick labels\n", - "vert_alignments = [0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, -0.1]\n", + "vert_alignments = [0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, -0.1]\n", "for tick, y in zip(axis.get_xticklabels(), vert_alignments):\n", " tick.set_y(y)\n", "\n", @@ -956,7 +1044,7 @@ "colors = [\"#FB8072\", \"#BEBADA\"]\n", "\n", "bar_width = 0.5\n", - "r1 = [item - 0.25 for item in range(2, 18, 2)]\n", + "r1 = [item - 0.25 for item in range(2, 24, 2)]\n", "r2 = [x + bar_width for x in r1]\n", "\n", "(\n", @@ -1148,7 +1236,7 @@ "posebusters_relaxed_rmsd_lt_2_and_pb_valid_data_std.fillna(0, inplace=True)\n", "\n", "# create the figure and a list of axes\n", - "fig, axis = plt.subplots(figsize=(12, 6))\n", + "fig, axis = plt.subplots(figsize=(18, 8))\n", "axis.spines[\"top\"].set_visible(False)\n", "axis.spines[\"right\"].set_visible(False)\n", "axis.spines[\"bottom\"].set_visible(False)\n", @@ -1200,7 +1288,7 @@ "\n", "# add labels, titles, ticks, etc.\n", "axis.set_ylabel(\"Percentage of predictions\")\n", - "axis.set_xlim(1, 17 + 0.1)\n", + "axis.set_xlim(1, 23 + 0.1)\n", "axis.set_ylim(0, 100)\n", "\n", "axis.bar_label(posebusters_rmsd_lt_2_bar, fmt=\"{:,.1f}%\", label_type=\"edge\")\n", @@ -1219,19 +1307,22 @@ "axis.grid(axis=\"y\", color=\"#EAEFF8\")\n", "axis.set_axisbelow(True)\n", "\n", - "axis.set_xticks([2, 4, 6, 6 + 1e-3, 8, 10, 12, 13, 14, 16, 16 + 1e-3])\n", + "axis.set_xticks([2, 4, 6, 8, 8 + 1e-3, 10, 12, 14, 16, 18, 18 + 1e-3, 20, 22, 22 + 1e-3])\n", "axis.set_xticks([1 + 0.1], minor=True)\n", "axis.set_xticklabels(\n", " [\n", " \"DiffDock-L\",\n", + " \"DiffDock w/o SCT\",\n", " \"FABind\",\n", " \"DynamicBind\",\n", " \"DL-based blind\",\n", " \"NeuralPLexer\",\n", - " \"RoseTTAFold-All-Atom\",\n", + " \"RoseTTAFold-AA\",\n", + " \"Chai-1\",\n", " \"TULIP\",\n", - " \"Conventional blind\",\n", " \"DiffDock-L-Vina\",\n", + " \"Conventional blind\",\n", + " \"P2Rank-Vina\",\n", " \"Ensemble (Con)\",\n", " \"Hybrid blind\",\n", " ]\n", @@ -1245,7 +1336,7 @@ "axis.tick_params(axis=\"y\", which=\"major\", left=\"off\", right=\"on\", color=\"#EAEFF8\")\n", "\n", "# vertical alignment of xtick labels\n", - "vert_alignments = [0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, -0.1]\n", + "vert_alignments = [0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, -0.1]\n", "for tick, y in zip(axis.get_xticklabels(), vert_alignments):\n", " tick.set_y(y)\n", "\n", @@ -1290,7 +1381,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.17" + "version": "3.10.14" } }, "nbformat": 4, diff --git a/notebooks/posebusters_astex_single_ligand_bar_chart.png b/notebooks/posebusters_astex_single_ligand_bar_chart.png index ef31d888..ec6c02e0 100644 Binary files a/notebooks/posebusters_astex_single_ligand_bar_chart.png and b/notebooks/posebusters_astex_single_ligand_bar_chart.png differ diff --git a/notebooks/posebusters_astex_single_ligand_rmsd_violin_plot.png b/notebooks/posebusters_astex_single_ligand_rmsd_violin_plot.png index b016d93b..5f5ced65 100644 Binary files a/notebooks/posebusters_astex_single_ligand_rmsd_violin_plot.png and b/notebooks/posebusters_astex_single_ligand_rmsd_violin_plot.png differ diff --git a/notebooks/posebusters_pocket_only_inference_results_plotting.ipynb b/notebooks/posebusters_pocket_only_inference_results_plotting.ipynb index d535862b..ec898c48 100644 --- a/notebooks/posebusters_pocket_only_inference_results_plotting.ipynb +++ b/notebooks/posebusters_pocket_only_inference_results_plotting.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## PoseBusters-Pocket Only Inference Results Plotting" + "## PoseBusters (Pocket-Only) Inference Results Plotting" ] }, { @@ -60,146 +60,199 @@ "outputs": [], "source": [ "# General variables\n", - "new_methods = [\n", + "baseline_methods = [\n", " \"diffdock\",\n", + " \"diffdockv1\",\n", " \"fabind\",\n", " \"dynamicbind\",\n", " \"neuralplexer\",\n", " \"rfaa\",\n", + " \"chai-lab\",\n", " \"vina_diffdock\",\n", + " \"vina_p2rank\",\n", " \"consensus_ensemble\",\n", "]\n", "max_num_repeats_per_method = 3\n", "\n", - "# Filepaths for each new method\n", - "pocket_postfix = \"pocket_only_\"\n", + "# Filepaths for each baseline method\n", + "pocket_suffix = \"_pocket_only\"\n", "globals()[\"diffdock_output_dir\"] = os.path.join(\"..\", \"forks\", \"DiffDock\", \"inference\")\n", + "globals()[\"diffdockv1_output_dir\"] = os.path.join(\"..\", \"forks\", \"DiffDockv1\", \"inference\")\n", "globals()[\"fabind_output_dir\"] = os.path.join(\"..\", \"forks\", \"FABind\", \"inference\")\n", "globals()[\"dynamicbind_output_dir\"] = os.path.join(\n", - " \"..\", \"forks\", \"DynamicBind\", \"inference\", \"outputs\", \"results_pocket_only\"\n", + " \"..\", \"forks\", \"DynamicBind\", \"inference\", \"outputs\", \"results\"\n", ")\n", "globals()[\"neuralplexer_output_dir\"] = os.path.join(\"..\", \"forks\", \"NeuralPLexer\", \"inference\")\n", "globals()[\"rfaa_output_dir\"] = os.path.join(\"..\", \"forks\", \"RoseTTAFold-All-Atom\", \"inference\")\n", + "globals()[\"chai-lab_output_dir\"] = os.path.join(\"..\", \"forks\", \"chai-lab\", \"inference\")\n", "globals()[\"vina_output_dir\"] = os.path.join(\"..\", \"forks\", \"Vina\", \"inference\")\n", "for repeat_index in range(1, max_num_repeats_per_method + 1):\n", " # DiffDock results\n", " globals()[f\"diffdock_posebusters_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[\"diffdock_output_dir\"],\n", - " f\"diffdock_posebusters_benchmark_output_{pocket_postfix}{repeat_index}\",\n", + " f\"diffdock{pocket_suffix}_posebusters_benchmark_output_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"diffdock_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"diffdock_output_dir\"],\n", - " f\"diffdock_posebusters_benchmark_output_{pocket_postfix}{repeat_index}_relaxed\",\n", + " globals()[f\"diffdock_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"diffdock_output_dir\"],\n", + " f\"diffdock{pocket_suffix}_posebusters_benchmark_output_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", + " )\n", + "\n", + " # DiffDock w/o structural cluster training (SCT) results\n", + " globals()[f\"diffdockv1_posebusters_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", + " globals()[\"diffdockv1_output_dir\"],\n", + " f\"diffdock{pocket_suffix}_posebusters_benchmark_output_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", + " globals()[f\"diffdockv1_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"diffdockv1_output_dir\"],\n", + " f\"diffdock{pocket_suffix}_posebusters_benchmark_output_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", + " )\n", "\n", " # FABind results\n", " globals()[f\"fabind_posebusters_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[\"fabind_output_dir\"],\n", - " f\"fabind_posebusters_benchmark_output_{pocket_postfix}{repeat_index}\",\n", + " f\"fabind{pocket_suffix}_posebusters_benchmark_output_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"fabind_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"fabind_output_dir\"],\n", - " f\"fabind_posebusters_benchmark_output_{pocket_postfix}{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"fabind_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"fabind_output_dir\"],\n", + " f\"fabind{pocket_suffix}_posebusters_benchmark_output_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # DynamicBind results\n", " globals()[f\"dynamicbind_posebusters_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[\"dynamicbind_output_dir\"],\n", - " f\"posebusters_benchmark_{repeat_index}\",\n", + " f\"posebusters_benchmark{pocket_suffix}_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"dynamicbind_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"dynamicbind_output_dir\"],\n", - " f\"posebusters_benchmark_{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"dynamicbind_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"dynamicbind_output_dir\"],\n", + " f\"posebusters_benchmark{pocket_suffix}_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # NeuralPLexer results\n", " globals()[f\"neuralplexer_posebusters_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[\"neuralplexer_output_dir\"],\n", - " f\"neuralplexer_posebusters_benchmark_outputs_{pocket_postfix}{repeat_index}\",\n", + " f\"neuralplexer{pocket_suffix}_posebusters_benchmark_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"neuralplexer_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"neuralplexer_output_dir\"],\n", - " f\"neuralplexer_posebusters_benchmark_outputs_{pocket_postfix}{repeat_index}_relaxed\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"neuralplexer_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"neuralplexer_output_dir\"],\n", + " f\"neuralplexer{pocket_suffix}_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", "\n", " # RoseTTAFold-All-Atom results\n", " globals()[f\"rfaa_posebusters_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[\"rfaa_output_dir\"],\n", - " f\"rfaa_posebusters_benchmark_outputs_{pocket_postfix}{repeat_index}\",\n", + " f\"rfaa{pocket_suffix}_posebusters_benchmark_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", " globals()[f\"rfaa_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[\"rfaa_output_dir\"],\n", - " f\"rfaa_posebusters_benchmark_outputs_{pocket_postfix}{repeat_index}_relaxed\",\n", + " f\"rfaa{pocket_suffix}_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", " \"bust_results.csv\",\n", " )\n", "\n", - " # Vina-DiffDock-L results\n", - " globals()[\n", - " f\"vina_diffdock_posebusters_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " globals()[\"vina_output_dir\"],\n", - " f\"vina_diffdock_posebusters_benchmark_outputs_{pocket_postfix}{repeat_index}\",\n", + " # Chai-1 results\n", + " globals()[f\"chai-lab_posebusters_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", + " globals()[\"chai-lab_output_dir\"],\n", + " f\"chai-lab{pocket_suffix}_posebusters_benchmark_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", - " globals()[\n", - " f\"vina_diffdock_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", + " globals()[f\"chai-lab_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"chai-lab_output_dir\"],\n", + " f\"chai-lab{pocket_suffix}_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", + " )\n", + "\n", + " # DiffDock-L-Vina results\n", + " globals()[f\"vina_diffdock_posebusters_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"vina_output_dir\"],\n", + " f\"vina{pocket_suffix}_diffdock_posebusters_benchmark_outputs_{repeat_index}\",\n", + " \"bust_results.csv\",\n", + " )\n", + " )\n", + " globals()[f\"vina_diffdock_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"vina_output_dir\"],\n", + " f\"vina{pocket_suffix}_diffdock_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", + " )\n", + "\n", + " # P2Rank-Vina results\n", + " globals()[f\"vina_p2rank_posebusters_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n", " globals()[\"vina_output_dir\"],\n", - " f\"vina_diffdock_posebusters_benchmark_outputs_{pocket_postfix}{repeat_index}_relaxed\",\n", + " f\"vina{pocket_suffix}_p2rank_posebusters_benchmark_outputs_{repeat_index}\",\n", " \"bust_results.csv\",\n", " )\n", + " globals()[f\"vina_p2rank_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " globals()[\"vina_output_dir\"],\n", + " f\"vina{pocket_suffix}_p2rank_posebusters_benchmark_outputs_{repeat_index}_relaxed\",\n", + " \"bust_results.csv\",\n", + " )\n", + " )\n", "\n", " # Consensus ensemble results\n", - " globals()[\n", - " f\"consensus_ensemble_posebusters_bust_results_csv_filepath_{repeat_index}\"\n", - " ] = os.path.join(\n", - " os.path.join(\"..\", \"data\", \"test_cases\", \"posebusters_benchmark\"),\n", - " f\"top_consensus_ensemble_predictions_{pocket_postfix}{repeat_index}\",\n", - " \"bust_results.csv\",\n", + " globals()[f\"consensus_ensemble_posebusters_bust_results_csv_filepath_{repeat_index}\"] = (\n", + " os.path.join(\n", + " os.path.join(\"..\", \"data\", \"test_cases\", \"posebusters_benchmark\"),\n", + " f\"top_consensus{pocket_suffix}_ensemble_predictions_{repeat_index}\",\n", + " \"bust_results.csv\",\n", + " )\n", " )\n", " globals()[\n", " f\"consensus_ensemble_posebusters_relaxed_bust_results_csv_filepath_{repeat_index}\"\n", " ] = os.path.join(\n", " os.path.join(\"..\", \"data\", \"test_cases\", \"posebusters_benchmark\"),\n", - " f\"top_consensus_ensemble_predictions_{pocket_postfix}{repeat_index}_relaxed\",\n", + " f\"top_consensus{pocket_suffix}_ensemble_predictions_{repeat_index}_relaxed\",\n", " \"bust_results.csv\",\n", " )\n", "\n", "# Mappings\n", "method_mapping = {\n", " \"diffdock\": \"DiffDock-L\",\n", + " \"diffdockv1\": \"DiffDock w/o SCT\",\n", " \"fabind\": \"FABind\",\n", " \"dynamicbind\": \"DynamicBind\",\n", " \"neuralplexer\": \"NeuralPLexer\",\n", - " \"rfaa\": \"RoseTTAFold-All-Atom\",\n", + " \"rfaa\": \"RoseTTAFold-AA\",\n", + " \"chai-lab\": \"Chai-1\",\n", " \"vina_diffdock\": \"DiffDock-L-Vina\",\n", + " \"vina_p2rank\": \"P2Rank-Vina\",\n", " \"consensus_ensemble\": \"Ensemble (Con)\",\n", "}\n", "\n", "method_category_mapping = {\n", " \"diffdock\": \"DL-based blind\",\n", + " \"diffdockv1\": \"DL-based blind\",\n", " \"fabind\": \"DL-based blind\",\n", " \"dynamicbind\": \"DL-based blind\",\n", " \"neuralplexer\": \"DL-based blind\",\n", " \"rfaa\": \"DL-based blind\",\n", + " \"chai-lab\": \"DL-based blind\",\n", " \"vina_diffdock\": \"Conventional blind\",\n", + " \"vina_p2rank\": \"Conventional blind\",\n", " \"consensus_ensemble\": \"Hybrid blind\",\n", "}" ] @@ -208,7 +261,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Report test results for new methods" + "#### Report test results for each baseline method" ] }, { @@ -217,9 +270,9 @@ "metadata": {}, "outputs": [], "source": [ - "# load and report test results for each new method\n", + "# load and report test results for each baseline method\n", "for config in [\"\", \"_relaxed\"]:\n", - " for method in new_methods:\n", + " for method in baseline_methods:\n", " for repeat_index in range(1, max_num_repeats_per_method + 1):\n", " method_title = method_mapping[method]\n", "\n", @@ -324,7 +377,7 @@ " globals()[f\"results_table_{repeat_index}\"] = pd.concat(\n", " [\n", " globals()[f\"{method}{config}_bust_results_table_{repeat_index}\"]\n", - " for method in new_methods\n", + " for method in baseline_methods\n", " for config in [\"\", \"_relaxed\"]\n", " if f\"{method}{config}_bust_results_table_{repeat_index}\" in globals()\n", " ]\n", @@ -402,7 +455,7 @@ " x=\"method\",\n", " y=\"rmsd\",\n", " hue=\"post-processing\",\n", - " data=combined_relaxed_data[combined_relaxed_data[\"rmsd\"] < 50],\n", + " data=combined_relaxed_data[combined_relaxed_data[\"rmsd\"] < 150], # ignore outliers\n", " split=True,\n", " inner=\"quartile\",\n", " palette=colors,\n", @@ -412,6 +465,7 @@ "# set labels and title\n", "plt.xlabel(\"Single-ligand pocket-only docking\")\n", "plt.ylabel(\"RMSD\")\n", + "plt.ylim(0, 150)\n", "\n", "# rotate x-axis labels for better readability\n", "plt.xticks(rotation=45, ha=\"right\")\n", @@ -437,7 +491,7 @@ "colors = [\"#FB8072\", \"#BEBADA\"]\n", "\n", "bar_width = 0.5\n", - "r1 = [item - 0.25 for item in range(2, 16, 2)]\n", + "r1 = [item - 0.25 for item in range(2, 22, 2)]\n", "r2 = [x + bar_width for x in r1]\n", "\n", "(\n", @@ -629,7 +683,7 @@ "posebusters_relaxed_rmsd_lt_2_and_pb_valid_data_std.fillna(0, inplace=True)\n", "\n", "# create the figure and a list of axes\n", - "fig, axis = plt.subplots(figsize=(12, 6))\n", + "fig, axis = plt.subplots(figsize=(18, 8))\n", "axis.spines[\"top\"].set_visible(False)\n", "axis.spines[\"right\"].set_visible(False)\n", "axis.spines[\"bottom\"].set_visible(False)\n", @@ -681,7 +735,7 @@ "\n", "# add labels, titles, ticks, etc.\n", "axis.set_ylabel(\"Percentage of predictions\")\n", - "axis.set_xlim(1, 15 + 0.1)\n", + "axis.set_xlim(1, 21 + 0.1)\n", "axis.set_ylim(0, 100)\n", "\n", "axis.bar_label(posebusters_rmsd_lt_2_bar, fmt=\"{:,.1f}%\", label_type=\"edge\")\n", @@ -700,18 +754,21 @@ "axis.grid(axis=\"y\", color=\"#EAEFF8\")\n", "axis.set_axisbelow(True)\n", "\n", - "axis.set_xticks([2, 4, 6, 6 + 1e-3, 8, 10, 12, 12 + 1e-3, 14, 14 + 1e-3])\n", + "axis.set_xticks([2, 4, 6, 8, 8 + 1e-3, 10, 12, 14, 16, 17, 18, 20, 20 + 1e-3])\n", "axis.set_xticks([1 + 0.1], minor=True)\n", "axis.set_xticklabels(\n", " [\n", " \"DiffDock-L\",\n", + " \"DiffDock w/o SCT\",\n", " \"FABind\",\n", " \"DynamicBind\",\n", " \"DL-based blind\",\n", " \"NeuralPLexer\",\n", - " \"RoseTTAFold-All-Atom\",\n", + " \"RoseTTAFold-AA\",\n", + " \"Chai-1\",\n", " \"DiffDock-L-Vina\",\n", " \"Conventional blind\",\n", + " \"P2Rank-Vina\",\n", " \"Ensemble (Con)\",\n", " \"Hybrid blind\",\n", " ]\n", @@ -725,7 +782,7 @@ "axis.tick_params(axis=\"y\", which=\"major\", left=\"off\", right=\"on\", color=\"#EAEFF8\")\n", "\n", "# vertical alignment of xtick labels\n", - "vert_alignments = [0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, -0.1, 0.0, -0.1]\n", + "vert_alignments = [0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, -0.1]\n", "for tick, y in zip(axis.get_xticklabels(), vert_alignments):\n", " tick.set_y(y)\n", "\n", diff --git a/notebooks/posebusters_single_ligand_pocket_only_relaxed_bar_chart.png b/notebooks/posebusters_single_ligand_pocket_only_relaxed_bar_chart.png index 8f909282..2208a389 100644 Binary files a/notebooks/posebusters_single_ligand_pocket_only_relaxed_bar_chart.png and b/notebooks/posebusters_single_ligand_pocket_only_relaxed_bar_chart.png differ diff --git a/notebooks/posebusters_single_ligand_pocket_only_relaxed_rmsd_violin_plot.png b/notebooks/posebusters_single_ligand_pocket_only_relaxed_rmsd_violin_plot.png index b52ab663..f5d09fe6 100644 Binary files a/notebooks/posebusters_single_ligand_pocket_only_relaxed_rmsd_violin_plot.png and b/notebooks/posebusters_single_ligand_pocket_only_relaxed_rmsd_violin_plot.png differ diff --git a/notebooks/posebusters_single_ligand_relaxed_bar_chart.png b/notebooks/posebusters_single_ligand_relaxed_bar_chart.png index 3938eead..e84d662a 100644 Binary files a/notebooks/posebusters_single_ligand_relaxed_bar_chart.png and b/notebooks/posebusters_single_ligand_relaxed_bar_chart.png differ diff --git a/notebooks/posebusters_single_ligand_relaxed_rmsd_violin_plot.png b/notebooks/posebusters_single_ligand_relaxed_rmsd_violin_plot.png index 2386f370..b40442b0 100644 Binary files a/notebooks/posebusters_single_ligand_relaxed_rmsd_violin_plot.png and b/notebooks/posebusters_single_ligand_relaxed_rmsd_violin_plot.png differ diff --git a/notebooks/rfaa_casp15_interaction_dataframes_1.h5 b/notebooks/rfaa_casp15_interaction_dataframes_1.h5 new file mode 100644 index 00000000..fe34f08b Binary files /dev/null and b/notebooks/rfaa_casp15_interaction_dataframes_1.h5 differ diff --git a/notebooks/tulip_casp15_interaction_dataframes_1.h5 b/notebooks/tulip_casp15_interaction_dataframes_1.h5 new file mode 100644 index 00000000..48ffcd8d Binary files /dev/null and b/notebooks/tulip_casp15_interaction_dataframes_1.h5 differ diff --git a/notebooks/vina_diffdock_casp15_interaction_dataframes_1.h5 b/notebooks/vina_diffdock_casp15_interaction_dataframes_1.h5 new file mode 100644 index 00000000..de37b01e Binary files /dev/null and b/notebooks/vina_diffdock_casp15_interaction_dataframes_1.h5 differ diff --git a/notebooks/vina_p2rank_casp15_interaction_dataframes_1.h5 b/notebooks/vina_p2rank_casp15_interaction_dataframes_1.h5 new file mode 100644 index 00000000..14c1fc86 Binary files /dev/null and b/notebooks/vina_p2rank_casp15_interaction_dataframes_1.h5 differ diff --git a/posebench/__init__.py b/posebench/__init__.py index aa5ceaca..2aa86577 100644 --- a/posebench/__init__.py +++ b/posebench/__init__.py @@ -14,6 +14,7 @@ "dynamicbind": "DynamicBind", "neuralplexer": "NeuralPLexer", "rfaa": "RoseTTAFold-All-Atom", + "chai-lab": "chai-lab", "vina": "Vina", "tulip": "TULIP", "p2rank": "P2Rank", @@ -64,25 +65,28 @@ def resolve_method_protein_dir( :param pocket_only_baseline: Whether to return protein files for a pocket-only baseline. :return: The protein directory for the given method. """ - pocket_postfix = "_bs_cropped" if pocket_only_baseline else "" + pocket_suffix = "_bs_cropped" if pocket_only_baseline else "" + pocket_only_suffix = "_pocket_only" if pocket_only_baseline else "" if method in STANDARDIZED_DIR_METHODS or method in ["vina", "tulip"]: return ( os.path.join( "data", f"{dataset}_set", - f"{dataset}_holo_aligned_esmfold_structures{pocket_postfix}", + f"{dataset}_holo_aligned_predicted_structures{pocket_suffix}", ) if os.path.exists( os.path.join( "data", f"{dataset}_set", - f"{dataset}_holo_aligned_esmfold_structures{pocket_postfix}", + f"{dataset}_holo_aligned_predicted_structures{pocket_suffix}", ) ) else os.path.join( "data", f"{dataset}_set", - "predicted_structures" if dataset == "casp15" else f"{dataset}_esmfold_structures", + "predicted_structures" + if dataset == "casp15" + else f"{dataset}_predicted_structures", ) ) elif method == "dynamicbind": @@ -92,21 +96,26 @@ def resolve_method_protein_dir( "inference", "outputs", "results", - dataset, + f"{dataset}{pocket_only_suffix}", ) - elif method in ["neuralplexer", "rfaa"]: + elif method in ["neuralplexer", "rfaa", "chai-lab"]: return os.path.join( "forks", METHOD_TITLE_MAPPING.get(method, method), "inference", - f"{method}_{dataset}_outputs_{repeat_index}", + f"{method}{pocket_only_suffix}_{dataset}_outputs_{repeat_index}", ) else: raise ValueError(f"Invalid method: {method}") def resolve_method_ligand_dir( - method: str, dataset: str, vina_binding_site_method: str, repeat_index: int + method: str, + dataset: str, + vina_binding_site_method: str, + repeat_index: int, + pocket_only_baseline: bool, + v1_baseline: bool, ) -> str: """Resolve the ligand directory for a given method. @@ -114,14 +123,24 @@ def resolve_method_ligand_dir( :param dataset: The dataset name. :param vina_binding_site_method: The binding site method used for Vina. :param repeat_index: The repeat index for the method. + :param pocket_only_baseline: Whether to return ligand files for a pocket-only baseline. + :param v1_baseline: Whether to return ligand files for a V1 baseline. :return: The ligand directory for the given method. """ - if method in STANDARDIZED_DIR_METHODS or method in ["neuralplexer", "rfaa", "tulip"]: + pocket_only_suffix = "_pocket_only" if pocket_only_baseline else "" + v1_baseline_suffix = "v1" if v1_baseline else "" + if method in STANDARDIZED_DIR_METHODS or method in [ + "neuralplexer", + "rfaa", + "chai-lab", + "tulip", + ]: + output_suffix = "s" if method in ["neuralplexer", "rfaa", "chai-lab", "tulip"] else "" return os.path.join( "forks", - METHOD_TITLE_MAPPING.get(method, method), + METHOD_TITLE_MAPPING.get(method, method) + v1_baseline_suffix, "inference", - f"{method}_{dataset}_output{'s' if method in ['neuralplexer', 'rfaa', 'tulip'] else ''}_{repeat_index}", + f"{method}{pocket_only_suffix}_{dataset}_output{output_suffix}_{repeat_index}", ) elif method == "dynamicbind": return os.path.join( @@ -130,14 +149,14 @@ def resolve_method_ligand_dir( "inference", "outputs", "results", - dataset, + f"{dataset}{pocket_only_suffix}", ) elif method == "vina": return os.path.join( "forks", METHOD_TITLE_MAPPING.get(method, method), "inference", - f"vina_{vina_binding_site_method}_{dataset}_outputs_{repeat_index}", + f"vina{pocket_only_suffix}_{vina_binding_site_method}_{dataset}_outputs_{repeat_index}", ) else: raise ValueError(f"Invalid method: {method}") @@ -149,6 +168,8 @@ def resolve_method_output_dir( vina_binding_site_method: str, ensemble_ranking_method: str, repeat_index: int, + pocket_only_baseline: bool, + v1_baseline: bool, ) -> str: """Resolve the output directory for a given method. @@ -157,14 +178,24 @@ def resolve_method_output_dir( :param vina_binding_site_method: The binding site method used for Vina. :param ensemble_ranking_method: The ranking method used for the ensemble method. :param repeat_index: The repeat index for the method. + :param pocket_only_baseline: Whether to output files for a pocket-only baseline. + :param v1_baseline: Whether to output files for a V1 baseline. :return: The output directory for the given method. """ - if method in STANDARDIZED_DIR_METHODS or method in ["neuralplexer", "rfaa", "tulip"]: + pocket_only_suffix = "_pocket_only" if pocket_only_baseline else "" + v1_baseline_suffix = "v1" if v1_baseline else "" + if method in STANDARDIZED_DIR_METHODS or method in [ + "neuralplexer", + "rfaa", + "chai-lab", + "tulip", + ]: + output_suffix = "s" if method in ["neuralplexer", "rfaa", "chai-lab", "tulip"] else "" return os.path.join( "forks", - METHOD_TITLE_MAPPING.get(method, method), + METHOD_TITLE_MAPPING.get(method, method) + v1_baseline_suffix, "inference", - f"{method}_{dataset}_output{'s' if method in ['neuralplexer', 'rfaa', 'tulip'] else ''}_{repeat_index}", + f"{method}{pocket_only_suffix}_{dataset}_output{output_suffix}_{repeat_index}", ) elif method == "dynamicbind": return os.path.join( @@ -173,46 +204,54 @@ def resolve_method_output_dir( "inference", "outputs", "results", - f"{dataset}_{repeat_index}", + f"{dataset}{pocket_only_suffix}_{repeat_index}", ) elif method in ["vina", "p2rank"]: return os.path.join( "forks", METHOD_TITLE_MAPPING.get(method, method), "inference", - f"vina_{vina_binding_site_method}_{dataset}_outputs_{repeat_index}", + f"vina{pocket_only_suffix}_{vina_binding_site_method}_{dataset}_outputs_{repeat_index}", ) elif method == "ensemble": return os.path.join( "data", "test_cases", dataset, - f"top_{ensemble_ranking_method}_ensemble_predictions_{repeat_index}", + f"top_{ensemble_ranking_method}{pocket_only_suffix}_ensemble_predictions_{repeat_index}", ) else: raise ValueError(f"Invalid method: {method}") -def resolve_method_input_csv_path(method: str, dataset: str) -> str: +def resolve_method_input_csv_path(method: str, dataset: str, pocket_only_baseline: bool) -> str: """Resolve the input CSV path for a given method. :param method: The method name. :param dataset: The dataset name. + :param pocket_only_baseline: Whether to return the input CSV path for a pocket-only baseline. :return: The input CSV path for the given method. """ - if method in STANDARDIZED_DIR_METHODS or method in ["neuralplexer", "rfaa", "vina", "tulip"]: + pocket_only_suffix = "_pocket_only" if pocket_only_baseline else "" + if method in STANDARDIZED_DIR_METHODS or method in [ + "neuralplexer", + "rfaa", + "chai-lab", + "vina", + "tulip", + ]: return os.path.join( "forks", METHOD_TITLE_MAPPING.get(method, method), "inference", - f"{method}_{dataset}_inputs.csv", + f"{method}{pocket_only_suffix}_{dataset}_inputs.csv", ) elif method == "dynamicbind": return os.path.join( "forks", METHOD_TITLE_MAPPING.get(method, method), "inference", - f"{method}_{dataset}_inputs", + f"{method}{pocket_only_suffix}_{dataset}_inputs", ) elif method == "ensemble": return os.path.join( @@ -242,24 +281,30 @@ def register_custom_omegaconf_resolvers(): ) OmegaConf.register_new_resolver( "resolve_method_ligand_dir", - lambda method, dataset, vina_binding_site_method, repeat_index: resolve_method_ligand_dir( + lambda method, dataset, vina_binding_site_method, repeat_index, pocket_only_baseline, v1_baseline: resolve_method_ligand_dir( method, dataset, vina_binding_site_method, repeat_index, + pocket_only_baseline, + v1_baseline, ), ) OmegaConf.register_new_resolver( "resolve_method_output_dir", - lambda method, dataset, vina_binding_site_method, ensemble_ranking_method, repeat_index: resolve_method_output_dir( + lambda method, dataset, vina_binding_site_method, ensemble_ranking_method, repeat_index, pocket_only_baseline, v1_baseline: resolve_method_output_dir( method, dataset, vina_binding_site_method, ensemble_ranking_method, repeat_index, + pocket_only_baseline, + v1_baseline=v1_baseline, ), ) OmegaConf.register_new_resolver( "resolve_method_input_csv_path", - lambda method, dataset: resolve_method_input_csv_path(method, dataset), + lambda method, dataset, pocket_only_baseline: resolve_method_input_csv_path( + method, dataset, pocket_only_baseline + ), ) diff --git a/posebench/analysis/complex_alignment.py b/posebench/analysis/complex_alignment.py index 3d7c06d2..0e3bcba9 100644 --- a/posebench/analysis/complex_alignment.py +++ b/posebench/analysis/complex_alignment.py @@ -38,7 +38,7 @@ def save_aligned_complex( reference_ligand_sdf: str, save_protein: bool = True, save_ligand: bool = True, - aligned_filename_postfix: str = "_aligned", + aligned_filename_suffix: str = "_aligned", atom_df_name: str = "ATOM", ): """Align the predicted protein-ligand structures to the reference protein-ligand structures and @@ -50,7 +50,7 @@ def save_aligned_complex( :param reference_ligand_sdf: Path to the reference ligand structure in SDF format :param save_protein: Whether to save the aligned protein structure :param save_ligand: Whether to save the aligned ligand structure - :param aligned_filename_postfix: Postfix to append to the aligned files + :param aligned_filename_suffix: suffix to append to the aligned files :param atom_df_name: Name of the atom dataframe in the PDB file """ # Load protein and ligand structures @@ -151,7 +151,7 @@ def save_aligned_complex( ] = predicted_protein_aligned if save_protein: predicted_protein.to_pdb( - path=predicted_protein_pdb.replace(".pdb", f"{aligned_filename_postfix}.pdb"), + path=predicted_protein_pdb.replace(".pdb", f"{aligned_filename_suffix}.pdb"), records=[atom_df_name], gz=False, ) @@ -167,7 +167,7 @@ def save_aligned_complex( predicted_ligand_conf.SetAtomPosition(i, Point3D(x, y, z)) if save_ligand: with Chem.SDWriter( - predicted_ligand_sdf.replace(".sdf", f"{aligned_filename_postfix}.sdf") + predicted_ligand_sdf.replace(".sdf", f"{aligned_filename_suffix}.sdf") ) as f: f.write(predicted_ligand) @@ -178,7 +178,7 @@ def align_complex_to_protein_only( reference_protein_pdb: str, save_protein: bool = True, save_ligand: bool = True, - aligned_filename_postfix: str = "_aligned", + aligned_filename_suffix: str = "_aligned", atom_df_name: str = "ATOM", ): """Align a predicted protein-ligand structure to a reference protein structure. @@ -188,7 +188,7 @@ def align_complex_to_protein_only( :param reference_protein_pdb: Path to the reference protein structure in PDB format :param save_protein: Whether to save the aligned protein structure :param save_ligand: Whether to save the aligned ligand structure - :param aligned_filename_postfix: Postfix to append to the aligned files + :param aligned_filename_suffix: suffix to append to the aligned files :param atom_df_name: Name of the atom dataframe in the PDB file """ # Load protein and ligand structures @@ -269,7 +269,7 @@ def align_complex_to_protein_only( ["x_coord", "y_coord", "z_coord"] ] = predicted_protein_aligned predicted_protein.to_pdb( - path=predicted_protein_pdb.replace(".pdb", f"{aligned_filename_postfix}.pdb"), + path=predicted_protein_pdb.replace(".pdb", f"{aligned_filename_suffix}.pdb"), records=[atom_df_name], gz=False, ) @@ -284,7 +284,7 @@ def align_complex_to_protein_only( x, y, z = predicted_ligand_aligned[i] predicted_ligand_conf.SetAtomPosition(i, Point3D(x, y, z)) with Chem.SDWriter( - predicted_ligand_sdf.replace(".sdf", f"{aligned_filename_postfix}.sdf") + predicted_ligand_sdf.replace(".sdf", f"{aligned_filename_suffix}.sdf") ) as f: f.write(predicted_ligand) @@ -302,7 +302,7 @@ def main(cfg: DictConfig): input_data_dir = Path(cfg.input_data_dir) for config in ["", "_relaxed"]: output_dir = Path(cfg.output_dir + config) - if not output_dir.exists() or cfg.method in ["neuralplexer", "rfaa"]: + if not output_dir.exists() or cfg.method in ["neuralplexer", "rfaa", "chai-lab"]: output_dir = Path(str(output_dir).replace("_relaxed", "")) # parse ligand files @@ -333,6 +333,20 @@ def main(cfg: DictConfig): ) elif cfg.method == "rfaa": output_ligand_files = sorted(list(output_dir.rglob(f"*ligand{config}.sdf"))) + elif cfg.method == "chai-lab": + output_ligand_files = list( + output_dir.rglob(f"pred.model_idx_{cfg.rank_to_align - 1}_ligand*{config}.sdf") + ) + output_ligand_files = sorted( + [ + file + for file in output_ligand_files + if config == "_relaxed" + or (config == "" and "_relaxed" not in file.stem) + and "_aligned" not in file.stem + and "_LIG_" not in file.stem + ] + ) else: raise ValueError(f"Invalid method: {cfg.method}") @@ -366,6 +380,18 @@ def main(cfg: DictConfig): ) elif cfg.method == "rfaa": output_protein_files = sorted(list(output_dir.rglob("*protein.pdb"))) + elif cfg.method == "chai-lab": + output_protein_files = list( + output_dir.rglob(f"pred.model_idx_{cfg.rank_to_align - 1}_protein*.pdb") + ) + output_protein_files = sorted( + [ + file + for file in output_protein_files + if (config == "_relaxed" or (config == "" and "_relaxed" not in file.stem)) + and "_aligned" not in file.stem + ] + ) else: raise ValueError(f"Invalid method: {cfg.method}") @@ -381,12 +407,15 @@ def main(cfg: DictConfig): ) ] ) - elif cfg.method == "rfaa": + elif cfg.method in ["rfaa", "chai-lab"]: output_protein_files = sorted( [ item - for item in output_dir.rglob("*protein.pdb") - if any( + for item in output_dir.rglob( + f"pred.model_idx_{cfg.rank_to_align - 1}_protein*.pdb" + ) + if "_aligned" not in item.stem + and any( [item.parent.stem in file.parent.stem for file in output_ligand_files] ) ] @@ -400,7 +429,9 @@ def main(cfg: DictConfig): ), f"Numbers of protein ({len(output_protein_files)}) and ligand ({len(output_ligand_files)}) files do not match." # align protein-ligand complexes - for protein_file, ligand_file in tqdm(zip(output_protein_files, output_ligand_files)): + for protein_file, ligand_file in tqdm( + zip(output_protein_files, output_ligand_files), desc="Aligning complexes" + ): protein_id, ligand_id = protein_file.stem, ligand_file.stem if protein_id != ligand_id and cfg.method == "dynamicbind": protein_id, ligand_id = ( @@ -417,13 +448,13 @@ def main(cfg: DictConfig): protein_id, ligand_id = protein_file.parent.stem, ligand_file.parent.stem if protein_id != ligand_id: raise ValueError(f"Protein and ligand IDs do not match: {protein_id}, {ligand_id}") - pocket_postfix = "_bs_cropped" if cfg.pocket_only_baseline else "" + pocket_suffix = "_bs_cropped" if cfg.pocket_only_baseline else "" reference_protein_pdbs = [ item for item in input_data_dir.rglob( - f"*{protein_id.split(f'{cfg.dataset}_')[-1]}{'_lig.pdb' if cfg.dataset == 'casp15' else f'*_protein{pocket_postfix}.pdb'}" + f"*{protein_id.split(f'{cfg.dataset}_')[-1]}{'_lig.pdb' if cfg.dataset == 'casp15' else f'*_protein{pocket_suffix}.pdb'}" ) - if "esmfold_structures" not in str(item) + if "predicted_structures" not in str(item) ] if cfg.dataset == "dockgen": reference_protein_pdbs = [ @@ -458,10 +489,10 @@ def main(cfg: DictConfig): if ( cfg.force_process or not os.path.exists( - str(protein_file).replace(".pdb", f"{cfg.aligned_filename_postfix}.pdb") + str(protein_file).replace(".pdb", f"{cfg.aligned_filename_suffix}.pdb") ) or not os.path.exists( - str(ligand_file).replace(".sdf", f"{cfg.aligned_filename_postfix}.sdf") + str(ligand_file).replace(".sdf", f"{cfg.aligned_filename_suffix}.sdf") ) ): if cfg.dataset == "casp15": @@ -472,7 +503,7 @@ def main(cfg: DictConfig): str(ligand_file), str(reference_protein_pdb), save_protein=cfg.method != "diffdock", - aligned_filename_postfix=cfg.aligned_filename_postfix, + aligned_filename_suffix=cfg.aligned_filename_suffix, ) else: save_aligned_complex( @@ -481,7 +512,7 @@ def main(cfg: DictConfig): str(reference_protein_pdb), str(reference_ligand_sdf), save_protein=cfg.method != "diffdock", - aligned_filename_postfix=cfg.aligned_filename_postfix, + aligned_filename_suffix=cfg.aligned_filename_suffix, ) diff --git a/posebench/analysis/inference_analysis.py b/posebench/analysis/inference_analysis.py index b12ef49d..3819b1cf 100644 --- a/posebench/analysis/inference_analysis.py +++ b/posebench/analysis/inference_analysis.py @@ -120,15 +120,15 @@ def create_mol_table( pdb_ids = None relaxed_protein = relaxed and cfg.relax_protein if cfg.dataset == "dockgen" and cfg.dockgen_test_ids_filepath is not None: - # NOTE: for DockGen, we have each method predict for all 189 complexes - # but evaluate them here on an ESMFold RMSD-filtered subset of 91 complexes + # NOTE: for DockGen, we may have each method predict for all 189 complexes + # but evaluate them here on a predicted RMSD-filtered subset of 91 complexes assert os.path.exists( cfg.dockgen_test_ids_filepath ), f"Invalid test IDs file path for DockGen: {os.path.exists(cfg.dockgen_test_ids_filepath)}." with open(cfg.dockgen_test_ids_filepath) as f: pdb_ids = {line.replace(" ", "-") for line in f.read().splitlines()} - if cfg.method in ["dynamicbind", "rfaa"]: + if cfg.method in ["dynamicbind", "rfaa", "chai-lab"]: # NOTE: for methods such as DynamicBind and RoseTTAFold-All-Atom, # the input CSV file needs to be created manually from the input data directory input_smiles_and_pdb_ids = parse_inference_inputs_from_dir(input_data_dir, pdb_ids=pdb_ids) @@ -194,6 +194,22 @@ def create_mol_table( ) else None ) + elif cfg.method == "chai-lab": + mol_table["mol_cond"] = input_table["pdb_id"].apply( + lambda x: list( + (Path(str(inference_dir).replace("_relaxed", "")) / x).rglob( + f"pred.model_idx_0_protein{'_relaxed' if relaxed_protein else ''}_aligned.pdb" + ) + )[0] + if len( + list( + (Path(str(inference_dir).replace("_relaxed", "")) / x).rglob( + f"pred.model_idx_0_protein{'_relaxed' if relaxed_protein else ''}_aligned.pdb" + ) + ) + ) + else None + ) elif cfg.method == "ensemble": mol_table["mol_cond"] = input_table["pdb_id"].apply( lambda x: list( @@ -211,25 +227,25 @@ def create_mol_table( else None ) else: - pocket_postfix = "_bs_cropped" if cfg.pocket_only_baseline else "" + pocket_suffix = "_bs_cropped" if cfg.pocket_only_baseline else "" protein_structure_input_dir = ( os.path.join( - input_data_dir, f"{cfg.dataset}_holo_aligned_esmfold_structures{pocket_postfix}" + input_data_dir, f"{cfg.dataset}_holo_aligned_predicted_structures{pocket_suffix}" ) if os.path.exists( os.path.join( input_data_dir, - f"{cfg.dataset}_holo_aligned_esmfold_structures{pocket_postfix}", + f"{cfg.dataset}_holo_aligned_predicted_structures{pocket_suffix}", ) ) - else os.path.join(input_data_dir, f"{cfg.dataset}_esmfold_structures") + else os.path.join(input_data_dir, f"{cfg.dataset}_predicted_structures") ) - protein_structure_file_postfix = ( - "_holo_aligned_esmfold_protein" + protein_structure_file_suffix = ( + "_holo_aligned_predicted_protein" if os.path.exists( os.path.join( input_data_dir, - f"{cfg.dataset}_holo_aligned_esmfold_structures{pocket_postfix}", + f"{cfg.dataset}_holo_aligned_predicted_structures{pocket_suffix}", ) ) and cfg.dataset != "casp15" @@ -237,18 +253,18 @@ def create_mol_table( ) if relaxed_protein: protein_structure_input_dir = str(inference_dir).replace("_relaxed", "") - protein_structure_file_postfix = "_relaxed" + protein_structure_file_suffix = "_relaxed" mol_table["mol_cond"] = input_table["pdb_id"].apply( lambda x: os.path.join( protein_structure_input_dir, "_".join(x.split("_")[:3]), - f"{'_'.join(x.split('_')[:2])}{protein_structure_file_postfix}.pdb", + f"{'_'.join(x.split('_')[:2])}{protein_structure_file_suffix}.pdb", ) if os.path.exists( os.path.join( protein_structure_input_dir, "_".join(x.split("_")[:3]), - f"{'_'.join(x.split('_')[:2])}{protein_structure_file_postfix}.pdb", + f"{'_'.join(x.split('_')[:2])}{protein_structure_file_suffix}.pdb", ) ) else None @@ -256,11 +272,11 @@ def create_mol_table( else: mol_table["mol_cond"] = input_table["pdb_id"].apply( lambda x: os.path.join( - protein_structure_input_dir, f"{x}{protein_structure_file_postfix}.pdb" + protein_structure_input_dir, f"{x}{protein_structure_file_suffix}.pdb" ) if os.path.exists( os.path.join( - protein_structure_input_dir, f"{x}{protein_structure_file_postfix}.pdb" + protein_structure_input_dir, f"{x}{protein_structure_file_suffix}.pdb" ) ) else None @@ -305,6 +321,22 @@ def create_mol_table( ) else None ) + elif cfg.method == "chai-lab": + mol_table["mol_pred"] = input_table["pdb_id"].apply( + lambda x: list( + (Path(str(inference_dir).replace("_relaxed", "")) / x).rglob( + f"pred.model_idx_0_ligand{'_relaxed' if relaxed else ''}_aligned.sdf" + ) + )[0] + if len( + list( + (Path(str(inference_dir).replace("_relaxed", "")) / x).rglob( + f"pred.model_idx_0_ligand{'_relaxed' if relaxed else ''}_aligned.sdf" + ) + ) + ) + else None + ) elif cfg.method == "vina": mol_table["mol_pred"] = ( input_table["pdb_id"] @@ -522,6 +554,28 @@ def create_mol_table( ) else None ) + elif cfg.method == "chai-lab": + mol_table.loc[missing_pred_indices, "mol_pred"] = input_table.loc[ + missing_pred_indices, "pdb_id" + ].apply( + lambda x: glob.glob( + os.path.join( + Path(str(inference_dir).replace("_relaxed", "")), + x, + "pred.model_idx_0_ligand_aligned.sdf", + ) + )[0] + if len( + glob.glob( + os.path.join( + Path(str(inference_dir).replace("_relaxed", "")), + x, + "pred.model_idx_0_ligand_aligned.sdf", + ) + ) + ) + else None + ) elif cfg.method == "vina": mol_table.loc[missing_pred_indices, "mol_pred"] = input_table.loc[ missing_pred_indices, "pdb_id" @@ -593,6 +647,13 @@ def main(cfg: DictConfig): ): output_dir = Path(str(output_dir).replace("_relaxed", "")) bust_results_filepath = Path(str(output_dir) + config) / "bust_results.csv" + + # differentiate relaxed and unrelaxed protein pose results + if "relaxed" in config and cfg.relax_protein: + bust_results_filepath = Path( + str(bust_results_filepath).replace(".csv", "_protein_relaxed.csv") + ) + os.makedirs(bust_results_filepath.parent, exist_ok=True) # collect test results diff --git a/posebench/analysis/inference_analysis_casp.py b/posebench/analysis/inference_analysis_casp.py index b8cffd39..b1487030 100644 --- a/posebench/analysis/inference_analysis_casp.py +++ b/posebench/analysis/inference_analysis_casp.py @@ -9,6 +9,7 @@ import subprocess # nosec import tempfile from pathlib import Path +from typing import Tuple import hydra import pandas as pd @@ -94,16 +95,19 @@ NUM_SCOREABLE_CASP15_TARGETS = len(All_CASP15_SINGLE_LIGAND_TARGETS) + len( All_CASP15_MULTI_LIGAND_TARGETS ) +TOLERANT_METHODS = ["diffdock", "dynamicbind", "vina", "tulip"] -def create_casp_input_dirs(cfg: DictConfig, config: str) -> str: +def create_casp_input_dirs(cfg: DictConfig, config: str) -> Tuple[str, List[str]]: """Create the input directories for the CASP ligand scoring pipeline and return the resulting (temporary) parent directory as a `Path`. :param cfg: Configuration dictionary from the hydra YAML file. :param config: The configuration suffix to append to the output directory. - :return: The path to the temporary parent directory as a `Path`. + :return: The path to the temporary parent directory as a `Path` as well as + a list of available prediction targets for "tolerant methods". """ + target_ids = [] temp_dir_path = Path( tempfile.mkdtemp( suffix=f"_{cfg.method}_{cfg.vina_binding_site_method}_{cfg.dataset}{config}" @@ -127,15 +131,17 @@ def create_casp_input_dirs(cfg: DictConfig, config: str) -> str: renumber_pdb_df_residues( target_file, str(temp_dir_path / "targets" / Path(target_file).name) ) + target_ids.append(Path(target_file).stem.split("_")[0]) else: shutil.copy(target_file, temp_dir_path / "targets") - return temp_dir_path + return temp_dir_path, target_ids def create_casp_mol_table( input_data_dir: Path, targets_to_select: List[str], relaxed: bool = False, + relax_protein: bool = False, rank_to_select: int = 1, ) -> pd.DataFrame: """Create a table of CASP molecules and their corresponding ligand files. @@ -154,12 +160,14 @@ def create_casp_mol_table( continue sdf_data_files = glob.glob(str(data_dir / f"*_rank{rank_to_select}_*.sdf")) pdb_data_files = glob.glob(str(data_dir / f"*_rank{rank_to_select}_*.pdb")) + if relax_protein: + pdb_data_files = glob.glob(str(data_dir / f"*_rank{rank_to_select}_*_relaxed.pdb")) assert ( len(sdf_data_files) == 1 - ), f"Expected 1 SDF file, but found {len(sdf_data_files)}." + ), f"Expected 1 SDF file, but found {len(sdf_data_files)}: {sdf_data_files}." assert ( len(pdb_data_files) == 1 - ), f"Expected 1 PDB file, but found {len(pdb_data_files)}." + ), f"Expected 1 PDB file, but found {len(pdb_data_files)}: {pdb_data_files}." sdf_data_file = sdf_data_files[0] pdb_data_file = pdb_data_files[0] mol_table_rows.append( @@ -183,10 +191,15 @@ def main(cfg: DictConfig): :param cfg: Configuration dictionary from the hydra YAML file. """ - if cfg.no_pretraining: + if cfg.v1_baseline: with open_dict(cfg): cfg.predictions_dir = cfg.predictions_dir.replace( - "_ensemble_predictions", "_npt_ensemble_predictions" + f"top_{cfg.method}", f"top_{cfg.method}v1" + ) + if cfg.no_ilcl: + with open_dict(cfg): + cfg.predictions_dir = cfg.predictions_dir.replace( + "_ensemble_predictions", "_no_ilcl_ensemble_predictions" ) if cfg.method == "vina": with open_dict(cfg): @@ -208,6 +221,11 @@ def main(cfg: DictConfig): output_dir = cfg.predictions_dir + config scoring_results_filepath = Path(output_dir) / "scoring_results.csv" bust_results_filepath = Path(output_dir) / "bust_results.csv" + + # differentiate relaxed and unrelaxed protein pose results + if "relaxed" in config and cfg.relax_protein: + bust_results_filepath = bust_results_filepath.replace(".csv", "_protein_relaxed.csv") + os.makedirs(scoring_results_filepath.parent, exist_ok=True) # collect analysis results @@ -216,7 +234,7 @@ def main(cfg: DictConfig): f"{resolve_method_title(cfg.method)}{config} analysis results for inference directory `{output_dir}` already exist at `{scoring_results_filepath}`. Directly analyzing..." ) else: - temp_dir_path = create_casp_input_dirs(cfg, config) + temp_dir_path, available_targets = create_casp_input_dirs(cfg, config) # run CASP ligand scoring pipeline scoring_args = [ @@ -231,8 +249,17 @@ def main(cfg: DictConfig): "-v", "DEBUG", ] - if cfg.targets is not None: - scoring_args.extend(["--targets", *[str(t) for t in cfg.targets]]) + targets_to_score = cfg.targets + if cfg.method in TOLERANT_METHODS: + # NOTE: Since e.g., DiffDock-L is notably unstable for the CASP15 multi-ligand + # targets, we only score the targets for which such a method was able to generate + # predictions after five retries of its respective inference script. + targets_to_score = available_targets + assert ( + len(targets_to_score) > 0 + ), f"No available targets to score for {cfg.method}." + if targets_to_score is not None: + scoring_args.extend(["--targets", *[str(t) for t in targets_to_score]]) if cfg.fault_tolerant: scoring_args.append("--fault-tolerant") result = subprocess.run(scoring_args) # nosec @@ -263,6 +290,7 @@ def main(cfg: DictConfig): Path(cfg.predictions_dir), targets_to_select, relaxed="relaxed" in config, + relax_protein=cfg.relax_protein, ) assert len(mol_table) == len( targets_to_select diff --git a/posebench/data/chai_input_preparation.py b/posebench/data/chai_input_preparation.py new file mode 100644 index 00000000..bcfe18ec --- /dev/null +++ b/posebench/data/chai_input_preparation.py @@ -0,0 +1,183 @@ +# ------------------------------------------------------------------------------------------------------------------------------------- +# Following code curated for PoseBench: (https://github.com/BioinfoMachineLearning/PoseBench) +# ------------------------------------------------------------------------------------------------------------------------------------- + +import logging +import os + +import hydra +import rootutils +from beartype import beartype +from beartype.typing import Any, List, Optional, Tuple +from omegaconf import DictConfig + +rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) + +from posebench import register_custom_omegaconf_resolvers +from posebench.utils.data_utils import ( + extract_sequences_from_protein_structure_file, + parse_inference_inputs_from_dir, +) + +logging.basicConfig(format="[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + + +@beartype +def write_scripts( + smiles_and_pdb_id_list: List[Tuple[Any, str]], + input_data_dir: str, + output_scripts_path: str, + dataset: str, + pocket_only_baseline: bool = False, + protein_filepath: Optional[str] = None, + ligand_smiles: Optional[Any] = None, + input_id: Optional[str] = None, +): + """Write a Chai-1 inference CSV file. + + :param smiles_and_pdb_id_list: A list of tuples each containing a SMILES string and a PDB ID. + :param input_data_dir: Path to directory of input protein-ligand complex subdirectories. + :param output_scripts_path: Path to directory of output FASTA sequence files. + :param dataset: Dataset name. + :param pocket_only_baseline: Whether to provide only the protein pocket as a baseline + experiment. + :param protein_filepath: Optional path to the protein structure file. + :param ligand_smiles: Optional SMILES string of the ligand. + :param input_id: Optional input ID. + """ + if pocket_only_baseline: + output_scripts_path = output_scripts_path.replace(dataset, f"{dataset}_pocket_only") + + os.makedirs(output_scripts_path, exist_ok=True) + if protein_filepath is not None and ligand_smiles is not None: + input_id = ( + "_".join(os.path.splitext(os.path.basename(protein_filepath))[0].split("_")[:2]) + if input_id is None + else input_id + ) + # only parse protein chains (e.g., not nucleic acids) + protein_sequence_list = [ + seq + for seq in extract_sequences_from_protein_structure_file(protein_filepath) + if len(seq) > 0 + ] + output_dir = os.path.join(output_scripts_path, input_id) + fasta_filepath = os.path.join(output_dir, f"{input_id}.fasta") + if os.path.exists(fasta_filepath): + logger.warning( + f"FASTA file already exists for input ID {input_id}. Skipping writing to file..." + ) + return + for chain_index, sequence in enumerate(protein_sequence_list, start=1): + with open(fasta_filepath, "a") as f: + f.write(f">protein|{input_id}-chain-{chain_index}\n{sequence}\n") + for chain_index, smiles in enumerate(ligand_smiles.split(":"), start=1): + with open(fasta_filepath, "a") as f: + f.write(f">ligand|{input_id}-chain-{chain_index}\n{smiles}\n") + else: + for smiles_string, pdb_id in smiles_and_pdb_id_list: + output_dir = os.path.join(output_scripts_path, pdb_id) + os.makedirs(output_dir, exist_ok=True) + casp_dataset_requested = os.path.basename(input_data_dir) == "targets" + if casp_dataset_requested: + protein_filepath = os.path.join(input_data_dir, f"{pdb_id}_lig.pdb") + else: + if pocket_only_baseline: + protein_filepath = os.path.join( + input_data_dir, + f"{dataset}_holo_aligned_predicted_structures_bs_cropped", + f"{pdb_id}_holo_aligned_predicted_protein.pdb", + ) + if not os.path.exists(protein_filepath): + logger.warning( + f"Protein structure file not found for PDB ID {pdb_id}. Skipping..." + ) + continue + else: + dockgen_suffix = "_processed" if dataset == "dockgen" else "" + protein_filepath = os.path.join( + input_data_dir, pdb_id, f"{pdb_id}_protein{dockgen_suffix}.pdb" + ) + protein_sequence_list = [ + seq + for seq in extract_sequences_from_protein_structure_file(protein_filepath) + if len(seq) > 0 + ] + ligand_smiles_list = smiles_string.split("|") + fasta_filepath = os.path.join(output_dir, f"{pdb_id}.fasta") + if os.path.exists(fasta_filepath): + logger.warning( + f"FASTA file already exists for PDB ID {pdb_id}. Skipping writing to file..." + ) + continue + for chain_index, sequence in enumerate(protein_sequence_list, start=1): + with open(fasta_filepath, "a") as f: + f.write(f">protein|{pdb_id}-chain-{chain_index}\n{sequence}\n") + for chain_index, sequence in enumerate(ligand_smiles_list, start=1): + with open(fasta_filepath, "a") as f: + f.write(f">ligand|{pdb_id}-chain-{chain_index}\n{sequence}\n") + + +@hydra.main( + version_base="1.3", + config_path="../../configs/data", + config_name="chai_input_preparation.yaml", +) +def main(cfg: DictConfig): + """Parse a data directory containing subdirectories of protein-ligand complexes and prepare + corresponding inference CSV file for the DiffDock model. + + :param cfg: Configuration dictionary from the hydra YAML file. + """ + pdb_ids = None + if cfg.dataset == "posebusters_benchmark" and cfg.posebusters_ccd_ids_filepath is not None: + assert os.path.exists( + cfg.posebusters_ccd_ids_filepath + ), f"Invalid CCD IDs file path for PoseBusters Benchmark: {os.path.exists(cfg.posebusters_ccd_ids_filepath)}." + with open(cfg.posebusters_ccd_ids_filepath) as f: + pdb_ids = set(f.read().splitlines()) + elif cfg.dataset == "dockgen" and cfg.dockgen_test_ids_filepath is not None: + assert os.path.exists( + cfg.dockgen_test_ids_filepath + ), f"Invalid test IDs file path for DockGen: {os.path.exists(cfg.dockgen_test_ids_filepath)}." + with open(cfg.dockgen_test_ids_filepath) as f: + pdb_ids = {line.replace(" ", "-") for line in f.read().splitlines()} + elif cfg.dataset not in ["posebusters_benchmark", "astex_diverse", "dockgen", "casp15"]: + raise ValueError(f"Dataset `{cfg.dataset}` not supported.") + + if cfg.protein_filepath is not None and cfg.ligand_smiles is None: + write_scripts( + [], + cfg.input_data_dir, + cfg.output_scripts_path, + dataset=cfg.dataset, + pocket_only_baseline=cfg.pocket_only_baseline, + protein_filepath=cfg.protein_filepath, + ligand_smiles=cfg.ligand_smiles, + input_id=cfg.input_id, + ) + else: + smiles_and_pdb_id_list = parse_inference_inputs_from_dir( + cfg.input_data_dir, + pdb_ids=pdb_ids, + ) + write_scripts( + smiles_and_pdb_id_list, + cfg.input_data_dir, + cfg.output_scripts_path, + dataset=cfg.dataset, + pocket_only_baseline=cfg.pocket_only_baseline, + protein_filepath=cfg.protein_filepath, + ligand_smiles=cfg.ligand_smiles, + input_id=cfg.input_id, + ) + + logger.info( + f"Chai-1 input files for dataset `{cfg.dataset}` are fully prepared for inference." + ) + + +if __name__ == "__main__": + register_custom_omegaconf_resolvers() + main() diff --git a/posebench/data/chai_output_extraction.py b/posebench/data/chai_output_extraction.py new file mode 100644 index 00000000..519c5ebf --- /dev/null +++ b/posebench/data/chai_output_extraction.py @@ -0,0 +1,150 @@ +# ------------------------------------------------------------------------------------------------------------------------------------- +# Following code curated for PoseBench: (https://github.com/BioinfoMachineLearning/PoseBench) +# ------------------------------------------------------------------------------------------------------------------------------------- + +import logging +import os + +import hydra +import rootutils +from biopandas.pdb import PandasPdb +from omegaconf import DictConfig, open_dict + +rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) + +from posebench.utils.data_utils import ( + extract_protein_and_ligands_with_prody, + parse_inference_inputs_from_dir, +) + +logging.basicConfig(format="[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + + +def distinguish_ligand_atoms(input_pdb_file: str, output_pdb_file: str): + """Mark ligand atoms as heteroatoms. + + :param input_pdb_file: Path to the input PDB file. + :param output_pdb_file: Path to the output PDB file. + """ + pdb = PandasPdb().read_pdb(input_pdb_file) + ligand_atoms = pdb.df["ATOM"][pdb.df["ATOM"]["residue_name"] == "LIG"] + + ligand_indices = ligand_atoms.index + pdb.df["ATOM"] = pdb.df["ATOM"].drop(ligand_indices) + + ligand_atoms.record_name = "HETATM" + pdb.df["HETATM"] = ligand_atoms + + pdb.to_pdb(output_pdb_file) + + +@hydra.main( + version_base="1.3", + config_path="../../configs/data", + config_name="chai_output_extraction.yaml", +) +def main(cfg: DictConfig): + """Extract proteins and ligands separately from the prediction outputs.""" + pdb_ids = None + if cfg.dataset == "posebusters_benchmark" and cfg.posebusters_ccd_ids_filepath is not None: + assert os.path.exists( + cfg.posebusters_ccd_ids_filepath + ), f"Invalid CCD IDs file path for PoseBusters Benchmark: {os.path.exists(cfg.posebusters_ccd_ids_filepath)}." + with open(cfg.posebusters_ccd_ids_filepath) as f: + pdb_ids = set(f.read().splitlines()) + elif cfg.dataset == "dockgen" and cfg.dockgen_test_ids_filepath is not None: + assert os.path.exists( + cfg.dockgen_test_ids_filepath + ), f"Invalid test IDs file path for DockGen: {os.path.exists(cfg.dockgen_test_ids_filepath)}." + with open(cfg.dockgen_test_ids_filepath) as f: + pdb_ids = {line.replace(" ", "-") for line in f.read().splitlines()} + elif cfg.dataset not in ["posebusters_benchmark", "astex_diverse", "dockgen", "casp15"]: + raise ValueError(f"Dataset `{cfg.dataset}` not supported.") + + if cfg.pocket_only_baseline: + with open_dict(cfg): + cfg.prediction_inputs_dir = cfg.prediction_inputs_dir.replace( + cfg.dataset, f"{cfg.dataset}_pocket_only" + ) + cfg.prediction_outputs_dir = cfg.prediction_outputs_dir.replace( + cfg.dataset, f"{cfg.dataset}_pocket_only" + ) + cfg.inference_outputs_dir = cfg.inference_outputs_dir.replace( + f"chai-lab_{cfg.dataset}", f"chai-lab_pocket_only_{cfg.dataset}" + ) + + if cfg.complex_filepath is not None: + # process single-complex inputs + assert os.path.exists( + cfg.complex_filepath + ), f"Complex PDB file not found: {cfg.complex_filepath}" + assert ( + cfg.complex_id is not None + ), "Complex ID must be provided when extracting single complex outputs." + assert ( + cfg.ligand_smiles is not None + ), "Ligand SMILES must be provided when extracting single complex outputs." + assert ( + cfg.output_dir is not None + ), "Output directory must be provided when extracting single complex outputs." + intermediate_output_filepath = cfg.complex_filepath + final_output_filepath = os.path.join( + cfg.output_dir, cfg.complex_id, os.path.basename(cfg.complex_filepath) + ) + os.makedirs(os.path.dirname(final_output_filepath), exist_ok=True) + distinguish_ligand_atoms( + intermediate_output_filepath, + final_output_filepath.replace(".pdb", "_fixed.pdb"), + ) + extract_protein_and_ligands_with_prody( + final_output_filepath.replace(".pdb", "_fixed.pdb"), + final_output_filepath.replace(".pdb", "_protein.pdb"), + final_output_filepath.replace(".pdb", "_ligand.sdf"), + sanitize=False, + add_element_types=True, + ligand_smiles=cfg.ligand_smiles, + ) + else: + # process all complexes in a dataset + smiles_and_pdb_id_list = parse_inference_inputs_from_dir( + cfg.input_data_dir, + pdb_ids=pdb_ids, + ) + pdb_id_to_smiles = {pdb_id: smiles for smiles, pdb_id in smiles_and_pdb_id_list} + for item in os.listdir(cfg.prediction_inputs_dir): + input_item_path = os.path.join(cfg.prediction_inputs_dir, item) + output_item_path = os.path.join(cfg.prediction_outputs_dir, item) + if os.path.isdir(input_item_path) and os.path.isdir(output_item_path): + for file in os.listdir(output_item_path): + if not file.endswith(".pdb"): + continue + intermediate_output_filepath = os.path.join(output_item_path, file) + final_output_filepath = os.path.join(cfg.inference_outputs_dir, item, file) + os.makedirs(os.path.dirname(final_output_filepath), exist_ok=True) + if cfg.dataset in ["posebusters_benchmark", "astex_diverse", "dockgen"]: + ligand_smiles = pdb_id_to_smiles[item].replace("|", ".") + else: + # NOTE: for the `casp15` dataset, standalone ligand SMILES are not available + ligand_smiles = None + distinguish_ligand_atoms( + intermediate_output_filepath, + final_output_filepath.replace(".pdb", "_fixed.pdb"), + ) + extract_protein_and_ligands_with_prody( + final_output_filepath.replace(".pdb", "_fixed.pdb"), + final_output_filepath.replace(".pdb", "_protein.pdb"), + final_output_filepath.replace(".pdb", "_ligand.sdf"), + sanitize=False, + add_element_types=True, + ligand_smiles=ligand_smiles, + ) + os.remove(final_output_filepath.replace(".pdb", "_fixed.pdb")) + + logger.info( + f"Finished extracting {cfg.dataset} protein and ligands from all prediction outputs." + ) + + +if __name__ == "__main__": + main() diff --git a/posebench/data/components/create_casp15_ensemble_input_csv.py b/posebench/data/components/create_casp15_ensemble_input_csv.py index fdc3bb95..0108341a 100644 --- a/posebench/data/components/create_casp15_ensemble_input_csv.py +++ b/posebench/data/components/create_casp15_ensemble_input_csv.py @@ -80,7 +80,7 @@ def create_casp15_ensemble_input_csv(args: argparse.Namespace): "-p" "--predicted-structures-dir", "--predicted_structures_dir", type=str, - default="data/casp15_set/predicted_structures", + default="data/casp15_set/casp15_holo_aligned_predicted_structures", help="The directory containing the CASP15 (ground truth binding site-aligned) predicted protein structures.", ) parser.add_argument( diff --git a/posebench/data/components/plot_dataset_rmsd.py b/posebench/data/components/plot_dataset_rmsd.py index b4625207..0017ed4f 100644 --- a/posebench/data/components/plot_dataset_rmsd.py +++ b/posebench/data/components/plot_dataset_rmsd.py @@ -237,7 +237,7 @@ def main(cfg: DictConfig): os.path.join( cfg.data_dir, "astex_diverse_set", - "astex_diverse_holo_aligned_esmfold_structures", + "astex_diverse_holo_aligned_predicted_structures", ), os.path.join(cfg.data_dir, "astex_diverse_set"), os.path.join( @@ -252,7 +252,7 @@ def main(cfg: DictConfig): os.path.join( cfg.data_dir, "posebusters_benchmark_set", - "posebusters_benchmark_holo_aligned_esmfold_structures", + "posebusters_benchmark_holo_aligned_predicted_structures", ), os.path.join(cfg.data_dir, "posebusters_benchmark_set"), os.path.join( @@ -265,7 +265,7 @@ def main(cfg: DictConfig): plot_dataset_rmsd( "DockGen Set", - os.path.join(cfg.data_dir, "dockgen_set", "dockgen_holo_aligned_esmfold_structures"), + os.path.join(cfg.data_dir, "dockgen_set", "dockgen_holo_aligned_predicted_structures"), os.path.join(cfg.data_dir, "dockgen_set"), os.path.join( cfg.data_dir, @@ -279,7 +279,7 @@ def main(cfg: DictConfig): plot_dataset_rmsd( "CASP15 Set", - os.path.join(cfg.data_dir, "casp15_set", "predicted_structures"), + os.path.join(cfg.data_dir, "casp15_set", "casp15_holo_aligned_predicted_structures"), os.path.join(cfg.data_dir, "casp15_set", "targets"), os.path.join( cfg.data_dir, @@ -299,7 +299,7 @@ def main(cfg: DictConfig): # plot_dataset_rmsd( # "CASP15 Set", - # os.path.join(cfg.data_dir, "casp15_set", "predicted_structures"), + # os.path.join(cfg.data_dir, "casp15_set", "casp15_holo_aligned_predicted_structures"), # os.path.join(cfg.data_dir, "casp15_set", "targets"), # os.path.join( # cfg.data_dir, diff --git a/posebench/data/diffdock_input_preparation.py b/posebench/data/diffdock_input_preparation.py index f32b5499..ebe62e70 100644 --- a/posebench/data/diffdock_input_preparation.py +++ b/posebench/data/diffdock_input_preparation.py @@ -9,7 +9,7 @@ import rootutils from beartype import beartype from beartype.typing import Any, List, Optional, Tuple -from omegaconf import DictConfig +from omegaconf import DictConfig, open_dict rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) @@ -61,12 +61,12 @@ def write_input_csv( protein_filepath = ( os.path.join( input_protein_structure_dir, - f"{pdb_id}_holo_aligned_esmfold_protein.pdb", + f"{pdb_id}_holo_aligned_predicted_protein.pdb", ) if os.path.exists( os.path.join( input_protein_structure_dir, - f"{pdb_id}_holo_aligned_esmfold_protein.pdb", + f"{pdb_id}_holo_aligned_predicted_protein.pdb", ) ) else os.path.join(input_protein_structure_dir, f"{pdb_id}.pdb") @@ -106,6 +106,13 @@ def main(cfg: DictConfig): pdb_ids = {line.replace(" ", "-") for line in f.read().splitlines()} elif cfg.dataset not in ["posebusters_benchmark", "astex_diverse", "dockgen", "casp15"]: raise ValueError(f"Dataset `{cfg.dataset}` not supported.") + + if cfg.pocket_only_baseline: + with open_dict(cfg): + cfg.output_csv_path = cfg.output_csv_path.replace( + f"diffdock_{cfg.dataset}", f"diffdock_pocket_only_{cfg.dataset}" + ) + input_protein_structure_dir = ( cfg.input_protein_structure_dir + "_bs_cropped" if cfg.pocket_only_baseline diff --git a/posebench/data/dynamicbind_input_preparation.py b/posebench/data/dynamicbind_input_preparation.py index 6db5508d..c9f0a752 100644 --- a/posebench/data/dynamicbind_input_preparation.py +++ b/posebench/data/dynamicbind_input_preparation.py @@ -11,7 +11,7 @@ import rootutils from beartype import beartype from beartype.typing import Any, List, Optional, Tuple -from omegaconf import DictConfig +from omegaconf import DictConfig, open_dict rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) @@ -84,6 +84,12 @@ def main(cfg: DictConfig): elif cfg.dataset not in ["posebusters_benchmark", "astex_diverse", "dockgen", "casp15"]: raise ValueError(f"Dataset `{cfg.dataset}` not supported.") + if cfg.pocket_only_baseline: + with open_dict(cfg): + cfg.output_csv_dir = cfg.output_csv_dir.replace( + f"dynamicbind_{cfg.dataset}", f"dynamicbind_pocket_only_{cfg.dataset}" + ) + if cfg.input_protein_data_dir and not os.path.exists(cfg.input_protein_data_dir): os.makedirs(cfg.input_protein_data_dir, exist_ok=True) if not os.path.exists(cfg.output_csv_dir): diff --git a/posebench/data/fabind_input_preparation.py b/posebench/data/fabind_input_preparation.py index 8b9e5f05..f93a3d2f 100644 --- a/posebench/data/fabind_input_preparation.py +++ b/posebench/data/fabind_input_preparation.py @@ -9,7 +9,7 @@ import rootutils from beartype import beartype from beartype.typing import List, Tuple -from omegaconf import DictConfig +from omegaconf import DictConfig, open_dict rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) @@ -60,6 +60,12 @@ def main(cfg: DictConfig): elif cfg.dataset not in ["posebusters_benchmark", "astex_diverse", "dockgen", "casp15"]: raise ValueError(f"Dataset `{cfg.dataset}` not supported.") + if cfg.pocket_only_baseline: + with open_dict(cfg): + cfg.output_csv_path = cfg.output_csv_path.replace( + f"fabind_{cfg.dataset}", f"fabind_pocket_only_{cfg.dataset}" + ) + smiles_and_pdb_id_list = parse_inference_inputs_from_dir( cfg.input_data_dir, pdb_ids=pdb_ids, diff --git a/posebench/data/neuralplexer_input_preparation.py b/posebench/data/neuralplexer_input_preparation.py index 8b855c87..5055f0a8 100644 --- a/posebench/data/neuralplexer_input_preparation.py +++ b/posebench/data/neuralplexer_input_preparation.py @@ -9,7 +9,7 @@ import rootutils from beartype import beartype from beartype.typing import Any, List, Optional, Tuple -from omegaconf import DictConfig +from omegaconf import DictConfig, open_dict rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) @@ -73,12 +73,12 @@ def write_input_csv( input_receptor = ( os.path.join( input_receptor_structure_dir, - f"{pdb_id}_holo_aligned_esmfold_protein.pdb", + f"{pdb_id}_holo_aligned_predicted_protein.pdb", ) if os.path.exists( os.path.join( input_receptor_structure_dir, - f"{pdb_id}_holo_aligned_esmfold_protein.pdb", + f"{pdb_id}_holo_aligned_predicted_protein.pdb", ) ) else os.path.join(input_receptor_structure_dir, f"{pdb_id}.pdb") @@ -116,6 +116,12 @@ def main(cfg: DictConfig): elif cfg.dataset not in ["posebusters_benchmark", "astex_diverse", "dockgen", "casp15"]: raise ValueError(f"Dataset `{cfg.dataset}` not supported.") + if cfg.pocket_only_baseline: + with open_dict(cfg): + cfg.output_csv_path = cfg.output_csv_path.replace( + f"neuralplexer_{cfg.dataset}", f"neuralplexer_pocket_only_{cfg.dataset}" + ) + input_receptor_structure_dir = ( cfg.input_receptor_structure_dir + "_bs_cropped" if cfg.pocket_only_baseline diff --git a/posebench/data/rfaa_input_preparation.py b/posebench/data/rfaa_input_preparation.py index 8db839d6..c799b6e4 100644 --- a/posebench/data/rfaa_input_preparation.py +++ b/posebench/data/rfaa_input_preparation.py @@ -49,6 +49,9 @@ def write_scripts( :param ligand_smiles: Optional SMILES string of the ligand. :param input_id: Optional input ID. """ + if pocket_only_baseline: + output_scripts_path = output_scripts_path.replace(dataset, f"{dataset}_pocket_only") + os.makedirs(output_scripts_path, exist_ok=True) if protein_filepath is not None and ligand_smiles is not None: input_id = ( @@ -94,8 +97,8 @@ def write_scripts( if pocket_only_baseline: protein_filepath = os.path.join( input_data_dir, - f"{dataset}_holo_aligned_esmfold_structures_bs_cropped", - f"{pdb_id}_holo_aligned_esmfold_protein.pdb", + f"{dataset}_holo_aligned_predicted_structures_bs_cropped", + f"{pdb_id}_holo_aligned_predicted_protein.pdb", ) if not os.path.exists(protein_filepath): logger.warning( @@ -103,9 +106,9 @@ def write_scripts( ) continue else: - dockgen_postfix = "_processed" if dataset == "dockgen" else "" + dockgen_suffix = "_processed" if dataset == "dockgen" else "" protein_filepath = os.path.join( - input_data_dir, pdb_id, f"{pdb_id}_protein{dockgen_postfix}.pdb" + input_data_dir, pdb_id, f"{pdb_id}_protein{dockgen_suffix}.pdb" ) if dataset == "dockgen": ligand_filepaths = [ @@ -147,7 +150,7 @@ def write_scripts( ) def main(cfg: DictConfig): """Parse a data directory containing subdirectories of protein-ligand complexes and prepare - corresponding inference CSV file for the DiffDock model. + corresponding inference CSV file for the RoseTTAFold-All-Atom model. :param cfg: Configuration dictionary from the hydra YAML file. """ diff --git a/posebench/data/rfaa_output_extraction.py b/posebench/data/rfaa_output_extraction.py index 952c4039..69365731 100644 --- a/posebench/data/rfaa_output_extraction.py +++ b/posebench/data/rfaa_output_extraction.py @@ -10,7 +10,7 @@ import numpy as np import rootutils from biopandas.pdb import PandasPdb -from omegaconf import DictConfig +from omegaconf import DictConfig, open_dict from rdkit import Chem rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) @@ -49,6 +49,19 @@ def main(cfg: DictConfig): "dockgen", "casp15", ], "Dataset must be one of 'posebusters_benchmark', 'astex_diverse', 'dockgen', 'casp15'." + + if cfg.pocket_only_baseline: + with open_dict(cfg): + cfg.prediction_inputs_dir = cfg.prediction_inputs_dir.replace( + cfg.dataset, f"{cfg.dataset}_pocket_only" + ) + cfg.prediction_outputs_dir = cfg.prediction_outputs_dir.replace( + cfg.dataset, f"{cfg.dataset}_pocket_only" + ) + cfg.inference_outputs_dir = cfg.inference_outputs_dir.replace( + f"rfaa_{cfg.dataset}", f"rfaa_pocket_only_{cfg.dataset}" + ) + if cfg.complex_filepath is not None: # process single-complex inputs assert os.path.exists( diff --git a/posebench/data/tulip_output_extraction.py b/posebench/data/tulip_output_extraction.py index 6a58d988..3e7dc724 100644 --- a/posebench/data/tulip_output_extraction.py +++ b/posebench/data/tulip_output_extraction.py @@ -28,18 +28,27 @@ def main(cfg: DictConfig): """Extract proteins and ligands separately from the prediction outputs.""" os.makedirs(cfg.inference_outputs_dir, exist_ok=True) - if cfg.dataset == "posebusters_benchmark": + + pdb_ids = None + if cfg.dataset == "posebusters_benchmark" and cfg.posebusters_ccd_ids_filepath is not None: assert os.path.exists( cfg.posebusters_ccd_ids_filepath - ), "CCD IDs file path must be provided for PoseBusters Benchmark dataset." + ), f"Invalid CCD IDs file path for PoseBusters Benchmark: {os.path.exists(cfg.posebusters_ccd_ids_filepath)}." with open(cfg.posebusters_ccd_ids_filepath) as f: - ccd_ids = set(f.read().splitlines()) - else: - ccd_ids = None + pdb_ids = set(f.read().splitlines()) + elif cfg.dataset == "dockgen" and cfg.dockgen_test_ids_filepath is not None: + assert os.path.exists( + cfg.dockgen_test_ids_filepath + ), f"Invalid test IDs file path for DockGen: {os.path.exists(cfg.dockgen_test_ids_filepath)}." + with open(cfg.dockgen_test_ids_filepath) as f: + pdb_ids = {line.replace(" ", "-") for line in f.read().splitlines()} + elif cfg.dataset not in ["posebusters_benchmark", "astex_diverse", "dockgen", "casp15"]: + raise ValueError(f"Dataset `{cfg.dataset}` not supported.") + for target_name in os.listdir(cfg.prediction_outputs_dir): - if ccd_ids is not None and target_name not in ccd_ids: + if pdb_ids is not None and target_name not in pdb_ids: logger.info( - f"Skipping target {target_name} as it is not in the PoseBusters Benchmark CCD IDs set." + f"Skipping target {target_name} as it is not in the specified PoseBusters Benchmark or DockGen PDB IDs set." ) continue target_dir_path = os.path.join(cfg.prediction_outputs_dir, target_name) diff --git a/posebench/models/chai_inference.py b/posebench/models/chai_inference.py new file mode 100644 index 00000000..8058d462 --- /dev/null +++ b/posebench/models/chai_inference.py @@ -0,0 +1,109 @@ +# ------------------------------------------------------------------------------------------------------------------------------------- +# Following code curated for PoseBench: (https://github.com/BioinfoMachineLearning/PoseBench) +# ------------------------------------------------------------------------------------------------------------------------------------- + +import glob +import logging +import os +import traceback +from pathlib import Path + +import hydra +import rootutils +import torch +from chai_lab.chai1 import run_inference +from omegaconf import DictConfig, open_dict + +rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) + +from posebench import register_custom_omegaconf_resolvers + +logging.basicConfig(format="[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + + +def run_chai_inference(fasta_file: str, cfg: DictConfig): + """Run inference using a trained Chai-1 model checkpoint. + + :param fasta_filepath: Path to the input FASTA file. + :param cfg: Configuration dictionary from the hydra YAML file. + """ + assert os.path.exists(fasta_file), f"FASTA file `{fasta_file}` not found." + assert ( + cfg.inference_dir_name is not None + ), "`inference_dir_name` must be provided in the config file." + output_dir = os.path.join(cfg.output_dir, cfg.inference_dir_name) + os.makedirs(output_dir, exist_ok=True) + try: + run_inference( + fasta_file=Path(fasta_file), + output_dir=Path(output_dir), + # 'default' setup + num_trunk_recycles=3, + num_diffn_timesteps=200, + seed=42, + device=torch.device(f"cuda:{cfg.cuda_device_index}"), + use_esm_embeddings=True, + ) + except Exception as e: + raise e + logger.info(f"Chai-1 inference for FASTA file `{fasta_file}` complete.") + + +@hydra.main( + version_base="1.3", + config_path="../../configs/model", + config_name="chai_inference.yaml", +) +def main(cfg: DictConfig): + """Create SLURM job submission scripts for inference with Chai-1. + + :param cfg: Configuration dictionary from the hydra YAML file. + """ + if cfg.pocket_only_baseline: + with open_dict(cfg): + cfg.input_dir = cfg.input_dir.replace(cfg.dataset, f"{cfg.dataset}_pocket_only") + cfg.output_dir = cfg.output_dir.replace(cfg.dataset, f"{cfg.dataset}_pocket_only") + + num_dir_items_found = 0 + for item in os.listdir(cfg.input_dir): + item_path = os.path.join(cfg.input_dir, item) + if os.path.isdir(item_path): + num_dir_items_found += 1 + if cfg.max_num_inputs and num_dir_items_found > cfg.max_num_inputs: + logger.info( + f"Maximum number of input directories reached ({cfg.max_num_inputs}). Exiting inference loop." + ) + break + if ( + cfg.skip_existing + and os.path.exists(os.path.join(cfg.output_dir, item, "pred.model_idx_0.pdb")) + and not os.path.exists(os.path.join(cfg.output_dir, item, "error_log.txt")) + ): + logger.info(f"Skipping inference for `{item}` as output directory already exists.") + continue + fasta_filepaths = list(glob.glob(os.path.join(item_path, "*.fasta"))) + if not fasta_filepaths: + logger.error(f"Failed to find all required files for item `{item}`. Skipping...") + continue + fasta_filepath = fasta_filepaths[0] + with open_dict(cfg): + cfg.inference_dir_name = item + try: + run_chai_inference( + fasta_file=fasta_filepath, + cfg=cfg, + ) + if os.path.isfile(os.path.join(cfg.output_dir, item, "error_log.txt")): + os.remove(os.path.join(cfg.output_dir, item, "error_log.txt")) + except Exception as e: + logger.error( + f"Failed to run Chai-1 inference for item `{item}` due to: {e}. Skipping..." + ) + with open(os.path.join(cfg.output_dir, item, "error_log.txt"), "w") as f: + traceback.print_exception(type(e), e, e.__traceback__, file=f) + + +if __name__ == "__main__": + register_custom_omegaconf_resolvers() + main() diff --git a/posebench/models/diffdock_inference.py b/posebench/models/diffdock_inference.py index 3ae8531f..df66b7c9 100644 --- a/posebench/models/diffdock_inference.py +++ b/posebench/models/diffdock_inference.py @@ -8,7 +8,7 @@ import hydra import rootutils -from omegaconf import DictConfig +from omegaconf import DictConfig, open_dict rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) @@ -33,13 +33,35 @@ def main(cfg: DictConfig): if cfg.max_num_inputs else cfg.input_csv_path ) + + if cfg.v1_baseline: + cfg.diffdock_exec_dir = cfg.diffdock_exec_dir.replace("DiffDock", "DiffDockv1") + cfg.input_csv_path = cfg.input_csv_path.replace("DiffDock", "DiffDockv1") + cfg.model_dir = cfg.model_dir.replace( + "forks/DiffDock/workdir/v1.1/score_model", "forks/DiffDockv1/workdir/paper_score_model" + ) + cfg.confidence_model_dir = cfg.confidence_model_dir.replace( + "forks/DiffDock/workdir/v1.1/confidence_model", + "forks/DiffDockv1/workdir/paper_confidence_model", + ) + cfg.output_dir = cfg.output_dir.replace("DiffDock", "DiffDockv1") + cfg.actual_steps = 18 + cfg.no_final_step_noise = True + + if cfg.pocket_only_baseline: + with open_dict(cfg): + input_csv_path = input_csv_path.replace( + f"diffdock_{cfg.dataset}", f"diffdock_pocket_only_{cfg.dataset}" + ) + cfg.output_dir = cfg.output_dir.replace( + f"diffdock_{cfg.dataset}", f"diffdock_pocket_only_{cfg.dataset}" + ) + assert os.path.exists(input_csv_path), f"Input CSV file `{input_csv_path}` not found." try: cmd = [ cfg.python_exec_path, os.path.join(cfg.diffdock_exec_dir, "inference.py"), - "--config", - cfg.inference_config_path, "--protein_ligand_csv", input_csv_path, "--out_dir", @@ -62,6 +84,8 @@ def main(cfg: DictConfig): ] if cfg.skip_existing: cmd.append("--skip_existing") + if not cfg.v1_baseline: + cmd.extend(["--config", cfg.inference_config_path]) subprocess.run(cmd, check=True) # nosec except Exception as e: raise e diff --git a/posebench/models/dynamicbind_inference.py b/posebench/models/dynamicbind_inference.py index 661eb5f7..75371ced 100644 --- a/posebench/models/dynamicbind_inference.py +++ b/posebench/models/dynamicbind_inference.py @@ -6,6 +6,7 @@ import logging import os import subprocess # nosec +import uuid from pathlib import Path import hydra @@ -31,9 +32,11 @@ def main(cfg: DictConfig): :param cfg: Configuration dictionary from the hydra YAML file. """ - pocket_postfix = "_bs_cropped" if cfg.pocket_only_baseline else "" + pocket_suffix = "_bs_cropped" if cfg.pocket_only_baseline else "" + pocket_only_suffix = "_pocket_only" if cfg.pocket_only_baseline else "" + os.environ["MKL_THREADING_LAYER"] = "GNU" # address MKL threading issue - protein_filepaths = find_protein_files(Path(cfg.input_data_dir + pocket_postfix)) + protein_filepaths = find_protein_files(Path(cfg.input_data_dir + pocket_suffix)) ligand_filepaths = [ ligand_filepath for ligand_filepath in find_ligand_files(Path(cfg.input_ligand_csv_dir), extension="csv") @@ -103,7 +106,7 @@ def main(cfg: DictConfig): "inference", "outputs", "results", - f"{cfg.dataset}_{ligand_filepath.stem}_{cfg.repeat_index}", + f"{cfg.dataset}{pocket_only_suffix}_{ligand_filepath.stem}_{cfg.repeat_index}", "index0_idx_0", "rank1_ligand*.sdf", ) @@ -114,6 +117,11 @@ def main(cfg: DictConfig): f"Skipping inference for completed protein `{protein_filepath}` and ligand `{ligand_filepath}`." ) continue + unique_cache_id = uuid.uuid4() + unique_cache_path = ( + str(cfg.cache_path) + + f"_{cfg.dataset}{pocket_only_suffix}_{ligand_filepath.stem}_{cfg.repeat_index}_{unique_cache_id}" + ) try: subprocess.run( [ @@ -129,8 +137,12 @@ def main(cfg: DictConfig): str(cfg.inference_steps), "--batch_size", str(cfg.batch_size), + "--cache_path", + unique_cache_path, "--header", - str(cfg.header) + f"_{ligand_filepath.stem}" + f"_{cfg.repeat_index}", + str(cfg.header) + + f"{pocket_only_suffix}_{ligand_filepath.stem}" + + f"_{cfg.repeat_index}", "--device", str(cfg.cuda_device_index), "--python", diff --git a/posebench/models/ensemble_generation.py b/posebench/models/ensemble_generation.py index 0d57f190..5babd671 100644 --- a/posebench/models/ensemble_generation.py +++ b/posebench/models/ensemble_generation.py @@ -473,6 +473,56 @@ def create_rfaa_bash_script( logger.info(f"Bash script '{output_filepath}' created successfully.") +def create_chai_bash_script( + protein_filepath: str, + ligand_smiles: str, + input_id: str, + cfg: DictConfig, + output_filepath: Optional[str] = None, + generate_hpc_scripts: bool = True, +): + """Create a bash script to run Chai-1 protein-ligand complex prediction. + + :param protein_filepath: Path to the input protein structure PDB file. + :param ligand_smiles: SMILES string of the input ligand. + :param input_id: Input ID. + :param cfg: Configuration dictionary for runtime arguments. + :param output_filepath: Optional path to the output bash script file. + :param generate_hpc_scripts: Whether to generate HPC scripts for RoseTTAFold-All-Atom. + """ + + if output_filepath is None: + output_filepath = os.path.join(cfg.output_dir, input_id, f"{input_id}_rfaa_inference.sh") + os.makedirs(os.path.dirname(output_filepath), exist_ok=True) + + bash_script_content = f"""#!/bin/bash -l +{insert_hpc_headers(method='chai-lab', time_limit='0-12:00:00') if generate_hpc_scripts else 'source /home/$USER/mambaforge/etc/profile.d/conda.sh'} +conda activate {"$project_dir/forks/chai-lab/chai-lab/" if generate_hpc_scripts else 'forks/chai-lab/chai-lab/'} +echo "Beginning Chai-1 inference for input '{input_id}'!" + +# command to run chai_input_preparation.py +python posebench/data/chai_input_preparation.py \\ + dataset=ensemble \\ + protein_filepath='{protein_filepath}' \\ + ligand_smiles='"{ligand_smiles}"' \\ + input_id='{input_id}' + +# command to run chai_inference.py +echo "Calling chai_inference.py!" +python posebench/models/chai_inference.py \\ + dataset=ensemble \\ + cuda_device_index={cfg.cuda_device_index} \\ + skip_existing={cfg.chai_skip_existing} + +echo "Finished calling chai_inference.py!" + """ + + with open(output_filepath, "w") as file: + file.write(bash_script_content) + + logger.info(f"Bash script '{output_filepath}' created successfully.") + + def create_vina_bash_script( binding_site_method: Literal["diffdock", "fabind", "dynamicbind", "neuralplexer", "rfaa"], protein_filepath: str, @@ -520,6 +570,9 @@ def create_vina_bash_script( ligand_filepath="{ligand_filepath}" \\ apo_protein_filepath="{apo_protein_filepath}" \\ input_id="{input_id}" \\ + p2rank_exec_utility={cfg.vina_p2rank_exec_utility} \\ + p2rank_config={cfg.vina_p2rank_config} \\ + p2rank_enable_pymol_visualizations={cfg.vina_p2rank_enable_pymol_visualizations} \\ echo "Finished calling vina_inference.py!" """ @@ -553,7 +606,7 @@ def generate_method_prediction_script( protein and ligand filepaths. """ - def extract_chains_to_fasta_files(protein_filepath: str) -> List[str]: + def extract_protein_chains_to_fasta_files(protein_filepath: str) -> List[str]: """Extract individual chains from a protein file and save them as separate FASTA files. :param protein_filepath: Path to the protein file. @@ -575,7 +628,7 @@ def extract_chains_to_fasta_files(protein_filepath: str) -> List[str]: model_chains = [chain for chain in model] assert len(model_chains) == len( sequences - ), "For RFAA, numbers of BioPython chains and parsed sequences do not match." + ), "For RFAA, numbers of Biopython chains and parsed sequences do not match." for chain_index, chain in enumerate(model_chains): fasta_filename = f"{Path(protein_filepath).stem}_{chain.id}.fasta" fasta_filepath = os.path.join(temp_dir, fasta_filename) @@ -619,8 +672,8 @@ def extract_chains_to_fasta_files(protein_filepath: str) -> List[str]: generate_hpc_scripts=generate_hpc_scripts, ) elif method == "rfaa": - fasta_filepaths = extract_chains_to_fasta_files(protein_filepath) - smiles_strings = ligand_smiles.split(":") if ":" in ligand_smiles else [ligand_smiles] + fasta_filepaths = extract_protein_chains_to_fasta_files(protein_filepath) + smiles_strings = ligand_smiles.split(":") create_rfaa_bash_script( fasta_filepaths=fasta_filepaths, sdf_filepaths=None, @@ -630,6 +683,15 @@ def extract_chains_to_fasta_files(protein_filepath: str) -> List[str]: smiles_strings=smiles_strings, generate_hpc_scripts=generate_hpc_scripts, ) + elif method == "chai-lab": + create_chai_bash_script( + protein_filepath=protein_filepath, + ligand_smiles=ligand_smiles, + input_id=input_id, + cfg=cfg, + output_filepath=output_filepath, + generate_hpc_scripts=generate_hpc_scripts, + ) elif method == "vina": assert ( cfg.generate_vina_scripts and cfg.resume @@ -688,10 +750,13 @@ def get_method_predictions( :return: List of method predictions, each as a tuple of the output protein filepath and the output ligand filepath. """ + pocket_only_suffix = "_pocket_only" if cfg.pocket_only_baseline else "" + no_ilcl_suffix = "_no_ilcl" if cfg.neuralplexer_no_ilcl else "" + if method == "diffdock": ensemble_benchmarking_output_dir = ( Path(cfg.diffdock_output_dir).parent - / f"diffdock_{cfg.ensemble_benchmarking_dataset}_output_{cfg.ensemble_benchmarking_repeat_index}" + / f"diffdock{pocket_only_suffix}_{cfg.ensemble_benchmarking_dataset}_output_{cfg.ensemble_benchmarking_repeat_index}" if cfg.ensemble_benchmarking else cfg.diffdock_output_dir ) @@ -748,9 +813,9 @@ def get_method_predictions( ), "Ligand files must be for the designated target." elif method == "dynamicbind": target_dir_name = ( - f"{cfg.ensemble_benchmarking_dataset}_{target}_{cfg.ensemble_benchmarking_repeat_index}" + f"{cfg.ensemble_benchmarking_dataset}{pocket_only_suffix}_{target}_{cfg.ensemble_benchmarking_repeat_index}" if cfg.ensemble_benchmarking - else f"{cfg.dynamicbind_header}_{target}" + else f"{cfg.dynamicbind_header}{pocket_only_suffix}_{target}" ) protein_output_files = list( map( @@ -815,7 +880,7 @@ def get_method_predictions( elif method == "neuralplexer": ensemble_benchmarking_output_dir = ( Path(cfg.input_dir if cfg.input_dir else cfg.neuralplexer_out_path).parent - / f"neuralplexer{'_npt' if cfg.neuralplexer_no_pretraining else ''}_{cfg.ensemble_benchmarking_dataset}_outputs_{cfg.ensemble_benchmarking_repeat_index}" + / f"neuralplexer{pocket_only_suffix}{no_ilcl_suffix}_{cfg.ensemble_benchmarking_dataset}_outputs_{cfg.ensemble_benchmarking_repeat_index}" if cfg.ensemble_benchmarking else (cfg.input_dir if cfg.input_dir else cfg.neuralplexer_out_path) ) @@ -848,7 +913,7 @@ def get_method_predictions( elif method == "rfaa": ensemble_benchmarking_output_dir = ( Path(cfg.rfaa_output_dir).parent - / f"rfaa_{cfg.ensemble_benchmarking_dataset}_outputs_{cfg.ensemble_benchmarking_repeat_index}" + / f"rfaa{pocket_only_suffix}_{cfg.ensemble_benchmarking_dataset}_outputs_{cfg.ensemble_benchmarking_repeat_index}" if cfg.ensemble_benchmarking else cfg.rfaa_output_dir ) @@ -876,11 +941,45 @@ def get_method_predictions( ], key=rank_key, )[: cfg.method_top_n_to_select] + elif method == "chai-lab": + ensemble_benchmarking_output_dir = ( + Path(cfg.input_dir if cfg.input_dir else cfg.chai_out_path).parent + / f"chai-lab{pocket_only_suffix}{no_ilcl_suffix}_{cfg.ensemble_benchmarking_dataset}_outputs_{cfg.ensemble_benchmarking_repeat_index}" + if cfg.ensemble_benchmarking + else (cfg.input_dir if cfg.input_dir else cfg.chai_out_path) + ) + protein_output_files = sorted( + [ + file + for file in map( + str, + Path(os.path.join(ensemble_benchmarking_output_dir, target)).rglob("*.pdb"), + ) + if "model_idx" in os.path.basename(file) + and "relaxed" not in os.path.basename(file) + and "aligned" not in os.path.basename(file) + ], + key=rank_key, + )[: cfg.method_top_n_to_select] + ligand_output_files = sorted( + [ + file + for file in map( + str, + Path(os.path.join(ensemble_benchmarking_output_dir, target)).rglob("*.sdf"), + ) + if "model_idx" in os.path.basename(file) + and "relaxed" not in os.path.basename(file) + and "aligned" not in os.path.basename(file) + and "_LIG_" not in os.path.basename(file) + ], + key=rank_key, + )[: cfg.method_top_n_to_select] elif method == "vina": assert binding_site_method, "Binding site method must be provided for Vina predictions." ensemble_benchmarking_output_dir = ( Path(cfg.vina_output_dir).parent - / f"vina_{binding_site_method}_{cfg.ensemble_benchmarking_dataset}_outputs_{cfg.ensemble_benchmarking_repeat_index}" + / f"vina{pocket_only_suffix}_{binding_site_method}_{cfg.ensemble_benchmarking_dataset}_outputs_{cfg.ensemble_benchmarking_repeat_index}" if cfg.ensemble_benchmarking else cfg.vina_output_dir.replace("vina_", f"vina_{binding_site_method}_") ) @@ -1245,7 +1344,7 @@ def rank_ensemble_predictions( ranking and valued as its method name, output protein filepath, output ligand filepath, and average pairwise RMSD or Vina energy score. """ - # cache filepath to predicted apo protein structure e.g., from ESMFold + # cache filepath to predicted apo protein structure from a structure predictor e.g., ESMFold if cfg.ensemble_benchmarking: apo_reference_protein_filepaths = list( Path(cfg.ensemble_benchmarking_apo_protein_dir).rglob(f"*{name}*.pdb") @@ -1542,9 +1641,11 @@ def export_ligands_in_casp15_format( "Number of RFAA ligand numbers, names, and molecule fragments do not match. Note that this means it did not predict for all input ligands and that manual adjustments to the resulting CASP15 submission file may need to be made (e.g., to make sure ligand names are correctly aligned with listed molecular fragments)." ) else: - assert ( - len(ligand_numbers_list) == len(ligand_names_list) == len(mol_frags) - ), "Number of ligand numbers, names, and molecule fragments must match." + if not (len(ligand_numbers_list) == len(ligand_names_list) == len(mol_frags)): + logger.warning( + f"Number of ligand numbers, names, and molecule fragments must match. Skipping model {i}..." + ) + continue sdf_content.write(f"MODEL {model_index if model_index is not None else i}\n") @@ -1588,11 +1689,11 @@ def save_ranked_predictions( assert ligand_tasks == "P", "Only protein-ligand docking tasks are supported." ranking_metric = "ff" if cfg.ensemble_ranking_method == "ff" else "rmsd" relax_complex = cfg.relax_method_ligands_pre_ranking or cfg.relax_method_ligands_post_ranking - ligand_relaxed_postfix = "_relaxed" if relax_complex else "" - protein_relaxed_postfix = ligand_relaxed_postfix if cfg.relax_protein else "" + ligand_relaxed_suffix = "_relaxed" if relax_complex else "" + protein_relaxed_suffix = ligand_relaxed_suffix if cfg.relax_protein else "" os.makedirs(cfg.output_dir, exist_ok=True) - os.makedirs(os.path.join(cfg.output_dir, name + ligand_relaxed_postfix), exist_ok=True) + os.makedirs(os.path.join(cfg.output_dir, name + ligand_relaxed_suffix), exist_ok=True) relaxation_success_list = [] if cfg.relax_method_ligands_post_ranking: @@ -1725,14 +1826,14 @@ def save_ranked_predictions( ligand_affinity_value = ( float(ligand_affinity_match.group(1)) if ligand_affinity_match else None ) - ligand_plddt_postfix = f"_plddt{ligand_plddt_value:.7f}" if ligand_plddt_value else "" - ligand_affinity_postfix = ( + ligand_plddt_suffix = f"_plddt{ligand_plddt_value:.7f}" if ligand_plddt_value else "" + ligand_affinity_suffix = ( f"_affinity{ligand_affinity_value:.7f}" if ligand_affinity_value else "" ) ligand_output_filepath = os.path.join( cfg.output_dir, - name + ligand_relaxed_postfix, - f"{method}_rank{rank}_{ranking_metric}{ranking_metric_value:.2e}{ligand_plddt_postfix}{ligand_affinity_postfix}{ligand_relaxed_postfix if 0 < len(relaxation_success_list) <= len(ranked_predictions) and relaxation_success_list[index] else ''}.sdf", + name + ligand_relaxed_suffix, + f"{method}_rank{rank}_{ranking_metric}{ranking_metric_value:.2e}{ligand_plddt_suffix}{ligand_affinity_suffix}{ligand_relaxed_suffix if 0 < len(relaxation_success_list) <= len(ranked_predictions) and relaxation_success_list[index] else ''}.sdf", ) protein_output_filepath = protein_filepath if ( @@ -1749,16 +1850,16 @@ def save_ranked_predictions( protein_affinity_value = ( float(protein_affinity_match.group(1)) if protein_affinity_match else None ) - protein_plddt_postfix = ( + protein_plddt_suffix = ( f"_plddt{protein_plddt_value:.7f}" if protein_plddt_value else "" ) - protein_affinity_postfix = ( + protein_affinity_suffix = ( f"_affinity{protein_affinity_value:.7f}" if protein_affinity_value else "" ) protein_output_filepath = os.path.join( cfg.output_dir, - name + ligand_relaxed_postfix, - f"{method}_rank{rank}_{ranking_metric}{ranking_metric_value:.2e}{protein_plddt_postfix}{protein_affinity_postfix}{protein_relaxed_postfix if 0 < len(relaxation_success_list) <= len(ranked_predictions) and relaxation_success_list[index] else ''}.pdb", + name + ligand_relaxed_suffix, + f"{method}_rank{rank}_{ranking_metric}{ranking_metric_value:.2e}{protein_plddt_suffix}{protein_affinity_suffix}{protein_relaxed_suffix if 0 < len(relaxation_success_list) <= len(ranked_predictions) and relaxation_success_list[index] else ''}.pdb", ) if not os.path.exists(ligand_output_filepath.replace(".sdf", "_bust_results.csv")): @@ -1892,7 +1993,7 @@ def save_ranked_predictions( output_ligand_filepaths.append(pb_validated_ligand_output_filepaths[0]) output_protein_filepaths.append(protein_output_filepath) - if cfg.export_file_format is not None: + if cfg.export_file_format is not None and "casp" in cfg.export_file_format: # NOTE: relaxed ligand (and potentially protein) files are used for CASP submission when `relax_complex=True` pdb_header = ( f"PFRMAT TS\nTARGET {name}\nAUTHOR {cfg.casp_author}\nMETHOD {cfg.casp_method}\n" @@ -1969,6 +2070,46 @@ def save_ranked_predictions( def main(cfg: DictConfig): """Generate predictions for a protein-ligand target pair using an ensemble of methods.""" os.makedirs(cfg.temp_protein_dir, exist_ok=True) + + if list(cfg.ensemble_methods) == ["neuralplexer"] and cfg.neuralplexer_no_ilcl: + with open_dict(cfg): + cfg.output_dir = cfg.output_dir.replace( + "top_neuralplexer", + "top_neuralplexer_no_ilcl", + ) + + if cfg.diffdock_v1_baseline: + with open_dict(cfg): + cfg.output_dir = cfg.output_dir.replace( + "top_diffdock", + "top_diffdockv1", + ) + cfg.diffdock_exec_dir = cfg.diffdock_exec_dir.replace("DiffDock", "DiffDockv1") + cfg.diffdock_input_csv_path = cfg.diffdock_input_csv_path.replace( + "DiffDock", "DiffDockv1" + ) + cfg.diffdock_model_dir = cfg.diffdock_model_dir.replace( + "forks/DiffDock/workdir/v1.1/score_model", + "forks/DiffDockv1/workdir/paper_score_model", + ) + cfg.diffdock_confidence_model_dir = cfg.diffdock_confidence_model_dir.replace( + "forks/DiffDock/workdir/v1.1/confidence_model", + "forks/DiffDockv1/workdir/paper_confidence_model", + ) + cfg.diffdock_output_dir = cfg.diffdock_output_dir.replace("DiffDock", "DiffDockv1") + cfg.diffdock_actual_steps = 18 + cfg.diffdock_no_final_step_noise = True + + if cfg.pocket_only_baseline: + with open_dict(cfg): + cfg.input_csv_filepath = cfg.input_csv_filepath.replace( + "ensemble_inputs.csv", "ensemble_pocket_only_inputs.csv" + ) + cfg.output_dir = cfg.output_dir.replace( + f"top_{cfg.ensemble_ranking_method}", + f"top_{cfg.ensemble_ranking_method}_pocket_only", + ) + input_csv_df = pd.read_csv(cfg.input_csv_filepath) assert len(input_csv_df.name.unique()) == len( input_csv_df @@ -2005,8 +2146,9 @@ def main(cfg: DictConfig): assert os.path.exists( cfg.ensemble_benchmarking_apo_protein_dir ), "Ensemble benchmarking for protein pocket-based experiments requires `ensemble_benchmarking_apo_protein_dir` to be set to a valid directory." + if not os.path.exists(cfg.ensemble_benchmarking_apo_protein_dir): - # NOTE: this is necessary to support e.g., CASP15 ensemble benchmarking + # NOTE: this may be necessary to support e.g., CASP15 ensemble benchmarking with open_dict(cfg): cfg.ensemble_benchmarking_apo_protein_dir = os.path.join( Path(cfg.ensemble_benchmarking_apo_protein_dir).parent, @@ -2050,9 +2192,10 @@ def main(cfg: DictConfig): temp_protein_filepath = row.protein_input else: if cfg.ensemble_benchmarking: - raise FileNotFoundError( - "An input (e.g., predicted) protein structure must be available for ensemble benchmarking." + logging.warning( + f"The input (e.g., predicted) protein structure ({row.protein_input}) must be locally available for ensemble benchmarking. Skipping target {row.name + config}." ) + continue # NOTE: a placeholder protein sequence is used when making ligand-only predictions row_protein_input = ( row.protein_input @@ -2108,11 +2251,9 @@ def main(cfg: DictConfig): continue # skip to the next target if no predictions from any method were found - predictions_found = False - for method in ensemble_predictions_dict: - if len(ensemble_predictions_dict[method]): - predictions_found = True - break + predictions_found = any( + len(ensemble_predictions_dict[method]) for method in ensemble_predictions_dict + ) if not predictions_found: logger.warning( f"No predictions from any method found for target {row.name}. Skipping..." diff --git a/posebench/models/fabind_inference.py b/posebench/models/fabind_inference.py index 87a3b5b4..4b3ebe80 100644 --- a/posebench/models/fabind_inference.py +++ b/posebench/models/fabind_inference.py @@ -8,7 +8,7 @@ import hydra import rootutils -from omegaconf import DictConfig +from omegaconf import DictConfig, open_dict rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) @@ -33,6 +33,19 @@ def main(cfg: DictConfig): if cfg.max_num_inputs else cfg.input_csv_path ) + + if cfg.pocket_only_baseline: + with open_dict(cfg): + cfg.save_mols_dir = cfg.save_mols_dir.replace( + f"fabind_{cfg.dataset}", f"fabind_pocket_only_{cfg.dataset}" + ) + cfg.save_pt_dir = cfg.save_pt_dir.replace( + f"fabind_{cfg.dataset}", f"fabind_pocket_only_{cfg.dataset}" + ) + cfg.output_dir = cfg.output_dir.replace( + f"fabind_{cfg.dataset}", f"fabind_pocket_only_{cfg.dataset}" + ) + assert os.path.exists(input_csv_path), f"Input CSV file `{input_csv_path}` not found." try: subprocess.run( diff --git a/posebench/models/inference_relaxation.py b/posebench/models/inference_relaxation.py index 0bd44525..d474af2f 100644 --- a/posebench/models/inference_relaxation.py +++ b/posebench/models/inference_relaxation.py @@ -12,7 +12,7 @@ import hydra import rootutils -from omegaconf import DictConfig +from omegaconf import DictConfig, open_dict rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) @@ -68,6 +68,13 @@ def relax_inference_results( for filepath in protein_filepaths if "_protein.pdb" in filepath.name and "relaxed" not in filepath.parent.stem ] + elif cfg.method == "chai-lab": + protein_filepaths = [ + filepath + for filepath in protein_filepaths + if "model_idx_0_protein.pdb" in filepath.name + and "relaxed" not in filepath.parent.stem + ] if not ligand_file_dir.exists() or cfg.method == "dynamicbind": ligand_filepaths = [ file @@ -99,6 +106,13 @@ def relax_inference_results( for filepath in ligand_filepaths if "_ligand.sdf" in filepath.name and "relaxed" not in filepath.parent.stem ] + elif cfg.method == "chai-lab": + ligand_filepaths = [ + filepath + for filepath in ligand_filepaths + if "model_idx_0_ligand.sdf" in filepath.name + and "relaxed" not in filepath.parent.stem + ] elif cfg.method == "vina": ligand_filepaths = [ filepath for filepath in ligand_filepaths if "relaxed" not in filepath.stem @@ -130,6 +144,8 @@ def relax_inference_results( for ligand_filepath in ligand_filepaths ) ] + elif cfg.method == "chai-lab": + raise NotImplementedError("Cannot subset `chai-lab` protein predictions at this time.") else: protein_filepaths = [ protein_filepath @@ -289,6 +305,17 @@ def relax_single_filepair( protein_filepath.stem.replace("_protein", ""), f"{protein_filepath.stem}_relaxed.pdb", ) + elif cfg.method == "chai-lab": + output_filepath = Path( + output_file_dir, + ligand_filepath.parent.stem, + f"{ligand_filepath.stem}_relaxed.sdf", + ) + protein_output_filepath = Path( + output_file_dir, + protein_filepath.parent.stem, + f"{protein_filepath.stem}_relaxed.pdb", + ) elif cfg.method == "vina": output_filepath = Path( output_file_dir, @@ -374,6 +401,10 @@ def main(cfg: DictConfig): """Run the relaxation inference process using the specified configuration.""" logger.setLevel(cfg.log_level) + if cfg.v1_baseline: + with open_dict(cfg): + cfg.temp_dir = cfg.temp_dir.replace(cfg.method, f"{cfg.method}v1") + protein_file_dir = Path(cfg.protein_dir) ligand_file_dir = Path(cfg.ligand_dir) output_file_dir = Path(cfg.output_dir) diff --git a/posebench/models/neuralplexer_inference.py b/posebench/models/neuralplexer_inference.py index bf6f9bd3..b215358c 100644 --- a/posebench/models/neuralplexer_inference.py +++ b/posebench/models/neuralplexer_inference.py @@ -30,7 +30,13 @@ def main(cfg: DictConfig): :param cfg: Configuration dictionary from the hydra YAML file. """ - if cfg.no_pretraining: + input_csv_path = ( + cfg.input_csv_path.replace(".csv", f"_first_{cfg.max_num_inputs}.csv") + if cfg.max_num_inputs + else cfg.input_csv_path + ) + + if cfg.no_ilcl: with open_dict(cfg): cfg.frozen_prot = True cfg.model_checkpoint = os.path.join( @@ -40,17 +46,21 @@ def main(cfg: DictConfig): ) cfg.out_path = os.path.join( os.path.dirname(cfg.out_path), - os.path.basename(cfg.out_path).replace("neuralplexer", "neuralplexer_npt"), + os.path.basename(cfg.out_path).replace("neuralplexer", "neuralplexer_no_ilcl"), ) assert os.path.exists( cfg.model_checkpoint - ), f"Non-pretrained (NPT) model checkpoint `{cfg.model_checkpoint}` not found." + ), f"Model checkpoint trained without an inter-ligand clash loss (ILCL) `{cfg.model_checkpoint}` not found." + + if cfg.pocket_only_baseline: + with open_dict(cfg): + cfg.out_path = os.path.join( + os.path.dirname(cfg.out_path), + os.path.basename(cfg.out_path).replace("neuralplexer", "neuralplexer_pocket_only"), + ) + input_csv_path = cfg.input_csv_path.replace("neuralplexer", "neuralplexer_pocket_only") + os.makedirs(cfg.out_path, exist_ok=True) - input_csv_path = ( - cfg.input_csv_path.replace(".csv", f"_first_{cfg.max_num_inputs}.csv") - if cfg.max_num_inputs - else cfg.input_csv_path - ) assert os.path.exists(input_csv_path), f"Input CSV file `{input_csv_path}` not found." for _, row in pd.read_csv(input_csv_path).iterrows(): out_dir = os.path.join(cfg.out_path, row.id) diff --git a/posebench/models/rfaa_inference.py b/posebench/models/rfaa_inference.py index 74b322e4..2f0d9698 100644 --- a/posebench/models/rfaa_inference.py +++ b/posebench/models/rfaa_inference.py @@ -86,6 +86,11 @@ def main(cfg: DictConfig): :param cfg: Configuration dictionary from the hydra YAML file. """ + if cfg.pocket_only_baseline: + with open_dict(cfg): + cfg.input_dir = cfg.input_dir.replace(cfg.dataset, f"{cfg.dataset}_pocket_only") + cfg.output_dir = cfg.output_dir.replace(cfg.dataset, f"{cfg.dataset}_pocket_only") + if cfg.run_inference_directly: num_dir_items_found = 0 for item in os.listdir(cfg.input_dir): diff --git a/posebench/models/vina_inference.py b/posebench/models/vina_inference.py index 3cad1474..fd3ce06d 100644 --- a/posebench/models/vina_inference.py +++ b/posebench/models/vina_inference.py @@ -426,6 +426,12 @@ def main(cfg: DictConfig): :param cfg: Configuration dictionary from the hydra YAML file. """ + if cfg.pocket_only_baseline: + with open_dict(cfg): + cfg.output_dir = cfg.output_dir.replace( + f"vina_{cfg.method}", f"vina_pocket_only_{cfg.method}" + ) + if cfg.protein_filepath and cfg.ligand_filepaths and cfg.apo_protein_filepath: # support ensemble inference logger.info("Beginning AutoDock Vina inference...") @@ -473,26 +479,23 @@ def main(cfg: DictConfig): "AutoDock Vina inference requires protein, ligand, and apo protein files as inputs." ) - if cfg.dataset == "casp15": - with open_dict(cfg): - cfg.input_protein_structure_dir = str( - Path(cfg.input_protein_structure_dir).parent / "predicted_structures" - ) - elif cfg.pocket_only_baseline: + if cfg.pocket_only_baseline: with open_dict(cfg): cfg.input_protein_structure_dir += "_bs_cropped" + assert os.path.exists( cfg.input_protein_structure_dir ), f"Input protein structure directory not found: {cfg.input_protein_structure_dir}" if cfg.method == "p2rank": # support P2Rank input parsing + pocket_only_suffix = "_pocket_only" if cfg.pocket_only_baseline else "" with open_dict(cfg): cfg.input_dir = os.path.join( "forks", "DiffDock", "inference", - f"diffdock_{cfg.dataset}_output_{cfg.repeat_index}", + f"diffdock{pocket_only_suffix}_{cfg.dataset}_output_{cfg.repeat_index}", ) assert os.path.exists( cfg.input_dir @@ -525,7 +528,7 @@ def main(cfg: DictConfig): apo_protein_filepaths = glob.glob( os.path.join( cfg.input_protein_structure_dir, - f"{item.replace('casp15_', '')}{'' if cfg.dataset == 'casp15' else '*_holo_aligned_esmfold_protein'}.pdb", + f"{item.replace('casp15_', '')}{'' if cfg.dataset == 'casp15' else '*_holo_aligned_predicted_protein'}.pdb", ) ) if not apo_protein_filepaths: @@ -538,7 +541,7 @@ def main(cfg: DictConfig): protein_filepaths = glob.glob( os.path.join( cfg.input_protein_structure_dir, - f"{item}{'' if cfg.dataset == 'casp15' else '*_holo_aligned_esmfold_protein'}.pdb", + f"{item}{'' if cfg.dataset == 'casp15' else '*_holo_aligned_predicted_protein'}.pdb", ) ) ligand_filepath = os.path.join(item_path, "rank1.sdf") @@ -600,7 +603,7 @@ def main(cfg: DictConfig): protein_filepaths = glob.glob( os.path.join( cfg.input_protein_structure_dir, - f"{item}{'' if cfg.dataset == 'casp15' else '*_holo_aligned_esmfold_protein'}.pdb", + f"{item}{'' if cfg.dataset == 'casp15' else '*_holo_aligned_predicted_protein'}.pdb", ) ) ligand_filepath = os.path.join(item_path, "rank1.sdf") diff --git a/posebench/utils/data_utils.py b/posebench/utils/data_utils.py index f0574fd0..f2fd69a4 100644 --- a/posebench/utils/data_utils.py +++ b/posebench/utils/data_utils.py @@ -94,7 +94,8 @@ def parse_inference_inputs_from_dir( sanitize=False, ) if mol is None: - raise ValueError(f"No ligand file found for PDB ID {pdb_id}") + logger.info(f"No ligand file found for PDB ID {pdb_id}") + continue mol_smiles = Chem.MolToSmiles(mol) if mol_smiles is not None: smiles_and_pdb_id_list.append((mol_smiles, pdb_id)) diff --git a/pyproject.toml b/pyproject.toml index af2b9107..9437e6d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "posebench" -version = "0.4.0" +version = "0.5.0" description = "Comprehensive benchmarking of protein-ligand structure generation methods" authors = [ { name = "Alex Morehead", email = "acmwhb@umsystem.edu" } diff --git a/scripts/benchmark_baseline_compute_resources.py b/scripts/benchmark_baseline_compute_resources.py index 813da7b5..764009fa 100644 --- a/scripts/benchmark_baseline_compute_resources.py +++ b/scripts/benchmark_baseline_compute_resources.py @@ -26,14 +26,23 @@ def assemble_baseline_command(cfg: DictConfig) -> List[str]: :param cfg: The configuration object. :return: The baseline command as a list of strings (i.e., command segments). """ - if cfg.method in ["diffdock", "fabind", "dynamicbind", "neuralplexer", "rfaa", "vina"]: - # NOTE: When running RoseTTAFold-All-Atom, the `RFAA` Conda environment must be activated instead of the `PoseBenchmark` environment - vina_postfix = f" method={cfg.vina_binding_site_method}" if cfg.method == "vina" else "" - cuda_device_postfix = ( + if cfg.method in [ + "diffdock", + "fabind", + "dynamicbind", + "neuralplexer", + "rfaa", + "chai-lab", + "vina", + ]: + # NOTE: When running RoseTTAFold-All-Atom (or Chai-1), the `RFAA` (`chai-lab`) Conda environment must be activated instead of the `PoseBench` environment + vina_suffix = f" method={cfg.vina_binding_site_method}" if cfg.method == "vina" else "" + cuda_device_suffix = ( "" if cfg.method == "vina" else f" cuda_device_index={cfg.cuda_device_index}" ) - rfaa_postfix = " run_inference_directly=true" if cfg.method == "rfaa" else "" - return f"python3 posebench/models/{cfg.method}_inference.py dataset={cfg.dataset} repeat_index={cfg.repeat_index} max_num_inputs={cfg.max_num_inputs}{vina_postfix}{cuda_device_postfix}{rfaa_postfix}".split() + rfaa_suffix = " run_inference_directly=true" if cfg.method == "rfaa" else "" + method = cfg.method.split("-")[0] if cfg.method == "chai-lab" else cfg.method + return f"python3 posebench/models/{method}_inference.py dataset={cfg.dataset} repeat_index={cfg.repeat_index} max_num_inputs={cfg.max_num_inputs}{vina_suffix}{cuda_device_suffix}{rfaa_suffix}".split() else: raise ValueError(f"Invalid method: {cfg.method}") diff --git a/scripts/build_inference_script.py b/scripts/build_inference_script.py index c7522eae..4ee44654 100644 --- a/scripts/build_inference_script.py +++ b/scripts/build_inference_script.py @@ -4,10 +4,11 @@ import logging import os -from typing import Literal +from typing import List, Literal, Optional import hydra import rootutils +from beartype import beartype from omegaconf import DictConfig rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) @@ -18,121 +19,144 @@ logger = logging.getLogger(__name__) -# Commands dictionary +# Constants COMMANDS = { "diffdock": { "prepare_input": [ - "python3 posebench/data/diffdock_input_preparation.py dataset={dataset}", + "python3 posebench/data/diffdock_input_preparation.py dataset={dataset} pocket_only_baseline={pocket_only_baseline}", ], "run_inference": [ - "python3 posebench/models/diffdock_inference.py dataset={dataset} repeat_index={repeat_index}", + "python3 posebench/models/diffdock_inference.py dataset={dataset} cuda_device_index={cuda_device_index} pocket_only_baseline={pocket_only_baseline} v1_baseline={v1_baseline} repeat_index={repeat_index}", ], "relax": [ - "python3 posebench/models/inference_relaxation.py method=diffdock dataset={dataset} remove_initial_protein_hydrogens=true assign_partial_charges_manually=true num_processes=1 repeat_index={repeat_index}", + "python3 posebench/models/inference_relaxation.py method=diffdock dataset={dataset} cuda_device_index={cuda_device_index} pocket_only_baseline={pocket_only_baseline} v1_baseline={v1_baseline} relax_protein={relax_protein} remove_initial_protein_hydrogens=true assign_partial_charges_manually=true repeat_index={repeat_index}", ], "analyze_results": [ - "python3 posebench/analysis/inference_analysis.py method=diffdock dataset={dataset} repeat_index={repeat_index}", + "python3 posebench/analysis/inference_analysis.py method=diffdock dataset={dataset} pocket_only_baseline={pocket_only_baseline} v1_baseline={v1_baseline} relax_protein={relax_protein} repeat_index={repeat_index}", ], "assemble_casp15": [ - "python3 posebench/models/ensemble_generation.py ensemble_methods=[diffdock] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_diffdock_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=false export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", - "python3 posebench/models/ensemble_generation.py ensemble_methods=[diffdock] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_diffdock_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=true export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", + "python3 posebench/models/ensemble_generation.py ensemble_methods=[diffdock] ensemble_ranking_method={ensemble_ranking_method} diffdock_v1_baseline={v1_baseline} input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_diffdock_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=false relax_protein={relax_protein} export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", + "python3 posebench/models/ensemble_generation.py ensemble_methods=[diffdock] ensemble_ranking_method={ensemble_ranking_method} diffdock_v1_baseline={v1_baseline} input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_diffdock_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=true relax_protein={relax_protein} export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", ], "analyze_casp15": [ - "python3 posebench/analysis/inference_analysis_casp.py method=diffdock dataset=casp15 repeat_index={repeat_index}", + "python3 posebench/analysis/inference_analysis_casp.py method=diffdock dataset=casp15 relax_protein={relax_protein} v1_baseline={v1_baseline} repeat_index={repeat_index}", ], }, "fabind": { "prepare_input": [ - "python3 posebench/data/fabind_input_preparation.py dataset={dataset}", + "python3 posebench/data/fabind_input_preparation.py dataset={dataset} pocket_only_baseline={pocket_only_baseline}", ], "run_inference": [ - "python3 posebench/models/fabind_inference.py dataset={dataset} repeat_index={repeat_index}", + "python3 posebench/models/fabind_inference.py dataset={dataset} cuda_device_index={cuda_device_index} pocket_only_baseline={pocket_only_baseline} repeat_index={repeat_index}", ], "relax": [ - "python3 posebench/models/inference_relaxation.py method=fabind dataset={dataset} remove_initial_protein_hydrogens=true assign_partial_charges_manually=true num_processes=1 repeat_index={repeat_index}", + "python3 posebench/models/inference_relaxation.py method=fabind dataset={dataset} cuda_device_index={cuda_device_index} pocket_only_baseline={pocket_only_baseline} relax_protein={relax_protein} remove_initial_protein_hydrogens=true assign_partial_charges_manually=true repeat_index={repeat_index}", ], "analyze_results": [ - "python3 posebench/analysis/inference_analysis.py method=fabind dataset={dataset} repeat_index={repeat_index}", + "python3 posebench/analysis/inference_analysis.py method=fabind dataset={dataset} pocket_only_baseline={pocket_only_baseline} relax_protein={relax_protein} repeat_index={repeat_index}", ], }, "dynamicbind": { "prepare_input": [ - "python3 posebench/data/dynamicbind_input_preparation.py dataset={dataset}", - 'python3 posebench/data/dynamicbind_input_preparation.py dataset=casp15 input_data_dir="$PWD"/data/casp15_set/targets', + "python3 posebench/data/dynamicbind_input_preparation.py dataset={dataset} pocket_only_baseline={pocket_only_baseline}", ], "run_inference": [ - "python3 posebench/models/dynamicbind_inference.py dataset={dataset} repeat_index={repeat_index}", - 'python3 posebench/models/dynamicbind_inference.py dataset=casp15 batch_size=1 input_data_dir="$PWD"/data/casp15_set/predicted_structures repeat_index={repeat_index}', + "python3 posebench/models/dynamicbind_inference.py dataset={dataset} cuda_device_index={cuda_device_index} pocket_only_baseline={pocket_only_baseline} repeat_index={repeat_index}", ], "relax": [ - "python3 posebench/models/inference_relaxation.py method=dynamicbind dataset={dataset} remove_initial_protein_hydrogens=true assign_partial_charges_manually=true num_processes=1 repeat_index={repeat_index}", + "python3 posebench/models/inference_relaxation.py method=dynamicbind dataset={dataset} cuda_device_index={cuda_device_index} pocket_only_baseline={pocket_only_baseline} relax_protein={relax_protein} remove_initial_protein_hydrogens=true assign_partial_charges_manually=true repeat_index={repeat_index}", ], "analyze_results": [ - "python3 posebench/analysis/inference_analysis.py method=dynamicbind dataset={dataset} repeat_index={repeat_index}", + "python3 posebench/analysis/inference_analysis.py method=dynamicbind dataset={dataset} pocket_only_baseline={pocket_only_baseline} relax_protein={relax_protein} repeat_index={repeat_index}", ], "assemble_casp15": [ - "python3 posebench/models/ensemble_generation.py ensemble_methods=[dynamicbind] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_dynamicbind_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=false export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", - "python3 posebench/models/ensemble_generation.py ensemble_methods=[dynamicbind] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_dynamicbind_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=true export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", + "python3 posebench/models/ensemble_generation.py ensemble_methods=[dynamicbind] ensemble_ranking_method={ensemble_ranking_method} input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_dynamicbind_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=false relax_protein={relax_protein} export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", + "python3 posebench/models/ensemble_generation.py ensemble_methods=[dynamicbind] ensemble_ranking_method={ensemble_ranking_method} input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_dynamicbind_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=true relax_protein={relax_protein} export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", ], "analyze_casp15": [ - "python3 posebench/analysis/inference_analysis_casp.py method=dynamicbind dataset=casp15 repeat_index={repeat_index}", + "python3 posebench/analysis/inference_analysis_casp.py method=dynamicbind dataset=casp15 relax_protein={relax_protein} repeat_index={repeat_index}", ], }, "neuralplexer": { "prepare_input": [ - "python3 posebench/data/neuralplexer_input_preparation.py dataset={dataset}", - 'python3 posebench/data/neuralplexer_input_preparation.py dataset=casp15 input_data_dir="$PWD"/data/casp15_set/targets input_receptor_structure_dir="$PWD"/data/casp15_set/predicted_structures', + "python3 posebench/data/neuralplexer_input_preparation.py dataset={dataset} pocket_only_baseline={pocket_only_baseline}", ], "run_inference": [ - "python3 posebench/models/neuralplexer_inference.py dataset={dataset} repeat_index={repeat_index}", - "python3 posebench/models/neuralplexer_inference.py dataset=casp15 repeat_index={repeat_index}", + "python3 posebench/models/neuralplexer_inference.py dataset={dataset} cuda_device_index={cuda_device_index} pocket_only_baseline={pocket_only_baseline} no_ilcl={no_ilcl} repeat_index={repeat_index}", ], "relax": [ - "python3 posebench/models/inference_relaxation.py method=neuralplexer dataset={dataset} num_processes=1 remove_initial_protein_hydrogens=true assign_partial_charges_manually=true cache_files=false repeat_index={repeat_index}", + "python3 posebench/models/inference_relaxation.py method=neuralplexer dataset={dataset} cuda_device_index={cuda_device_index} pocket_only_baseline={pocket_only_baseline} relax_protein={relax_protein} remove_initial_protein_hydrogens=true assign_partial_charges_manually=true repeat_index={repeat_index}", ], "align_complexes": [ - "python3 posebench/analysis/complex_alignment.py method=neuralplexer dataset={dataset} repeat_index={repeat_index}", + "python3 posebench/analysis/complex_alignment.py method=neuralplexer dataset={dataset} pocket_only_baseline={pocket_only_baseline} repeat_index={repeat_index}", ], "analyze_results": [ - "python3 posebench/analysis/inference_analysis.py method=neuralplexer dataset={dataset} repeat_index={repeat_index}", + "python3 posebench/analysis/inference_analysis.py method=neuralplexer dataset={dataset} pocket_only_baseline={pocket_only_baseline} relax_protein={relax_protein} repeat_index={repeat_index}", ], "assemble_casp15": [ - "python3 posebench/models/ensemble_generation.py ensemble_methods=[neuralplexer] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_neuralplexer_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=false export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", - "python3 posebench/models/ensemble_generation.py ensemble_methods=[neuralplexer] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_neuralplexer_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=true export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", + "python3 posebench/models/ensemble_generation.py ensemble_methods=[neuralplexer] ensemble_ranking_method={ensemble_ranking_method} neuralplexer_no_ilcl={no_ilcl} input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_neuralplexer_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=false relax_protein={relax_protein} export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", + "python3 posebench/models/ensemble_generation.py ensemble_methods=[neuralplexer] ensemble_ranking_method={ensemble_ranking_method} neuralplexer_no_ilcl={no_ilcl} input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_neuralplexer_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=true relax_protein={relax_protein} export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", ], "analyze_casp15": [ - "python3 posebench/analysis/inference_analysis_casp.py method=neuralplexer dataset=casp15 repeat_index={repeat_index}", + "python3 posebench/analysis/inference_analysis_casp.py method=neuralplexer dataset=casp15 no_ilcl={no_ilcl} relax_protein={relax_protein} repeat_index={repeat_index}", ], }, "rfaa": { "prepare_input": [ - "python3 posebench/data/rfaa_input_preparation.py dataset={dataset}", - 'python3 posebench/data/rfaa_input_preparation.py dataset=casp15 input_data_dir="$PWD"/data/casp15_set/targets', + "python3 posebench/data/rfaa_input_preparation.py dataset={dataset} pocket_only_baseline={pocket_only_baseline}", ], "run_inference": [ "conda activate forks/RoseTTAFold-All-Atom/RFAA/", - "python3 posebench/models/rfaa_inference.py dataset={dataset} run_inference_directly=true", - "python3 posebench/models/rfaa_inference.py dataset=casp15 run_inference_directly=true", + "python3 posebench/models/rfaa_inference.py dataset={dataset} cuda_device_index={cuda_device_index} pocket_only_baseline={pocket_only_baseline} run_inference_directly=true", "conda deactivate", ], "extract_outputs": [ - "python3 posebench/data/rfaa_output_extraction.py dataset={dataset}", + "python3 posebench/data/rfaa_output_extraction.py dataset={dataset} pocket_only_baseline={pocket_only_baseline} repeat_index={repeat_index}", ], "relax": [ - "python3 posebench/models/inference_relaxation.py method=rfaa dataset={dataset} num_processes=1 remove_initial_protein_hydrogens=true", + "python3 posebench/models/inference_relaxation.py method=rfaa dataset={dataset} cuda_device_index={cuda_device_index} pocket_only_baseline={pocket_only_baseline} relax_protein={relax_protein} remove_initial_protein_hydrogens=true", ], "align_complexes": [ - "python3 posebench/analysis/complex_alignment.py method=rfaa dataset={dataset}", + "python3 posebench/analysis/complex_alignment.py method=rfaa dataset={dataset} pocket_only_baseline={pocket_only_baseline}", ], "analyze_results": [ - "python3 posebench/analysis/inference_analysis.py method=rfaa dataset={dataset}", + "python3 posebench/analysis/inference_analysis.py method=rfaa dataset={dataset} pocket_only_baseline={pocket_only_baseline} relax_protein={relax_protein}", ], "assemble_casp15": [ - "python3 posebench/models/ensemble_generation.py ensemble_methods=[rfaa] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_rfaa_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=false export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", - "python3 posebench/models/ensemble_generation.py ensemble_methods=[rfaa] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_rfaa_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=true export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", + "python3 posebench/models/ensemble_generation.py ensemble_methods=[rfaa] ensemble_ranking_method={ensemble_ranking_method} input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_rfaa_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=false relax_protein={relax_protein} export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", + "python3 posebench/models/ensemble_generation.py ensemble_methods=[rfaa] ensemble_ranking_method={ensemble_ranking_method} input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_rfaa_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=true relax_protein={relax_protein} export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", ], "analyze_casp15": [ - "python3 posebench/analysis/inference_analysis_casp.py method=rfaa dataset=casp15 repeat_index={repeat_index}", + "python3 posebench/analysis/inference_analysis_casp.py method=rfaa dataset=casp15 repeat_index={repeat_index} relax_protein={relax_protein} targets='[T1124, T1127v2, T1146, T1152, T1158v1, T1158v2, T1158v3, T1158v4, T1186, T1187, T1188]'", + ], + }, + "chai-lab": { + "prepare_input": [ + "python3 posebench/data/chai_input_preparation.py dataset={dataset} pocket_only_baseline={pocket_only_baseline}", + ], + "run_inference": [ + "conda activate forks/chai-lab/chai-lab/", + "python3 posebench/models/chai_inference.py dataset={dataset} cuda_device_index={cuda_device_index} pocket_only_baseline={pocket_only_baseline} repeat_index={repeat_index}", + "conda deactivate", + ], + "extract_outputs": [ + "python3 posebench/data/chai_output_extraction.py dataset={dataset} pocket_only_baseline={pocket_only_baseline} repeat_index={repeat_index}", + ], + "relax": [ + "python3 posebench/models/inference_relaxation.py method=chai-lab dataset={dataset} cuda_device_index={cuda_device_index} pocket_only_baseline={pocket_only_baseline} relax_protein={relax_protein} remove_initial_protein_hydrogens=true repeat_index={repeat_index}", + ], + "align_complexes": [ + "python3 posebench/analysis/complex_alignment.py method=chai-lab dataset={dataset} pocket_only_baseline={pocket_only_baseline} repeat_index={repeat_index}", + ], + "analyze_results": [ + "python3 posebench/analysis/inference_analysis.py method=chai-lab dataset={dataset} pocket_only_baseline={pocket_only_baseline} relax_protein={relax_protein} repeat_index={repeat_index}", + ], + "assemble_casp15": [ + "python3 posebench/models/ensemble_generation.py ensemble_methods=[chai-lab] ensemble_ranking_method={ensemble_ranking_method} input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_chai-lab_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=false relax_protein={relax_protein} export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", + "python3 posebench/models/ensemble_generation.py ensemble_methods=[chai-lab] ensemble_ranking_method={ensemble_ranking_method} input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_chai-lab_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=true relax_protein={relax_protein} export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset=casp15 cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", + ], + "analyze_casp15": [ + "python3 posebench/analysis/inference_analysis_casp.py method=chai-lab dataset=casp15 repeat_index={repeat_index} relax_protein={relax_protein} targets='[H1135, T1127v2, T1146, T1152, T1158v1, T1158v2, T1158v3, T1158v4, T1186, T1187, T1188]'", ], }, "vina": { @@ -140,21 +164,23 @@ "cp forks/DiffDock/inference/diffdock_{dataset}_inputs.csv forks/Vina/inference/vina_{dataset}_inputs.csv", ], "run_inference": [ - "python3 posebench/models/vina_inference.py dataset={dataset} method={vina_binding_site_method} repeat_index={repeat_index}", + "python3 posebench/models/vina_inference.py dataset={dataset} method={vina_binding_site_method} pocket_only_baseline={pocket_only_baseline} repeat_index={repeat_index}", ], "copy_predictions": [ "mkdir -p forks/Vina/inference/vina_{vina_binding_site_method}_{dataset}_outputs_{repeat_index} && cp -r data/test_cases/{dataset}/vina_{vina_binding_site_method}_{dataset}_outputs_{repeat_index}/* forks/Vina/inference/vina_{vina_binding_site_method}_{dataset}_outputs_{repeat_index}", ], "relax": [ - "python3 posebench/models/inference_relaxation.py method=vina vina_binding_site_method={vina_binding_site_method} dataset={dataset} remove_initial_protein_hydrogens=true assign_partial_charges_manually=true num_processes=1 repeat_index={repeat_index}", + "python3 posebench/models/inference_relaxation.py method=vina vina_binding_site_method={vina_binding_site_method} dataset={dataset} cuda_device_index={cuda_device_index} pocket_only_baseline={pocket_only_baseline} relax_protein={relax_protein} remove_initial_protein_hydrogens=true assign_partial_charges_manually=true repeat_index={repeat_index}", ], "analyze_results": [ - "python3 posebench/analysis/inference_analysis.py method=vina vina_binding_site_method={vina_binding_site_method} dataset={dataset} repeat_index={repeat_index}", + "python3 posebench/analysis/inference_analysis.py method=vina vina_binding_site_method={vina_binding_site_method} dataset={dataset} pocket_only_baseline={pocket_only_baseline} relax_protein={relax_protein} repeat_index={repeat_index}", + ], + "assemble_casp15": [ + "python3 posebench/models/ensemble_generation.py ensemble_methods=[vina] ensemble_ranking_method={ensemble_ranking_method} vina_binding_site_methods=[{vina_binding_site_method}] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_vina_{vina_binding_site_method}_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=false relax_protein={relax_protein} export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset={dataset} cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", + "python3 posebench/models/ensemble_generation.py ensemble_methods=[vina] ensemble_ranking_method={ensemble_ranking_method} vina_binding_site_methods=[{vina_binding_site_method}] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_vina_{vina_binding_site_method}_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=true relax_protein={relax_protein} export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset={dataset} cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", ], "analyze_casp15": [ - "python3 posebench/models/ensemble_generation.py ensemble_methods=[vina] vina_binding_site_methods=[{vina_binding_site_method}] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_vina_{vina_binding_site_method}_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=false export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset={dataset} cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", - "python3 posebench/models/ensemble_generation.py ensemble_methods=[vina] vina_binding_site_methods=[{vina_binding_site_method}] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_vina_{vina_binding_site_method}_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=true export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset={dataset} cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", - "python3 posebench/analysis/inference_analysis_casp.py method=vina vina_binding_site_method={vina_binding_site_method} dataset={dataset} repeat_index={repeat_index}", + "python3 posebench/analysis/inference_analysis_casp.py method=vina vina_binding_site_method={vina_binding_site_method} dataset=casp15 relax_protein={relax_protein} repeat_index={repeat_index}", ], }, "tulip": { @@ -162,34 +188,79 @@ "python3 posebench/data/tulip_output_extraction.py dataset={dataset}", ], "relax": [ - "python3 posebench/models/inference_relaxation.py method=tulip dataset={dataset} remove_initial_protein_hydrogens=true assign_partial_charges_manually=true num_processes=1 repeat_index={repeat_index}", + "python3 posebench/models/inference_relaxation.py method=tulip dataset={dataset} cuda_device_index={cuda_device_index} relax_protein={relax_protein} remove_initial_protein_hydrogens=true assign_partial_charges_manually=true repeat_index={repeat_index}", + ], + "analyze_results": [ + "python3 posebench/analysis/inference_analysis.py method=tulip dataset={dataset} relax_protein={relax_protein} repeat_index={repeat_index}", + ], + "assemble_casp15": [ + "python3 posebench/models/ensemble_generation.py ensemble_methods=[tulip] ensemble_ranking_method={ensemble_ranking_method} input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_tulip_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=false relax_protein={relax_protein} export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset={dataset} cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", + "python3 posebench/models/ensemble_generation.py ensemble_methods=[tulip] ensemble_ranking_method={ensemble_ranking_method} input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_tulip_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=true relax_protein={relax_protein} export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=5 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset={dataset} cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", + ], + "analyze_casp15": [ + "python3 posebench/analysis/inference_analysis_casp.py method=tulip dataset=casp15 repeat_index={repeat_index} relax_protein={relax_protein} targets='[H1171v1, H1171v2, H1172v1, H1172v2, H1172v3, H1172v4, T1124, T1127v2, T1152, T1158v1, T1158v2, T1158v3, T1181, T1186, T1187, T1188]'", + ], + }, + "ensemble": { + "run_inference": [ + "python3 posebench/models/ensemble_generation.py pocket_only_baseline={pocket_only_baseline} ensemble_ranking_method={ensemble_ranking_method} input_csv_filepath=data/test_cases/{dataset}/ensemble_inputs.csv output_dir=data/test_cases/{dataset}/top_consensus_ensemble_predictions_{repeat_index} max_method_predictions=40 method_top_n_to_select=3 export_top_n={export_top_n} export_file_format={dataset} skip_existing=true relax_method_ligands_post_ranking=false relax_protein={relax_protein} resume=true cuda_device_index={cuda_device_index} ensemble_methods='[diffdock, dynamicbind, neuralplexer, rfaa]' vina_binding_site_methods=[{vina_binding_site_method}] ensemble_benchmarking=true ensemble_benchmarking_dataset={dataset} ensemble_ranking_method=consensus ensemble_benchmarking_repeat_index={repeat_index}", + "python3 posebench/models/ensemble_generation.py pocket_only_baseline={pocket_only_baseline} ensemble_ranking_method={ensemble_ranking_method} input_csv_filepath=data/test_cases/{dataset}/ensemble_inputs.csv output_dir=data/test_cases/{dataset}/top_consensus_ensemble_predictions_{repeat_index} max_method_predictions=40 method_top_n_to_select=3 export_top_n={export_top_n} export_file_format={dataset} skip_existing=true relax_method_ligands_post_ranking=true relax_protein={relax_protein} resume=true cuda_device_index={cuda_device_index} ensemble_methods='[diffdock, dynamicbind, neuralplexer, rfaa]' vina_binding_site_methods=[{vina_binding_site_method}] ensemble_benchmarking=true ensemble_benchmarking_dataset={dataset} ensemble_ranking_method=consensus ensemble_benchmarking_repeat_index={repeat_index}", ], "analyze_results": [ - "python3 posebench/analysis/inference_analysis.py method=tulip dataset={dataset} repeat_index={repeat_index}", + "python3 posebench/analysis/inference_analysis.py method=ensemble dataset={dataset} pocket_only_baseline={pocket_only_baseline} relax_protein={relax_protein} repeat_index={repeat_index}", ], "analyze_casp15": [ - "python3 posebench/models/ensemble_generation.py ensemble_methods=[tulip] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_tulip_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=false export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset={dataset} cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", - "python3 posebench/models/ensemble_generation.py ensemble_methods=[tulip] input_csv_filepath=data/test_cases/casp15/ensemble_inputs.csv output_dir=data/test_cases/casp15/top_tulip_ensemble_predictions_{repeat_index} skip_existing=true relax_method_ligands_post_ranking=true export_file_format=casp15 export_top_n=5 combine_casp_output_files=true max_method_predictions=40 method_top_n_to_select=40 resume=true ensemble_benchmarking=true ensemble_benchmarking_dataset={dataset} cuda_device_index={cuda_device_index} ensemble_benchmarking_repeat_index={repeat_index}", - "python3 posebench/analysis/inference_analysis_casp.py method=tulip dataset={dataset} repeat_index={repeat_index} targets='[H1135, H1171v1, H1171v2, H1172v1, H1172v2, H1172v3, H1172v4, T1124, T1127v2, T1152, T1158v1, T1158v2, T1158v3, T1158v4, T1186, T1187]'", + "python3 posebench/analysis/inference_analysis_casp.py method=ensemble dataset=casp15 relax_protein={relax_protein} repeat_index={repeat_index}", ], }, } -VINA_BINDING_SITE_METHODS = ["diffdock", "p2rank"] -SINGLE_RUN_METHODS = ["fabind", "rfaa", "tulip"] -GPU_ENABLED_METHODS = ["diffdock", "fabind", "dynamicbind", "neuralplexer"] -DATASETS = ["posebusters_benchmark", "astex_diverse", "dockgen", "casp15"] + +INFERENCE_METHODS = Literal[ + "diffdock", + "fabind", + "dynamicbind", + "neuralplexer", + "rfaa", + "chai-lab", + "vina", + "tulip", + "ensemble", +] +VINA_BINDING_SITE_INFERENCE_METHODS = Literal["diffdock", "p2rank"] +INFERENCE_ENSEMBLE_RANKING_METHODS = Literal["consensus", "ff"] +INFERENCE_DATASETS = Literal["posebusters_benchmark", "astex_diverse", "dockgen", "casp15"] + +NON_GENERATIVE_INFERENCE_METHODS = {"fabind", "rfaa", "tulip"} +POCKET_ONLY_COMPATIBLE_METHODS = { + "diffdock", + "fabind", + "dynamicbind", + "neuralplexer", + "rfaa", + "chai-lab", + "vina", + "ensemble", +} +INVALID_METHOD_DATASET_COMBINATIONS = { + ("fabind", "casp15"), +} +@beartype def build_inference_script( - method: Literal["diffdock", "fabind", "dynamicbind", "neuralplexer", "rfaa", "vina", "tulip"], - vina_binding_site_method: Literal[ - "diffdock", "fabind", "dynamicbind", "neuralplexer", "rfaa", "p2rank" - ], - dataset: Literal["posebusters_benchmark", "astex_diverse", "dockgen", "casp15"], + method: INFERENCE_METHODS, + vina_binding_site_method: VINA_BINDING_SITE_INFERENCE_METHODS, + ensemble_ranking_method: INFERENCE_ENSEMBLE_RANKING_METHODS, + dataset: INFERENCE_DATASETS, repeat_index: int, cuda_device_index: int, output_script_dir: str, + pocket_only_baseline: bool = False, + v1_baseline: bool = False, + no_ilcl: bool = False, + relax_protein: bool = False, export_hpc_headers: bool = False, + verbose: bool = False, gpu_partition: str = "chengji-lab-gpu", gpu_account: str = "chengji-lab", gpu_type: Literal["A100", "H100"] = "A100", @@ -200,11 +271,19 @@ def build_inference_script( :param method: Inference method to use. :param vina_binding_site_method: Vina binding site method to use. + :param ensemble_ranking_method: Ensemble ranking method to use. :param dataset: Dataset to use. :param repeat_index: Index of the repeat. :param cuda_device_index: Index of the CUDA device to use. :param output_script_dir: Output script directory. + :param pocket_only_baseline: Whether to perform a pocket-only baseline for the PoseBusters + Benchmark set. + :param v1_baseline: Whether to perform a V1 baseline for DiffDock. + :param no_ilcl: Whether to use model weights trained with an inter-ligand clash loss (ILCL) for + the CASP15 set. + :param relax_protein: Whether to relax the protein structure before scoring. :param export_hpc_headers: Whether to export HPC headers. + :param verbose: Whether to print verbose (i.e., invalid configuration) output. :param gpu_partition: GPU partition to use. :param gpu_account: GPU account to use. :param gpu_type: GPU type to use. @@ -212,22 +291,62 @@ def build_inference_script( :param time_limit: Time limit. """ commands = COMMANDS.get(method) + + # Inform user of invalid function calls if not commands: raise ValueError(f"Unsupported method: {method}") - if method in SINGLE_RUN_METHODS and repeat_index > 1: - logging.info( - f"Method {method} does not support multiple repeats. Skipping repeat_index {repeat_index}." - ) + if (method, dataset) in INVALID_METHOD_DATASET_COMBINATIONS: + if verbose: + logging.info(f"Method {method} does not support dataset {dataset}. Skipping.") + return + + if method in NON_GENERATIVE_INFERENCE_METHODS and repeat_index > 1: + if verbose: + logging.info( + f"Method {method} does not support multiple repeats. Skipping repeat_index {repeat_index}." + ) + return + + if pocket_only_baseline and not ( + method in POCKET_ONLY_COMPATIBLE_METHODS and dataset == "posebusters_benchmark" + ): + if verbose: + logging.info( + f"Method-dataset combination {method}-{dataset} does not support argument `pocket_only_baseline`. Skipping." + ) + return + + if v1_baseline and not (method == "diffdock"): + if verbose: + logging.info( + f"Method-dataset combination {method}-{dataset} does not support argument `v1_baseline`. Skipping." + ) + return + + if no_ilcl and not (method == "neuralplexer" and dataset == "casp15"): + if verbose: + logging.info( + f"Method-dataset combination {method}-{dataset} does not support argument `no_ilcl`. Skipping." + ) return os.makedirs(output_script_dir, exist_ok=True) + vina_binding_site_method_suffix = f"_{vina_binding_site_method}" if method == "vina" else "" + ensemble_ranking_method_suffix = f"_{ensemble_ranking_method}" if method == "ensemble" else "" + v1_baseline_suffix = "v1" if v1_baseline else "" + pocket_only_suffix = "_pocket_only" if pocket_only_baseline else "" + no_ilcl_suffix = "_no_ilcl" if no_ilcl else "" + relax_protein_suffix = "_relax_protein" if relax_protein else "" + hpc_suffix = "_hpc" if export_hpc_headers else "" output_script = os.path.join( output_script_dir, - f"{method}_{dataset}{'_hpc' if export_hpc_headers else ''}_inference_{repeat_index}.sh", + f"{method}{vina_binding_site_method_suffix}{ensemble_ranking_method_suffix}{v1_baseline_suffix}{pocket_only_suffix}{no_ilcl_suffix}{relax_protein_suffix}_{dataset}{hpc_suffix}_inference_{repeat_index}.sh", ) + # Build script in sections with open(output_script, "w") as f: + f.write("#!/bin/bash -l\n\n") if export_hpc_headers: f.write( insert_hpc_headers( @@ -239,56 +358,236 @@ def build_inference_script( time_limit=time_limit, ) ) - f.write("\nconda activate PoseBench\n\n") + # NOTE: The following HPC environment activation command assumes the + # `PoseBench` Conda environment was created using `--prefix PoseBench/` + # to reduce storage usage in one's HPC home directory + f.write("\nconda activate PoseBench/\n\n") + + # NOTE: Model weights may take up too much space in one's HPC + # home directory, so we recommend using a command like the following + # to store the model weights in a larger storage location (e.g., `/scratch`): + f.write( + "# Store model weights in a larger storage location\n" + + 'export TORCH_HOME="/cluster/pixstor/chengji-lab/$USER/torch_cache"\n' + + 'export HF_HOME="/cluster/pixstor/chengji-lab/$USER/hf_cache"\n\n' + + 'mkdir -p "$TORCH_HOME"\n' + + 'mkdir -p "$HF_HOME"\n\n' + ) + else: + f.write( + "# shellcheck source=/dev/null\n" + + "source /home/$USER/mambaforge/etc/profile.d/conda.sh\n\n" + + "# Activate PoseBench environment\n" + + "conda activate PoseBench\n\n" + ) # Prepare input files - f.write("# Prepare input files\n") - for cmd in commands.get("prepare_input", []): - f.write(cmd.format(dataset=dataset) + "\n") - f.write("\n") + if "prepare_input" in commands: + diffdock_casp15_input_suffix = ( + " input_data_dir=data/casp15_set/targets input_protein_structure_dir=data/casp15_set/casp15_holo_aligned_predicted_structures" + if method == "diffdock" and dataset == "casp15" + else "" + ) + dynamicbind_casp15_input_suffix = ( + " input_data_dir=data/casp15_set/targets" + if method == "dynamicbind" and dataset == "casp15" + else "" + ) + neuralplexer_casp15_input_suffix = ( + " input_data_dir=data/casp15_set/targets input_receptor_structure_dir=data/casp15_set/casp15_holo_aligned_predicted_structures" + if method == "neuralplexer" and dataset == "casp15" + else "" + ) + rfaa_casp15_input_suffix = ( + " input_data_dir=data/casp15_set/targets" + if method == "rfaa" and dataset == "casp15" + else "" + ) + chai_casp15_input_suffix = ( + " input_data_dir=data/casp15_set/targets" + if method == "chai-lab" and dataset == "casp15" + else "" + ) + f.write("# Prepare input files\n") + for cmd in commands.get("prepare_input", []): + prepare_input_string = ( + cmd.format(dataset=dataset, pocket_only_baseline=pocket_only_baseline) + + diffdock_casp15_input_suffix + + dynamicbind_casp15_input_suffix + + neuralplexer_casp15_input_suffix + + rfaa_casp15_input_suffix + + chai_casp15_input_suffix + + "\n" + ) + if method == "vina" and pocket_only_baseline: + prepare_input_string = prepare_input_string.replace( + f"diffdock_{dataset}", f"diffdock_pocket_only_{dataset}" + ) + prepare_input_string = prepare_input_string.replace( + f"vina_{dataset}", f"vina_pocket_only_{dataset}" + ) + f.write(prepare_input_string) + f.write("\n") # Run inference - f.write("# Run inference\n") - run_inference_cmds = commands.get("run_inference", []) - for cmd in run_inference_cmds: - f.write( - cmd.format( - dataset=dataset, - repeat_index=repeat_index, - cuda_device_index=cuda_device_index, - vina_binding_site_method=vina_binding_site_method, - ) - + "\n" + if "run_inference" in commands: + export_top_n = 5 if dataset == "casp15" else 1 + diffdock_casp15_inference_suffix = ( + " batch_size=1" if method == "diffdock" and dataset == "casp15" else "" ) - f.write("\n") + dynamicbind_casp15_inference_suffix = ( + " batch_size=1 input_data_dir=data/casp15_set/casp15_holo_aligned_predicted_structures" + if method == "dynamicbind" and dataset == "casp15" + else "" + ) + neuralplexer_casp15_inference_suffix = ( + " chunk_size=4" if method == "neuralplexer" and dataset == "casp15" else "" + ) + ensemble_casp15_inference_suffix = ( + " combine_casp_output_files=true" + if method == "ensemble" and dataset == "casp15" + else "" + ) + f.write("# Run inference\n") + for cmd in commands.get("run_inference", []): + f.write( + cmd.format( + dataset=dataset, + repeat_index=repeat_index, + cuda_device_index=cuda_device_index, + vina_binding_site_method=vina_binding_site_method, + ensemble_ranking_method=ensemble_ranking_method, + export_top_n=export_top_n, + pocket_only_baseline=pocket_only_baseline, + v1_baseline=v1_baseline, + no_ilcl=no_ilcl, + relax_protein=relax_protein, + ) + + diffdock_casp15_inference_suffix + + dynamicbind_casp15_inference_suffix + + neuralplexer_casp15_inference_suffix + + ensemble_casp15_inference_suffix + + "\n" + ) + if diffdock_casp15_inference_suffix: + f.write( + "# NOTE: Due to DiffDock-L's occasional numerical instabilities " + + "on the CASP15 dataset, you may have to re-run this inference script " + + "several times (with the default 'skip_existing=true') to have it " + + "successfully predict for all CASP targets.\n" + ) + f.write( + "# Consider running the following commands to clean up DiffDock-L's " + + "inference run directory (e.g., `_1`) before re-running this script:\n" + ) + f.write( + "# rm -r forks/DiffDock/inference/diffdock_casp15_output_1/*_*/\n" + + "# find forks/DiffDock/inference/diffdock_casp15_output_1/* -type d ! -exec test -e {}/rank1.sdf \\; -exec sh -c 'rm -rf {}/' \\;\n" + ) + if dynamicbind_casp15_inference_suffix: + f.write( + "# NOTE: Due to DynamicBind's occasional numerical instabilities " + + "on the CASP15 dataset, you may have to re-run this inference script " + + "several times (with the default 'skip_existing=true') to have it " + + "successfully predict for all CASP targets.\n" + ) + f.write( + "# Consider running the following commands to clean up DiffDock-L's " + + "inference run directory (e.g., `_1`) before re-running this script:\n" + ) + f.write( + "# find forks/DynamicBind/inference/outputs/results/casp15__1/index0_idx_0 -type d ! -exec test -e {}/cleaned_input_proteinFile.pdb \\; -exec sh -c 'rm -rf $(dirname {})/' \\;\n" + + "# find forks/DynamicBind/inference/outputs/results/casp15_*_1/ -type d -empty -delete\n" + ) + f.write("\n") - # Relax generated ligand structures - f.write("# Relax generated ligand structures\n") - for cmd in commands.get("relax", []): - f.write( - cmd.format( - dataset=dataset, - repeat_index=repeat_index, - vina_binding_site_method=vina_binding_site_method, + # Extract outputs (if applicable) + if "extract_outputs" in commands: + f.write("# Extract outputs\n") + for cmd in commands.get("extract_outputs", []): + f.write( + cmd.format( + dataset=dataset, + pocket_only_baseline=pocket_only_baseline, + repeat_index=repeat_index, + ) + + "\n" ) - + "\n" - ) - f.write("\n") + f.write("\n") - # Analyze inference results - f.write("# Analyze inference results\n") - for cmd in commands.get("analyze_results", []): - f.write( - cmd.format( - dataset=dataset, - repeat_index=repeat_index, - vina_binding_site_method=vina_binding_site_method, + # Copy predictions (if applicable) + if "copy_predictions" in commands: + f.write("# Copy predictions\n") + for cmd in commands.get("copy_predictions", []): + copy_predictions_string = ( + cmd.format( + dataset=dataset, + repeat_index=repeat_index, + vina_binding_site_method=vina_binding_site_method, + ) + + "\n" ) - + "\n" - ) - f.write("\n") + if method == "vina" and pocket_only_baseline: + copy_predictions_string = copy_predictions_string.replace( + f"vina_{vina_binding_site_method}", + f"vina_pocket_only_{vina_binding_site_method}", + ) + f.write(copy_predictions_string) + f.write("\n") - # Assemble CASP15 (if applicable) + # Relax generated ligand structures (if applicable) + if dataset != "casp15" and "relax" in commands: + # NOTE: CASP15 predictions are instead relaxed using the `ensemble_generation.py` script + f.write("# Relax generated ligand structures\n") + for cmd in commands.get("relax", []): + f.write( + cmd.format( + dataset=dataset, + repeat_index=repeat_index, + cuda_device_index=cuda_device_index, + vina_binding_site_method=vina_binding_site_method, + pocket_only_baseline=pocket_only_baseline, + v1_baseline=v1_baseline, + relax_protein=relax_protein, + ) + + "\n" + ) + f.write("\n") + + # Align complexes (if applicable) + if dataset != "casp15" and "align_complexes" in commands: + # NOTE: CASP15 predictions are instead aligned using the `ensemble_generation.py` script + f.write("# Align complexes\n") + for cmd in commands.get("align_complexes", []): + f.write( + cmd.format( + dataset=dataset, + repeat_index=repeat_index, + pocket_only_baseline=pocket_only_baseline, + ) + + "\n" + ) + f.write("\n") + + # Analyze inference results (if applicable) + if dataset != "casp15" and "analyze_results" in commands: + # NOTE: CASP15 predictions are instead analyzed using the `inference_analysis_casp.py` script + f.write("# Analyze inference results\n") + for cmd in commands.get("analyze_results", []): + f.write( + cmd.format( + dataset=dataset, + repeat_index=repeat_index, + vina_binding_site_method=vina_binding_site_method, + pocket_only_baseline=pocket_only_baseline, + v1_baseline=v1_baseline, + relax_protein=relax_protein, + ) + + "\n" + ) + f.write("\n") + + # Assemble CASP15 predictions (if applicable) if dataset == "casp15" and "assemble_casp15" in commands: f.write("# Assemble CASP15 results\n") for cmd in commands.get("assemble_casp15", []): @@ -297,6 +596,11 @@ def build_inference_script( dataset=dataset, repeat_index=repeat_index, cuda_device_index=cuda_device_index, + vina_binding_site_method=vina_binding_site_method, + ensemble_ranking_method=ensemble_ranking_method, + v1_baseline=v1_baseline, + no_ilcl=no_ilcl, + relax_protein=relax_protein, ) + "\n" ) @@ -311,41 +615,88 @@ def build_inference_script( dataset=dataset, repeat_index=repeat_index, vina_binding_site_method=vina_binding_site_method, - cuda_device_index=cuda_device_index, + v1_baseline=v1_baseline, + no_ilcl=no_ilcl, + relax_protein=relax_protein, ) + "\n" ) f.write("\n") + # Inform user of run completion + f.write("# Inform user of run completion\n" + f"echo 'Run {repeat_index} completed.'\n") + logging.info(f"Script {output_script} created successfully.") +@beartype def build_inference_scripts( - num_repeats: int, + methods_to_sweep: List[INFERENCE_METHODS], + vina_binding_site_methods_to_sweep: List[VINA_BINDING_SITE_INFERENCE_METHODS], + ensemble_ranking_methods_to_sweep: List[INFERENCE_ENSEMBLE_RANKING_METHODS], + datasets_to_sweep: List[INFERENCE_DATASETS], + num_sweep_repeats: int, cuda_device_index: int, output_script_dir: str, + pocket_only_baseline: Optional[bool] = None, + v1_baseline: Optional[bool] = None, + no_ilcl: Optional[bool] = None, + relax_protein: Optional[bool] = None, export_hpc_headers: bool = False, + verbose: bool = False, ): - """Build inference scripts according to user arguments. + """Build inference scripts according to user sweep arguments. - :param num_repeats: Number of repeats total. + :param methods_to_sweep: Inference methods to sweep. + :param vina_binding_site_methods_to_sweep: Vina binding site methods to sweep. + :param ensemble_ranking_methods_to_sweep: Ensemble ranking methods to sweep. + :param datasets_to_sweep: Datasets to sweep. + :param num_sweep_repeats: Number of repeats in the sweep. :param cuda_device_index: Index of the CUDA device to use. :param output_script: Output script file. + :param pocket_only_baseline: Whether to perform a pocket-only baseline for the PoseBusters + Benchmark set. + :param v1_baseline: Whether to perform a V1 baseline for DiffDock. + :param no_ilcl: Whether to use model weights trained with an inter-ligand clash loss (ILCL) for + the CASP15 set. + :param relax_protein: Whether to relax the protein structure before scoring. :param export_hpc_headers: Whether to export HPC headers. + :param verbose: Whether to print verbose (i.e., invalid configuration) output. """ - for method in COMMANDS: - for vina_binding_site_method in VINA_BINDING_SITE_METHODS: - for dataset in DATASETS: - for repeat_index in range(1, num_repeats + 1): - build_inference_script( - method=method, - vina_binding_site_method=vina_binding_site_method, - dataset=dataset, - repeat_index=repeat_index, - cuda_device_index=cuda_device_index, - output_script_dir=output_script_dir, - export_hpc_headers=export_hpc_headers, - ) + for method in methods_to_sweep: + for vina_binding_site_method in vina_binding_site_methods_to_sweep: + for ensemble_ranking_method in ensemble_ranking_methods_to_sweep: + for dataset in datasets_to_sweep: + for pocket_only in [True, False]: + pocket_only_mode = ( + pocket_only_baseline + if pocket_only_baseline is not None + else pocket_only + ) + for v1 in [True, False]: + v1_mode = v1_baseline if v1_baseline is not None else v1 + for n_ilcl in [True, False]: + no_ilcl_mode = no_ilcl if no_ilcl is not None else n_ilcl + for relax_prot in [True, False]: + relax_protein_mode = ( + relax_protein if relax_protein is not None else relax_prot + ) + for repeat_index in range(1, num_sweep_repeats + 1): + build_inference_script( + method=method, + vina_binding_site_method=vina_binding_site_method, + ensemble_ranking_method=ensemble_ranking_method, + dataset=dataset, + repeat_index=repeat_index, + cuda_device_index=cuda_device_index, + output_script_dir=output_script_dir, + pocket_only_baseline=pocket_only_mode, + v1_baseline=v1_mode, + no_ilcl=no_ilcl_mode, + relax_protein=relax_protein_mode, + export_hpc_headers=export_hpc_headers, + verbose=verbose, + ) @hydra.main( @@ -354,22 +705,38 @@ def build_inference_scripts( config_name="build_inference_script.yaml", ) def main(cfg: DictConfig): - """Build an inference script according to user arguments.""" - if cfg.build_all_scripts: + """Build an inference script or sweep according to user arguments.""" + if cfg.sweep: build_inference_scripts( - num_repeats=cfg.num_repeats, + methods_to_sweep=list(cfg.methods_to_sweep), + vina_binding_site_methods_to_sweep=list(cfg.vina_binding_site_methods_to_sweep), + ensemble_ranking_methods_to_sweep=list(cfg.ensemble_ranking_methods_to_sweep), + datasets_to_sweep=list(cfg.datasets_to_sweep), + num_sweep_repeats=cfg.num_sweep_repeats, cuda_device_index=cfg.cuda_device_index, output_script_dir=cfg.output_script_dir, + pocket_only_baseline=cfg.pocket_only_baseline, + v1_baseline=cfg.v1_baseline, + no_ilcl=cfg.no_ilcl, + relax_protein=cfg.relax_protein, export_hpc_headers=cfg.export_hpc_headers, + verbose=cfg.verbose, ) else: build_inference_script( method=cfg.method, + vina_binding_site_method=cfg.vina_binding_site_method, + ensemble_ranking_method=cfg.ensemble_ranking_method, dataset=cfg.dataset, repeat_index=cfg.repeat_index, cuda_device_index=cfg.cuda_device_index, output_script_dir=cfg.output_script_dir, + pocket_only_baseline=cfg.pocket_only_baseline, + v1_baseline=cfg.v1_baseline, + no_ilcl=cfg.no_ilcl, + relax_protein=cfg.relax_protein, export_hpc_headers=cfg.export_hpc_headers, + verbose=cfg.verbose, ) diff --git a/scripts/find_most_common_ensemble_method.sh b/scripts/find_most_common_ensemble_method.sh new file mode 100644 index 00000000..176ad28f --- /dev/null +++ b/scripts/find_most_common_ensemble_method.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# ------------------------------------------------------------------------------------------------------------------------------------- +# Following code curated for PoseBench: (https://github.com/BioinfoMachineLearning/PoseBench) +# ------------------------------------------------------------------------------------------------------------------------------------- + +# Finding for each dataset the most frequently selected baseline method by the (structural) consensus ensembling baseline # + +for dataset in astex_diverse posebusters_benchmark dockgen casp15; do + if [ "$dataset" = "posebusters_benchmark" ]; then + echo "Top-3 baseline methods most frequently selected by the (structural) consensus ensembling baseline for $dataset (pocket-only):" + + # Step 1: Find all files in the ensemble baseline method's subdirectories for a given dataset + find data/test_cases/"$dataset"/top_consensus_pocket_only_ensemble_predictions_*/ -type f | + + # Step 2: Extract the method names using grep with a regex + grep -oP '(?<=/)[^/]+(?=_rank)' | + + # Step 3: Count the occurrences of each method using awk + awk '{count[$1]++} END {for (method in count) print count[method], method}' | + + # Step 4: Sort the results and find the most frequent methods at the top of the command's output + sort -nr | head -n 3 + fi + + echo "Top-3 baseline methods most frequently selected by the (structural) consensus ensembling baseline for $dataset:" + + # Step 1: Find all files in the ensemble baseline method's subdirectories for a given dataset + find data/test_cases/"$dataset"/top_consensus_ensemble_predictions_*/ -type f | + + # Step 2: Extract the method names using grep with a regex + grep -oP '(?<=/)[^/]+(?=_rank)' | + + # Step 3: Count the occurrences of each method using awk + awk '{count[$1]++} END {for (method in count) print count[method], method}' | + + # Step 4: Sort the results and find the most frequent methods at the top of the command's output + sort -nr | head -n 3 +done