From faf6373be7291b0cccfbbc4ac638f626950716ae Mon Sep 17 00:00:00 2001 From: mmagnus Date: Wed, 25 Sep 2024 22:10:41 -0400 Subject: [PATCH] cif2pdb fancy way --- rna_tools/rna_pdb_tools.py | 63 +++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 24 deletions(-) diff --git a/rna_tools/rna_pdb_tools.py b/rna_tools/rna_pdb_tools.py index 444160799..a7447ac53 100755 --- a/rna_tools/rna_pdb_tools.py +++ b/rna_tools/rna_pdb_tools.py @@ -1219,12 +1219,24 @@ def get_parser(): structure = parser.get_structure("structure_id", cif_file) pdb_file = cif_file.replace('.cif', '_fCIF.pdb') - remarks = [] + try: # Save to PDB format io = PDBIO() io.set_structure(structure) io.save(pdb_file) + + print(f'saved: {pdb_file}') + # open a file add remarks + new_file = '' + with open(pdb_file, 'r') as f: + if not args.no_hr: + new_file += add_header(version) + '\n' + new_file += f.read() + + with open(pdb_file, 'w') as f: + f.write(new_file) + except TypeError as e: print('Warning: some of the chains in this mmCIF file has chain names with more char than 1, e.g. AB, and the PDB format needs single-letter code, e.g. A.') @@ -1262,14 +1274,14 @@ def has_high_rna_content(chain, threshold=0.8): import string letters = list(string.ascii_uppercase) - # New structure - new_structure = Structure.Structure("new_structure") - new_model = Model.Model(0) # Create a new model - new_structure.add(new_model) # Add the new model to the new structure - for model in structure: for chain in model: if has_high_rna_content(chain): + # New structure + new_structure = Structure.Structure("new_structure") + new_model = Model.Model(0) # Create a new model + new_structure.add(new_model) # Add the new model to the new structure + chain_id_new = letters.pop(0) chain_id = chain.get_id() @@ -1277,28 +1289,31 @@ def has_high_rna_content(chain, threshold=0.8): for residue in chain: for atom in residue: atom_count += 1 - print(f'rna chain {chain.id} -> {chain_id_new} # of atoms: {atom_count}') + + remarks = [] remarks.append(f'REMARK rna chain {chain.id} -> {chain_id_new}') + chain.id = chain_id_new new_model.add(chain) - io = PDBIO() - io.set_structure(new_structure) - io.save(pdb_file) - - print(f'saved: {pdb_file}') - - # open a file add remarks - new_file = '' - with open(pdb_file, 'r') as f: - if not args.no_hr: - new_file += add_header(version) + '\n' - if remarks: - new_file += '\n'.join(remarks) + '\n' - new_file += f.read() - - with open(pdb_file, 'w') as f: - f.write(new_file) + pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb') + + io = PDBIO() + io.set_structure(new_structure) + + io.save(pdb_file) + print(f'rna chain {chain.id} -> {chain_id_new} # of atoms: {atom_count} {pdb_file}') + # open a file add remarks + new_file = '' + with open(pdb_file, 'r') as f: + if not args.no_hr: + new_file += add_header(version) + '\n' + if remarks: + new_file += '\n'.join(remarks) + '\n' + new_file += f.read() + + with open(pdb_file, 'w') as f: + f.write(new_file) if args.pdb2cif: try: