Skip to content

Commit

Permalink
rna_mmcif2pdb.py: add
Browse files Browse the repository at this point in the history
  • Loading branch information
mmagnus committed Sep 26, 2024
1 parent 565ae06 commit 5be42a4
Show file tree
Hide file tree
Showing 2 changed files with 141 additions and 1 deletion.
139 changes: 139 additions & 0 deletions rna_tools/rna_mmcif2pdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
"""
from __future__ import print_function
import argparse
from icecream import ic
import sys
ic.configureOutput(outputFunction=lambda *a: print(*a, file=sys.stderr))
ic.configureOutput(prefix='> ')

from rna_tools.rna_tools_lib import edit_pdb, add_header, get_version
import os

def get_parser():
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)

version = os.path.basename(os.path.dirname(os.path.abspath(__file__))), get_version(__file__)
version = version[1].strip()
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawTextHelpFormatter)

parser.add_argument('--version', help='', action='version', version=version)
parser.add_argument("-v", "--verbose",
action="store_true", help="be verbose")
parser.add_argument('--no-hr', help='do not insert the header into files',
action='store_true')
parser.add_argument("file", help="", default="") # nargs='+')
return parser, version


if __name__ == '__main__':
parser, version = get_parser()
args = parser.parse_args()

if list != type(args.file):
args.file = [args.file]

for cif_file in args.file:
from Bio.PDB import MMCIFParser, PDBIO
parser = MMCIFParser()
structure = parser.get_structure("structure_id", cif_file)
pdb_file = cif_file.replace('.cif', '_fCIF.pdb')

try:
# Save to PDB format
io = PDBIO()
io.set_structure(structure)
io.save(pdb_file)

print(f'saved: {pdb_file}')
# open a file add remarks
new_file = ''
with open(pdb_file, 'r') as f:
if not args.no_hr:
new_file += add_header(version) + '\n'
new_file += f.read()

with open(pdb_file, 'w') as f:
f.write(new_file)

except:
print('Warning: some of the chains in this mmCIF file has chain names with more char than 1, e.g. AB, and the PDB format needs single-letter code, e.g. A.')
def has_high_rna_content(chain, threshold=0.8):
# RNA nucleotides: A, C, G, U, and X (you can modify as needed)
rna_nucleotides = ['A', 'C', 'G', 'U', 'X']
total_residues = 0
rna_residues = 0

# Count the total number of residues and RNA-like residues
for residue in chain:
total_residues += 1
if residue.get_resname().strip() in rna_nucleotides:
rna_residues += 1

# Calculate the proportion of RNA residues
if total_residues == 0:
return False # Avoid division by zero if chain has no residues

rna_percentage = rna_residues / total_residues

# Check if the percentage of RNA residues is greater than or equal to the threshold (80% by default)
return rna_percentage >= threshold

from Bio.PDB.MMCIFParser import MMCIFParser
from Bio.PDB import MMCIFParser, Structure, Model, Chain

# Initialize the parser
parser = MMCIFParser()

# Parse the structure
structure = parser.get_structure("structure", cif_file)

# Create a list of single-letter chain identifiers
import string
letters = list(string.ascii_uppercase)

for model in structure:
for chain in model:
if has_high_rna_content(chain):
# New structure
new_structure = Structure.Structure("new_structure")
new_model = Model.Model(0) # Create a new model
new_structure.add(new_model) # Add the new model to the new structure

chain_id_new = letters.pop(0)
chain_id = chain.get_id()

atom_count = 0
for residue in chain:
for atom in residue:
atom_count += 1

remarks = []
remarks.append(f'REMARK rna chain {chain.id} -> {chain_id_new}')

pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb')
print(f'rna chain {chain.id} -> {chain_id_new} {pdb_file} # of atoms: {atom_count}')

chain.id = chain_id_new
new_model.add(chain)

io = PDBIO()
io.set_structure(new_structure)

io.save(pdb_file)
# open a file add remarks
new_file = ''
with open(pdb_file, 'r') as f:
if not args.no_hr:
new_file += add_header(version) + '\n'
if remarks:
new_file += '\n'.join(remarks) + '\n'
new_file += f.read()

with open(pdb_file, 'w') as f:
f.write(new_file)
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
'rna_tools/rna_pdb_replace.py',
'rna_tools/rna_standardize.py',
'rna_tools/rna_pdb_inspect.py',

'rna_tools/rna_mmcif2pdb.py',

'rna_tools/tools/misc/rna_tools_which.py',
'rna_tools/tools/misc/rna_tools_demo.py',

Expand Down

0 comments on commit 5be42a4

Please sign in to comment.