diff --git a/gff3tool/bin/gff3_to_fasta.py b/gff3tool/bin/gff3_to_fasta.py index cfbadc6..a059439 100755 --- a/gff3tool/bin/gff3_to_fasta.py +++ b/gff3tool/bin/gff3_to_fasta.py @@ -191,7 +191,11 @@ def splicer(gff, ftype, dline, stype, embedded_fasta=False): cname = child['attributes']['Name'] defline='>{0:s}'.format(cid) if stype == "pep": - cid = re.sub(r'(.+-)(R)([a-zA-Z]+)', r'\1P\3', cid) + for grandchild in child['children']: #first try to get the CDS protein_id + if 'protein_id' in grandchild['attributes']: + cid = grandchild['attributes']['protein_id'] + + cid = re.sub(r'(.+-)(R)([a-zA-Z]+)', r'\1P\3', cid)#otherwise, if it has the -R[A-Z] format then modify that to -P[A-Z] defline = '>{0:s}'.format(cid) elif ftype[0] == 'CDS': defline='>{0:s}-CDS'.format(cid)