From 9a4ec6a707368325032109a5ab8d1023abfccf61 Mon Sep 17 00:00:00 2001 From: ahha14215 Date: Tue, 19 May 2020 18:03:36 -0500 Subject: [PATCH 001/452] Add ID to list of feature names Addresses bug found with disruptin prox to lysis tool in Galaxy. List of feature names was not enough to recognize the features when comparing between the TMHMM (gff3), InterPro (gff3), disruptin (fasta), and genome (gff3) input files. This commit also addresses another bug found with some of the genome gff3 files from Genbank, where the feature "Name" was too general/short leading to too many features in the output --- tools/phage/disruptin_proximity_2_lysis_genes.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/phage/disruptin_proximity_2_lysis_genes.py b/tools/phage/disruptin_proximity_2_lysis_genes.py index 92b0dc83..dac2baac 100644 --- a/tools/phage/disruptin_proximity_2_lysis_genes.py +++ b/tools/phage/disruptin_proximity_2_lysis_genes.py @@ -179,7 +179,7 @@ def adjacent_lgc(lgc, tmhmm, ipro, genome, enzyme, window): adjacent_tm = {} adjacent_lgc_to_tm = {} - # print(tmhmm_protein_names, endo_names) + # print(len(tmhmm_protein_names), len(endo_names)) # print(rec_genome_ini) # print(len(rec_genome_ini)) @@ -197,16 +197,17 @@ def adjacent_lgc(lgc, tmhmm, ipro, genome, enzyme, window): # print(rec_genome) for feat in rec_genome.features: - # rint(feat) + # print(feat) # searches for synonyms and if feat.type == "CDS": feat_names = [] + feat_names.append(str(feat.id)) if "locus_tag" in feat.qualifiers: feat_names.append(str(feat.qualifiers["locus_tag"][0])) - if "Name" in feat.qualifiers: - feat_names.append(str(feat.qualifiers["Name"][0])) if "protein_id" in feat.qualifiers: feat_names.append(str(feat.qualifiers["protein_id"][0])) + if len(str(feat.qualifiers["Name"][0])) > 5: + feat_names.append(str(feat.qualifiers["Name"][0])) # print(str(feat_names)) # print(str(feat.qualifiers)) for i in range(len(feat_names)): @@ -223,7 +224,7 @@ def adjacent_lgc(lgc, tmhmm, ipro, genome, enzyme, window): tm_seqrec += [feat] # check if protein contains a TMD for i in range(len(feat_names)): - if str(feat_names[i]) in tmhmm_protein_names: + if str(feat_names[i]) in str(tmhmm_protein_names): # print(feat_names[i]) tm_seqrec += [feat] @@ -238,7 +239,7 @@ def adjacent_lgc(lgc, tmhmm, ipro, genome, enzyme, window): endolysin_seqrec += [feat] # check if protein contains an endolysin-associated domain for i in range(len(feat_names)): - if str(feat_names[i]) in endo_names: + if str(feat_names[i]) in str(endo_names): endolysin_seqrec += [feat] # print(endolysin_seqrec, tm_seqrec, lgc_seqrec) From 48dab36e1ce0c1c8e8d32852e74409291be2c866 Mon Sep 17 00:00:00 2001 From: Anthony C Date: Fri, 22 May 2020 16:36:06 -0500 Subject: [PATCH 002/452] FlaGs upload 4 --- tool_conf.xml | 1 + tools/external/FlaGs/FlaGs.py | 1779 +++++++++++++++++ tools/external/FlaGs/FlaGs.xml | 88 + tools/external/FlaGs/FlaGs_Manual_070520.pdf | Bin 0 -> 4189133 bytes tools/external/FlaGs/FlaGs_linux.py | 1783 ++++++++++++++++++ 5 files changed, 3651 insertions(+) create mode 100644 tools/external/FlaGs/FlaGs.py create mode 100644 tools/external/FlaGs/FlaGs.xml create mode 100644 tools/external/FlaGs/FlaGs_Manual_070520.pdf create mode 100755 tools/external/FlaGs/FlaGs_linux.py diff --git a/tool_conf.xml b/tool_conf.xml index 32b54f66..e7e3b3b3 100644 --- a/tool_conf.xml +++ b/tool_conf.xml @@ -306,6 +306,7 @@ +