Skip to content

Commit

Permalink
Merge pull request #19 from andrewjpage/direction
Browse files Browse the repository at this point in the history
Validate databases
  • Loading branch information
andrewjpage authored Sep 3, 2019
2 parents 2113356 + 96fca1f commit b28fd62
Show file tree
Hide file tree
Showing 40 changed files with 232 additions and 69 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.2.0
2.2.1
10 changes: 10 additions & 0 deletions socru/GATProfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,5 +128,15 @@ def orientation_binary(self):
total += 1 << bits_to_shift
return total

def orientation_array(self):
orientation_bools = []
for f in self.fragments:
m = re.match(r"([\d]+)'", f)
if m:
orientation_bools.append(False)
else:
orientation_bools.append(True)
return orientation_bools



118 changes: 112 additions & 6 deletions socru/SocruLookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,128 @@
from socru.GATProfile import GATProfile
from socru.Profiles import Profiles
from socru.TypeGenerator import TypeGenerator
from socru.ValidateFragments import ValidateFragments
from socru.Fragment import Fragment

class SocruLookup:
def __init__(self,options):
self.fragments = options.fragments
self.db_dir = options.db_dir
self.verbose = options.verbose

def calc_type(self):
profile_db = Profiles(os.path.join(self.db_dir, 'profile.txt'), self.verbose)
self.tg = self.type_generator()

def create_fragments(self, input_profile, standard_fragments):
frags = []
# get the basic details

reversed_frags = input_profile.orientation_array()
for i, frag_number in enumerate(input_profile.orientationless_fragments()):
f = Fragment([], number = frag_number, operon_forward_start = False, dna_A = False, dif = False, reversed_frag = not reversed_frags[i])
if str(frag_number) == str(input_profile.dnaA_fragment_number):
f.dna_A = True
if str(frag_number) == str(input_profile.dif_fragment_number):
f.dif = True
frags.append(f)

# loop over the standard and set the direction (if inverted, then invert)
for f in frags:
for s in standard_fragments:
if f.number == s.number:
f.operon_forward_start = s.operon_forward_start

#if f.dna_A:
# f.operon_forward_start = True
#if f.dif:
# f.operon_forward_start = False

if f.reversed_frag:
f.operon_forward_start = not f.operon_forward_start

return frags

def find_dnaa(self, frags):
for i, frag in enumerate(frags):
if frag.dna_A:
return i

print("DnaA couldnt be found")
return -1

def find_dif(self, frags):
for i, frag in enumerate(frags):
if frag.dif:
return i

print("Dif couldnt be found")
return -1

def reorientate_ori(self, frags):
dnaa_index = self.find_dnaa(frags)
if dnaa_index == -1:
return frags

reorientated_frags = frags[dnaa_index: len(frags)] + frags[0: dnaa_index]
return reorientated_frags

# Take reference case 1 2 3 4 5 6 7
# assign terminus and origin
# assign direction to each number
# dnaa points outwards
# dif points inwards
# This is the standard
def create_standard_fragments(self, profile_db):
reference_profile = profile_db.gats[0]

ref_frags = []
# get the basic details
for frag_number in reference_profile.orientationless_fragments():
f = Fragment([], number = frag_number, operon_forward_start = False, dna_A = False, dif = False)
if str(frag_number) == str(profile_db.dnaA_fragment_number):
f.dna_A = True
if str(frag_number) == str(profile_db.dif_fragment_number):
f.dif = True
ref_frags.append(f)

# reorientate to dnaA
dna_start_frags = self.reorientate_ori(ref_frags)
dnaa_index = self.find_dnaa(dna_start_frags)
dif_index = self.find_dif(dna_start_frags)
if dnaa_index == -1 or dif_index == -1:
return []

# forward walk (reverse is already set )
for i in range(dnaa_index, dif_index):
if i >= dif_index:
# we have reached the end
continue
else:
dna_start_frags[i].operon_forward_start = True

return dna_start_frags

def validate_profile(self, profile_db, input_profile):
standard_fragments = self.create_standard_fragments(profile_db)
frags = self.create_fragments( input_profile, standard_fragments)
return ValidateFragments(frags).validate()

def type_generator(self):
profile_db = Profiles(os.path.join(self.db_dir, 'profile.txt'), self.verbose)

split_fragments = self.fragments.split('-')
input_profile = GATProfile(self.verbose, fragments = split_fragments)
input_profile = GATProfile(self.verbose, fragments = split_fragments, dnaA_fragment_number = profile_db.dnaA_fragment_number, dif_fragment_number = profile_db.dif_fragment_number)
input_profile.orientate_for_dnaA()

tg = TypeGenerator(profile_db,input_profile, self.verbose, True)
return tg.calculate_type()
is_profile_valid = self.validate_profile(profile_db, input_profile)
tg = TypeGenerator(profile_db,input_profile, self.verbose, is_profile_valid)
return tg

def calc_type(self):
return self.tg.calculate_type()

def calc_quality(self):
return self.tg.quality

def run(self):
print(self.calc_type())
print(str(self.db_dir ) + "\t"+str(self.calc_quality())+"\t"+str(self.tg.calculate_type()))

3 changes: 1 addition & 2 deletions socru/TypeGenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,7 @@ def previously_seen(self, gat_number, orientation_binary):

if self.is_frag_valid:
self.quality = 'GREEN'
else:
self.quality = 'AMBER'

return True
else:
return False
Expand Down
4 changes: 0 additions & 4 deletions socru/data/Acinetobacter_baumannii/profile.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
GS Frag_1 Frag_2 Frag_3 Frag_4 Frag_5 Frag_6
1.0 1 2 3 4 5 6
1.1 1' 2 3 4 5 6
1.8 1 2 3 4' 5 6
1.9 1' 2 3 4' 5 6
2.24 1 2 3 5' 4' 6
2.25 1' 2 3 5' 4' 6
1 change: 1 addition & 0 deletions socru/data/Acinetobacter_sp./profile.txt.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
dif_fragment: 1
dnaa_forward_orientation: true
dnaa_fragment: 4
reference_genome: Acinetobacter/sp./ADP1/GCF_000046845.1_ASM4684v1_genomic.fna.gz
1 change: 1 addition & 0 deletions socru/data/Bacillus_cellulasensis/profile.txt.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
dif_fragment: 1
dnaa_forward_orientation: false
dnaa_fragment: 7
reference_genome: Bacillus/cellulasensis/NJ-M2/GCF_001431145.1_ASM143114v1_genomic.fna.gz
4 changes: 2 additions & 2 deletions socru/data/Bordetella_pertussis/profile.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
GS Frag_1 Frag_2 Frag_3
1.0 1 2 3
1.1 1' 2 3
1.3 1' 3 2'
1.6 1' 2 3
1.4 1' 3 2'
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
dif_fragment: 1
dnaa_forward_orientation: true
dnaa_fragment: 2
reference_genome: Candidatus/Kinetoplastibacterium/crithidii__ex_Angomonas_deanei_ATCC_30255_/GCF_000319225.1_ASM31922v1_genomic.fna.gz
1 change: 0 additions & 1 deletion socru/data/Enterobacter_asburiae/profile.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
GS Frag_1 Frag_2 Frag_3 Frag_4 Frag_5 Frag_6 Frag_7 Frag_8
1.0 1 2 3 4 5 6 7 8
1.1 1' 2 3 4 5 6 7 8

1 change: 0 additions & 1 deletion socru/data/Enterobacter_cloacae/profile.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
GS Frag_1 Frag_2 Frag_3 Frag_4 Frag_5 Frag_6 Frag_7 Frag_8
1.0 1 2 3 4 5 6 7 8
1.1 1' 2 3 4 5 6 7 8
2.57 1' 2 3 6' 5' 4' 7 8
1 change: 1 addition & 0 deletions socru/data/Enterobacter_hormaechei/profile.txt.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
dif_fragment: 1
dnaa_forward_orientation: true
dnaa_fragment: 6
reference_genome: Enterobacter/hormaechei/ECNIH3/GCF_000750225.1_ASM75022v1_genomic.fna.gz
1 change: 1 addition & 0 deletions socru/data/Enterobacter_roggenkampii/profile.txt.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
dif_fragment: 1
dnaa_forward_orientation: true
dnaa_fragment: 6
reference_genome: Enterobacter/roggenkampii/35734/GCF_000807415.2_ASM80741v4_genomic.fna.gz
1 change: 1 addition & 0 deletions socru/data/Enterobacter_sp./profile.txt.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
dif_fragment: 1
dnaa_forward_orientation: true
dnaa_fragment: 6
reference_genome: Enterobacter/sp./638/GCF_000016325.1_ASM1632v1_genomic.fna.gz
2 changes: 0 additions & 2 deletions socru/data/Enterococcus_faecium/profile.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
GS Frag_1 Frag_2 Frag_3 Frag_4 Frag_5 Frag_6
1.0 1 2 3 4 5 6
1.1 1' 2 3 4 5 6
1.8 1 2 3 4' 5 6
2.0 6 1 2 3 4 5
3.24 1 5' 4' 2 3 6
4.32 1 6' 2 3 4 5
4.33 1' 6' 2 3 4 5
5.35 1' 6' 3 4 5 2'
8 changes: 0 additions & 8 deletions socru/data/Escherichia_coli/profile.txt
Original file line number Diff line number Diff line change
@@ -1,24 +1,16 @@
GS Frag_1 Frag_2 Frag_3 Frag_4 Frag_5 Frag_6 Frag_7
1.0 1 2 3 4 5 6 7
1.1 1' 2 3 4 5 6 7
1.2 1 2' 3 4 5 6 7
1.24 1 2 3 4' 5' 6 7
1.25 1' 2 3 4' 5' 6 7
1.64 1 2 3 4 5 6 7'
1.9 1' 2 3 4' 5 6 7
1.123 1' 7' 6' 5' 4' 3 2'
2.67 1' 7' 3 4 5 6 2'
2.90 1 7' 3 4' 5' 6 2'
3.25 1' 4' 5' 2 3 6 7
4.96 1 7' 6' 2 3 4 5
4.97 1' 7' 6' 2 3 4 5
5.99 1' 7' 6' 3 4 5 2'
6.32 1 2 6' 3 4 5 7
7.120 1 2 3 7' 6' 5' 4'
8.3 1' 3 4 5 6 7 2'
9.16 1 2 5' 3 6 4 7
10.65 1' 7' 2 3 4 5 6
11.59 1' 4' 6' 5' 3 7 2'
12.56 1 2 3 6' 5' 4' 7
13.113 1' 7' 6' 5' 2 3 4
14.64 1 2 7' 3 4 5 6
1 change: 1 addition & 0 deletions socru/data/Escherichia_coli/profile.txt.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
dif_fragment: 1
dnaa_forward_orientation: false
dnaa_fragment: 3
reference_genome: Escherichia/coli/K-12_substr._MG1655/GCF_000005845.2_ASM584v2_genomic.fna.gz
1 change: 1 addition & 0 deletions socru/data/Flavobacterium_sp./profile.txt.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
dif_fragment: 1
dnaa_forward_orientation: true
dnaa_fragment: 3
reference_genome: Flavobacterium/sp./MEBiC07310/GCF_003148385.1_ASM314838v1_genomic.fna.gz
7 changes: 1 addition & 6 deletions socru/data/Haemophilus_influenzae/profile.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
GS Frag_1 Frag_2 Frag_3 Frag_4 Frag_5 Frag_6
1.0 1 2 3 4 5 6
2.32 1 2 3 6' 4 5
2.40 1 2 3 6' 4' 5
2.48 1 2 3 6' 4 5'
3.3 1' 3 4 6 5 2'
4.33 1' 6' 2 3 4 5
5.48 1 2 6' 3 4 5'

2 changes: 0 additions & 2 deletions socru/data/Klebsiella_pneumoniae/profile.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
GS Frag_1 Frag_2 Frag_3 Frag_4 Frag_5 Frag_6 Frag_7 Frag_8
1.0 1 2 3 4 5 6 7 8
1.1 1' 2 3 4 5 6 7 8
1.128 1 2 3 4 5 6 7 8'
2.1 1' 2 3 7 4 5 6 8
3.24 1 2 5' 4' 3 6 7 8
4.131 1' 8' 3 4 5 6 7 2'
5.0 1 2 3 5 4 6 7 8
6.3 1' 3 4 5 6 7 8 2'
7.32 1 2 3 4 5 7 8 6'
8.35 1' 6' 3 4 5 6 7 2'
1 change: 1 addition & 0 deletions socru/data/Klebsiella_pneumoniae/profile.txt.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
dif_fragment: 1
dnaa_forward_orientation: false
dnaa_fragment: 3
reference_genome: Klebsiella/pneumoniae/NTUH-K2044/GCF_000009885.1_ASM988v1_genomic.fna.gz
2 changes: 0 additions & 2 deletions socru/data/Lactobacillus_plantarum/profile.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
GS Frag_1 Frag_2 Frag_3 Frag_4 Frag_5
1.0 1 2 3 4 5
1.1 1' 2 3 4 5
1.8 1 2 3 4' 5
2.8 4' 5 1 2 3
1 change: 1 addition & 0 deletions socru/data/Legionella_sainthelensi/profile.txt.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
dif_fragment: 1
dnaa_forward_orientation: true
dnaa_fragment: 2
reference_genome: Legionella/sainthelensi/LA01-117/GCF_002848365.1_ASM284836v1_genomic.fna.gz
1 change: 1 addition & 0 deletions socru/data/Leuconostoc_gelidum/profile.txt.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
dif_fragment: 1
dnaa_forward_orientation: true
dnaa_fragment: 1
reference_genome: Leuconostoc/gelidum/type_strain:_LMG_18811/GCF_000196855.1_ASM19685v1_genomic.fna.gz
3 changes: 0 additions & 3 deletions socru/data/Listeria_monocytogenes/profile.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
GS Frag_1 Frag_2 Frag_3 Frag_4 Frag_5 Frag_6
1.0 1 2 3 4 5 6
1.16 1 2 3 4 5' 6
2.56 1 2 3 6' 5' 4'
3.33 1' 6' 2 3 4 5
4.48 1 2 3 4 6' 5'
5.35 1' 6' 3 4 5 2'
2 changes: 0 additions & 2 deletions socru/data/Mannheimia_haemolytica/profile.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
GS Frag_1 Frag_2 Frag_3 Frag_4 Frag_5 Frag_6
1.0 1 2 3 4 5 6
2.0 1 2 5 3 4 6
3.33 1' 6' 2 3 5 4
4.17 1' 5' 2 3 4 6
3 changes: 0 additions & 3 deletions socru/data/Neisseria_meningitidis/profile.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
GS Frag_1 Frag_2 Frag_3 Frag_4
1.0 1 2 3 4
1.1 1' 2 3 4
2.3 1' 3 4 2'
2.9 1' 2 4' 3
3.1 4 1' 2 3
3 changes: 0 additions & 3 deletions socru/data/Pasteurella_multocida/profile.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,2 @@
GS Frag_1 Frag_2 Frag_3 Frag_4 Frag_5 Frag_6
1.0 1 2 3 4 5 6
2.48 1 2 3 4 6' 5'
3.9 1' 5 6 4' 2 3
4.24 1 2 3 6 4' 5'
2 changes: 0 additions & 2 deletions socru/data/Porphyromonas_gingivalis/profile.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
GS Frag_1 Frag_2 Frag_3 Frag_4
1.0 1 2 3 4
1.1 1' 4 3 2
1.9 3 2 1' 4'
2 changes: 0 additions & 2 deletions socru/data/Pseudomonas_aeruginosa/profile.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
GS Frag_1 Frag_2 Frag_3 Frag_4
1.0 1 2 3 4
1.1 1' 2 3 4
1.9 1' 2 3 4'
2.2 1 3 4 2'
2.3 1' 3 4 2'
2.11 1' 3 4' 2'
1 change: 1 addition & 0 deletions socru/data/Salmonella_enterica/profile.txt.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
dif_fragment: 1
dnaa_forward_orientation: false
dnaa_fragment: 3
reference_genome: Salmonella/enterica/subsp._enterica_serovar_Typhimurium_str._LT2/GCF_000006945.2_ASM694v2_genomic.fna.gz
1 change: 0 additions & 1 deletion socru/data/Streptococcus_pyogenes/profile.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,3 @@ GS Frag_1 Frag_2 Frag_3 Frag_4 Frag_5 Frag_6
1.1 1' 2 3 4 5 6
2.32 1 2 6' 3 4 5
3.3 1' 3 5 4 6 2'
4.56 1 2 3 6' 5' 4'
4 changes: 2 additions & 2 deletions socru/data/Streptomyces_sp./profile.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
GS Frag_1 Frag_2 Frag_3 Frag_4 Frag_5 Frag_6
1.0 1 2 3 4 5 6
1.16 1 2 3 4 5' 6
1.17 1' 2 3 4 5' 6
1.47 1 2 3 4 5' 6
1.46 1' 2 3 4 5' 6
4 changes: 2 additions & 2 deletions socru/data/Vibrio_cholerae/profile.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
GS Frag_1 Frag_2 Frag_3 Frag_4 Frag_5 Frag_6 Frag_7 Frag_8
1.0 1 2 3 4 5 6 7 8
2.251 1' 8' 3 7' 6' 5' 4' 2'
3.251 1' 8' 7' 6' 3 5' 4' 2'
2.4 1' 8' 3 7' 6' 5' 4' 2'
3.4 1' 8' 7' 6' 3 5' 4' 2'
1 change: 1 addition & 0 deletions socru/data/Vibrio_harveyi/profile.txt.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
dif_fragment: 1
dnaa_forward_orientation: true
dnaa_fragment: 9
reference_genome: Vibrio/harveyi/ATCC_33843__392_[MAV]_/GCF_000770115.1_ASM77011v2_genomic.fna.gz
5 changes: 1 addition & 4 deletions socru/data/Yersinia_pseudotuberculosis/profile.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
GS Frag_1 Frag_2 Frag_3 Frag_4 Frag_5 Frag_6 Frag_7
1.0 1 2 3 4 5 6 7
1.1 1' 2 3 4 5 6 7
2.56 1 2 3 6' 5' 4' 7
3.123 1' 7' 3 6' 5' 4' 2'
4.91 1' 7' 3 5' 4' 6 2'
5.83 1' 7' 5' 3 4 6 2'
2.83 1' 7' 5' 3 4 6 2'
Loading

0 comments on commit b28fd62

Please sign in to comment.