From 88dbe819eddd316bd80d726ec9d2450cc43cac14 Mon Sep 17 00:00:00 2001
From: Ali Pirani <alipirani88@gmail.com>
Date: Thu, 3 Oct 2019 11:59:52 -0400
Subject: [PATCH] Turn off unncessary core snp steps

---
 modules/core_prep_sanity_checks.py            |    2 +
 modules/core_prep_sanity_checks.pyc           |  Bin 4050 -> 4114 bytes
 .../variant_diagnostics/PBS_generate_jobs.py  |  255 +
 .../variant_diagnostics/PBS_generate_jobs.pyc |  Bin 0 -> 8942 bytes
 modules/variant_diagnostics/core_pipeline.py  |   93 +-
 .../core_pipeline_core_prep.pyc               |  Bin 0 -> 8766 bytes
 .../core_pipeline_core_prep_label.py          |  293 ++
 .../core_pipeline_core_prep_label.pyc         |  Bin 0 -> 8934 bytes
 .../core_pipeline_core_prep_main.py           | 2439 +++++++++
 .../core_pipeline_core_prep_main.pyc          |  Bin 0 -> 80696 bytes
 .../core_pipeline_modular.py                  | 4658 +++++++++++++++++
 11 files changed, 7696 insertions(+), 44 deletions(-)
 create mode 100644 modules/variant_diagnostics/PBS_generate_jobs.py
 create mode 100644 modules/variant_diagnostics/PBS_generate_jobs.pyc
 create mode 100644 modules/variant_diagnostics/core_pipeline_core_prep.pyc
 create mode 100644 modules/variant_diagnostics/core_pipeline_core_prep_label.py
 create mode 100644 modules/variant_diagnostics/core_pipeline_core_prep_label.pyc
 create mode 100644 modules/variant_diagnostics/core_pipeline_core_prep_main.py
 create mode 100644 modules/variant_diagnostics/core_pipeline_core_prep_main.pyc
 create mode 100755 modules/variant_diagnostics/core_pipeline_modular.py
diff --git a/modules/core_prep_sanity_checks.py b/modules/core_prep_sanity_checks.py
index d8685b0..20a59be 100755
--- a/modules/core_prep_sanity_checks.py
+++ b/modules/core_prep_sanity_checks.py
@@ -79,6 +79,8 @@ def make_sure_files_exists(vcf_file_array, Config, logger):
         for i in not_found_files:
             keep_logging('Error finding variant calling output files for: %s' % os.path.basename(i.replace('_filter2_final.vcf_no_proximate_snp.vcf', '')),
                          'Error finding variant calling output files for: %s' % os.path.basename(i.replace('_filter2_final.vcf_no_proximate_snp.vcf', '')), logger, 'exception')
+            keep_logging('File not found: %s' % i,
+                         'File not found: %s' % i, logger, 'debug')
         exit()
 
 def make_sure_label_files_exists(vcf_file_array, uniq_snp_positions, uniq_indel_positions, Config, logger):
diff --git a/modules/core_prep_sanity_checks.pyc b/modules/core_prep_sanity_checks.pyc
index f3887976bda0c6035c8ebac0126f7ebb50751414..15fefa654053c4d5154d0c24b4ba5bd4647f5604 100755
GIT binary patch
delta 305
zcmca4KS_au`7<w<iRiT0i5xc=Wj8*Y$;d6qR8hiE!_43kn=`qU$zQ*OjUk1Xp@xS+
z3`*57F{JP@G_x=;7S3l3FX3QlW?*2<V_-<(X9(61*!+Ttmr1c$h=GB@Ei)%oAuqo~
zAuYc&FU3khwYY>8D4dd-RGPl|E6aOEP9GqbfsKukck)LziOH<&nyi|PT#TZd_1WJr
zGp0?J;SS=I1*!;=0}_+{*vcmJa`Q2!PyWMwTa*nbRKg4-ic51#Sb%hXL2BOQk37me
zL3}`-28amapDfL5qACDl3W5lbt4cszu#4g|^OEy(3vyCRQiFsim+&SqN>9GZyPPp>
aaxGsBBm3lAe12>)AZ?nH_4pSv+5!Lpbw@P-

delta 218
zcmbQFa7mtn`7<w9mf!K{i5xc=nKnM0$;kbWu_BV8hMB=7HgIw)lRsPGe8%t+j>#UJ
zmp7ka_F&v>$oig<(POeAyC<XW=6v=y%#10MW4MDjWf&M3f@Fcj<Sw?d$-dltjH#1t
zcy3Fv0)<MLfkbg>ZV3yJ&M!#K3t|H))ZtZT*Wd%Pg7_v!^O~sggO~y!LJ&xlfLLky
zrFkjwnR&_ixdl0?C8<F|leh3DFiK4p<y+1eGI=jw3?tiQDSkgTX^=LJ$$9*X8La_#
C>Nhz6

diff --git a/modules/variant_diagnostics/PBS_generate_jobs.py b/modules/variant_diagnostics/PBS_generate_jobs.py
new file mode 100644
index 0000000..34ae193
--- /dev/null
+++ b/modules/variant_diagnostics/PBS_generate_jobs.py
@@ -0,0 +1,255 @@
+# System wide imports
+from __future__ import division
+import sys
+import argparse
+import re
+import os
+import csv
+import subprocess
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+""" Hacky way to append. Instead Add this path to PYTHONPATH Variable """
+from collections import OrderedDict
+from collections import defaultdict
+from joblib import Parallel, delayed
+import multiprocessing
+import thread
+import glob
+import readline
+import errno
+from datetime import datetime
+import threading
+import json
+import ConfigParser
+from config_settings import ConfigSectionMap
+from logging_subprocess import *
+from log_modules import *
+
+
+def create_job(filter2_only_snp_vcf_dir, jobrun, vcf_filenames, unique_position_file, tmp_dir, Config):
+
+    """
+    This method takes the unique_position_file and list of final *_no_proximate_snp.vcf files and generates individual jobs/script.
+    Each of these jobs/scripts will generate a *label file. These label file for each sample contains a field description for each position in unique_position_file.
+    This field description denotes if the variant position made to the final variant list in a sample and if not then a reason/filter that caused it to filtered out from final list.
+    :param jobrun:
+    :param vcf_filenames:
+    :return:
+    """
+    if jobrun == "parallel-cluster":
+        """
+        Supports only PBS clusters for now.
+        """
+        for i in vcf_filenames:
+            job_name = os.path.basename(i)
+            job_print_string = "#PBS -N %s\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/reason_job_debug.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -unique_position_file %s -tmp_dir %s\n" % (job_name, ConfigSectionMap("scheduler", Config)['email'], ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['resources'], ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'], args.filter2_only_snp_vcf_dir, i, unique_position_file, tmp_dir)
+            job_file_name = "%s.pbs" % (i)
+            f1=open(job_file_name, 'w+')
+            f1.write(job_print_string)
+            f1.close()
+        #os.system("mv %s/*.pbs %s/temp" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir))
+        pbs_dir = args.filter2_only_snp_vcf_dir + "/*vcf.pbs"
+        pbs_scripts = glob.glob(pbs_dir)
+        for i in pbs_scripts:
+            keep_logging('Running: qsub %s' % i, 'Running: qsub %s' % i, logger, 'info')
+            call("qsub %s" % i, logger)
+
+    elif jobrun == "parallel-local":
+        """
+        Generate a Command list of each job and run it in parallel on different cores available on local system
+        """
+        command_array = []
+        command_file = "%s/commands_list.sh" % args.filter2_only_snp_vcf_dir
+        f3 = open(command_file, 'w+')
+        for i in vcf_filenames:
+            job_name = os.path.basename(i)
+            job_print_string = "#PBS -N %s\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/reason_job_debug.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -unique_position_file %s -tmp_dir %s\n" % (job_name, ConfigSectionMap("scheduler", Config)['email'], ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['resources'], ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'], args.filter2_only_snp_vcf_dir, i, unique_position_file, tmp_dir)
+            job_file_name = "%s.pbs" % (i)
+            f1=open(job_file_name, 'w+')
+            f1.write(job_print_string)
+            f1.close()
+        #os.system("mv %s/*.pbs %s/temp" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir))
+        pbs_dir = filter2_only_snp_vcf_dir + "/*vcf.pbs"
+        pbs_scripts = glob.glob(pbs_dir)
+        for i in pbs_scripts:
+            f3.write("bash %s\n" % i)
+        f3.close()
+        with open(command_file, 'r') as fpp:
+            for lines in fpp:
+                lines = lines.strip()
+                command_array.append(lines)
+        fpp.close()
+
+        num_cores = multiprocessing.cpu_count()
+        results = Parallel(n_jobs=num_cores)(delayed(run_command)(command) for command in command_array)
+
+    elif jobrun == "cluster":
+        #command_file = "%s/commands_list.sh" % args.filter2_only_snp_vcf_dir
+        #os.system("bash %s" % command_file)
+        command_array = []
+        command_file = "%s/commands_list.sh" % filter2_only_snp_vcf_dir
+        f3 = open(command_file, 'w+')
+
+
+        for i in vcf_filenames:
+            job_name = os.path.basename(i)
+            job_print_string = "#PBS -N %s\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/reason_job_debug.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -unique_position_file %s -tmp_dir %s\n" % (job_name, ConfigSectionMap("scheduler", Config)['email'], ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['resources'], ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'], filter2_only_snp_vcf_dir, i, unique_position_file, tmp_dir)
+            job_file_name = "%s.pbs" % (i)
+            f1=open(job_file_name, 'w+')
+            f1.write(job_print_string)
+            f1.close()
+        #os.system("mv %s/*.pbs %s/temp" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir))
+        pbs_dir = filter2_only_snp_vcf_dir + "/*vcf.pbs"
+        pbs_scripts = glob.glob(pbs_dir)
+        for i in pbs_scripts:
+            f3.write("bash %s\n" % i)
+        f3.close()
+        with open(command_file, 'r') as fpp:
+            for lines in fpp:
+                lines = lines.strip()
+                command_array.append(lines)
+        fpp.close()
+
+        num_cores = multiprocessing.cpu_count()
+        results = Parallel(n_jobs=num_cores)(delayed(run_command)(command) for command in command_array)
+
+    elif jobrun == "local":
+        """
+        Generate a Command list of each job and run it on local system one at a time
+        """
+
+        command_array = []
+        command_file = "%s/commands_list.sh" % filter2_only_snp_vcf_dir
+        f3 = open(command_file, 'w+')
+
+
+        for i in vcf_filenames:
+            job_name = os.path.basename(i)
+            job_print_string = "#PBS -N %s\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/reason_job_debug.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -unique_position_file %s -tmp_dir %s\n" % (job_name, filter2_only_snp_vcf_dir, i, unique_position_file, tmp_dir)
+            job_file_name = "%s.pbs" % (i)
+            f1=open(job_file_name, 'w+')
+            f1.write(job_print_string)
+            f1.close()
+        #os.system("mv %s/*.pbs %s/temp" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir))
+        pbs_dir = filter2_only_snp_vcf_dir + "/*vcf.pbs"
+        pbs_scripts = glob.glob(pbs_dir)
+
+
+        for i in pbs_scripts:
+            f3.write("bash %s\n" % i)
+        f3.close()
+        with open(command_file, 'r') as fpp:
+            for lines in fpp:
+                lines = lines.strip()
+                command_array.append(lines)
+        fpp.close()
+        call("bash %s" % command_file, logger)
+
+def create_indel_job(filter2_only_snp_vcf_dir, jobrun, vcf_filenames, unique_position_file, tmp_dir, Config):
+
+    """
+    This method takes the unique_indel_position_file and list of final *_indel_final.vcf files and generates individual jobs/script.
+    Each of these jobs/scripts will generate a *label file. These label file for each sample contains a field description of each position in unique_indel_position_file.
+    This field description denotes if the variant position made to the final variant list in a sample and if not then a reason/filter that caused it to filtered out from final list.
+    :param jobrun:
+    :param vcf_filenames:
+    :return:
+    """
+    if jobrun == "parallel-cluster":
+        """
+        Supports only PBS clusters for now.
+        """
+        for i in vcf_filenames:
+            job_name = os.path.basename(i)
+            job_print_string = "#PBS -N %s\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/reason_job_indel_debug.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -unique_position_file %s -tmp_dir %s\n" % (job_name, ConfigSectionMap("scheduler", Config)['email'], ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['resources'], ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'], filter2_only_snp_vcf_dir, i, unique_position_file, tmp_dir)
+            job_file_name = "%s_indel.pbs" % (i)
+            f1=open(job_file_name, 'w+')
+            f1.write(job_print_string)
+            f1.close()
+        #os.system("mv %s/*.pbs %s/temp" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir))
+        pbs_dir = filter2_only_snp_vcf_dir + "/*vcf_indel.pbs"
+        pbs_scripts = glob.glob(pbs_dir)
+        for i in pbs_scripts:
+            keep_logging('Running: qsub %s' % i, 'Running: qsub %s' % i, logger, 'info')
+            # os.system("qsub %s" % i)
+            call("qsub %s" % i, logger)
+
+    elif jobrun == "parallel-local" or jobrun == "cluster":
+        """
+        Generate a Command list of each job and run it in parallel on different cores available on local system
+        """
+        command_array = []
+        command_file = "%s/commands_indel_list.sh" % filter2_only_snp_vcf_dir
+        f3 = open(command_file, 'w+')
+
+
+        for i in vcf_filenames:
+            job_name = os.path.basename(i)
+            job_print_string = "#PBS -N %s\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/reason_job_indel_debug_gatk.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -unique_position_file %s -tmp_dir %s\n" % (job_name, ConfigSectionMap("scheduler", Config)['email'], ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['resources'], ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'], filter2_only_snp_vcf_dir, i, unique_position_file, tmp_dir)
+            job_file_name = "%s_indel.pbs" % (i)
+            f1=open(job_file_name, 'w+')
+            f1.write(job_print_string)
+            f1.close()
+        #os.system("mv %s/*.pbs %s/temp" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir))
+        pbs_dir = filter2_only_snp_vcf_dir + "/*vcf_indel.pbs"
+        pbs_scripts = glob.glob(pbs_dir)
+        for i in pbs_scripts:
+            f3.write("bash %s\n" % i)
+        f3.close()
+        with open(command_file, 'r') as fpp:
+            for lines in fpp:
+                lines = lines.strip()
+                command_array.append(lines)
+        fpp.close()
+
+        num_cores = multiprocessing.cpu_count()
+        results = Parallel(n_jobs=num_cores)(delayed(run_command)(command) for command in command_array)
+
+    # elif jobrun == "cluster":
+    #     command_file = "%s/commands_list.sh" % args.filter2_only_snp_vcf_dir
+    #     os.system("bash %s" % command_file)
+    elif jobrun == "local":
+        """
+        Generate a Command list of each job and run it on local system one at a time
+        """
+
+        command_array = []
+        command_file = "%s/commands_list.sh" % args.filter2_only_snp_vcf_dir
+        f3 = open(command_file, 'w+')
+
+
+        for i in vcf_filenames:
+            job_name = os.path.basename(i)
+            job_print_string = "#PBS -N %s\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/reason_job_indel_debug.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -unique_position_file %s -tmp_dir %s\n" % (job_name, args.filter2_only_snp_vcf_dir, i, unique_position_file, tmp_dir)
+            job_file_name = "%s_indel.pbs" % (i)
+            f1=open(job_file_name, 'w+')
+            f1.write(job_print_string)
+            f1.close()
+        #os.system("mv %s/*.pbs %s/temp" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir))
+        pbs_dir = args.filter2_only_snp_vcf_dir + "/*vcf_indel.pbs"
+        pbs_scripts = glob.glob(pbs_dir)
+
+
+        for i in pbs_scripts:
+            f3.write("bash %s\n" % i)
+        f3.close()
+        with open(command_file, 'r') as fpp:
+            for lines in fpp:
+                lines = lines.strip()
+                command_array.append(lines)
+        fpp.close()
+        call("bash %s" % command_file, logger)
+
+def run_command(i):
+    """Function to run each command and is run as a part of python Parallel mutiprocessing method.
+
+    :param:
+        i: command variable to run
+
+    :return:
+        done: string variable with completion status of command.
+    """
+
+    #call("%s" % i, logger)
+    os.system("%s" % i)
+    done = "Completed: %s" % i
+    return done
\ No newline at end of file
diff --git a/modules/variant_diagnostics/PBS_generate_jobs.pyc b/modules/variant_diagnostics/PBS_generate_jobs.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dec5113c51c777391d5182c7f4ecd00ebf96b6e0
GIT binary patch
literal 8942
zcmeHMOK%*<5w6(>AImo>zVsZ)R>a5=*OHZo3B^bvIXPKTMur(I;tYm6z009yXO`15
z5(z9%%Go*iALN=-faG8x{~*Z0$Ss!uIRrW7kQ{PMzN((tT|V@HWeYNFK5BcqtE+3O
zs;m2}E&pe#_K){J`mjm)PZgg(u!(eL5&8JDC?x7w)Ue3n=Mse_YLsLQ?J_mWMY}?c
zO3|)Tqgu2_s4-HsN2xJNszf_uv@=esDvjE)#ssNJdTb5;WbHDAQ`DKJ#<bC;BF{6_
zm@&^)3TLS^M~yk^%u{2&ukivk777hcQR7t6UZloi(O#m)Qqf+f#<H|WC|se=3N=>x
zvR0|FTJW8w#_6Jch8kz2JxbwO>YSs-IWuRB!t?Y#+YYS@XpPgah#EHeHECQVe?l6U
z*t+-ECgFnMfB0F03n(1}@q>qf4&vyp>6$>-PZM7y%D)pd(?O@NT3!~WK6jRi$@jd(
z3quvAqbx~<-oEmOM0=@9gN`yivtYR$N3EcJUo}&#_@37_Bdp8S<_PF1{Iia?ZG_1u
z2$-J5Ve+6v$rWqwkJKyCoezIc2P2dc?O3!^qTUF5@?ex-us2F6LZ(c;Dm_LVRcQ6l
zbM?sc$dTvKBhO=auFy`E7wJK2g<3UQIv69^8C^>U<CJ1#oXw5(4r<h^(atFK##kqd
zAJ8JQb{A;VNP<6>4<;xb=NRoxi0+fL_oqs460decUady{q{z=d(h2gX=y8RtbdvmO
z8A2acw08eRo6$!VHq(rl=`W==(ZLk?vt$-rI+zwErwb)#=*w)#Us#(xgxbkgyg*{b
zWwv5(hCSiWiM7}pyEjXl+-W?*euGKo*^0luP6xBJcZGVh@WBN-z@$0afyrS?w5MnX
zMxPU(?k1(pO@D!Y_H!^7vLJO<nCB^-BmWfjAQwAyk@p^^-!bXtV1c5)m)KWJ;;a2i
z-&c62lN|MliTK~W%!r37FC6aG@@MwaUorK?y|hw!elIN__tGtky|f};`a|_uUJ8tt
zyr+5TkX5UnEs6ZL@|msrX7%&1YE(ULRb-@9-bzb(MvAaI7jV>^MjF@WaZ=Q5{Jg&%
zXuG4*?bx?d?}5^Gx~=Ri3U)K)c4Hmzf#9}+P}yGO+hL$nJ8s#nAo4={svE^_H;MOx
z4o(zTN8R;@O&$y(huPaIQVE8%9YlPd`59*I#9O+qn@P}3*F}eSz2-Ix#mZU@4{Q5T
z5Qcpzwr5`ry)6~W3hU6F`3F6AD^6_1!nD`vLgQu}rCtz07tBzhZ~Mxui+36@7jFq2
zPkeEsq!{YB6kkQLyq~CVKlGBoi&DEUv*Y>7PGhmEF<QYQro@t7p)DIMXxWg*+&qvd
zPsdRm_DofRPA|2aUZxfJz!S3>Fo4H6OYK$?chJLrVbMnGjV>+%9kywbMH^!6ybF#J
zw?<w^>3k$nX_n+{ny&>so39bqnqj7~l>Q~I1h3uu(S7^cUHh`GndW<gW@pg+ARi3}
zUAu$k4+qWA-i`J5TVb|WtJR~Ht}7h{>4PAuZv~OtPU5Uv_aYC@^t~J0SMTm4UL*TS
z%-x`?!XQ%hp9JZ4wpGt9?l$o*LDY7Oao1N5>z&xo5UzbDKk(X7tka;W>&E^rtOwSu
zti9gdx33wa-*Dq7+~<&Y*^+*c;Dw$r$ooM?Pux~KE$wu35j9*uus@sIiq%R|zMHF#
z2S@Yu3$YQjf~LotQWz7Z<1A?^EsWT+nG!~}rt3AEaTcYTFL0OjdUs1}z92q&L-QK-
zt594z+0#xIMX=3=y{oe=D2qD^ZlqBw)_jA^JM}c${g?=27*sEy>$0vl<4y+wtX+vc
zy{-8QyXEO^$f~7$kxcI5XvZtCIdzkJSeY{ZwT=IaXdoAVh&zJCmq^UP;f!n#`hm48
zfaAVgOYh1x*I@;b+8{WK+<bX+0cN@DZ_3rT=tmE&G0_>w7alm+A#RcT<b4r2V)R?w
zyVp(LxzD<=qPQ`$h4T&eIoZXTgGghhd9)mMlDz&aWrJO4RoH;z+yoR`BM+Q4Rz77b
zVMDUNyI#62;0O$gGnd05B|~1)*69kKp0q!OEAFaDw#uU<00!b!n_;Y#47S5~OLor#
zrMhkyx7*ke613dLW@HsOiO4*i@(IT4dtFHN(-|~7Kw<$-MC?e-lX;^xyBXpW8`0rl
zUt=ic@F6U7O=eD~@O&sEr_6RzzL03i^Myz-&`q{jAhzLj#kNU#U-LsZ38K_xeW4Mr
z#GRa!3>(|GbzS5lMQ|RHZN+VKT56e;Zzb1D5^rD3RP?YY@lxw8pgIWacEv)RgSD6-
z$~rIsl9jB2Pnb!+3IUncpl=g-9@iIG<t(1`Z$PLvS(&ott<b7jGuD(fU7C{5GuBFJ
zvND1m?j0UCe2d_p$H=6$Y}HDW)}_*9d4y$@nLFr9dm?vrz`Z)mjroUUTgPGfS%fFw
zjtwloXzjgvAf<8)lLNvJFgYp)IVQ(RP(aC10q%W3ezQCT$N~5d19Bk#9FPO6=YV|N
z0P=qVkpEXeo&a+Fwm^y5!{8jCG6#H6br_jH!<O@3M2Q2cJO%JQpDo=(u>6QEkHT_P
z?-q{1a^n=_KDNiHr@(T!s1M8Gm0?)En8Wg1mf&_~1}vXp(4E8b69C`4Lx69kfaR-%
z+_p1kkogfug$DEaPs4m>_Q1BsVBhoEcC5T<Bt0QipKjid4W52M^L|<e@;hQ)g#KxR
z{7mRG@>>M*JHy%Sb)@@eBR{}36+Q?4Gkk(NzC_RuSW{qPAEgbD-w7}NHU1mmKO8P!
zP5fu#;-#TLXAFDQ;oGCbHx!3)j>8Dh;rpBB)@ALAQp328ub_hKhK5c;cMb!&XCOO=
zZ=iZrRvN%NV}}Cd1z(3l+iiR4gO^bmI0UI3cJ*C{o%@x-jDO~!WW+K;&LX#;RDU=M
zBb_C1JIhQMB?@qK_(XL0((jyOa-PWrCN>i;NjR67yvF2pCYPC%nefgK95+C8&NsO4
z3KIr5&NrD{W%34-Z!x*XWSvQ!$(u~BGr7U!Ehaaae4EL4n0%MX_ds3=z$6i80F%iD
zfK4AmIh}$Ou<0n!Ia69GtpGMXHxQ~$0tf+Q32Zvs2STRj2oORJ>X(vk+t7qP2yO5+
zB#tY-&!dE7@wbqCwU?UG{WW%>{u{`Tvq--5<a#P9N#<gZBU`Cqqdw049#@f3`;zi^
zUVyxpmq+bRHjMi7dir{8sNQNStXy#qHc*$AVwtHHZ=pD7*3S8dtFS!5k0Z5V8~8q$
z@(8s}7KA!CR|7HbrJ3e3dM@2uTV$_X)|!uv+lE{D8z{<|>JV=VsUqlo)OS0klY>=i
z9lY|Z5Oh+``Zu(aQYKBzAt2UxPYQf@-ByOWwQ^ktUHnBe81XuMgrX`0pP|0PDI-ym
z1caffAIfA@JM(gl$qgcva|g;GQbgglx25})C#=Sm6IW^^%^1~j2_Nln7&^R6QP&BB
zEt4s@Y$Vx3M$&EU9$#{esK`%TDUp(iN|GpcxGW`g6T{188<w$yF9W3TVoE-UK8>Oj
z9|Wk4<Li4D@0o&Go2nQW&+;OcJ9tge8{KYR_;%g~o3p`0iiT*s8i|J8cgraG4x3eT
ef}BNVW$=5mG;7UTOG7=y_$(r%Rvv#e?*1QZ*n2|&

literal 0
HcmV?d00001

diff --git a/modules/variant_diagnostics/core_pipeline.py b/modules/variant_diagnostics/core_pipeline.py
index 199e2a1..51f156d 100755
--- a/modules/variant_diagnostics/core_pipeline.py
+++ b/modules/variant_diagnostics/core_pipeline.py
@@ -1338,15 +1338,15 @@ def barplot_stats():
             if args.outgroup:
                 bar_perc_string = "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (os.path.basename(vcf_filenames_outgroup[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')), unmapped_positions_perc, true_variant_perc, Only_low_FQ_perc, Only_DP_perc, Only_low_MQ_perc, other_perc)
             else:
-                bar_perc_string = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (os.path.basename(
+                bar_perc_string = "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (os.path.basename(
                     vcf_filenames[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')),
-                                                                        unmapped_positions_perc, reference_allele_perc, true_variant_perc,
+                                                                        unmapped_positions_perc, true_variant_perc,
                                                                         Only_low_FQ_perc, Only_DP_perc, Only_low_MQ_perc, other_perc)
             f_bar_count.write(bar_string)
             f_bar_perc.write(bar_perc_string)
         f_bar_count.close()
         f_bar_perc.close()
-        bargraph_R_script = "library(ggplot2)\nlibrary(reshape)\nx1 <- read.table(\"bargraph_percentage.txt\", header=TRUE)\nx1$Sample <- reorder(x1$Sample, rowSums(x1[-1]))\nmdf1=melt(x1,id.vars=\"Sample\")\npdf(\"%s/%s_barplot.pdf\", width = 30, height = 30)\nggplot(mdf1, aes(Sample, value, fill=variable)) + geom_bar(stat=\"identity\") + ylab(\"Percentage of Filtered Positions\") + xlab(\"Samples\") + theme(text = element_text(size=9)) + scale_fill_manual(name=\"Reason for filtered out positions\", values=c(\"#08306b\", \"black\", \"orange\", \"darkgrey\", \"#fdd0a2\", \"#7f2704\")) + ggtitle(\"Title Here\") + ylim(0, 100) + theme(text = element_text(size=10), panel.background = element_rect(fill = 'white', colour = 'white'), plot.title = element_text(size=20, face=\"bold\", margin = margin(10, 0, 10, 0)), axis.ticks.y = element_blank(), axis.ticks.x = element_blank(),  axis.text.x = element_text(colour = \"black\", face= \"bold.italic\", angle = 90)) + theme(legend.position = c(0.6, 0.7), legend.direction = \"horizontal\")\ndev.off()" % (args.filter2_only_snp_vcf_dir, os.path.basename(os.path.normpath(args.results_dir)))
+        bargraph_R_script = "library(ggplot2)\nlibrary(reshape)\nx1 <- read.table(\"bargraph_percentage.txt\", header=TRUE)\nx1$Sample <- reorder(x1$Sample, rowSums(x1[-1]))\nmdf1=melt(x1,id.vars=\"Sample\")\npdf(\"%s/%s_barplot.pdf\", width = 30, height = 30)\nggplot(mdf1, aes(Sample, value, fill=variable)) + geom_bar(stat=\"identity\") + ylab(\"Percentage of Filtered Positions\") + xlab(\"Samples\") + theme(text = element_text(size=9)) + scale_fill_manual(name=\"Reason for filtered out positions\", values=c(\"#08306b\", \"black\", \"orange\", \"darkgrey\", \"#fdd0a2\", \"#7f2704\")) + ggtitle(\"Title Here\") + ylim(0, 100) + theme(text = element_text(size=10), panel.background = element_rect(fill = 'white', colour = 'white'), plot.title = element_text(size=20, face=\"bold\", margin = margin(10, 0, 10, 0)), axis.ticks.y = element_blank(), axis.ticks.x = element_blank(),  axis.text.x = element_text(colour = \"black\", face= \"bold.italic\", angle = 90)) + theme(legend.position = c(0.6, 0.7), legend.direction = \"horizontal\")\ndev.off()" % ("%s/matrices/plots" % data_matrix_dir, os.path.basename(os.path.normpath(args.results_dir)))
         barplot_R_file = open("%s/bargraph.R" % args.filter2_only_snp_vcf_dir, 'w+')
         barplot_R_file.write(bargraph_R_script)
         keep_logging('Run this R script to generate bargraph plot: %s/bargraph.R' % args.filter2_only_snp_vcf_dir, 'Run this R script to generate bargraph plot: %s/bargraph.R' % args.filter2_only_snp_vcf_dir, logger, 'info')
@@ -1667,7 +1667,7 @@ def barplot_indel_stats():
             open('%s/temp_Only_filtered_indel_positions_for_closely_matrix.txt' % args.filter2_only_snp_vcf_dir,
                  'r'), delimiter='\t')
         columns = list(zip(*c_reader))
-        print len(columns)
+
         keep_logging('Finished reading columns...', 'Finished reading columns...', logger, 'info')
         counts = 1
 
@@ -1684,7 +1684,6 @@ def barplot_indel_stats():
         f_bar_perc.write("Sample\tunmapped_positions_perc\ttrue_variant_perc\tOnly_low_AF_perc\tOnly_DP_perc\tOnly_low_MQ_perc\tother_perc\n")
         for i in xrange(1, end, 1):
             """ Bar Count Statistics: Variant Position Count Statistics """
-            print i
             true_variant = columns[i].count('VARIANT')
             unmapped_positions = columns[i].count('reference_unmapped_position')
             reference_allele = columns[i].count('reference_allele')
@@ -1763,7 +1762,7 @@ def barplot_indel_stats():
 
         f_bar_count.close()
         f_bar_perc.close()
-        bargraph_R_script = "library(ggplot2)\nlibrary(reshape)\nx1 <- read.table(\"bargraph_indel_percentage.txt\", header=TRUE)\nx1$Sample <- reorder(x1$Sample, rowSums(x1[-1]))\nmdf1=melt(x1,id.vars=\"Sample\")\npdf(\"%s/%s_barplot_indel.pdf\", width = 30, height = 30)\nggplot(mdf1, aes(Sample, value, fill=variable)) + geom_bar(stat=\"identity\") + ylab(\"Percentage of Filtered Positions\") + xlab(\"Samples\") + theme(text = element_text(size=9)) + scale_fill_manual(name=\"Reason for filtered out positions\", values=c(\"#08306b\", \"black\", \"orange\", \"darkgrey\", \"#fdd0a2\", \"#7f2704\")) + ggtitle(\"Title Here\") + ylim(0, 100) + theme(text = element_text(size=10), panel.background = element_rect(fill = 'white', colour = 'white'), plot.title = element_text(size=20, face=\"bold\", margin = margin(10, 0, 10, 0)), axis.ticks.y = element_blank(), axis.ticks.x = element_blank(),  axis.text.x = element_text(colour = \"black\", face= \"bold.italic\", angle = 90)) + theme(legend.position = c(0.6, 0.7), legend.direction = \"horizontal\")\ndev.off()"  % (args.filter2_only_snp_vcf_dir, os.path.basename(os.path.normpath(args.results_dir)))
+        bargraph_R_script = "library(ggplot2)\nlibrary(reshape)\nx1 <- read.table(\"bargraph_indel_percentage.txt\", header=TRUE)\nx1$Sample <- reorder(x1$Sample, rowSums(x1[-1]))\nmdf1=melt(x1,id.vars=\"Sample\")\npdf(\"%s/%s_barplot_indel.pdf\", width = 30, height = 30)\nggplot(mdf1, aes(Sample, value, fill=variable)) + geom_bar(stat=\"identity\") + ylab(\"Percentage of Filtered Positions\") + xlab(\"Samples\") + theme(text = element_text(size=9)) + scale_fill_manual(name=\"Reason for filtered out positions\", values=c(\"#08306b\", \"black\", \"orange\", \"darkgrey\", \"#fdd0a2\", \"#7f2704\")) + ggtitle(\"Title Here\") + ylim(0, 100) + theme(text = element_text(size=10), panel.background = element_rect(fill = 'white', colour = 'white'), plot.title = element_text(size=20, face=\"bold\", margin = margin(10, 0, 10, 0)), axis.ticks.y = element_blank(), axis.ticks.x = element_blank(),  axis.text.x = element_text(colour = \"black\", face= \"bold.italic\", angle = 90)) + theme(legend.position = c(0.6, 0.7), legend.direction = \"horizontal\")\ndev.off()"  % ("%s/matrices/plots" % data_matrix_dir, os.path.basename(os.path.normpath(args.results_dir)))
         barplot_R_file = open("%s/bargraph_indel.R" % args.filter2_only_snp_vcf_dir, 'w+')
         barplot_R_file.write(bargraph_R_script)
         keep_logging('Run this R script to generate bargraph plot: %s/bargraph_indel.R' % args.filter2_only_snp_vcf_dir, 'Run this R script to generate bargraph plot: %s/bargraph_indel.R' % args.filter2_only_snp_vcf_dir, logger, 'info')
@@ -2228,28 +2227,29 @@ def generate_vcf_files():
             f1.write(print_string)
         f1.close()
 
-    filename = "%s/consensus.sh" % args.filter2_only_snp_vcf_dir
-    keep_logging('Generating Consensus...', 'Generating Consensus...', logger, 'info')
-    for file in filtered_out_vcf_files:
-        f1 = open(filename, 'a+')
-        bgzip_cmd = "%s/%s/bgzip -f %s\n" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("vcftools", Config)['tabix_bin'], file)
-        f1.write(bgzip_cmd)
-        subprocess.call([bgzip_cmd], shell=True)
-        tabix_cmd = "%s/%s/tabix -f -p vcf %s.gz\n" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("vcftools", Config)['tabix_bin'], file)
-        f1.write(tabix_cmd)
-        subprocess.call([tabix_cmd], shell=True)
-        fasta_cmd = "cat %s | %s/vcf-consensus %s.gz > %s.fa\n" % (args.reference, base_vcftools_bin, file, file.replace('_filter2_final.vcf_core.vcf', ''))
-        f1.write(fasta_cmd)
-        subprocess.call([fasta_cmd], shell=True)
-        base = os.path.basename(file)
-        header = base.replace('_filter2_final.vcf_core.vcf', '')
-        sed_command = "sed -i 's/>.*/>%s/g' %s.fa\n" % (header, file.replace('_filter2_final.vcf_core.vcf', ''))
-        subprocess.call([sed_command], shell=True)
-        f1.write(sed_command)
-    keep_logging('The consensus commands are in : %s' % filename, 'The consensus commands are in : %s' % filename, logger, 'info')
-    sequence_lgth_cmd = "for i in %s/*.fa; do %s/%s/bioawk -c fastx \'{ print $name, length($seq) }\' < $i; done" % (args.filter2_only_snp_vcf_dir, ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("bioawk", Config)['bioawk_bin'])
-    #os.system(sequence_lgth_cmd)
-    call("%s" % sequence_lgth_cmd, logger)
+    # Turning off generating core fasta alignemnets. No longer used in pipeline
+    # filename = "%s/consensus.sh" % args.filter2_only_snp_vcf_dir
+    # keep_logging('Generating Consensus...', 'Generating Consensus...', logger, 'info')
+    # for file in filtered_out_vcf_files:
+    #     f1 = open(filename, 'a+')
+    #     bgzip_cmd = "%s/%s/bgzip -f %s\n" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("vcftools", Config)['tabix_bin'], file)
+    #     f1.write(bgzip_cmd)
+    #     subprocess.call([bgzip_cmd], shell=True)
+    #     tabix_cmd = "%s/%s/tabix -f -p vcf %s.gz\n" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("vcftools", Config)['tabix_bin'], file)
+    #     f1.write(tabix_cmd)
+    #     subprocess.call([tabix_cmd], shell=True)
+    #     fasta_cmd = "cat %s | %s/vcf-consensus %s.gz > %s.fa\n" % (args.reference, base_vcftools_bin, file, file.replace('_filter2_final.vcf_core.vcf', ''))
+    #     f1.write(fasta_cmd)
+    #     subprocess.call([fasta_cmd], shell=True)
+    #     base = os.path.basename(file)
+    #     header = base.replace('_filter2_final.vcf_core.vcf', '')
+    #     sed_command = "sed -i 's/>.*/>%s/g' %s.fa\n" % (header, file.replace('_filter2_final.vcf_core.vcf', ''))
+    #     subprocess.call([sed_command], shell=True)
+    #     f1.write(sed_command)
+    # keep_logging('The consensus commands are in : %s' % filename, 'The consensus commands are in : %s' % filename, logger, 'info')
+    # sequence_lgth_cmd = "for i in %s/*.fa; do %s/%s/bioawk -c fastx \'{ print $name, length($seq) }\' < $i; done" % (args.filter2_only_snp_vcf_dir, ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("bioawk", Config)['bioawk_bin'])
+    # #os.system(sequence_lgth_cmd)
+    # call("%s" % sequence_lgth_cmd, logger)
 
 def gatk_filter2(final_raw_vcf, out_path, analysis, reference):
     gatk_filter2_parameter_expression = "MQ > 50 && QUAL > 100 && DP > 9"
@@ -4958,9 +4958,10 @@ def someOtherFunc(data, key):
             if args.outgroup:
                 # Get outgroup_Sample name
                 outgroup = get_outgroup()
-                keep_logging('%s/scripts/gubbins_iqtree_raxml.sh %s 1 esnitkin_flux \'%s\'' % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input, outgroup),
-                             '%s/scripts/gubbins_iqtree_raxml.sh %s 1 esnitkin_flux \'%s\'' % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input, outgroup), logger, 'info')
-                call("%s/scripts/gubbins_iqtree_raxml.sh %s 1 esnitkin_flux \'%s\'" % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input, outgroup), logger)
+                # Turn off callling gubbins/iqtree/raxml on core genome whole alignments
+                # keep_logging('%s/scripts/gubbins_iqtree_raxml.sh %s 1 esnitkin_flux \'%s\'' % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input, outgroup),
+                #              '%s/scripts/gubbins_iqtree_raxml.sh %s 1 esnitkin_flux \'%s\'' % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input, outgroup), logger, 'info')
+                # call("%s/scripts/gubbins_iqtree_raxml.sh %s 1 esnitkin_flux \'%s\'" % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input, outgroup), logger)
                 keep_logging('%s/scripts/gubbins_iqtree_raxml.sh %s 1 esnitkin_flux \'%s\'' % (
                 os.path.dirname(os.path.abspath(__file__)), prepare_ref_allele_unmapped_consensus_input, outgroup),
                              '%s/scripts/gubbins_iqtree_raxml.sh %s 1 esnitkin_flux \'%s\'' % (
@@ -4969,12 +4970,13 @@ def someOtherFunc(data, key):
                 call("%s/scripts/gubbins_iqtree_raxml.sh %s 1 esnitkin_flux \'%s\'" % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_allele_unmapped_consensus_input, outgroup), logger)
                 # call("%s/scripts/gubbins_iqtree_raxml.sh %s 1" % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_allele_var_consensus_input), logger)
             else:
-                keep_logging('%s/scripts/gubbins_iqtree_raxml.sh %s 1' % (
-                os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input),
-                             '%s/scripts/gubbins_iqtree_raxml.sh %s 1' % (
-                             os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input),
-                             logger, 'info')
-                call("%s/scripts/gubbins_iqtree_raxml.sh %s 1" % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input), logger)
+                # Turn off callling gubbins/iqtree/raxml on core genome whole alignments
+                # keep_logging('%s/scripts/gubbins_iqtree_raxml.sh %s 1' % (
+                # os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input),
+                #              '%s/scripts/gubbins_iqtree_raxml.sh %s 1' % (
+                #              os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input),
+                #              logger, 'info')
+                # call("%s/scripts/gubbins_iqtree_raxml.sh %s 1" % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input), logger)
                 keep_logging('%s/scripts/gubbins_iqtree_raxml.sh %s 1' % (
                     os.path.dirname(os.path.abspath(__file__)), prepare_ref_allele_unmapped_consensus_input),
                              '%s/scripts/gubbins_iqtree_raxml.sh %s 1' % (
@@ -4987,13 +4989,16 @@ def someOtherFunc(data, key):
             if args.outgroup:
                 # Get outgroup_Sample name
                 outgroup = get_outgroup()
+
                 keep_logging('The gubbins argument is set to No.', 'The gubbins argument is set to No.', logger, 'info')
-                keep_logging('%s/scripts/gubbins_iqtree_raxml.sh %s 0 esnitkin_flux \'%s\'' % (
-                os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input, outgroup),
-                             '%s/scripts/gubbins_iqtree_raxml.sh %s 0 esnitkin_flux \'%s\'' % (
-                             os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input, outgroup),
-                             logger, 'info')
-                print "%s/scripts/gubbins_iqtree_raxml.sh %s 0 esnitkin_flux \'%s\'" % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input, outgroup)
+                # Turn off callling gubbins/iqtree/raxml on core genome whole alignments
+                # keep_logging('%s/scripts/gubbins_iqtree_raxml.sh %s 0 esnitkin_flux \'%s\'' % (
+                # os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input, outgroup),
+                #              '%s/scripts/gubbins_iqtree_raxml.sh %s 0 esnitkin_flux \'%s\'' % (
+                #              os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input, outgroup),
+                #              logger, 'info')
+                # print "%s/scripts/gubbins_iqtree_raxml.sh %s 0 esnitkin_flux \'%s\'" % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input, outgroup)
+
                 keep_logging('%s/scripts/gubbins_iqtree_raxml.sh %s 0 esnitkin_flux \'%s\'' % (
                     os.path.dirname(os.path.abspath(__file__)), prepare_ref_allele_unmapped_consensus_input, outgroup),
                              '%s/scripts/gubbins_iqtree_raxml.sh %s 0 esnitkin_flux \'%s\'' % (
@@ -5002,7 +5007,7 @@ def someOtherFunc(data, key):
                 print "%s/scripts/gubbins_iqtree_raxml.sh %s 0 esnitkin_flux \'%s\'" % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_allele_unmapped_consensus_input, outgroup)
             else:
                 keep_logging('The gubbins argument is set to No.', 'The gubbins argument is set to No.', logger, 'info')
-                print "%s/scripts/gubbins_iqtree_raxml.sh %s 0" % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input)
+                # print "%s/scripts/gubbins_iqtree_raxml.sh %s 0" % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input)
                 print "%s/scripts/gubbins_iqtree_raxml.sh %s 0" % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_allele_unmapped_consensus_input)
 
         call("cp %s %s/Logs/tree/" % (
diff --git a/modules/variant_diagnostics/core_pipeline_core_prep.pyc b/modules/variant_diagnostics/core_pipeline_core_prep.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..712a8c98913a270d9bd0c2f83da9e493d88bbb34
GIT binary patch
literal 8766
zcmd5?OK==V8UANi4{P<bo|bHxA;F1AKq3%c=8=#%F;L)`QG%(hGE<|SS!v|mnceA;
zBNeLym>X44MNXWc3XT*fZcrS#a^##NH>z^TiGuI@duMhft&<!AL0a|p^y`20-`(HW
zqv|`ewchJLzu%Dl&p5tcbVbZ>9gz?pM_M9nN9v9^e6C2VBK3;aAzzhxwaAZ2eXPij
zOMSe^Pe^^D$WKarQlg3+UXsHpiN-ZkyO5cdXht46gOAFtN^4fyb5ftPx{T@bywvCI
z^SHDYq<vZHm!-WZ^~F--C8;kJ8Z1kFxyY|beWl2+N`1A+uStDP^Apm#BJC?uzfz91
zF7@@I-G<aRiu_fnU)B7iw5~~eQ|g<x&n0PHm)Ge#<UUsBT$`Jc*2ks&gw&rXTR&O0
z`2=z``Kd_#hJ@3ac}l_=%{<My*xj1NgvI~H&lV<3@=Hj<_+f0~G`VAorcv}-7DibV
z-ijOfpfHS@!BHy@Sy(AL-wm>$)rwmABu9!`!ATSj4ISiB9=9V~vVbNpr%5y3-;EkM
zBz`yO*c$5cjLkBlcBAgMUTaJtH-kTp{IZJ}dkW2Z4r6RBfbNd;^to4&-h@0FlhY~5
zkv~^hqv5?VSv{SWyvhN3liFxXdehRoB#$P<$;XB&YQq%>xd}O(l&~tf&0msUm65iJ
zKl6^{Qxc*##zB;i%X&B=4=R$O0+P`W!Vu1=9)p<Swo_%>e@YHbtZoxJ6xw$GDcyG@
zfuS>kS?!Gv<PQ`nLcUp%RXU-fQL($lNxWm|%bh%GcMPX}CvSK3$-iNC%p6MiebjCT
z*~xZ(oac)uY91vzH9^a7#;rWcOnwE$L8sF?@kdlvQF#&>^KE>;TK07tt-u)9j^M_|
zHBs*7sf(7|?%gcv1X-NyyN~H$YM2g$Y0^yd%g8k2B=kE6!G7dt(LUnFP^Cu4y<za#
z`A)iTb{c6G`5lbBqam(=P0Rf-&hm9O&msh)-$_l(&@(hIGI`X=KQ^d^pr|#fO>NNT
zFx|@;$^8Ls7Z!3&q()Wm1o=Tef#=vzj9naYmIUp{V=<;w<Utg5{8qZZ4_9FRMY5xW
zW<se<M`>VXZLwv}>3i5{s?Hq5PKtH!XZ<w6+?b@}vkC3xs!osc{VYA|AmlJ|)WmvD
z8c{xvC&=`VlDK;m`COc38CCTUf+U2-)RMYE6=q={w9~9z!TB;u`-2odRrT_XLB&~j
z+G?(51S0j^LRK^}KSFLNX_}qLBys*APImU<#8(4$I&mjz#YwbtJI)V|_I4g3c!DJN
z=^nVwFY5g;dbrb0!z0XV*(i*I{RFcTH;NePkLH&Vv)wsSCzS;Z2;(A=YOT8BtW4G_
z>y`ED6VAFbf!|4Iy>hKmb#6EfhUs<u(Z{+){S_tqBBx_={C93Lx9<N<PO);aC*gd<
zYOTmG{vf9l+`X_Zts!U*t8$3_W1L&c8g`krQ|uU%a*AzcOb*9&JHTPeA+Vc_aR_d7
zx+!sI!MIrXj&oX*<5#3tgUqi9_Bw2C*n8GbXQVg7W0PADHmo0O1^1pBM9oTXiYIGt
zmX%KT$AZs2meu5uh;t9e$oGB#G5zQ6ENZdOP0NER$$nWa%$Vt`KBv~ttHE<TdaSm`
zpQfB!_ixDQg7g;T5aQq^<mYK(KF2~RpgL?gLkiHDm*u!gXWY8~Lpfa(?)QuGU{bQW
zdS$VYvm}(WWaTVNZ@G|z{N+N<aw+F0kh4@8R(HB}=N{GqnnI1!6~QX+t!RE#ty-g1
z;jCa!I$V_1{#0C%<J&aj*8LacbX|J*tg3SFI=w4&ZN9`p3`vL6z}{M+ix#WR?>gOg
zp$<A&zlU>z6^3C^yL$VZAv}u5wf-ZWvLVO!tW#d2Qy3l}$PpWb@PQ+KRvC6gTDg}4
z<t*@nGbw~Scyhrp8|sX!>X@sVzb0%7=}_*PDtG2#&^?3`V9DvGD!N(3F=p%rd)~Ug
zsm{Z6jBr@@`Rb5@r7T1@589tssRJ+?bQq|+s^N>%v$rX`AF{vxG^|ed*VX?!e}!Z3
z;jh1py|2Hpymz-&u&0@S0dmz`{JegEQygc-K^nUIINbmY+}%5O-D2;@VV|XKHwqdD
zIQEmE<v!ylDNf|{IBxUc#rd?2^NE-sa<?#miQFKIT;zrZ$q$0u?EpJOq1)f*3lss&
zfRm4iL={qTMU(zf;1);0CtaLk?weVhLqPf%RmyW-bpkMX11ed?M(cjA14rJEk|=|D
zZs4|Jle=lt9d#(G#y8c1n`J}mrotzBth@1c<UT_`jdtEP=7El*6HVzpI}i^1&XH<x
z-YY~oZZpm7?3ke4X+_&sr7X&ivgGFDl~z=fy@7q60Fp4M^nx)&Ja0a$hbM4T+=ja|
z!<JJq5}t7)PsU8Ow01{(WY3x>Pz2;wU=-hUqDI_|8-uY4NDZ;UophUCEo>UZTb#H>
z|C@k;=DGK3TnONto#{nQcGD~;9JModQPg(LBMLLmAgN&j+~lae7iELVzS!j^6e384
zk>}!}4?JH$<tv`|ns?K^lNOQ(#x>KUB;0miK8P9*+!*HfSB{-@;M)A;g{?J=p+GV1
zM2Sa0?=3M|W;rq7USJ}|mEtx)_$Ud%fB;?1LoEU%2G7%3HF=Qb=1m|hkPFNzmyYHd
ztrXy$K<7@HMA$k8&{FXxLs4b0o5{1dqaxxY*KhPyQ3gJaLLDc__KkNHt!;xm({GI8
zl?Fw}alS=h<q=J4WA7SsE|UBP5{N~x4PoEHcNW+CL6!w4Dvk=OH(<4ephRmd^;K_5
z2*R-FU)qz}AsFM$=hR0WpzktzF-zanaf_bz!KV)Z=xJaPX=PZed@Gi=UaYq==}IDA
zlYbn!Vti|pA00PZM<Ic4(On%(y^YZ&*HpVXK+Y9(u75usM-21S#m1brQ4-HsS1hj5
zOr!2<4iw}T5Ci3%S4P8Wq<%n6=NtP}#(NUw${95IsELC|K1J?peuqS+CY)Jk(YX$m
zFzd`aD@bejo^zI+1!t~0gSrVk&pI>Cnp3N?)lB((8TpyYgtOYO->9(fJW4ngT2|Ja
zxylk-IZx}jvz4n(t+MDWAsw6eK<zANA;T#-%O3}v<sX)_yiLvmMsf(et2~7$UO5X1
z2QEJ>i<#tg3t;dX7BDCbxeA#GsK7X=1^CM)G8M8ITxv|Q#zB0dl!OH%)&#^D%!IrK
zAMz6i#Dy$o8juhyM!8D}vO=OVOK=Dfjd9FdBsG93>!28Gr=SltBIn+m3hBNj_}pXJ
zbf3k{lEpxneJ)tctg@K5s)ZS|ebs?==jHY{2^$ymo@rhH$gRqx801)>GMANBph7tc
zX$^c-5C~e^eGZtu)Pln;4pSLmVo;eSWeZ((4Q?H+oMojl%T~?`dB~xagZyG4N6CrG
z`4!}V?ewkdI^8a)4B-1B{c*ZV$^urx{F+jkE6P8iDlmC}GS-#HEI=Z7%m%?e=mtsF
zbOrPU&c*eRWdL+JrIM{!gOwPlBGZ-bG}r_>SY5y&$bXdFk;h!t$179?kMzjZVIH%j
z-u^&dxLODwc;VNOJn+IJ@EEwDBr0%$QWSC!umzA7D7LAZoOvLLSW?bIMnTeYUA4Mi
zge~R<q-7JH0%?JpAaaCvx}OJ2D77hNA(%;8eo>_k;4PTXQXY0ggpxzja@~@a579Y)
z9#*IO8<3Wd(m5U&%441L`?2?R4ruu9)++biw{f-%7)o(NS#p$b@kVcmU|eJ=_L535
zv=Ws6IX5}4fS%-M*~qyj<w4sg0{<-{;-8c2{<ryul~)1+dP3*cJdLR)cv8$)&eceg
zq`13md(Hxn19^lF-X|G7mce*L_Cv(Qdlt{5cndoY^A<b$Mf{`eWgCtA>_yvMWC`wh
z4)ewL?@|c6z+|Y~Xm(<y7hW~sCYH|-U=Ik2wWVyOP}O`GG=<AlZ@_Uo-wGO$_bC*d
zqbuI0Sw@d}J4`;qgpAJnER)YMc^=7GSZ!4%TV|vX&3j*9OS;kf5|b}8`3jS-G9l>p
zzJ_Fr2xug^V1N{XrWfmH{bJo>p}0ryDAcuWsD$?-2mS_=mzccF<Q9`xn7qp5HWEel
z1%NNtbGhIOa<Yl0k87n|!6W$0sdO=OeVSlZx`^_<@1U*sT_$(fl|CQY_3=D`^LMU`
z@`T5UJZr0MRfKVAy74^fw9DjmCT}qLK9f%%QM9kgM-KV<^5;+ZI|KRO8}yIAAM8i{
zQUUuF+Mk2{=RrS+#0bzox|Op<ziFKMc!h@xpUC&=>^=cMG0I#)%W**kzRo={i|kbb
z7o+wxM|-%-Yk*?v^@3fh<;o8BjK1=^gyxWeb>RE@N~@9L<*>d`G#*Ky$HA4a;*C-(
z-t(#~1OXv%#y<|={c_xwLlVtbPx%2kw^G{u7GASa21}IRk|{@7mZTnYdJ|>avp0u5
zG-;x5+w5zryR<M<Sq*&??el^Hjg5UTw}>wNS91QqWHp6v&H8Vb^kO6r_Tpo`x_Bv0
zJ!+M6dnw-{`6j@ttP{V%I~=1gGVi{$>lZhMJ{({?Qf}pt9;1r$G?aYQk*==(s<rv+
loH%n6$$~~1Nhp7xuPlJtt`3zI^$Un&{_1yS>e;F8e*ki^asL1S

literal 0
HcmV?d00001

diff --git a/modules/variant_diagnostics/core_pipeline_core_prep_label.py b/modules/variant_diagnostics/core_pipeline_core_prep_label.py
new file mode 100644
index 0000000..470eed5
--- /dev/null
+++ b/modules/variant_diagnostics/core_pipeline_core_prep_label.py
@@ -0,0 +1,293 @@
+# System wide imports
+from __future__ import division
+import sys
+import argparse
+import re
+import os
+import csv
+import subprocess
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+""" Hacky way to append. Instead Add this path to PYTHONPATH Variable """
+from collections import OrderedDict
+from collections import defaultdict
+from joblib import Parallel, delayed
+import multiprocessing
+import thread
+import glob
+import readline
+import errno
+from datetime import datetime
+import threading
+import json
+import ConfigParser
+from config_settings import ConfigSectionMap
+from logging_subprocess import *
+from log_modules import *
+from tabix import *
+from Bio import SeqIO
+from core_prep_sanity_checks import *
+from PBS_generate_jobs import *
+from core_pipeline_core_prep_main import *
+
+def core_prep_label(vcf_filenames, filter2_only_snp_vcf_dir, outgroup, reference, log_unique_time, log_file_handle, logger, jobrun, Config):
+    # Create temporary Directory core_temp_dir/temp for storing temporary intermediate files. Check if core_temp_dir contains all the required files to run these pipeline.
+    global temp_dir
+    temp_dir = filter2_only_snp_vcf_dir + "/temp"
+
+    # # Extract All the unique SNO and Indel position list from final filtered *_no_proximate_snp.vcf files.
+    unique_position_file = create_positions_filestep(vcf_filenames, filter2_only_snp_vcf_dir, outgroup, logger)
+    unique_indel_position_file = create_indel_positions_filestep(vcf_filenames, filter2_only_snp_vcf_dir, outgroup, logger)
+
+    # bgzip and tabix all the vcf files in core_temp_dir.
+    files_for_tabix = glob.glob("%s/*.vcf" % filter2_only_snp_vcf_dir)
+    tabix(files_for_tabix, "vcf", logger, Config)
+
+    # Get the cluster option; create and run jobs based on given parameter. The jobs will parse all the intermediate vcf file to extract information such as if any unique variant position was unmapped in a sample, if it was filtered out dur to DP,MQ, FQ, proximity to indel, proximity to other SNPs and other variant filter parameters set in config file.
+    tmp_dir = "/tmp/temp_%s/" % log_unique_time
+
+    create_job(filter2_only_snp_vcf_dir, jobrun, vcf_filenames, unique_position_file, tmp_dir, Config)
+
+    create_indel_job(filter2_only_snp_vcf_dir, jobrun, vcf_filenames, unique_indel_position_file, tmp_dir, Config)
+
+    # If Phaster Summary file doesn't exist in reference genome folder
+    if not os.path.isfile("%s/summary.txt" % os.path.dirname(reference)):
+        if ConfigSectionMap("functional_filters", Config)['apply_functional_filters'] == "yes":
+            keep_logging('Functional class filter is set to yes. Preparing Functional class filters\n',
+                         'Functional class filter is set to yes. Preparing Functional class filters\n', logger,
+                         'info')
+            if ConfigSectionMap("functional_filters", Config)['find_phage_region'] == "yes":
+                # Submit Phaster jobs to find ProPhage region in reference genome.
+                run_phaster(reference, filter2_only_snp_vcf_dir, logger, Config)
+
+    call(
+        "cp %s %s/Logs/core_prep/" % (log_file_handle, os.path.dirname(os.path.dirname(filter2_only_snp_vcf_dir))),
+        logger)
+
+
+"""core_prep methods 
+
+    This block contains methods that are respnsible for running the first part of core_All step of the pipeline.
+    This methods generates all the necessary intermediate files required for the second part of core_All step.
+    Example of intermediate files: various diagnostics files/matrices where it decides why a variant was filtered out.
+
+"""
+
+def create_positions_filestep(vcf_filenames, filter2_only_snp_vcf_dir, outgroup, logger):
+
+    """
+    This method gathers SNP positions from each final *_no_proximate_snp.vcf file (these are the positions that passed variant filter parameters
+    from variant calling pipeline) and write to *_no_proximate_snp.vcf_position files. Use these *_no_proximate_snp.vcf_position files to generate a list of unique_position_file
+    :param: list of final vcf filenames i.e *.vcf_no_proximate_snp.vcf . These files are the final output of variant calling step for each sample.
+    :return: unique_position_file
+    """
+
+    filter2_only_snp_position_files_array = []
+    for file in vcf_filenames:
+        with open(file, 'rU') as csv_file:
+            file_name = temp_dir + "/" + os.path.basename(file) + "_positions"
+            addpositionfilenametoarray = file_name
+            filter2_only_snp_position_files_array.append(addpositionfilenametoarray)
+            f1 = open(file_name, 'w+')
+            csv_reader = csv.reader(csv_file, delimiter='\t')
+            for row in csv_reader:
+                position = row[0]
+                if not position.startswith('#'):
+                    p_string = row[1] + "\n"
+                    f1.write(p_string)
+            f1.close()
+        csv_file.close()
+
+    """ Get Positions Specific to Outgroup Sample name """
+    if outgroup is not None:
+        outgroup_position_file_name = temp_dir + "/" + outgroup_vcf_filename + "_positions"
+        outgroup_position_array = []
+        f1 = open(outgroup_position_file_name, 'r+')
+        for lines in f1:
+            lines = lines.strip()
+            outgroup_position_array.append(int(lines))
+        f1.close()
+
+
+        position_array_excluding_outgroup = []
+        for filess in filter2_only_snp_position_files_array:
+            if outgroup not in filess:
+                f = open(filess, 'r+')
+                for line in f:
+                    line = line.strip()
+                    position_array_excluding_outgroup.append(int(line))
+                f.close()
+        position_array_unique_excluding_outgroup = set(position_array_excluding_outgroup)
+        position_array_sort_excluding_outgroup = sorted(position_array_unique_excluding_outgroup)
+        #print len(position_array_sort_excluding_outgroup)
+        outgroup_specific_positions = []
+        f_outgroup = open("%s/outgroup_specific_positions.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+        for i in outgroup_position_array:
+            if i not in position_array_sort_excluding_outgroup:
+                f_outgroup.write(str(i) + '\n')
+                outgroup_specific_positions.append(int(i))
+                # outgroup_indel_specific_positions.append(int(i))
+        f_outgroup.close()
+        print "No. of variant positions in outgroup: %s" % len(outgroup_position_array)
+        print "No. of variant positions specific to outgroup: %s" % len(outgroup_specific_positions)
+
+        position_array = []
+        for filess in filter2_only_snp_position_files_array:
+            f = open(filess, 'r+')
+            for line in f:
+                line = line.strip()
+                # Changed variable to suit sorting: 25-07-2018
+                position_array.append(int(line))
+            f.close()
+        # Check why python sorting is not working
+        keep_logging('Sorting unique variant positions.\n', 'Sorting unique variant positions.\n', logger, 'info')
+        position_array_unique = set(position_array)
+        position_array_sort = sorted(position_array_unique)
+        keep_logging('\nThe number of unique variant positions:%s' % len(position_array_sort), '\nThe number of unique variant positions:%s' % len(position_array_sort), logger, 'info')
+        unique_position_file = "%s/unique_positions_file" % filter2_only_snp_vcf_dir
+        f=open(unique_position_file, 'w+')
+        for i in position_array_sort:
+            # Changed variable to suit sorting: 25-07-2018
+            f.write(str(i) + "\n")
+        f.close()
+
+        if len(position_array_sort) == 0:
+            keep_logging('ERROR: No unique positions found. Check if vcf files are empty?', 'ERROR: No unique positions found. Check if vcf files are empty?', logger, 'info')
+            exit()
+
+        return unique_position_file
+
+    else:
+
+        """ Create position array containing unique positiones from positions file """
+
+        position_array = []
+        for filess in filter2_only_snp_position_files_array:
+            f = open(filess, 'r+')
+            for line in f:
+                line = line.strip()
+                # Changed variable to suit sorting: 25-07-2018
+                position_array.append(int(line))
+            f.close()
+        # Check why python sorting is not working
+        keep_logging('Sorting unique variant positions.\n', 'Sorting unique variant positions.\n', logger, 'info')
+        position_array_unique = set(position_array)
+        position_array_sort = sorted(position_array_unique)
+        keep_logging('\nThe number of unique variant positions:%s' % len(position_array_sort), '\nThe number of unique variant positions:%s' % len(position_array_sort), logger, 'info')
+        unique_position_file = "%s/unique_positions_file" % filter2_only_snp_vcf_dir
+        f=open(unique_position_file, 'w+')
+        for i in position_array_sort:
+            # Changed variable to suit sorting: 25-07-2018
+            f.write(str(i) + "\n")
+        f.close()
+
+        if len(position_array_sort) == 0:
+            keep_logging('ERROR: No unique positions found. Check if vcf files are empty?', 'ERROR: No unique positions found. Check if vcf files are empty?', logger, 'info')
+            exit()
+        return unique_position_file
+
+def create_indel_positions_filestep(vcf_filenames, filter2_only_snp_vcf_dir, outgroup, logger):
+
+    """
+    This function gathers Indel positions from each final *_indel_final.vcf (these are the positions that passed variant filter parameters
+    from variant calling pipeline) and write to *_indel_final.vcf files. Use these *_indel_final.vcf_position files to generate a list of unique_position_file
+    :param: list of final vcf filenames i.e *_indel_final.vcf . These files are the final output of variant calling step for each sample.
+    :return: unique_indel_position_file
+    """
+
+    filter2_only_indel_position_files_array = []
+    for file in vcf_filenames:
+        indel_file = file.replace('_filter2_final.vcf_no_proximate_snp.vcf', '_filter2_indel_final.vcf')
+        with open(indel_file, 'rU') as csv_file:
+            file_name = temp_dir + "/" + os.path.basename(indel_file) + "_positions"
+            addpositionfilenametoarray = file_name
+            filter2_only_indel_position_files_array.append(addpositionfilenametoarray)
+            f1 = open(file_name, 'w+')
+            csv_reader = csv.reader(csv_file, delimiter='\t')
+            for row in csv_reader:
+                position = row[0]
+                if not position.startswith('#'):
+                    p_string = row[1] + "\n"
+                    f1.write(p_string)
+            f1.close()
+        csv_file.close()
+
+    """ Get Positions Specific to Outgroup Sample name """
+    if outgroup is not None:
+        outgroup_position_indel_file_name = temp_dir + "/" + outgroup_indel_vcf_filename + "_positions"
+        print outgroup_position_indel_file_name
+        outgroup_position_indel_array = []
+        f1 = open(outgroup_position_indel_file_name, 'r+')
+        for lines in f1:
+            lines = lines.strip()
+            outgroup_position_indel_array.append(int(lines))
+        f1.close()
+        #print len(outgroup_position_indel_array)
+
+        position_array_indel_excluding_outgroup = []
+        for filess in filter2_only_indel_position_files_array:
+            if outgroup not in filess:
+                f = open(filess, 'r+')
+                for line in f:
+                    line = line.strip()
+                    position_array_indel_excluding_outgroup.append(int(line))
+                f.close()
+        position_array_indel_unique_excluding_outgroup = set(position_array_indel_excluding_outgroup)
+        position_array_sort_indel_excluding_outgroup = sorted(position_array_indel_unique_excluding_outgroup)
+        outgroup_indel_specific_positions = []
+        f_outgroup = open("%s/outgroup_indel_specific_positions.txt" % filter2_only_snp_vcf_dir, 'w+')
+        for i in outgroup_position_indel_array:
+            if i not in position_array_sort_indel_excluding_outgroup:
+                f_outgroup.write(str(i) + '\n')
+                outgroup_indel_specific_positions.append(int(i))
+        f_outgroup.close()
+        print "No. of indel variant positions in outgroup: %s" % len(outgroup_position_indel_array)
+        print "No. of indel variant positions specific to outgroup: %s" % len(outgroup_indel_specific_positions)
+
+        position_array = []
+        for filess in filter2_only_indel_position_files_array:
+            f = open(filess, 'r+')
+            for line in f:
+                line = line.strip()
+                # Changed variable to suit sorting: 25-07-2018
+                position_array.append(int(line))
+            f.close()
+        position_array_unique = set(position_array)
+        position_array_sort = sorted(position_array_unique)
+        keep_logging('\nThe number of unique indel positions:%s' % len(position_array_sort), '\nThe number of unique indel positions:%s' % len(position_array_sort), logger, 'info')
+        unique_indel_position_file = "%s/unique_indel_positions_file" % filter2_only_snp_vcf_dir
+        f=open(unique_indel_position_file, 'w+')
+        for i in position_array_sort:
+            # Changed variable to suit sorting: 25-07-2018
+            f.write(str(i) + "\n")
+        f.close()
+        if len(position_array_sort) == 0:
+            keep_logging('ERROR: No unique positions found. Check if vcf files are empty?', 'ERROR: No unique positions found. Check if vcf files are empty?', logger, 'info')
+            exit()
+
+        return unique_indel_position_file
+
+
+    else:
+
+        """ Create position array containing unique positiones from positions file """
+        position_array = []
+        for filess in filter2_only_indel_position_files_array:
+            f = open(filess, 'r+')
+            for line in f:
+                line = line.strip()
+                # Changed variable to suit sorting: 25-07-2018
+                position_array.append(int(line))
+            f.close()
+        position_array_unique = set(position_array)
+        position_array_sort = sorted(position_array_unique)
+        keep_logging('\nThe number of unique indel positions:%s' % len(position_array_sort), '\nThe number of unique indel positions:%s' % len(position_array_sort), logger, 'info')
+        unique_indel_position_file = "%s/unique_indel_positions_file" % filter2_only_snp_vcf_dir
+        f=open(unique_indel_position_file, 'w+')
+        for i in position_array_sort:
+            # Changed variable to suit sorting: 25-07-2018
+            f.write(str(i) + "\n")
+        f.close()
+        if len(position_array_sort) == 0:
+            keep_logging('ERROR: No unique positions found. Check if vcf files are empty?', 'ERROR: No unique positions found. Check if vcf files are empty?', logger, 'info')
+            exit()
+        return unique_indel_position_file
\ No newline at end of file
diff --git a/modules/variant_diagnostics/core_pipeline_core_prep_label.pyc b/modules/variant_diagnostics/core_pipeline_core_prep_label.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..992c5d8334a5a1472f3bf1dd16763857990aedb8
GIT binary patch
literal 8934
zcmd5>O>i8=74F&9kG1;$Tm4yPFc^d#q(C4^IS>do7zmNg2x78T&eUjURvLMCW_Nnl
zNL8x?A-7aPRXF8D4ml<{<rXT(oKlrz&Kz^jfm15^zSldm(rS%4Kw@dPx2ON!>({Se
ze_v1WUz4T3H9opiRo%Z~{Jv}}rT<_l<>O<ix>5~GRV-!kxuEI=RVhdv@<mlCX89pi
z8Orj*sxqAAM^t4b%a5wcs0s?|;G8-bQ^BxgN@p_TDwt3oTD_0RE~@&ZYD}rhl+k5K
zo~Ko1+B^@d`iyGKs>-Zt%&E#;uJOF8%x4-bsLDc?UsRRFEMHcYa+Y6Gl_klKsQR*M
zEUU_LKGuq=tYqz0Rb@5Huc^wK<VRKgylSkg%DU-uPSr1{cW6809?5gI$&IP{qpERH
zRW9bOAIsZ3p64zhS5m)Js`7;L$0hTm@+Ty-!ByGWn8ebB|H0n|mQJPTkoe)fP=|4}
zZHmTGbTjdTB=E0?)wEaW2Q{x<Pkj~^vd*`>#H-hXdOFIHg1UDU_$P+;(jW~Rfhn0m
zlWTEQ3-@+{Y6^)z@tUTFx?DC{`qoa+dgEqw3^}UHk+0eCwztr%W6|4|Jm_wzjy!h?
zsxzWK98$+)Dn-7pu%yzWI)MM7%2H+c%kj8Mhg4@=`9=2ZoMUIpN70ZfA5W-sm@PY_
zGSryrOh}UvWu<7GSCmdxjCOt1A6Bpg{T9_ldL{+x6)S}_BMQ4>RIOl{?-o>oiV5Y9
z@dHh;c+})n+wr_@*-9rlC{@U{Zk4UpiWR{eQ(WoJ3Hd!mvMo|8s4`7e5HB*$u&rtF
ztu$yfHND}I-nzVbuUdoAl)|s(5^SZ7rZjO;t*1;g-EK6z<Y+TJOw&0O)!LD)s#kYw
zVLc5Joi3x;Yc}gg?tn@PI103W6Th$JeeG)9)7my8*rB#{klJZ%qvfW3D+!uj5=MLW
zeLCn8Ru&!})#7v(nOYe6Zgbz;3*02wgUe~ERBhUqG(KBz#d~_I8Yh9<#K>D4)2Ng^
zL36l%n4~Lgo&@k?w;Ah@UaL91K&L@7eWX_lK|y^`8|t%Hk9Xl!7~I<p57Sbw+W{)L
zFS;_!C@ka-i?z(A>81N{Jxoheve{~35_yflVKH1tsk=eYbnEfn9;^cgQYvXjoH_JN
zWRxoQsce6Ek*ustgM*lLG8TW<jUz0Vj+!nOUra!+h}-F261STvH)Rsk5FMgwkkVd|
z>9(V=)ec;4^}LL#y8B+_Lt|=5-JlAym>;y0tceHd0!q7sWM&ri($!wYX$zZbDj=)j
z*5br9)@8hazf$)NWR)cP1LU@%n%)X@6sC8>Xlpl&T$$8XGi(O+FbcMAgz0{Jck3Q}
z%!^W&CWATMtlss5ds~gzZ)5rMMt<n+MOdk@nz>SUG&grFx9;r*_08sy7^kn;hGP10
zB&t{{E?SGld27B{Dy$S%iWjXFYXtwJ)=J@gp=e#Qs`RD{_;MX2DE<{CdrBP-sl$IT
zU|qlScXf;?fGCKw6+6G6KK`RR9$~yh2wZ|=As8MY*bOs0Eg=Rj9V66@s^fDEfCmVE
z2#7eyImD<!Si}$vtt09H?GU9fE`o?<9hcPMtEy9i%oiC{8Br115J3?#5d#t982mnv
ziZR5o5=0?{jqz0POp1`!2MV9tEJN^9%DRn%>+Sa-ru*EQL@i?GxVk&0l3y3IX-srg
zpOV>6%Y-Ea8f_1srkv|{E~(=g)tONT5C<zEKg}s3TA~ozaiXnN>x2R{=Bzrb(HPh7
z{9GN+DV{KM>h7pYDq@v6hFZ#*SClhv<SeMpLM8|K*-XwtF6S4JGoMeaVznyPZMX!Q
zLXG1^g-Fm@lzdrcwZvKZlM12iU{000OR=mDZ*Usd@4TdrS5ya|vMBe3)mf%#(|HzR
zNE)0I>?~!vNU_ZM3#;`7)IkShcYjJ@7h+h{mT$c7!=iXx>ORsatLpH!G0IIEh3@f{
z46&LC?-}A(g_DMe3%64stQmfAzWFc*PeB-FRgAGFhFO#RdBvuX4&}~^a;F}8-F+AV
zGdW%tMb|Sw#)@5H&+B*A#duhb0S0S5S3IF$E(_kxL;Is5bpSqu4n1|t;=VYEJL_ua
zYqr;)PpZ@UwD{23%OCm@d;MkTf%d}o-q~10Ow<1c$Sc|SduJbqKhBf=*thp^0s>Fi
zJKMMH?0f)5NaBVac-4KNg~+Shm)$7FK^`B54W8FH!w6&uB?5Z`1L(l^lE6mp#31Ru
zm)cE0j=;COai2YKYN9m3=|~_W3W>O^N%v^5v!mfD8z-IpeiEh-5Z_0Y93F^Hz%%bc
zC8OA2-TOLl<h>vY5=_tb?0TqEJFeM-4o=bds?6YO-q4sSvxyx2cDNbX0{8|SZxic4
z!^t6=o6igcP|$3P2K`ncII?jfo7K@?qgf9&jY>(7wv*`U{goEvle~*KPuxkUlv_b-
zqN4Y&2nqq*3L7wYqS<mRM8Y#psmDU5Qd-)U9?2Db5k&xe8IEyvGpL5Ou-Y4&I93xY
zY{#3lYBr~yzlD*V^}h=AsGs?=#+d-F*{NRCWG7BiqFA$XXGLw69D+1`8A%BXU`Or7
zE<7?{?6Y02LLp*XOtQ}(x_I>^RKDssH=V2YcAQD>O|BNVBY)GrwjWgQ+99Uj-8oto
z9dI~3dTC<`V+fdxn?d9dd^u$%Qcj$>>*;`gCD;zgKZtxG5><Uqih!iSEM--7>Lsau
zABankv1}d5RqHV@KY`A690dp+J@_e@l&&Z;2&i?MgiR3<MydRwuZYsHap221Ub3eh
zq9)TIP2?A&IE0tdBsdJy4K{JsnMjW4-g%5IE+H{N;RHU;Y-i|z>m`YIBx6xQVG)?M
zfvH4mEOkY13iW(H>z~gjHbc<IYtI;xfC0KDE21=s-<NT-p60=&4FLMr&?57++^t2v
z5z9q7TW@62ltjQfeH6KDd^0CEIIPy&K7nx7T?|aUwU#YclX-K1lsl-ef7c%e4D-a<
z#$2{R5>L%8+g$lHwV12cQ;;Db1j_4IM%-zjen3tAja@3^P)Xqsk~|zl!Xan;F~0hD
zNYvPfHEGRR7r+%Jt!Zl!X$ikm)`B%-O%*3lH-hI$Yr<NxN=3Gs$e(ACpD2u2<!=3I
zfqkb@!m-e@uw+dY=Gn@6QpTMutXZYPoHdVhXyhxkGt7lMDrYW#>M@sp8s_pjnG1Ny
z0Z^~76=G~*E~Fi}PBC0&l-E1}!%Nu4ATneu<Rc&i!yp)7G3Urv$YpR%GR7JUUW!uE
z7L14+A-SU=yTOMn#R7dHml+2z1eXy8gFBfO(v?X<L|)B+%X~s=1F*6J0<d%p5>X<4
z?o5f0)+Y*|+bkRJa+yhT83;4a8JC$9F7rt-o5o~Ubs*kpb>nq{#~HcbnpS}2Mr9HW
zGA$6ASs@pw5T-(M0~=*Dg0pQs17x3T!Qtjk5*i?55Sn@63@tGY?mdm11tByGM$RJH
z$bpfA{9GnSD2mAW4dj6HbZ6JHS}hP7fcQDu<G4)X0&c?mk`S6@VWChJ$h^B4E5c@G
zAQ5b4m5?8#gH&t001^Y^;%>?C0Ggaq$yv;Vg&v3^<Av5ZI0ZTwUBD#Bf`sCc&CJT<
zGF8DNEwXl!&CH9nzmgT!GT}Wdd<w}uD|`qx0~6$Q1tt)pLM8&v0O|t8)<u(352O<F
z<|Y!ng4E@LXmufTTdWPJ%Q`Ft>H;%C<N)imo(EURwaI0{nMqwfE>Z`u7p!M44>1w0
z#7o4^1w&oFM&o>TQk~XsL0$fz#&N(??rWUi4?WO0Amck5ggp9loG?9}l3j1+Oyvz;
z2cDoAXSs^G&JrXoH06KJQ2G^+^ZYa?>1&c7w_Q5$P0<mzPp<t<vk)UMhXu5RtgU_$
zOHBv`n(-`t!7{m41F4dr@4Ri_44f+%$oUqNr|CV0$2gnJ6RH^c;%p&1h`kVboMbO%
z^s^{PI1C5vau{iMmMhr(Oy+wJ?ve>R!)K`5V2)y>XI9l<D2CM#T=!^-nM>YE0IU8w
z$O^Zq+?C^MzwT87=i4agBP|X&obxP`?=X3e$@5IU%Y=4uzK`TIxHc-&5$Gwz^v+eb
z`~i~}n7qh@lW<;ULd5U<2+4+Uia`{Eo|ZY9T*05Vi!qA<<jys;6~JrSPzmQcMZC)7
zH6}Nhyw2neCT}t!UKD(v!T5YX=Nm4gDAe$Nt%NZ++Z>EaXDinw3r3~0Sl_vYw$9s3
z9CoFb4(xh=w!rPv*G1UE{Y0Lgt7(<FaXxkJ{FFM~X7V1BpE0R0;p_zSOY)Eaf4<fF
z4Bu10|I4BO@PoiV>gO}?F95#}{P#nD(1`)ie{d^n<0)t>-s<8lATECb*QEz^$@q~L
z77L1w%PMepM#dyC*9zQ{(o?s0aj#be-IS{avtjvaPWHFAtK}8hITSz!c7yB6E3j&e
zSJU!NQag;Y4hI+RiZ@X8aMvNJc6ieuFJ>BeyB&7rkWlkgRk~09?eP7k=*cJHxn#^i
zl0>n?oZLv6w(Q9v4|E*Kn>q7}>lSC1h@7gt19y2*fyUar!=r@m>pQnMU6}^ouyx-$
zIV7oad+|ybJDhMzTHkx6q&>XdI&!PL#?kT$^wujoZgy$tV%|DEpZ7TXrmEqEk;7{z
zVVDlRt+xdpqJ1EX@q)3+54o(Gad$?1kVKXL&lP4sf6FJzvicc#Isd!Ag|RDRt^WdI
CeVcLs

literal 0
HcmV?d00001

diff --git a/modules/variant_diagnostics/core_pipeline_core_prep_main.py b/modules/variant_diagnostics/core_pipeline_core_prep_main.py
new file mode 100644
index 0000000..1a54436
--- /dev/null
+++ b/modules/variant_diagnostics/core_pipeline_core_prep_main.py
@@ -0,0 +1,2439 @@
+# System wide imports
+from __future__ import division
+import sys
+import argparse
+import re
+import os
+import csv
+import subprocess
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+""" Hacky way to append. Instead Add this path to PYTHONPATH Variable """
+from collections import OrderedDict
+from collections import defaultdict
+from joblib import Parallel, delayed
+import multiprocessing
+import thread
+import glob
+import readline
+import errno
+from datetime import datetime
+import threading
+import json
+import ConfigParser
+from config_settings import ConfigSectionMap
+from modules.logging_subprocess import *
+from modules.log_modules import *
+from modules.tabix import *
+from Bio import SeqIO
+from modules.core_prep_sanity_checks import *
+from PBS_generate_jobs import *
+
+
+"""core methods 
+
+    This block contains methods that are respnsible for running the second part of core_All step of the pipeline.
+    It uses intermediate files generated during the first step, finds core SNPs and annotates variants using snpEff.
+    It will generate all types of SNP matrices that is required for downstream pathways / Association analysis.
+    Output:
+        - 
+
+"""
+
+
+def generate_paste_command():
+    """
+    This Function will take all the *label file and generate/paste it column wise to generate a matrix. These matrix will be used in downstream analysis.
+    :param: null
+    :return: null
+    """
+
+    """ Paste/Generate and sort SNP Filter Label Matrix """
+    paste_file = args.filter2_only_snp_vcf_dir + "/paste_label_files.sh"
+    f4 = open(paste_file, 'w+')
+    paste_command = "paste %s/unique_positions_file" % args.filter2_only_snp_vcf_dir
+    for i in vcf_filenames:
+        label_file = i.replace('_filter2_final.vcf_no_proximate_snp.vcf',
+                               '_filter2_final.vcf_no_proximate_snp.vcf_positions_label')
+        paste_command = paste_command + " " + label_file
+    header_awk_cmd = "awk \'{ORS=\"\t\";}{print $1}\' %s > %s/header.txt" % (
+    args.filter2_only_snp_vcf_filenames, args.filter2_only_snp_vcf_dir)
+    sed_header = "sed -i \'s/^/\t/\' %s/header.txt" % args.filter2_only_snp_vcf_dir
+    sed_header_2 = "sed -i -e \'$a\\' %s/header.txt" % args.filter2_only_snp_vcf_dir
+
+    call("%s" % header_awk_cmd, logger)
+    call("%s" % sed_header, logger)
+    call("%s" % sed_header_2, logger)
+
+    temp_paste_command = paste_command + " > %s/temp_label_final_raw.txt" % args.filter2_only_snp_vcf_dir
+    paste_command = paste_command + " > %s/All_label_final_raw" % args.filter2_only_snp_vcf_dir
+    f4.write(paste_command)
+    f4.close()
+    sort_All_label_cmd = "sort -n -k1,1 %s/All_label_final_raw > %s/All_label_final_sorted.txt" % (
+    args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir)
+    paste_command_header = "cat %s/header.txt %s/All_label_final_sorted.txt > %s/All_label_final_sorted_header.txt" % (
+    args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir)
+
+    ls = []
+    for i in vcf_filenames:
+        label_file = i.replace('_filter2_final.vcf_no_proximate_snp.vcf',
+                               '_filter2_final.vcf_no_proximate_snp.vcf_positions_label')
+        ls.append(label_file)
+    ls.insert(0, "%s/unique_positions_file" % args.filter2_only_snp_vcf_dir)
+
+    with open('%s/All_label_final_raw.sh' % args.filter2_only_snp_vcf_dir, 'w') as outfile:
+        outfile.write(paste_command)
+    outfile.close()
+
+    with open('%s/temp_label_final_raw.txt.sh' % args.filter2_only_snp_vcf_dir, 'w') as outfile:
+        outfile.write(temp_paste_command)
+    outfile.close()
+
+    call("bash %s/All_label_final_raw.sh" % args.filter2_only_snp_vcf_dir, logger)
+    call("bash %s/temp_label_final_raw.txt.sh" % args.filter2_only_snp_vcf_dir, logger)
+    call("%s" % sort_All_label_cmd, logger)
+    call("%s" % paste_command_header, logger)
+
+    """ Assign numeric code to each variant filter reason"""
+    subprocess.call([
+                        "sed -i 's/reference_unmapped_position/0/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call(
+        ["sed -i 's/reference_allele/1/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+        shell=True)
+    subprocess.call(["sed -i 's/VARIANT/1TRUE/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call([
+                        "sed -i 's/LowFQ_QUAL_DP_proximate_SNP/2/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call([
+                        "sed -i 's/LowFQ_DP_QUAL_proximate_SNP/2/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call([
+                        "sed -i 's/LowFQ_QUAL_proximate_SNP/2/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call(
+        ["sed -i 's/LowFQ_DP_proximate_SNP/2/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+        shell=True)
+    subprocess.call(
+        ["sed -i 's/LowFQ_proximate_SNP/2/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+        shell=True)
+    subprocess.call(
+        ["sed -i 's/LowFQ_QUAL_DP/2/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+        shell=True)
+    subprocess.call(
+        ["sed -i 's/LowFQ_DP_QUAL/2/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+        shell=True)
+    subprocess.call(["sed -i 's/LowFQ_QUAL/2/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call(["sed -i 's/LowFQ_DP/2/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call([
+                        "sed -i 's/HighFQ_QUAL_DP_proximate_SNP/4/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call([
+                        "sed -i 's/HighFQ_DP_QUAL_proximate_SNP/4/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call([
+                        "sed -i 's/HighFQ_QUAL_proximate_SNP/4/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call(
+        ["sed -i 's/HighFQ_DP_proximate_SNP/4/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+        shell=True)
+    subprocess.call(
+        ["sed -i 's/HighFQ_proximate_SNP/7/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+        shell=True)
+    subprocess.call(
+        ["sed -i 's/HighFQ_QUAL_DP/3/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+        shell=True)
+    subprocess.call(
+        ["sed -i 's/HighFQ_DP_QUAL/3/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+        shell=True)
+    subprocess.call(["sed -i 's/HighFQ_QUAL/3/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call(["sed -i 's/HighFQ_DP/3/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call(["sed -i 's/LowFQ/5/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call(["sed -i 's/HighFQ/6/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    remove_unwanted_text = "sed -i \'s/_filter2_final.vcf_no_proximate_snp.vcf//g\' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir
+    call("%s" % remove_unwanted_text, logger)
+
+def generate_paste_command_outgroup():
+    """
+    This Function will take all the *label file and generate/paste it column wise to generate a matrix. These matrix will be used in downstream analysis.
+    :param: null
+    :return: null
+    """
+
+    if args.outgroup:
+        """ Paste/Generate and sort SNP Filter Label Matrix """
+        paste_file = args.filter2_only_snp_vcf_dir + "/paste_label_files_outgroup.sh"
+        f4 = open(paste_file, 'w+')
+        paste_command = "paste %s/unique_positions_file" % args.filter2_only_snp_vcf_dir
+        for i in vcf_filenames:
+            if "%s_filter2_final.vcf_no_proximate_snp.vcf" % outgroup not in i:
+                label_file = i.replace('_filter2_final.vcf_no_proximate_snp.vcf',
+                                       '_filter2_final.vcf_no_proximate_snp.vcf_positions_label')
+                paste_command = paste_command + " " + label_file
+
+        """Exclude outgroup sample name in header
+
+        header_awk_cmd = "awk \'{ORS=\"\t\";}{print $1}\' %s > %s/header.txt" % (args.filter2_only_snp_vcf_filenames, args.filter2_only_snp_vcf_dir)
+        sed_header = "sed -i \'s/^/\t/\' %s/header.txt" % args.filter2_only_snp_vcf_dir
+        sed_header_2 = "sed -i -e \'$a\\' %s/header.txt" % args.filter2_only_snp_vcf_dir
+
+        """
+
+        header_awk_cmd = "grep -v \'%s\' %s | awk \'{ORS=\"\t\";}{print $1}\' > %s/header_outgroup.txt" % (
+        outgroup, args.filter2_only_snp_vcf_filenames, args.filter2_only_snp_vcf_dir)
+        sed_header = "sed -i \'s/^/\t/\' %s/header_outgroup.txt" % args.filter2_only_snp_vcf_dir
+        sed_header_2 = "sed -i -e \'$a\\' %s/header_outgroup.txt" % args.filter2_only_snp_vcf_dir
+
+        call("%s" % header_awk_cmd, logger)
+        call("%s" % sed_header, logger)
+        call("%s" % sed_header_2, logger)
+
+        temp_paste_command = paste_command + " > %s/temp_label_final_raw_outgroup.txt" % args.filter2_only_snp_vcf_dir
+        paste_command = paste_command + " > %s/All_label_final_raw_outgroup" % args.filter2_only_snp_vcf_dir
+        f4.write(paste_command)
+        f4.close()
+        sort_All_label_cmd = "sort -n -k1,1 %s/All_label_final_raw_outgroup > %s/All_label_final_sorted_outgroup.txt" % (
+        args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir)
+        paste_command_header = "cat %s/header_outgroup.txt %s/All_label_final_sorted_outgroup.txt > %s/All_label_final_sorted_header_outgroup.txt" % (
+        args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir)
+
+        ls = []
+        for i in vcf_filenames:
+            label_file = i.replace('_filter2_final.vcf_no_proximate_snp.vcf',
+                                   '_filter2_final.vcf_no_proximate_snp.vcf_positions_label')
+            ls.append(label_file)
+        ls.insert(0, "%s/unique_positions_file" % args.filter2_only_snp_vcf_dir)
+
+        with open('%s/All_label_final_raw_outgroup.sh' % args.filter2_only_snp_vcf_dir, 'w') as outfile:
+            outfile.write(paste_command)
+        outfile.close()
+
+        with open('%s/temp_label_final_raw_outgroup.txt.sh' % args.filter2_only_snp_vcf_dir, 'w') as outfile:
+            outfile.write(temp_paste_command)
+        outfile.close()
+        call("bash %s/All_label_final_raw_outgroup.sh" % args.filter2_only_snp_vcf_dir, logger)
+        call("bash %s/temp_label_final_raw_outgroup.txt.sh" % args.filter2_only_snp_vcf_dir, logger)
+
+        """
+        remove this lines
+        #subprocess.call(["%s" % paste_command], shell=True)
+        #subprocess.call(["%s" % temp_paste_command], shell=True)
+        #subprocess.check_call('%s' % paste_command)
+        #subprocess.check_call('%s' % temp_paste_command)
+        #os.system(paste_command) change
+        #os.system(temp_paste_command) change
+        """
+
+        call("%s" % sort_All_label_cmd, logger)
+        call("%s" % paste_command_header, logger)
+
+        """ Assign numeric code to each variant filter reason"""
+        subprocess.call([
+                            "sed -i 's/reference_unmapped_position/0/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/reference_allele/1/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/VARIANT/1TRUE/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_QUAL_DP_proximate_SNP/2/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_DP_QUAL_proximate_SNP/2/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_QUAL_proximate_SNP/2/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_DP_proximate_SNP/2/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_proximate_SNP/2/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_QUAL_DP/2/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_DP_QUAL/2/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call(
+            ["sed -i 's/LowFQ_QUAL/2/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+            shell=True)
+        subprocess.call(
+            ["sed -i 's/LowFQ_DP/2/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+            shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_QUAL_DP_proximate_SNP/4/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_DP_QUAL_proximate_SNP/4/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_QUAL_proximate_SNP/4/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_DP_proximate_SNP/4/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_proximate_SNP/7/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_QUAL_DP/3/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_DP_QUAL/3/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_QUAL/3/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call(
+            ["sed -i 's/HighFQ_DP/3/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+            shell=True)
+        subprocess.call(
+            ["sed -i 's/LowFQ/5/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+            shell=True)
+        subprocess.call(
+            ["sed -i 's/HighFQ/6/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+            shell=True)
+        remove_unwanted_text = "sed -i \'s/_filter2_final.vcf_no_proximate_snp.vcf//g\' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir
+        call("%s" % remove_unwanted_text, logger)
+
+    else:
+        print "Skip generating seperate intermediate files for outgroup"
+
+
+def generate_indel_paste_command():
+    """
+    This Function will take all the *label file and generate/paste it column wise to generate a matrix. These matrix will be used in downstream analysis.
+    :param: null
+    :return: null
+    """
+
+    """ Paste/Generate and sort SNP Filter Label Matrix """
+    paste_file = args.filter2_only_snp_vcf_dir + "/paste_indel_label_files.sh"
+    f4 = open(paste_file, 'w+')
+    paste_command = "paste %s/unique_indel_positions_file" % args.filter2_only_snp_vcf_dir
+    for i in vcf_filenames:
+        label_file = i.replace('_filter2_final.vcf_no_proximate_snp.vcf',
+                               '_filter2_indel_final.vcf_indel_positions_label')
+        paste_command = paste_command + " " + label_file
+    header_awk_cmd = "awk \'{ORS=\"\t\";}{print $1}\' %s > %s/header.txt" % (
+    args.filter2_only_snp_vcf_filenames, args.filter2_only_snp_vcf_dir)
+    sed_header = "sed -i \'s/^/\t/\' %s/header.txt" % args.filter2_only_snp_vcf_dir
+    sed_header_2 = "sed -i -e \'$a\\' %s/header.txt" % args.filter2_only_snp_vcf_dir
+
+    # os.system(header_awk_cmd)
+    # os.system(sed_header)
+    # os.system(sed_header_2)
+
+    call("%s" % header_awk_cmd, logger)
+    call("%s" % sed_header, logger)
+    call("%s" % sed_header_2, logger)
+
+    temp_paste_command = paste_command + " > %s/temp_indel_label_final_raw.txt" % args.filter2_only_snp_vcf_dir
+    paste_command = paste_command + " > %s/All_indel_label_final_raw" % args.filter2_only_snp_vcf_dir
+    f4.write(paste_command)
+    f4.close()
+
+    call("bash %s" % paste_file, logger)
+
+    sort_All_label_cmd = "sort -n -k1,1 %s/All_indel_label_final_raw > %s/All_indel_label_final_sorted.txt" % (
+    args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir)
+    paste_command_header = "cat %s/header.txt %s/All_indel_label_final_sorted.txt > %s/All_indel_label_final_sorted_header.txt" % (
+    args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir)
+
+    ls = []
+    for i in vcf_filenames:
+        label_file = i.replace('_filter2_final.vcf_no_proximate_snp.vcf',
+                               '_filter2_indel_final.vcf_indel_positions_label')
+        ls.append(label_file)
+    ls.insert(0, "%s/unique_indel_positions_file" % args.filter2_only_snp_vcf_dir)
+
+    with open('%s/All_indel_label_final_raw.sh' % args.filter2_only_snp_vcf_dir, 'w') as outfile2:
+        outfile2.write(paste_command)
+    outfile2.close()
+
+    with open('%s/temp_indel_label_final_raw.txt.sh' % args.filter2_only_snp_vcf_dir, 'w') as outfile2:
+        outfile2.write(temp_paste_command)
+    outfile2.close()
+
+    # Why is this not working?
+    call("bash %s/All_indel_label_final_raw.sh" % args.filter2_only_snp_vcf_dir, logger)
+    call("bash %s/temp_indel_label_final_raw.txt.sh" % args.filter2_only_snp_vcf_dir, logger)
+    keep_logging('Finished pasting...DONE', 'Finished pasting...DONE', logger, 'info')
+
+    """
+    remove this lines
+    #subprocess.call(["%s" % paste_command], shell=True)
+    #subprocess.call(["%s" % temp_paste_command], shell=True)
+    #subprocess.check_call('%s' % paste_command)
+    #subprocess.check_call('%s' % temp_paste_command)
+    #os.system(paste_command) change
+    #os.system(temp_paste_command) change
+    """
+
+    call("%s" % sort_All_label_cmd, logger)
+    call("%s" % paste_command_header, logger)
+
+    """ Assign numeric code to each variant filter reason"""
+    subprocess.call([
+                        "sed -i 's/reference_unmapped_position/0/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call(
+        ["sed -i 's/reference_allele/1/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+        shell=True)
+    subprocess.call(
+        ["sed -i 's/VARIANT/1TRUE/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+        shell=True)
+    subprocess.call([
+                        "sed -i 's/LowAF_QUAL_DP_proximate_SNP/2/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call([
+                        "sed -i 's/LowAF_DP_QUAL_proximate_SNP/2/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call([
+                        "sed -i 's/LowAF_QUAL_proximate_SNP/2/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call([
+                        "sed -i 's/LowAF_DP_proximate_SNP/2/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call([
+                        "sed -i 's/LowAF_proximate_SNP/2/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call(
+        ["sed -i 's/LowAF_QUAL_DP/2/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+        shell=True)
+    subprocess.call(
+        ["sed -i 's/LowAF_DP_QUAL/2/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+        shell=True)
+    subprocess.call(
+        ["sed -i 's/LowAF_QUAL/2/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+        shell=True)
+    subprocess.call(
+        ["sed -i 's/LowAF_DP/2/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+        shell=True)
+    subprocess.call([
+                        "sed -i 's/HighAF_QUAL_DP_proximate_SNP/4/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call([
+                        "sed -i 's/HighAF_DP_QUAL_proximate_SNP/4/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call([
+                        "sed -i 's/HighAF_QUAL_proximate_SNP/4/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call([
+                        "sed -i 's/HighAF_DP_proximate_SNP/4/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call([
+                        "sed -i 's/HighAF_proximate_SNP/7/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call(
+        ["sed -i 's/HighAF_QUAL_DP/3/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+        shell=True)
+    subprocess.call(
+        ["sed -i 's/HighAF_DP_QUAL/3/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+        shell=True)
+    subprocess.call(
+        ["sed -i 's/HighAF_QUAL/3/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+        shell=True)
+    subprocess.call(
+        ["sed -i 's/HighAF_DP/3/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+        shell=True)
+    subprocess.call(["sed -i 's/LowAF/5/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+                    shell=True)
+    subprocess.call(
+        ["sed -i 's/HighAF/6/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir],
+        shell=True)
+    remove_unwanted_text = "sed -i \'s/_filter2_final.vcf_no_proximate_snp.vcf//g\' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir
+    call("%s" % remove_unwanted_text, logger)
+
+
+def generate_indel_paste_command_outgroup():
+    """
+    This Function will take all the *label file and generate/paste it column wise to generate a matrix. These matrix will be used in downstream analysis.
+    :param: null
+    :return: null
+    """
+
+    if args.outgroup:
+        """ Paste/Generate and sort SNP Filter Label Matrix """
+        # define a file name where the paste commands will be saved.
+        paste_file = args.filter2_only_snp_vcf_dir + "/paste_indel_label_files_outgroup.sh"
+        f4 = open(paste_file, 'w+')
+
+        # initiate paste command string
+        paste_command = "paste %s/unique_indel_positions_file" % args.filter2_only_snp_vcf_dir
+
+        # Generate paste command
+        for i in vcf_filenames:
+            if "%s_filter2_final.vcf_no_proximate_snp.vcf" % outgroup not in i:
+                label_file = i.replace('_filter2_final.vcf_no_proximate_snp.vcf',
+                                       '_filter2_indel_final.vcf_indel_positions_label')
+                paste_command = paste_command + " " + label_file
+        # Change header awk command to exclude outgroup
+        # header_awk_cmd = "awk \'{ORS=\"\t\";}{print $1}\' %s > %s/header.txt" % (args.filter2_only_snp_vcf_filenames, args.filter2_only_snp_vcf_dir)
+        header_awk_cmd = "grep -v \'%s\' %s | awk \'{ORS=\"\t\";}{print $1}\' > %s/header_outgroup.txt" % (
+        outgroup, args.filter2_only_snp_vcf_filenames, args.filter2_only_snp_vcf_dir)
+        sed_header = "sed -i \'s/^/\t/\' %s/header_outgroup.txt" % args.filter2_only_snp_vcf_dir
+        sed_header_2 = "sed -i -e \'$a\\' %s/header_outgroup.txt" % args.filter2_only_snp_vcf_dir
+
+        call("%s" % header_awk_cmd, logger)
+        call("%s" % sed_header, logger)
+        call("%s" % sed_header_2, logger)
+
+        temp_paste_command = paste_command + " > %s/temp_indel_label_final_raw_outgroup.txt" % args.filter2_only_snp_vcf_dir
+        paste_command = paste_command + " > %s/All_indel_label_final_raw_outgroup" % args.filter2_only_snp_vcf_dir
+        f4.write(paste_command)
+        f4.close()
+
+        call("bash %s" % paste_file, logger)
+
+        sort_All_label_cmd = "sort -n -k1,1 %s/All_indel_label_final_raw_outgroup > %s/All_indel_label_final_sorted_outgroup.txt" % (
+        args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir)
+        paste_command_header = "cat %s/header_outgroup.txt %s/All_indel_label_final_sorted_outgroup.txt > %s/All_indel_label_final_sorted_header_outgroup.txt" % (
+        args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir)
+
+        ls = []
+        for i in vcf_filenames:
+            label_file = i.replace('_filter2_final.vcf_no_proximate_snp.vcf',
+                                   '_filter2_indel_final.vcf_indel_positions_label')
+            ls.append(label_file)
+        ls.insert(0, "%s/unique_indel_positions_file" % args.filter2_only_snp_vcf_dir)
+
+        with open('%s/All_indel_label_final_raw_outgroup.sh' % args.filter2_only_snp_vcf_dir, 'w') as outfile2:
+            outfile2.write(paste_command)
+        outfile2.close()
+
+        with open('%s/temp_indel_label_final_raw_outgroup.txt.sh' % args.filter2_only_snp_vcf_dir, 'w') as outfile2:
+            outfile2.write(temp_paste_command)
+        outfile2.close()
+
+        # Why is this not working?
+        call("bash %s/All_indel_label_final_raw_outgroup.sh" % args.filter2_only_snp_vcf_dir, logger)
+        call("bash %s/temp_indel_label_final_raw_outgroup.txt.sh" % args.filter2_only_snp_vcf_dir, logger)
+        keep_logging('Finished pasting...DONE', 'Finished pasting...DONE', logger, 'info')
+
+        """
+        remove this lines
+        #subprocess.call(["%s" % paste_command], shell=True)
+        #subprocess.call(["%s" % temp_paste_command], shell=True)
+        #subprocess.check_call('%s' % paste_command)
+        #subprocess.check_call('%s' % temp_paste_command)
+        #os.system(paste_command) change
+        #os.system(temp_paste_command) change
+        """
+
+        call("%s" % sort_All_label_cmd, logger)
+        call("%s" % paste_command_header, logger)
+
+        """ Assign numeric code to each variant filter reason"""
+        subprocess.call([
+                            "sed -i 's/reference_unmapped_position/0/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/reference_allele/1/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/VARIANT/1TRUE/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_QUAL_DP_proximate_SNP/2/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_DP_QUAL_proximate_SNP/2/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_QUAL_proximate_SNP/2/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_DP_proximate_SNP/2/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_proximate_SNP/2/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_QUAL_DP/2/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_DP_QUAL/2/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_QUAL/2/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_DP/2/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_QUAL_DP_proximate_SNP/4/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_DP_QUAL_proximate_SNP/4/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_QUAL_proximate_SNP/4/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_DP_proximate_SNP/4/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_proximate_SNP/7/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_QUAL_DP/3/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_DP_QUAL/3/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_QUAL/3/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_DP/3/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF/5/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF/6/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        remove_unwanted_text = "sed -i \'s/_filter2_final.vcf_no_proximate_snp.vcf//g\' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir
+        call("%s" % remove_unwanted_text, logger)
+    else:
+        print "Skip generating seperate intermediate files for outgroup"
+
+
+def generate_position_label_data_matrix():
+    """
+    Generate different list of Positions using the matrix All_label_final_sorted_header.txt.
+
+    (Defining Core Variant Position: Variant Position which was not filtered out in any of the other samples due to variant filter parameter and also this position was present in all the samples(not unmapped)).
+
+    Filtered Position label matrix:
+        List of non-core positions. These positions didn't make it to the final core list because it was filtered out in one of the samples.
+
+    Only_ref_variant_positions_for_closely_matrix.txt :
+        Those Positions where the variant was either reference allele or a variant that passed all the variant filter parameters.
+
+    :param: null
+    :return: null
+
+    """
+
+    def generate_position_label_data_matrix_All_label():
+        position_label = OrderedDict()
+        f1 = open("%s/Only_ref_variant_positions_for_closely" % args.filter2_only_snp_vcf_dir, 'w+')
+        f2 = open("%s/Only_ref_variant_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+        f3 = open("%s/Only_filtered_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+        f4 = open(
+            "%s/Only_filtered_positions_for_closely_matrix_TRUE_variants_filtered_out.txt" % args.filter2_only_snp_vcf_dir,
+            'w+')
+        if args.outgroup:
+            with open("%s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir,
+                      'rU') as csv_file:
+                keep_logging(
+                    'Reading All label positions file: %s/All_label_final_sorted_header.txt \n' % args.filter2_only_snp_vcf_dir,
+                    'Reading All label positions file: %s/All_label_final_sorted_header.txt \n' % args.filter2_only_snp_vcf_dir,
+                    logger, 'info')
+                csv_reader = csv.reader(csv_file, delimiter='\t')
+                next(csv_reader, None)
+                for row in csv_reader:
+                    position_label[row[0]] = row[1:]
+                keep_logging('Generating different list of Positions and heatmap data matrix... \n',
+                             'Generating different list of Positions and heatmap data matrix... \n', logger, 'info')
+                print_string_header = "\t"
+                for i in vcf_filenames:
+                    print_string_header = print_string_header + os.path.basename(i) + "\t"
+                f2.write('\t' + print_string_header.strip() + '\n')
+                f3.write('\t' + print_string_header.strip() + '\n')
+                f4.write('\t' + print_string_header.strip() + '\n')
+                for value in position_label:
+                    lll = ['0', '2', '3', '4', '5', '6', '7']
+                    ref_var = ['1', '1TRUE']
+                    if set(ref_var) & set(position_label[value]):
+                        if set(lll) & set(position_label[value]):
+                            if int(value) not in outgroup_specific_positions:
+                                print_string = ""
+                                for i in position_label[value]:
+                                    print_string = print_string + "\t" + i
+                                STRR2 = value + print_string + "\n"
+                                f3.write(STRR2)
+                                if position_label[value].count('1TRUE') >= 2:
+                                    f4.write('1\n')
+                                else:
+                                    f4.write('0\n')
+                        else:
+                            if int(value) not in outgroup_specific_positions:
+                                strr = value + "\n"
+                                f1.write(strr)
+                                STRR3 = value + "\t" + str(position_label[value]) + "\n"
+                                f2.write(STRR3)
+            csv_file.close()
+            f1.close()
+            f2.close()
+            f3.close()
+            f4.close()
+            subprocess.call([
+                                "sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_ref_variant_positions_for_closely" % args.filter2_only_snp_vcf_dir],
+                            shell=True)
+            subprocess.call([
+                                "sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_ref_variant_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir],
+                            shell=True)
+            subprocess.call([
+                                "sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_filtered_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir],
+                            shell=True)
+            subprocess.call([
+                                "sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_filtered_positions_for_closely_matrix_TRUE_variants_filtered_out.txt" % args.filter2_only_snp_vcf_dir],
+                            shell=True)
+            subprocess.call([
+                                "sed -i 's/1TRUE/-1/g' %s/Only_filtered_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir],
+                            shell=True)
+
+        else:
+            with open("%s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir, 'rU') as csv_file:
+                keep_logging(
+                    'Reading All label positions file: %s/All_label_final_sorted_header.txt \n' % args.filter2_only_snp_vcf_dir,
+                    'Reading All label positions file: %s/All_label_final_sorted_header.txt \n' % args.filter2_only_snp_vcf_dir,
+                    logger, 'info')
+                csv_reader = csv.reader(csv_file, delimiter='\t')
+                next(csv_reader, None)
+                for row in csv_reader:
+                    position_label[row[0]] = row[1:]
+                keep_logging('Generating different list of Positions and heatmap data matrix... \n',
+                             'Generating different list of Positions and heatmap data matrix... \n', logger, 'info')
+                print_string_header = "\t"
+                for i in vcf_filenames:
+                    print_string_header = print_string_header + os.path.basename(i) + "\t"
+                f2.write('\t' + print_string_header.strip() + '\n')
+                f3.write('\t' + print_string_header.strip() + '\n')
+                f4.write('\t' + print_string_header.strip() + '\n')
+                for value in position_label:
+                    lll = ['0', '2', '3', '4', '5', '6', '7']
+                    ref_var = ['1', '1TRUE']
+                    if set(ref_var) & set(position_label[value]):
+                        if set(lll) & set(position_label[value]):
+
+                            print_string = ""
+                            for i in position_label[value]:
+                                print_string = print_string + "\t" + i
+                            STRR2 = value + print_string + "\n"
+                            f3.write(STRR2)
+                            if position_label[value].count('1TRUE') >= 2:
+                                f4.write('1\n')
+                            else:
+                                f4.write('0\n')
+                        else:
+
+                            strr = value + "\n"
+                            f1.write(strr)
+                            STRR3 = value + "\t" + str(position_label[value]) + "\n"
+                            f2.write(STRR3)
+            csv_file.close()
+            f1.close()
+            f2.close()
+            f3.close()
+            f4.close()
+            subprocess.call([
+                                "sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_ref_variant_positions_for_closely" % args.filter2_only_snp_vcf_dir],
+                            shell=True)
+            subprocess.call([
+                                "sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_ref_variant_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir],
+                            shell=True)
+            subprocess.call([
+                                "sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_filtered_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir],
+                            shell=True)
+            subprocess.call([
+                                "sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_filtered_positions_for_closely_matrix_TRUE_variants_filtered_out.txt" % args.filter2_only_snp_vcf_dir],
+                            shell=True)
+            subprocess.call([
+                                "sed -i 's/1TRUE/-1/g' %s/Only_filtered_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir],
+                            shell=True)
+
+    def temp_generate_position_label_data_matrix_All_label():
+
+        """
+        Read temp_label_final_raw.txt SNP position label data matrix for generating barplot statistics.
+        """
+        temp_position_label = OrderedDict()
+        f33 = open("%s/temp_Only_filtered_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+        print_string_header = "\t"
+
+        if args.outgroup:
+            for i in vcf_filenames:
+                if "%s_filter2_final.vcf_no_proximate_snp.vcf" % outgroup not in i:
+                    print_string_header = print_string_header + os.path.basename(i) + "\t"
+        else:
+            for i in vcf_filenames:
+                print_string_header = print_string_header + os.path.basename(i) + "\t"
+
+        f33.write('\t' + print_string_header.strip() + '\n')
+        keep_logging(
+            'Reading temporary label positions file: %s/temp_label_final_raw.txt \n' % args.filter2_only_snp_vcf_dir,
+            'Reading temporary label positions file: %s/temp_label_final_raw.txt \n' % args.filter2_only_snp_vcf_dir,
+            logger, 'info')
+        lll = ['reference_unmapped_position', 'LowFQ', 'LowFQ_DP', 'LowFQ_QUAL', 'LowFQ_DP_QUAL', 'LowFQ_QUAL_DP',
+               'HighFQ_DP', 'HighFQ_QUAL', 'HighFQ_DP_QUAL', 'HighFQ_QUAL_DP', 'HighFQ', 'LowFQ_proximate_SNP',
+               'LowFQ_DP_proximate_SNP', 'LowFQ_QUAL_proximate_SNP', 'LowFQ_DP_QUAL_proximate_SNP',
+               'LowFQ_QUAL_DP_proximate_SNP', 'HighFQ_DP_proximate_SNP', 'HighFQ_QUAL_proximate_SNP',
+               'HighFQ_DP_QUAL_proximate_SNP', 'HighFQ_QUAL_DP_proximate_SNP', 'HighFQ_proximate_SNP', '_proximate_SNP']
+        ref_var = ['reference_allele', 'VARIANT']
+
+        if args.outgroup:
+            print "here"
+            with open("%s/temp_label_final_raw_outgroup.txt" % args.filter2_only_snp_vcf_dir, 'r') as csv_file:
+                csv_reader = csv.reader(csv_file, delimiter='\t')
+                next(csv_reader, None)
+                for row in csv_reader:
+                    if set(ref_var) & set(row[1:]):
+                        if set(lll) & set(row[1:]):
+                            if int(row[0]) not in outgroup_specific_positions:
+
+                                print_string = ""
+                                for i in row[1:]:
+                                    print_string = print_string + "\t" + i
+                                STRR2 = row[0] + print_string + "\n"
+                                f33.write(STRR2)
+            csv_file.close()
+            f33.close()
+
+        else:
+            with open("%s/temp_label_final_raw.txt" % args.filter2_only_snp_vcf_dir, 'r') as csv_file:
+                csv_reader = csv.reader(csv_file, delimiter='\t')
+                next(csv_reader, None)
+                for row in csv_reader:
+                    if set(ref_var) & set(row[1:]):
+                        if set(lll) & set(row[1:]):
+
+                            print_string = ""
+                            for i in row[1:]:
+                                print_string = print_string + "\t" + i
+                            STRR2 = row[0] + print_string + "\n"
+                            f33.write(STRR2)
+            csv_file.close()
+            f33.close()
+        """
+        Read temp_Only_filtered_positions_for_closely_matrix file and generate a matrix of positions that are being filtered just because of FQ
+        """
+        temp_position_label_FQ = OrderedDict()
+        f44 = open("%s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+        with open("%s/temp_Only_filtered_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir,
+                  'rU') as csv_file:
+            keep_logging(
+                'Reading temporary Only_filtered_positions label file: %s/temp_Only_filtered_positions_for_closely_matrix.txt \n' % args.filter2_only_snp_vcf_dir,
+                'Reading temporary Only_filtered_positions label file: %s/temp_Only_filtered_positions_for_closely_matrix.txt \n' % args.filter2_only_snp_vcf_dir,
+                logger, 'info')
+            csv_reader = csv.reader(csv_file, delimiter='\t')
+            next(csv_reader, None)
+
+            for row in csv_reader:
+                temp_position_label_FQ[row[0]] = row[1:]
+            print_string_header = "\t"
+            for i in vcf_filenames:
+                print_string_header = print_string_header + os.path.basename(i) + "\t"
+            f44.write('\t' + print_string_header.strip() + '\n')
+            for value in temp_position_label_FQ:
+                lll = ['LowFQ']
+                if set(lll) & set(temp_position_label_FQ[value]):
+
+                    print_string = ""
+                    for i in temp_position_label_FQ[value]:
+                        print_string = print_string + "\t" + i
+                    STRR2 = value + print_string + "\n"
+                    f44.write(STRR2)
+            f44.close()
+            csv_file.close()
+            f44.close()
+
+        """
+        Perform Sed on temp files. Find a faster way to do this.
+        """
+        subprocess.call([
+                            "sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/reference_unmapped_position/0/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/reference_allele/1/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/VARIANT/2/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_QUAL_DP_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_DP_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_DP_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_QUAL_DP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_DP_QUAL/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_QUAL/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_DP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_QUAL_DP_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_DP_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_DP_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_QUAL_DP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_DP_QUAL/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_QUAL/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_DP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ/3/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+
+        """
+        Read temp_Only_filtered_positions_for_closely_matrix file and generate a matrix of positions that are being filtered just because of Dp
+        """
+        temp_position_label_DP = OrderedDict()
+        f44 = open("%s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+        with open("%s/temp_Only_filtered_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir,
+                  'rU') as csv_file:
+            keep_logging(
+                'Reading temporary Only_filtered_positions label file: %s/temp_Only_filtered_positions_for_closely_matrix.txt \n' % args.filter2_only_snp_vcf_dir,
+                'Reading temporary Only_filtered_positions label file: %s/temp_Only_filtered_positions_for_closely_matrix.txt \n' % args.filter2_only_snp_vcf_dir,
+                logger, 'info')
+            csv_reader = csv.reader(csv_file, delimiter='\t')
+            next(csv_reader, None)
+            for row in csv_reader:
+                temp_position_label_DP[row[0]] = row[1:]
+            print_string_header = "\t"
+            for i in vcf_filenames:
+                print_string_header = print_string_header + os.path.basename(i) + "\t"
+            f44.write('\t' + print_string_header.strip() + '\n')
+            for value in temp_position_label_DP:
+                lll = ['HighFQ_DP']
+                ref_var = ['reference_allele', 'VARIANT']
+                if set(lll) & set(temp_position_label_FQ[value]):
+
+                    print_string = ""
+                    for i in temp_position_label_FQ[value]:
+                        print_string = print_string + "\t" + i
+                    STRR2 = value + print_string + "\n"
+                    f44.write(STRR2)
+        f44.close()
+        csv_file.close()
+
+        """
+        Perform Sed on temp files. Find a faster way to do this.
+        """
+        subprocess.call([
+                            "sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/reference_unmapped_position/0/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/reference_allele/1/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/VARIANT/2/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_QUAL_DP_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_DP_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_DP_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_QUAL_DP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_DP_QUAL/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_QUAL/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ_DP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_QUAL_DP_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_DP_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_DP_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_QUAL_DP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_DP_QUAL/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_QUAL/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ_DP/3/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowFQ/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighFQ/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+
+    def barplot_stats():
+        keep_logging(
+            '\nRead each Sample columns and calculate the percentage of each label to generate barplot statistics.\n',
+            '\nRead each Sample columns and calculate the percentage of each label to generate barplot statistics.\n',
+            logger, 'info')
+        """
+        Read each Sample columns and calculate the percentage of each label to generate barplot statistics.
+        This will give a visual explanation of how many positions in each samples were filtered out because of different reason
+        """
+
+        c_reader = csv.reader(
+            open('%s/temp_Only_filtered_positions_for_closely_matrix.txt' % args.filter2_only_snp_vcf_dir, 'r'),
+            delimiter='\t')
+        columns = list(zip(*c_reader))
+        keep_logging('Finished reading columns...', 'Finished reading columns...', logger, 'info')
+        counts = 1
+
+        if args.outgroup:
+            end = len(vcf_filenames) + 1
+            end = end - 1
+        else:
+            end = len(vcf_filenames) + 1
+
+        f_bar_count = open("%s/bargraph_counts.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+        f_bar_perc = open("%s/bargraph_percentage.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+        f_bar_count.write(
+            "Sample\tunmapped_positions\treference_allele\ttrue_variant\tOnly_low_FQ\tOnly_DP\tOnly_low_MQ\tother\n")
+        f_bar_perc.write(
+            "Sample\tunmapped_positions_perc\ttrue_variant_perc\tOnly_low_FQ_perc\tOnly_DP_perc\tOnly_low_MQ_perc\tother_perc\n")
+
+        for i in xrange(1, end, 1):
+            """ Bar Count Statistics: Variant Position Count Statistics """
+            true_variant = columns[i].count('VARIANT')
+            unmapped_positions = columns[i].count('reference_unmapped_position')
+            reference_allele = columns[i].count('reference_allele')
+            Only_low_FQ = columns[i].count('LowFQ')
+            Only_DP = columns[i].count('HighFQ_DP')
+            Only_low_MQ = columns[i].count('HighFQ')
+            low_FQ_other_parameters = columns[i].count('LowFQ_QUAL_DP_proximate_SNP') + columns[i].count(
+                'LowFQ_DP_QUAL_proximate_SNP') + columns[i].count('LowFQ_QUAL_proximate_SNP') + columns[i].count(
+                'LowFQ_DP_proximate_SNP') + columns[i].count('LowFQ_proximate_SNP') + columns[i].count(
+                'LowFQ_QUAL_DP') + columns[i].count('LowFQ_DP_QUAL') + columns[i].count('LowFQ_QUAL') + columns[
+                                          i].count('LowFQ_DP')
+            high_FQ_other_parameters = columns[i].count('HighFQ_QUAL_DP_proximate_SNP') + columns[i].count(
+                'HighFQ_DP_QUAL_proximate_SNP') + columns[i].count('HighFQ_QUAL_proximate_SNP') + columns[i].count(
+                'HighFQ_DP_proximate_SNP') + columns[i].count('HighFQ_proximate_SNP') + columns[i].count(
+                'HighFQ_QUAL_DP') + columns[i].count('HighFQ_DP_QUAL') + columns[i].count('HighFQ_QUAL')
+            other = low_FQ_other_parameters + high_FQ_other_parameters
+
+            total = true_variant + unmapped_positions + reference_allele + Only_low_FQ + Only_DP + low_FQ_other_parameters + high_FQ_other_parameters + Only_low_MQ
+
+            filename_count = i - 1
+
+            if args.outgroup:
+                bar_string = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (os.path.basename(
+                    vcf_filenames_outgroup[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')),
+                                                                   unmapped_positions, reference_allele, true_variant,
+                                                                   Only_low_FQ, Only_DP, Only_low_MQ, other)
+                f_bar_count.write(bar_string)
+            else:
+                bar_string = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (os.path.basename(
+                    vcf_filenames[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')),
+                                                                   unmapped_positions, reference_allele, true_variant,
+                                                                   Only_low_FQ, Only_DP, Only_low_MQ, other)
+            # f_bar_count.write(bar_string)
+            """ Bar Count Percentage Statistics: Variant Position Percentage Statistics """
+            try:
+                true_variant_perc = float((columns[i].count('VARIANT') * 100) / total)
+            except ZeroDivisionError:
+                true_variant_perc = 0
+            try:
+                unmapped_positions_perc = float((columns[i].count('reference_unmapped_position') * 100) / total)
+            except ZeroDivisionError:
+                unmapped_positions_perc = 0
+            try:
+                reference_allele_perc = float((columns[i].count('reference_allele') * 100) / total)
+            except ZeroDivisionError:
+                reference_allele_perc = 0
+            try:
+                Only_low_FQ_perc = float((columns[i].count('LowFQ') * 100) / total)
+            except ZeroDivisionError:
+                Only_low_FQ_perc = 0
+            try:
+                Only_DP_perc = float((columns[i].count('HighFQ_DP') * 100) / total)
+            except ZeroDivisionError:
+                Only_DP_perc = 0
+            try:
+                Only_low_MQ_perc = float((columns[i].count('HighFQ') * 100) / total)
+            except ZeroDivisionError:
+                Only_low_MQ_perc = 0
+            try:
+                low_FQ_other_parameters_perc = float(((columns[i].count('LowFQ_QUAL_DP_proximate_SNP') + columns[
+                    i].count('LowFQ_DP_QUAL_proximate_SNP') + columns[i].count('LowFQ_QUAL_proximate_SNP') + columns[
+                                                           i].count('LowFQ_DP_proximate_SNP') + columns[i].count(
+                    'LowFQ_proximate_SNP') + columns[i].count('LowFQ_QUAL_DP') + columns[i].count('LowFQ_DP_QUAL') +
+                                                       columns[i].count('LowFQ_QUAL') + columns[i].count(
+                            'LowFQ_DP')) * 100) / total)
+            except ZeroDivisionError:
+                low_FQ_other_parameters_perc = 0
+            try:
+                high_FQ_other_parameters_perc = float(((columns[i].count('HighFQ_QUAL_DP_proximate_SNP') + columns[
+                    i].count('HighFQ_DP_QUAL_proximate_SNP') + columns[i].count('HighFQ_QUAL_proximate_SNP') + columns[
+                                                            i].count('HighFQ_DP_proximate_SNP') + columns[i].count(
+                    'HighFQ_proximate_SNP') + columns[i].count('HighFQ_QUAL_DP') + columns[i].count('HighFQ_DP_QUAL') +
+                                                        columns[i].count('HighFQ_QUAL')) * 100) / total)
+            except ZeroDivisionError:
+                high_FQ_other_parameters_perc = 0
+
+            other_perc = float(low_FQ_other_parameters_perc + high_FQ_other_parameters_perc)
+            if args.outgroup:
+                bar_perc_string = "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (os.path.basename(
+                    vcf_filenames_outgroup[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')),
+                                                                    unmapped_positions_perc, true_variant_perc,
+                                                                    Only_low_FQ_perc, Only_DP_perc, Only_low_MQ_perc,
+                                                                    other_perc)
+            else:
+                bar_perc_string = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (os.path.basename(
+                    vcf_filenames[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')),
+                                                                        unmapped_positions_perc, reference_allele_perc,
+                                                                        true_variant_perc,
+                                                                        Only_low_FQ_perc, Only_DP_perc,
+                                                                        Only_low_MQ_perc, other_perc)
+            f_bar_count.write(bar_string)
+            f_bar_perc.write(bar_perc_string)
+        f_bar_count.close()
+        f_bar_perc.close()
+        bargraph_R_script = "library(ggplot2)\nlibrary(reshape)\nx1 <- read.table(\"bargraph_percentage.txt\", header=TRUE)\nx1$Sample <- reorder(x1$Sample, rowSums(x1[-1]))\nmdf1=melt(x1,id.vars=\"Sample\")\npdf(\"%s/%s_barplot.pdf\", width = 30, height = 30)\nggplot(mdf1, aes(Sample, value, fill=variable)) + geom_bar(stat=\"identity\") + ylab(\"Percentage of Filtered Positions\") + xlab(\"Samples\") + theme(text = element_text(size=9)) + scale_fill_manual(name=\"Reason for filtered out positions\", values=c(\"#08306b\", \"black\", \"orange\", \"darkgrey\", \"#fdd0a2\", \"#7f2704\")) + ggtitle(\"Title Here\") + ylim(0, 100) + theme(text = element_text(size=10), panel.background = element_rect(fill = 'white', colour = 'white'), plot.title = element_text(size=20, face=\"bold\", margin = margin(10, 0, 10, 0)), axis.ticks.y = element_blank(), axis.ticks.x = element_blank(),  axis.text.x = element_text(colour = \"black\", face= \"bold.italic\", angle = 90)) + theme(legend.position = c(0.6, 0.7), legend.direction = \"horizontal\")\ndev.off()" % (
+        args.filter2_only_snp_vcf_dir, os.path.basename(os.path.normpath(args.results_dir)))
+        barplot_R_file = open("%s/bargraph.R" % args.filter2_only_snp_vcf_dir, 'w+')
+        barplot_R_file.write(bargraph_R_script)
+        keep_logging('Run this R script to generate bargraph plot: %s/bargraph.R' % args.filter2_only_snp_vcf_dir,
+                     'Run this R script to generate bargraph plot: %s/bargraph.R' % args.filter2_only_snp_vcf_dir,
+                     logger, 'info')
+
+    """ Methods Steps"""
+    keep_logging('Running: Generating data matrices...', 'Running: Generating data matrices...', logger, 'info')
+    generate_position_label_data_matrix_All_label()
+    keep_logging('Running: Changing variables in data matrices to codes for faster processing...',
+                 'Running: Changing variables in data matrices to codes for faster processing...', logger, 'info')
+    temp_generate_position_label_data_matrix_All_label()
+    keep_logging('Running: Generating Barplot statistics data matrices...',
+                 'Running: Generating Barplot statistics data matrices...', logger, 'info')
+    barplot_stats()
+
+
+def generate_indel_position_label_data_matrix():
+    """
+    Generate different list of Positions using the matrix All_label_final_sorted_header.txt.
+
+    (Defining Core Variant Position: Variant Position which was not filtered out in any of the other samples due to variant filter parameter and also this position was present in all the samples(not unmapped)).
+
+    Filtered Position label matrix:
+        List of non-core positions. These positions didn't make it to the final core list because it was filtered out in one of the samples.
+
+    Only_ref_variant_positions_for_closely_matrix.txt :
+        Those Positions where the variant was either reference allele or a variant that passed all the variant filter parameters.
+
+    :param: null
+    :return: null
+
+    """
+
+    def generate_indel_position_label_data_matrix_All_label():
+        position_label = OrderedDict()
+        print "Generating Only_ref_indel_positions_for_closely"
+        f1 = open("%s/Only_ref_indel_positions_for_closely" % args.filter2_only_snp_vcf_dir, 'w+')
+        f2 = open("%s/Only_ref_indel_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+        f3 = open("%s/Only_filtered_indel_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+        f4 = open(
+            "%s/Only_filtered_indel_positions_for_closely_matrix_TRUE_variants_filtered_out.txt" % args.filter2_only_snp_vcf_dir,
+            'w+')
+
+        if args.outgroup:
+            with open("%s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir,
+                      'rU') as csv_file:
+                keep_logging(
+                    'Reading All label positions file: %s/All_indel_label_final_sorted_header.txt' % args.filter2_only_snp_vcf_dir,
+                    'Reading All label positions file: %s/All_indel_label_final_sorted_header.txt' % args.filter2_only_snp_vcf_dir,
+                    logger, 'info')
+                csv_reader = csv.reader(csv_file, delimiter='\t')
+                next(csv_reader, None)
+                for row in csv_reader:
+                    position_label[row[0]] = row[1:]
+                keep_logging('Generating different list of Positions and heatmap data matrix...',
+                             'Generating different list of Positions and heatmap data matrix...', logger, 'info')
+                print_string_header = "\t"
+                for i in vcf_filenames:
+                    print_string_header = print_string_header + os.path.basename(i) + "\t"
+                # f.write('\t' + print_string_header.strip() + '\n')
+                f2.write('\t' + print_string_header.strip() + '\n')
+                f3.write('\t' + print_string_header.strip() + '\n')
+                f4.write('\t' + print_string_header.strip() + '\n')
+                for value in position_label:
+                    lll = ['0', '2', '3', '4', '5', '6', '7']
+                    ref_var = ['1', '1TRUE']
+                    if set(ref_var) & set(position_label[value]):
+                        if set(lll) & set(position_label[value]):
+                            if int(value) not in outgroup_indel_specific_positions:
+                                print_string = ""
+                                for i in position_label[value]:
+                                    print_string = print_string + "\t" + i
+                                STRR2 = value + print_string + "\n"
+                                f3.write(STRR2)
+                                if position_label[value].count('1TRUE') >= 2:
+                                    f4.write('1\n')
+                                else:
+                                    f4.write('0\n')
+                        else:
+                            if int(value) not in outgroup_indel_specific_positions:
+                                strr = value + "\n"
+                                f1.write(strr)
+                                STRR3 = value + "\t" + str(position_label[value]) + "\n"
+                                f2.write(STRR3)
+            csv_file.close()
+            f1.close()
+            f2.close()
+            f3.close()
+            f4.close()
+            subprocess.call([
+                                "sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_ref_indel_positions_for_closely" % args.filter2_only_snp_vcf_dir],
+                            shell=True)
+            subprocess.call([
+                                "sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_ref_indel_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir],
+                            shell=True)
+            subprocess.call([
+                                "sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_filtered_indel_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir],
+                            shell=True)
+            subprocess.call([
+                                "sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_filtered_indel_positions_for_closely_matrix_TRUE_variants_filtered_out.txt" % args.filter2_only_snp_vcf_dir],
+                            shell=True)
+            subprocess.call([
+                                "sed -i 's/1TRUE/-1/g' %s/Only_filtered_indel_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir],
+                            shell=True)
+        else:
+            with open("%s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir, 'rU') as csv_file:
+                keep_logging(
+                    'Reading All label positions file: %s/All_indel_label_final_sorted_header.txt' % args.filter2_only_snp_vcf_dir,
+                    'Reading All label positions file: %s/All_indel_label_final_sorted_header.txt' % args.filter2_only_snp_vcf_dir,
+                    logger, 'info')
+                csv_reader = csv.reader(csv_file, delimiter='\t')
+                next(csv_reader, None)
+                for row in csv_reader:
+                    position_label[row[0]] = row[1:]
+                keep_logging('Generating different list of Positions and heatmap data matrix...',
+                             'Generating different list of Positions and heatmap data matrix...', logger, 'info')
+                print_string_header = "\t"
+                for i in vcf_filenames:
+                    print_string_header = print_string_header + os.path.basename(i) + "\t"
+                # f.write('\t' + print_string_header.strip() + '\n')
+                f2.write('\t' + print_string_header.strip() + '\n')
+                f3.write('\t' + print_string_header.strip() + '\n')
+                f4.write('\t' + print_string_header.strip() + '\n')
+                for value in position_label:
+
+                    lll = ['0', '2', '3', '4', '5', '6', '7']
+                    ref_var = ['1', '1TRUE']
+                    if set(ref_var) & set(position_label[value]):
+                        if set(lll) & set(position_label[value]):
+                            print_string = ""
+                            for i in position_label[value]:
+                                print_string = print_string + "\t" + i
+                            STRR2 = value + print_string + "\n"
+                            f3.write(STRR2)
+                            if position_label[value].count('1TRUE') >= 2:
+                                f4.write('1\n')
+                            else:
+                                f4.write('0\n')
+                        else:
+                            strr = value + "\n"
+                            f1.write(strr)
+                            STRR3 = value + "\t" + str(position_label[value]) + "\n"
+                            f2.write(STRR3)
+            csv_file.close()
+            f1.close()
+            f2.close()
+            f3.close()
+            f4.close()
+            subprocess.call([
+                                "sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_ref_indel_positions_for_closely" % args.filter2_only_snp_vcf_dir],
+                            shell=True)
+            subprocess.call([
+                                "sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_ref_indel_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir],
+                            shell=True)
+            subprocess.call([
+                                "sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_filtered_indel_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir],
+                            shell=True)
+            subprocess.call([
+                                "sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_filtered_indel_positions_for_closely_matrix_TRUE_variants_filtered_out.txt" % args.filter2_only_snp_vcf_dir],
+                            shell=True)
+            subprocess.call([
+                                "sed -i 's/1TRUE/-1/g' %s/Only_filtered_indel_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir],
+                            shell=True)
+
+    def temp_generate_indel_position_label_data_matrix_All_label():
+
+        """
+        Read **temp_label_final_raw.txt** SNP position label data matrix for generating barplot statistics.
+        """
+        temp_position_label = OrderedDict()
+        f33 = open("%s/temp_Only_filtered_indel_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+        print_string_header = "\t"
+        if args.outgroup:
+            for i in vcf_filenames:
+
+                if "%s_filter2_final.vcf_no_proximate_snp.vcf" % outgroup not in i:
+                    print_string_header = print_string_header + os.path.basename(i) + "\t"
+        else:
+            for i in vcf_filenames:
+                print_string_header = print_string_header + os.path.basename(i) + "\t"
+
+        f33.write('\t' + print_string_header.strip() + '\n')
+        keep_logging(
+            'Reading temporary label positions file: %s/temp_label_final_raw.txt' % args.filter2_only_snp_vcf_dir,
+            'Reading temporary label positions file: %s/temp_label_final_raw.txt' % args.filter2_only_snp_vcf_dir,
+            logger, 'info')
+        # lll = ['reference_unmapped_position', 'LowFQ', 'LowFQ_DP', 'LowFQ_QUAL', 'LowFQ_DP_QUAL', 'LowFQ_QUAL_DP', 'HighFQ_DP', 'HighFQ_QUAL', 'HighFQ_DP_QUAL', 'HighFQ_QUAL_DP', 'HighFQ', 'LowFQ_proximate_SNP', 'LowFQ_DP_proximate_SNP', 'LowFQ_QUAL_proximate_SNP', 'LowFQ_DP_QUAL_proximate_SNP', 'LowFQ_QUAL_DP_proximate_SNP', 'HighFQ_DP_proximate_SNP', 'HighFQ_QUAL_proximate_SNP', 'HighFQ_DP_QUAL_proximate_SNP', 'HighFQ_QUAL_DP_proximate_SNP', 'HighFQ_proximate_SNP', '_proximate_SNP']
+        lll = ['reference_unmapped_position', 'LowAF', 'LowAF_DP', 'LowAF_QUAL', 'LowAF_DP_QUAL', 'LowAF_QUAL_DP',
+               'HighAF_DP', 'HighAF_QUAL', 'HighAF_DP_QUAL', 'HighAF_QUAL_DP', 'HighAF', 'LowAF_proximate_SNP',
+               'LowAF_DP_proximate_SNP', 'LowAF_QUAL_proximate_SNP', 'LowAF_DP_QUAL_proximate_SNP',
+               'LowAF_QUAL_DP_proximate_SNP', 'HighAF_DP_proximate_SNP', 'HighAF_QUAL_proximate_SNP',
+               'HighAF_DP_QUAL_proximate_SNP', 'HighAF_QUAL_DP_proximate_SNP', 'HighAF_proximate_SNP', '_proximate_SNP']
+        ref_var = ['reference_allele', 'VARIANT']
+
+        if args.outgroup:
+            with open("%s/temp_indel_label_final_raw_outgroup.txt" % args.filter2_only_snp_vcf_dir, 'r') as csv_file:
+                csv_reader = csv.reader(csv_file, delimiter='\t')
+                next(csv_reader, None)
+                for row in csv_reader:
+                    if set(ref_var) & set(row[1:]):
+                        if set(lll) & set(row[1:]):
+                            if int(row[0]) not in outgroup_indel_specific_positions:
+                                print_string = ""
+                                for i in row[1:]:
+                                    print_string = print_string + "\t" + i
+                                STRR2 = row[0] + print_string + "\n"
+                                f33.write(STRR2)
+            csv_file.close()
+            f33.close()
+        else:
+            with open("%s/temp_indel_label_final_raw.txt" % args.filter2_only_snp_vcf_dir, 'r') as csv_file:
+                csv_reader = csv.reader(csv_file, delimiter='\t')
+                next(csv_reader, None)
+                for row in csv_reader:
+                    if set(ref_var) & set(row[1:]):
+                        if set(lll) & set(row[1:]):
+
+                            print_string = ""
+                            for i in row[1:]:
+                                print_string = print_string + "\t" + i
+                            STRR2 = row[0] + print_string + "\n"
+                            f33.write(STRR2)
+            csv_file.close()
+            f33.close()
+        """
+        Read temp_Only_filtered_positions_for_closely_matrix file and generate a matrix of positions that are being filtered just because of AF
+        """
+        temp_position_label_AF = OrderedDict()
+        f44 = open("%s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir,
+                   'w+')
+        with open("%s/temp_Only_filtered_indel_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir,
+                  'rU') as csv_file:
+            keep_logging(
+                'Reading temporary Only_filtered_indel_positions label file: %s/temp_Only_filtered_indel_positions_for_closely_matrix.txt ' % args.filter2_only_snp_vcf_dir,
+                'Reading temporary Only_filtered_indel_positions label file: %s/temp_Only_filtered_indel_positions_for_closely_matrix.txt ' % args.filter2_only_snp_vcf_dir,
+                logger, 'info')
+            csv_reader = csv.reader(csv_file, delimiter='\t')
+            next(csv_reader, None)
+
+            for row in csv_reader:
+                temp_position_label_AF[row[0]] = row[1:]
+            print_string_header = "\t"
+            for i in vcf_filenames:
+                print_string_header = print_string_header + os.path.basename(i) + "\t"
+            f44.write('\t' + print_string_header.strip() + '\n')
+            for value in temp_position_label_AF:
+                lll = ['LowAF']
+                if set(lll) & set(temp_position_label_AF[value]):
+
+                    print_string = ""
+                    for i in temp_position_label_AF[value]:
+                        print_string = print_string + "\t" + i
+                    STRR2 = value + print_string + "\n"
+                    f44.write(STRR2)
+            f44.close()
+            csv_file.close()
+            f44.close()
+
+        """
+        Perform Sed on temp files. Find a faster way to do this.
+        """
+        subprocess.call([
+                            "sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/reference_unmapped_position/0/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/reference_allele/1/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/VARIANT/2/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_QUAL_DP_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_DP_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_DP_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_QUAL_DP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_DP_QUAL/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_QUAL/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_DP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_QUAL_DP_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_DP_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_DP_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_QUAL_DP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_DP_QUAL/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_QUAL/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_DP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF/3/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+
+        """
+        Read temp_Only_filtered_positions_for_closely_matrix file and generate a matrix of positions that are being filtered just because of Dp
+        """
+        temp_position_label_DP = OrderedDict()
+        f44 = open("%s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir,
+                   'w+')
+        with open("%s/temp_Only_filtered_indel_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir,
+                  'rU') as csv_file:
+            keep_logging(
+                'Reading temporary Only_filtered_positions label file: %s/temp_Only_filtered_indel_positions_for_closely_matrix.txt ' % args.filter2_only_snp_vcf_dir,
+                'Reading temporary Only_filtered_positions label file: %s/temp_Only_filtered_indel_positions_for_closely_matrix.txt ' % args.filter2_only_snp_vcf_dir,
+                logger, 'info')
+            csv_reader = csv.reader(csv_file, delimiter='\t')
+            next(csv_reader, None)
+            for row in csv_reader:
+                temp_position_label_DP[row[0]] = row[1:]
+            print_string_header = "\t"
+            for i in vcf_filenames:
+                print_string_header = print_string_header + os.path.basename(i) + "\t"
+            f44.write('\t' + print_string_header.strip() + '\n')
+            for value in temp_position_label_DP:
+                lll = ['HighAF_DP']
+                ref_var = ['reference_allele', 'VARIANT']
+                if set(lll) & set(temp_position_label_AF[value]):
+                    print_string = ""
+                    for i in temp_position_label_AF[value]:
+                        print_string = print_string + "\t" + i
+                    STRR2 = value + print_string + "\n"
+                    f44.write(STRR2)
+        f44.close()
+        csv_file.close()
+
+        """
+        Perform Sed on temp files. Find a faster way to do this.
+        """
+        subprocess.call([
+                            "sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/reference_unmapped_position/0/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/reference_allele/1/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/VARIANT/2/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_QUAL_DP_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_DP_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_DP_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_QUAL_DP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_DP_QUAL/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_QUAL/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF_DP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_QUAL_DP_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_DP_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_DP_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_QUAL_DP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_DP_QUAL/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_QUAL/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF_DP/3/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/LowAF/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+        subprocess.call([
+                            "sed -i 's/HighAF/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir],
+                        shell=True)
+
+    def barplot_indel_stats():
+        keep_logging(
+            'Read each Sample columns and calculate the percentage of each label to generate barplot statistics.',
+            'Read each Sample columns and calculate the percentage of each label to generate barplot statistics.',
+            logger, 'info')
+        """
+        Read each Sample columns and calculate the percentage of each label to generate barplot statistics.
+        This will give a visual explanation of how many positions in each samples were filtered out because of different reason
+        """
+
+        c_reader = csv.reader(
+            open('%s/temp_Only_filtered_indel_positions_for_closely_matrix.txt' % args.filter2_only_snp_vcf_dir,
+                 'r'), delimiter='\t')
+        columns = list(zip(*c_reader))
+        print len(columns)
+        keep_logging('Finished reading columns...', 'Finished reading columns...', logger, 'info')
+        counts = 1
+
+        if args.outgroup:
+            end = len(vcf_filenames) + 1
+            end = end - 1
+        else:
+            end = len(vcf_filenames) + 1
+        print end
+
+        f_bar_count = open("%s/bargraph_indel_counts.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+        f_bar_perc = open("%s/bargraph_indel_percentage.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+        f_bar_count.write(
+            "Sample\tunmapped_positions\treference_allele\ttrue_variant\tOnly_low_AF\tOnly_DP\tOnly_low_MQ\tother\n")
+        f_bar_perc.write(
+            "Sample\tunmapped_positions_perc\ttrue_variant_perc\tOnly_low_AF_perc\tOnly_DP_perc\tOnly_low_MQ_perc\tother_perc\n")
+        for i in xrange(1, end, 1):
+            """ Bar Count Statistics: Variant Position Count Statistics """
+            print i
+            true_variant = columns[i].count('VARIANT')
+            unmapped_positions = columns[i].count('reference_unmapped_position')
+            reference_allele = columns[i].count('reference_allele')
+            Only_low_AF = columns[i].count('LowAF')
+            Only_DP = columns[i].count('HighAF_DP')
+            Only_low_MQ = columns[i].count('HighAF')
+            low_AF_other_parameters = columns[i].count('LowAF_QUAL_DP_proximate_SNP') + columns[i].count(
+                'LowAF_DP_QUAL_proximate_SNP') + columns[i].count('LowAF_QUAL_proximate_SNP') + columns[i].count(
+                'LowAF_DP_proximate_SNP') + columns[i].count('LowAF_proximate_SNP') + columns[i].count(
+                'LowAF_QUAL_DP') + columns[i].count('LowAF_DP_QUAL') + columns[i].count('LowAF_QUAL') + columns[
+                                          i].count('LowAF_DP')
+            high_AF_other_parameters = columns[i].count('HighAF_QUAL_DP_proximate_SNP') + columns[i].count(
+                'HighAF_DP_QUAL_proximate_SNP') + columns[i].count('HighAF_QUAL_proximate_SNP') + columns[i].count(
+                'HighAF_DP_proximate_SNP') + columns[i].count('HighAF_proximate_SNP') + columns[i].count(
+                'HighAF_QUAL_DP') + columns[i].count('HighAF_DP_QUAL') + columns[i].count('HighAF_QUAL')
+            other = low_AF_other_parameters + high_AF_other_parameters
+            total = true_variant + unmapped_positions + reference_allele + Only_low_AF + Only_DP + low_AF_other_parameters + high_AF_other_parameters + Only_low_MQ
+            filename_count = i - 1
+            # bar_string = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (os.path.basename(vcf_filenames_outgroup[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')), unmapped_positions, reference_allele, true_variant, Only_low_AF, Only_DP, Only_low_MQ, other)
+            if args.outgroup:
+                ###
+
+                bar_string = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (os.path.basename(
+                    vcf_filenames_outgroup[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')),
+                                                                   unmapped_positions, reference_allele, true_variant,
+                                                                   Only_low_AF, Only_DP, Only_low_MQ, other)
+            else:
+                bar_string = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (os.path.basename(
+                    vcf_filenames[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')),
+                                                                   unmapped_positions, reference_allele, true_variant,
+                                                                   Only_low_AF, Only_DP, Only_low_MQ, other)
+
+            f_bar_count.write(bar_string)
+
+            """ Bar Count Percentage Statistics: Variant Position Percentage Statistics """
+            try:
+                true_variant_perc = float((columns[i].count('VARIANT') * 100) / total)
+            except ZeroDivisionError:
+                true_variant_perc = 0
+            try:
+                unmapped_positions_perc = float((columns[i].count('reference_unmapped_position') * 100) / total)
+            except ZeroDivisionError:
+                unmapped_positions_perc = 0
+            try:
+                reference_allele_perc = float((columns[i].count('reference_allele') * 100) / total)
+            except ZeroDivisionError:
+                reference_allele_perc = 0
+            try:
+                Only_low_AF_perc = float((columns[i].count('LowAF') * 100) / total)
+            except ZeroDivisionError:
+                Only_low_AF_perc = 0
+            try:
+                Only_DP_perc = float((columns[i].count('HighAF_DP') * 100) / total)
+            except ZeroDivisionError:
+                Only_DP_perc = 0
+            try:
+                Only_low_MQ_perc = float((columns[i].count('HighAF') * 100) / total)
+            except ZeroDivisionError:
+                Only_low_MQ_perc = 0
+            try:
+                low_AF_other_parameters_perc = float(((columns[i].count('LowAF_QUAL_DP_proximate_SNP') + columns[
+                    i].count('LowAF_DP_QUAL_proximate_SNP') + columns[i].count('LowAF_QUAL_proximate_SNP') + columns[
+                                                           i].count('LowAF_DP_proximate_SNP') + columns[i].count(
+                    'LowAF_proximate_SNP') + columns[i].count('LowAF_QUAL_DP') + columns[i].count('LowAF_DP_QUAL') +
+                                                       columns[i].count('LowAF_QUAL') + columns[i].count(
+                            'LowAF_DP')) * 100) / total)
+            except ZeroDivisionError:
+                low_AF_other_parameters_perc = 0
+            try:
+                high_AF_other_parameters_perc = float(((columns[i].count('HighAF_QUAL_DP_proximate_SNP') + columns[
+                    i].count('HighAF_DP_QUAL_proximate_SNP') + columns[i].count('HighAF_QUAL_proximate_SNP') + columns[
+                                                            i].count('HighAF_DP_proximate_SNP') + columns[i].count(
+                    'HighAF_proximate_SNP') + columns[i].count('HighAF_QUAL_DP') + columns[i].count('HighAF_DP_QUAL') +
+                                                        columns[i].count('HighAF_QUAL')) * 100) / total)
+            except ZeroDivisionError:
+                high_AF_other_parameters_perc = 0
+
+            other_perc = float(low_AF_other_parameters_perc + high_AF_other_parameters_perc)
+            if args.outgroup:
+                ###
+                bar_perc_string = "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (
+                    os.path.basename(
+                        vcf_filenames_outgroup[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')),
+                    unmapped_positions_perc, true_variant_perc, Only_low_AF_perc, Only_DP_perc, Only_low_MQ_perc,
+                    other_perc)
+                f_bar_perc.write(bar_perc_string)
+            else:
+                bar_perc_string = "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (
+                    os.path.basename(
+                        vcf_filenames[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')),
+                    unmapped_positions_perc, true_variant_perc, Only_low_AF_perc, Only_DP_perc, Only_low_MQ_perc,
+                    other_perc)
+                f_bar_perc.write(bar_perc_string)
+
+        f_bar_count.close()
+        f_bar_perc.close()
+        bargraph_R_script = "library(ggplot2)\nlibrary(reshape)\nx1 <- read.table(\"bargraph_indel_percentage.txt\", header=TRUE)\nx1$Sample <- reorder(x1$Sample, rowSums(x1[-1]))\nmdf1=melt(x1,id.vars=\"Sample\")\npdf(\"%s/%s_barplot_indel.pdf\", width = 30, height = 30)\nggplot(mdf1, aes(Sample, value, fill=variable)) + geom_bar(stat=\"identity\") + ylab(\"Percentage of Filtered Positions\") + xlab(\"Samples\") + theme(text = element_text(size=9)) + scale_fill_manual(name=\"Reason for filtered out positions\", values=c(\"#08306b\", \"black\", \"orange\", \"darkgrey\", \"#fdd0a2\", \"#7f2704\")) + ggtitle(\"Title Here\") + ylim(0, 100) + theme(text = element_text(size=10), panel.background = element_rect(fill = 'white', colour = 'white'), plot.title = element_text(size=20, face=\"bold\", margin = margin(10, 0, 10, 0)), axis.ticks.y = element_blank(), axis.ticks.x = element_blank(),  axis.text.x = element_text(colour = \"black\", face= \"bold.italic\", angle = 90)) + theme(legend.position = c(0.6, 0.7), legend.direction = \"horizontal\")\ndev.off()" % (
+        args.filter2_only_snp_vcf_dir, os.path.basename(os.path.normpath(args.results_dir)))
+        barplot_R_file = open("%s/bargraph_indel.R" % args.filter2_only_snp_vcf_dir, 'w+')
+        barplot_R_file.write(bargraph_R_script)
+        keep_logging('Run this R script to generate bargraph plot: %s/bargraph_indel.R' % args.filter2_only_snp_vcf_dir,
+                     'Run this R script to generate bargraph plot: %s/bargraph_indel.R' % args.filter2_only_snp_vcf_dir,
+                     logger, 'info')
+
+    """ Methods Steps"""
+    keep_logging('Running: Generating data matrices...', 'Running: Generating data matrices...', logger, 'info')
+    # if args.outgroup:
+    #     f_outgroup = open("%s/outgroup_indel_specific_positions.txt" % args.filter2_only_snp_vcf_dir, 'r+')
+    #     global outgroup_indel_specific_positions
+    #     outgroup_indel_specific_positions = []
+    #     for i in f_outgroup:
+    #         outgroup_indel_specific_positions.append(i)
+    #     f_outgroup.close()
+    #
+    #     f_outgroup = open("%s/outgroup_specific_positions.txt" % args.filter2_only_snp_vcf_dir, 'r+')
+    #     global outgroup_specific_positions
+    #     outgroup_specific_positions = []
+    #     for i in f_outgroup:
+    #         outgroup_specific_positions.append(i)
+    #     f_outgroup.close()
+    # else:
+    #     global outgroup_specific_positions
+    #     global outgroup_indel_specific_positions
+    #     outgroup_indel_specific_positions = []
+    #     outgroup_specific_positions = []
+    generate_indel_position_label_data_matrix_All_label()
+    keep_logging('Running: Changing variables in data matrices to codes for faster processing...',
+                 'Running: Changing variables in data matrices to codes for faster processing...', logger, 'info')
+    temp_generate_indel_position_label_data_matrix_All_label()
+    keep_logging('Running: Generating Barplot statistics data matrices...',
+                 'Running: Generating Barplot statistics data matrices...', logger, 'info')
+    barplot_indel_stats()
+
+
+def create_job_fasta(jobrun, vcf_filenames, core_vcf_fasta_dir, functional_filter):
+    """ Generate jobs/scripts that creates core consensus fasta file.
+
+    This function will generate and run scripts/jobs to create core consensus fasta file of only core variant positions.
+    Input for Fasttree, Beast and pairwise variant analysis.
+
+    :param jobrun: Based on this value all the job/scripts will run on "cluster": either on single cluster, "parallel-local": run in parallel on local system, "local": run on local system, "parallel-cluster": submit parallel jobs on cluster.
+    :param vcf_filenames: list of final vcf filenames i.e *_no_proximate_snp.vcf. These files are the final output of variant calling step for each sample.
+    :return:
+    :raises:
+    """
+    if jobrun == "parallel-cluster":
+        """
+        Supports only PBS clusters for now.
+        """
+        for i in vcf_filenames:
+            job_name = os.path.basename(i)
+            job_print_string = "#PBS -N %s_fasta\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/extract_only_ref_variant_fasta.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -reference %s -out_core %s -functional_filter %s\n" % (
+            job_name, ConfigSectionMap("scheduler", Config)['email'],
+            ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['resources'],
+            ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'],
+            args.filter2_only_snp_vcf_dir, i, args.reference, core_vcf_fasta_dir, functional_filter)
+            job_file_name = "%s_fasta.pbs" % (i)
+            f1 = open(job_file_name, 'w+')
+            f1.write(job_print_string)
+            f1.close()
+        # os.system("mv %s/*.pbs %s/temp" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir))
+        pbs_dir = args.filter2_only_snp_vcf_dir + "/*_fasta.pbs"
+        pbs_scripts = glob.glob(pbs_dir)
+        for i in pbs_scripts:
+            keep_logging('Running: qsub %s' % i, 'Running: qsub %s' % i, logger, 'info')
+            # os.system("qsub %s" % i)
+            call("qsub %s" % i, logger)
+
+
+    elif jobrun == "parallel-local" or jobrun == "cluster":
+        """
+        Generate a Command list of each job and run it in parallel on different cores available on local system
+        """
+        command_array = []
+        command_file = "%s/commands_list_fasta.sh" % args.filter2_only_snp_vcf_dir
+        f3 = open(command_file, 'w+')
+        for i in vcf_filenames:
+            job_name = os.path.basename(i)
+            job_print_string = "#PBS -N %s_fasta\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/extract_only_ref_variant_fasta.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -reference %s -out_core %s -functional_filter %s\n" % (
+            job_name, ConfigSectionMap("scheduler", Config)['email'],
+            ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['resources'],
+            ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'],
+            args.filter2_only_snp_vcf_dir, i, args.reference, core_vcf_fasta_dir, functional_filter)
+            job_file_name = "%s_fasta.pbs" % (i)
+            f1 = open(job_file_name, 'w+')
+            f1.write(job_print_string)
+            f1.close()
+        pbs_dir = args.filter2_only_snp_vcf_dir + "/*_fasta.pbs"
+        pbs_scripts = glob.glob(pbs_dir)
+        for i in pbs_scripts:
+            f3.write("bash %s\n" % i)
+        f3.close()
+        with open(command_file, 'r') as fpp:
+            for lines in fpp:
+                lines = lines.strip()
+                command_array.append(lines)
+        fpp.close()
+        if args.numcores:
+            num_cores = int(num_cores)
+        else:
+            num_cores = multiprocessing.cpu_count()
+        results = Parallel(n_jobs=num_cores)(delayed(run_command)(command) for command in command_array)
+
+    # elif jobrun == "cluster":
+    #     command_array = []
+    #     command_file = "%s/commands_list_fasta.sh" % args.filter2_only_snp_vcf_dir
+    #     f3 = open(command_file, 'w+')
+    #     for i in vcf_filenames:
+    #         job_name = os.path.basename(i)
+    #         job_print_string = "#PBS -N %s_fasta\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/extract_only_ref_variant_fasta.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -reference %s -out_core %s\n" % (job_name, ConfigSectionMap("scheduler", Config)['email'], ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['resources'], ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'],args.filter2_only_snp_vcf_dir, i, args.reference, core_vcf_fasta_dir)
+    #         job_file_name = "%s_fasta.pbs" % (i)
+    #         f1=open(job_file_name, 'w+')
+    #         f1.write(job_print_string)
+    #         f1.close()
+    #     pbs_dir = args.filter2_only_snp_vcf_dir + "/*_fasta.pbs"
+    #     pbs_scripts = glob.glob(pbs_dir)
+    #     for i in pbs_scripts:
+    #         f3.write("bash %s\n" % i)
+    #     f3.close()
+    #     with open(command_file, 'r') as fpp:
+    #         for lines in fpp:
+    #             lines = lines.strip()
+    #             command_array.append(lines)
+    #     fpp.close()
+    #     os.system("bash %s/command_file" % args.filter2_only_snp_vcf_dir)
+    else:
+        """
+        Generate a Command list of each job and run it on local system one at a time
+        """
+        command_array = []
+        command_file = "%s/commands_list_fasta.sh" % args.filter2_only_snp_vcf_dir
+        f3 = open(command_file, 'w+')
+
+        for i in vcf_filenames:
+            job_name = os.path.basename(i)
+            job_print_string = "#PBS -N %s_fasta\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/extract_only_ref_variant_fasta.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -reference %s -out_core %s -functional_filter %s\n" % (
+            job_name, ConfigSectionMap("scheduler", Config)['email'],
+            ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['resources'],
+            ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'],
+            args.filter2_only_snp_vcf_dir, i, args.reference, core_vcf_fasta_dir, functional_filter)
+            job_file_name = "%s_fasta.pbs" % (i)
+            f1 = open(job_file_name, 'w+')
+            f1.write(job_print_string)
+            f1.close()
+        # os.system("mv %s/*.pbs %s/temp" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir))
+        pbs_dir = args.filter2_only_snp_vcf_dir + "/*_fasta.pbs"
+        pbs_scripts = glob.glob(pbs_dir)
+
+        for i in pbs_scripts:
+            f3.write("bash %s\n" % i)
+        f3.close()
+        with open(command_file, 'r') as fpp:
+            for lines in fpp:
+                lines = lines.strip()
+                command_array.append(lines)
+        fpp.close()
+        # os.system("bash command_file")
+        call("bash %s" % command_file, logger)
+
+
+def create_job_allele_variant_fasta(jobrun, vcf_filenames, core_vcf_fasta_dir, config_file):
+    """ Generate jobs/scripts that creates core consensus fasta file.
+
+    This function will generate and run scripts/jobs to create core consensus fasta file of only core variant positions.
+    Input for Fasttree, Beast and pairwise variant analysis.
+
+    :param jobrun: Based on this value all the job/scripts will run on "cluster": either on single cluster, "parallel-local": run in parallel on local system, "local": run on local system, "parallel-cluster": submit parallel jobs on cluster.
+    :param vcf_filenames: list of final vcf filenames i.e *_no_proximate_snp.vcf. These files are the final output of variant calling step for each sample.
+    :return:
+    :raises:
+    """
+    if jobrun == "parallel-cluster":
+        """
+        Supports only PBS clusters for now.
+        """
+        for i in vcf_filenames:
+            job_name = os.path.basename(i)
+            job_print_string = "#PBS -N %s_fasta\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/extract_only_ref_variant_fasta_unique_positions.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -reference %s -out_core %s -config %s\n" % (
+            job_name, ConfigSectionMap("scheduler", Config)['email'],
+            ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['resources'],
+            ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'],
+            args.filter2_only_snp_vcf_dir, i, args.reference, core_vcf_fasta_dir, config_file)
+            job_file_name = "%s_ref_allele_variants_fasta.pbs" % (i)
+            f1 = open(job_file_name, 'w+')
+            f1.write(job_print_string)
+            f1.close()
+        # os.system("mv %s/*.pbs %s/temp" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir))
+        pbs_dir = args.filter2_only_snp_vcf_dir + "/*_fasta.pbs"
+        pbs_scripts = glob.glob(pbs_dir)
+        for i in pbs_scripts:
+            keep_logging('Running: qsub %s' % i, 'Running: qsub %s' % i, logger, 'info')
+            # os.system("qsub %s" % i)
+            call("qsub %s" % i, logger)
+
+
+    elif jobrun == "parallel-local" or jobrun == "cluster":
+        """
+        Generate a Command list of each job and run it in parallel on different cores available on local system
+        """
+        command_array = []
+        command_file = "%s/commands_list_ref_allele_variants_fasta.sh" % args.filter2_only_snp_vcf_dir
+        f3 = open(command_file, 'w+')
+        for i in vcf_filenames:
+            job_name = os.path.basename(i)
+            job_print_string = "#PBS -N %s_fasta\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/extract_only_ref_variant_fasta_unique_positions.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -reference %s -out_core %s -config %s\n" % (
+            job_name, ConfigSectionMap("scheduler", Config)['email'],
+            ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['resources'],
+            ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'],
+            args.filter2_only_snp_vcf_dir, i, args.reference, core_vcf_fasta_dir, config_file)
+            job_file_name = "%s_ref_allele_variants_fasta.pbs" % (i)
+            f1 = open(job_file_name, 'w+')
+            f1.write(job_print_string)
+            f1.close()
+        pbs_dir = args.filter2_only_snp_vcf_dir + "/*_ref_allele_variants_fasta.pbs"
+        pbs_scripts = glob.glob(pbs_dir)
+        for i in pbs_scripts:
+            f3.write("bash %s\n" % i)
+        f3.close()
+        with open(command_file, 'r') as fpp:
+            for lines in fpp:
+                lines = lines.strip()
+                command_array.append(lines)
+        fpp.close()
+        if args.numcores:
+            num_cores = int(num_cores)
+        else:
+            num_cores = multiprocessing.cpu_count()
+        results = Parallel(n_jobs=num_cores)(delayed(run_command)(command) for command in command_array)
+
+    # elif jobrun == "cluster":
+    #     command_array = []
+    #     command_file = "%s/commands_list_fasta.sh" % args.filter2_only_snp_vcf_dir
+    #     f3 = open(command_file, 'w+')
+    #     for i in vcf_filenames:
+    #         job_name = os.path.basename(i)
+    #         job_print_string = "#PBS -N %s_fasta\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/extract_only_ref_variant_fasta.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -reference %s -out_core %s\n" % (job_name, ConfigSectionMap("scheduler", Config)['email'], ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['resources'], ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'],args.filter2_only_snp_vcf_dir, i, args.reference, core_vcf_fasta_dir)
+    #         job_file_name = "%s_fasta.pbs" % (i)
+    #         f1=open(job_file_name, 'w+')
+    #         f1.write(job_print_string)
+    #         f1.close()
+    #     pbs_dir = args.filter2_only_snp_vcf_dir + "/*_fasta.pbs"
+    #     pbs_scripts = glob.glob(pbs_dir)
+    #     for i in pbs_scripts:
+    #         f3.write("bash %s\n" % i)
+    #     f3.close()
+    #     with open(command_file, 'r') as fpp:
+    #         for lines in fpp:
+    #             lines = lines.strip()
+    #             command_array.append(lines)
+    #     fpp.close()
+    #     os.system("bash %s/command_file" % args.filter2_only_snp_vcf_dir)
+    else:
+        """
+        Generate a Command list of each job and run it on local system one at a time
+        """
+        command_array = []
+        command_file = "%s/commands_list_ref_allele_variants_fasta.sh" % args.filter2_only_snp_vcf_dir
+        f3 = open(command_file, 'w+')
+
+        for i in vcf_filenames:
+            job_name = os.path.basename(i)
+            job_print_string = "#PBS -N %s_fasta\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/extract_only_ref_variant_fasta_unique_positions.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -reference %s -out_core %s -config %s\n" % (
+            job_name, ConfigSectionMap("scheduler", Config)['email'],
+            ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['resources'],
+            ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'],
+            args.filter2_only_snp_vcf_dir, i, args.reference, core_vcf_fasta_dir, config_file)
+            job_file_name = "%s_ref_allele_variants_fasta.pbs" % (i)
+            f1 = open(job_file_name, 'w+')
+            f1.write(job_print_string)
+            f1.close()
+        # os.system("mv %s/*.pbs %s/temp" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir))
+        pbs_dir = args.filter2_only_snp_vcf_dir + "/*_ref_allele_variants_fasta.pbs"
+        pbs_scripts = glob.glob(pbs_dir)
+
+        for i in pbs_scripts:
+            f3.write("bash %s\n" % i)
+        f3.close()
+        with open(command_file, 'r') as fpp:
+            for lines in fpp:
+                lines = lines.strip()
+                command_array.append(lines)
+        fpp.close()
+        # os.system("bash command_file")
+        call("bash %s" % command_file, logger)
+
+
+def create_job_DP(jobrun, vcf_filenames):
+    """
+    Based on type of jobrun; generate jobs and run accordingly.
+    :param jobrun: Based on this value all the job/scripts will run on "cluster": either on single cluster, "parallel-local": run in parallel on local system, "local": run on local system, "parallel-cluster": submit parallel jobs on cluster.
+    :param vcf_filenames:
+    :return:
+    """
+
+    if jobrun == "parallel-cluster":
+        """
+        Supports only PBS clusters for now.
+        """
+        for i in vcf_filenames:
+            job_name = os.path.basename(i)
+            job_print_string = "#PBS -N %s\n#PBS -M apirani@med.umich.edu\n#PBS -m a\n#PBS -V\n#PBS -l nodes=1:ppn=1,mem=4000mb,walltime=76:00:00\n#PBS -q fluxod\n#PBS -A esnitkin_fluxod\n#PBS -l qos=flux\n\ncd %s\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/DP_analysis.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s\n" % (
+            job_name, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, i)
+            job_file_name = "%s_DP.pbs" % (i)
+            f1 = open(job_file_name, 'w+')
+            f1.write(job_print_string)
+            f1.close()
+        # os.system("mv %s/*.pbs %s/temp" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir))
+        pbs_dir = args.filter2_only_snp_vcf_dir + "/*_DP.pbs"
+        pbs_scripts = glob.glob(pbs_dir)
+        for i in pbs_scripts:
+            keep_logging('Running: qsub %s' % i, 'Running: qsub %s' % i, logger, 'info')
+            # os.system("qsub %s" % i)
+            call("qsub %s" % i, logger)
+
+
+    elif jobrun == "parallel-local" or jobrun == "cluster":
+        """
+        Generate a Command list of each job and run it in parallel on different cores available on local system
+        """
+        command_array = []
+        command_file = "%s/commands_list_DP.sh" % args.filter2_only_snp_vcf_dir
+        f3 = open(command_file, 'w+')
+
+        for i in vcf_filenames:
+            job_name = os.path.basename(i)
+            job_print_string = "#PBS -N %s\n#PBS -M apirani@med.umich.edu\n#PBS -m a\n#PBS -V\n#PBS -l nodes=1:ppn=1,mem=4000mb,walltime=76:00:00\n#PBS -q fluxod\n#PBS -A esnitkin_fluxod\n#PBS -l qos=flux\n\ncd %s\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/DP_analysis.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s\n" % (
+            job_name, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, i)
+            job_file_name = "%s_DP.pbs" % (i)
+            f1 = open(job_file_name, 'w+')
+            f1.write(job_print_string)
+            f1.close()
+        # os.system("mv %s/*.pbs %s/temp" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir))
+        pbs_dir = args.filter2_only_snp_vcf_dir + "/*_DP.pbs"
+        pbs_scripts = glob.glob(pbs_dir)
+
+        for i in pbs_scripts:
+            f3.write("bash %s\n" % i)
+        f3.close()
+        with open(command_file, 'r') as fpp:
+            for lines in fpp:
+                lines = lines.strip()
+                command_array.append(lines)
+        fpp.close()
+        print len(command_array)
+        if args.numcores:
+            num_cores = int(num_cores)
+        else:
+            num_cores = multiprocessing.cpu_count()
+        results = Parallel(n_jobs=num_cores)(delayed(run_command)(command) for command in command_array)
+
+    # elif jobrun == "cluster":
+    #     """ Test pending """
+    #     command_file = "%s/commands_list_DP.sh" % args.filter2_only_snp_vcf_dir
+    #     f3 = open(command_file, 'w+')
+    #     for i in vcf_filenames:
+    #         job_name = os.path.basename(i)
+    #         job_print_string = "#PBS -N %s\n#PBS -M apirani@med.umich.edu\n#PBS -m a\n#PBS -V\n#PBS -l nodes=1:ppn=1,mem=4000mb,walltime=76:00:00\n#PBS -q fluxod\n#PBS -A esnitkin_fluxod\n#PBS -l qos=flux\n\ncd %s\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/DP_analysis.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s\n" % (job_name, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, i)
+    #         job_file_name = "%s_DP.pbs" % (i)
+    #         f1=open(job_file_name, 'w+')
+    #         f1.write(job_print_string)
+    #         f1.close()
+    #     pbs_dir = args.filter2_only_snp_vcf_dir + "/*_DP.pbs"
+    #     pbs_scripts = glob.glob(pbs_dir)
+    #     for i in pbs_scripts:
+    #         f3.write("bash %s\n" % i)
+    #     f3.close()
+    #     os.system("bash %s/commands_list_DP.sh" % args.filter2_only_snp_vcf_dir)
+
+    else:
+        """
+        Generate a Command list of each job and run it on local system one at a time
+        """
+        command_file = "%s/commands_list_DP.sh" % args.filter2_only_snp_vcf_dir
+        f3 = open(command_file, 'w+')
+        for i in vcf_filenames:
+            job_name = os.path.basename(i)
+            job_print_string = "#PBS -N %s\n#PBS -M apirani@med.umich.edu\n#PBS -m a\n#PBS -V\n#PBS -l nodes=1:ppn=1,mem=4000mb,walltime=76:00:00\n#PBS -q fluxod\n#PBS -A esnitkin_fluxod\n#PBS -l qos=flux\n\ncd %s\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/DP_analysis.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s\n" % (
+            job_name, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, i)
+            job_file_name = "%s_DP.pbs" % (i)
+            f1 = open(job_file_name, 'w+')
+            f1.write(job_print_string)
+            f1.close()
+        pbs_dir = args.filter2_only_snp_vcf_dir + "/*_DP.pbs"
+        pbs_scripts = glob.glob(pbs_dir)
+        for i in pbs_scripts:
+            f3.write("bash %s\n" % i)
+        f3.close()
+        # os.system("bash %s/commands_list_DP.sh" % args.filter2_only_snp_vcf_dir)
+        call("bash %s/commands_list_DP.sh" % args.filter2_only_snp_vcf_dir, logger)
+
+
+def generate_vcf_files():
+    if ConfigSectionMap("functional_filters", Config)['apply_functional_filters'] == "yes":
+        keep_logging(
+            'Removing Variants falling in Functional filters positions file: %s\n' % functional_class_filter_positions,
+            'Removing Variants falling in Functional filters positions file: %s\n' % functional_class_filter_positions,
+            logger,
+            'info')
+        # phage_positions = []
+        # phage_region_positions = "%s/phage_region_positions.txt" % args.filter2_only_snp_vcf_dir
+        # with open(phage_region_positions, 'rU') as fp:
+        #     for line in fp:
+        #         phage_positions.append(line.strip())
+        # fp.close()
+
+        functional_filter_pos_array = []
+        with open(functional_class_filter_positions, 'rU') as f_functional:
+            for line_func in f_functional:
+                functional_filter_pos_array.append(line_func.strip())
+
+        ref_variant_position_array = []
+        ffp = open("%s/Only_ref_variant_positions_for_closely" % args.filter2_only_snp_vcf_dir, 'r+')
+        for line in ffp:
+            line = line.strip()
+            if line not in functional_filter_pos_array:
+                ref_variant_position_array.append(line)
+        ffp.close()
+
+        # Adding core indel support: 2018-07-24
+        ref_indel_variant_position_array = []
+        ffp = open("%s/Only_ref_indel_positions_for_closely" % args.filter2_only_snp_vcf_dir, 'r+')
+        for line in ffp:
+            line = line.strip()
+            if line not in functional_filter_pos_array:
+                ref_indel_variant_position_array.append(line)
+        ffp.close()
+
+    else:
+        functional_filter_pos_array = []
+        ref_variant_position_array = []
+        ffp = open("%s/Only_ref_variant_positions_for_closely" % args.filter2_only_snp_vcf_dir, 'r+')
+        for line in ffp:
+            line = line.strip()
+            ref_variant_position_array.append(line)
+        ffp.close()
+
+        # Adding core indel support: 2018-07-24
+        ref_indel_variant_position_array = []
+        ffp = open("%s/Only_ref_indel_positions_for_closely" % args.filter2_only_snp_vcf_dir, 'r+')
+        for line in ffp:
+            line = line.strip()
+            if line not in functional_filter_pos_array:
+                ref_indel_variant_position_array.append(line)
+        ffp.close()
+
+    print "No. of core SNPs: %s" % len(ref_variant_position_array)
+    print "No. of core INDELs: %s" % len(ref_indel_variant_position_array)
+
+    f_file = open(
+        "%s/Only_ref_variant_positions_for_closely_without_functional_filtered_positions" % args.filter2_only_snp_vcf_dir,
+        'w+')
+    for pos in ref_variant_position_array:
+        f_file.write(pos + '\n')
+    f_file.close()
+
+    # Adding core indel support: 2018-07-24
+    f_file = open(
+        "%s/Only_ref_indel_variant_positions_for_closely_without_functional_filtered_positions" % args.filter2_only_snp_vcf_dir,
+        'w+')
+    for pos in ref_indel_variant_position_array:
+        f_file.write(pos + '\n')
+    f_file.close()
+
+    base_vcftools_bin = ConfigSectionMap("bin_path", Config)['binbase'] + "/" + ConfigSectionMap("vcftools", Config)[
+        'vcftools_bin']
+    filter2_files_array = []
+    for i in vcf_filenames:
+        filter2_file = i.replace('_no_proximate_snp.vcf', '')
+        filter2_files_array.append(filter2_file)
+
+    filtered_out_vcf_files = []
+    for i in filter2_files_array:
+        print_array = []
+        with open(i) as file_open:
+            for line in file_open:
+                line = line.strip()
+                if line.startswith("#"):
+                    print_array.append(line)
+                else:
+                    split_array = re.split(r'\t+', line)
+                    if split_array[1] in ref_variant_position_array and 'INDEL' not in split_array[7]:
+                        print_array.append(line)
+        file_open.close()
+        file_name = i + "_core.vcf"
+        keep_logging('Generating %s' % file_name, 'Generating %s' % file_name, logger, 'info')
+        filtered_out_vcf_files.append(file_name)
+        f1 = open(file_name, 'w+')
+        for ios in print_array:
+            print_string = str(ios) + "\n"
+            f1.write(print_string)
+        f1.close()
+
+    filename = "%s/consensus.sh" % args.filter2_only_snp_vcf_dir
+    keep_logging('Generating Consensus...', 'Generating Consensus...', logger, 'info')
+    for file in filtered_out_vcf_files:
+        f1 = open(filename, 'a+')
+        bgzip_cmd = "%s/%s/bgzip -f %s\n" % (
+        ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("vcftools", Config)['tabix_bin'], file)
+        f1.write(bgzip_cmd)
+        subprocess.call([bgzip_cmd], shell=True)
+        tabix_cmd = "%s/%s/tabix -f -p vcf %s.gz\n" % (
+        ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("vcftools", Config)['tabix_bin'], file)
+        f1.write(tabix_cmd)
+        subprocess.call([tabix_cmd], shell=True)
+        fasta_cmd = "cat %s | %s/vcf-consensus %s.gz > %s.fa\n" % (
+        args.reference, base_vcftools_bin, file, file.replace('_filter2_final.vcf_core.vcf', ''))
+        f1.write(fasta_cmd)
+        subprocess.call([fasta_cmd], shell=True)
+        base = os.path.basename(file)
+        header = base.replace('_filter2_final.vcf_core.vcf', '')
+        sed_command = "sed -i 's/>.*/>%s/g' %s.fa\n" % (header, file.replace('_filter2_final.vcf_core.vcf', ''))
+        subprocess.call([sed_command], shell=True)
+        f1.write(sed_command)
+    keep_logging('The consensus commands are in : %s' % filename, 'The consensus commands are in : %s' % filename,
+                 logger, 'info')
+    sequence_lgth_cmd = "for i in %s/*.fa; do %s/%s/bioawk -c fastx \'{ print $name, length($seq) }\' < $i; done" % (
+    args.filter2_only_snp_vcf_dir, ConfigSectionMap("bin_path", Config)['binbase'],
+    ConfigSectionMap("bioawk", Config)['bioawk_bin'])
+    # os.system(sequence_lgth_cmd)
+    call("%s" % sequence_lgth_cmd, logger)
+
+
+def gatk_filter2(final_raw_vcf, out_path, analysis, reference):
+    gatk_filter2_parameter_expression = "MQ > 50 && QUAL > 100 && DP > 9"
+    gatk_filter2_command = "java -jar %s/%s/GenomeAnalysisTK.jar -T VariantFiltration -R %s -o %s/%s_filter2_gatk.vcf --variant %s --filterExpression \"%s\" --filterName PASS_filter2" % (
+    ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("gatk", Config)['gatk_bin'], reference, out_path,
+    analysis, final_raw_vcf, gatk_filter2_parameter_expression)
+    keep_logging('Running Command: [%s]' % gatk_filter2_command, 'Running Command: [%s]' % gatk_filter2_command, logger,
+                 'info')
+    # os.system(gatk_filter2_command)
+    call("%s" % gatk_filter2_command, logger)
+    filter_flag_command = "grep '#\|PASS_filter2' %s/%s_filter2_gatk.vcf > %s/%s_filter2_final.vcf" % (
+    out_path, analysis, out_path, analysis)
+    call("%s" % filter_flag_command, logger)
+    gatk_filter2_final_vcf = "%s/%s_filter2_final.vcf" % (out_path, analysis)
+    return gatk_filter2_final_vcf
+
+
+def remove_proximate_snps(gatk_filter2_final_vcf_file, out_path, analysis, reference):
+    all_position = []
+    remove_proximate_position_array = []
+    gatk_filter2_final_vcf_file_no_proximate_snp = gatk_filter2_final_vcf_file + "_no_proximate_snp.vcf"
+    with open(gatk_filter2_final_vcf_file, 'rU') as csv_file:
+        for line in csv_file:
+            if not line.startswith('#'):
+                line_array = line.split('\t')
+                all_position.append(line_array[1])
+    for position in all_position:
+        position_index = all_position.index(position)
+        next_position_index = position_index + 1
+
+        if next_position_index < len(all_position):
+            diff = int(all_position[next_position_index]) - int(position)
+            if diff < 10:
+                # print position + "  " + all_position[next_position_index]
+                if position not in remove_proximate_position_array and all_position[
+                    next_position_index] not in remove_proximate_position_array:
+                    remove_proximate_position_array.append(int(position))
+                    remove_proximate_position_array.append(int(all_position[next_position_index]))
+    f1 = open(gatk_filter2_final_vcf_file_no_proximate_snp, 'w+')
+    with open(gatk_filter2_final_vcf_file, 'rU') as csv_file2:
+        for line in csv_file2:
+            if line.startswith('gi') or line.startswith('MRSA_8058'):  ##change this!
+                line_array = line.split('\t')
+                if int(line_array[1]) not in remove_proximate_position_array:
+                    print_string = line
+                    f1.write(print_string)
+            else:
+                print_string = line
+                f1.write(print_string)
+    gatk_filter2_final_vcf_file_no_proximate_snp_positions = gatk_filter2_final_vcf_file + "_no_proximate_snp.vcf_positions_array"
+    f2 = open(gatk_filter2_final_vcf_file_no_proximate_snp_positions, 'w+')
+    for i in remove_proximate_position_array:
+        position_print_string = str(i) + "\n"
+        f2.write(position_print_string)
+    return gatk_filter2_final_vcf_file_no_proximate_snp
+
+
+def FQ_analysis():
+    for i in vcf_filenames:
+        filename_base = os.path.basename(i)
+        aln_mpileup_vcf_file = i.replace('_filter2_final.vcf_no_proximate_snp.vcf',
+                                         '_aln_mpileup_raw.vcf_5bp_indel_removed.vcf')
+        analysis = filename_base.replace('_filter2_final.vcf_no_proximate_snp.vcf', '')
+        # print aln_mpileup_vcf_file
+        grep_reference_file = "grep \'^##reference\' %s" % aln_mpileup_vcf_file
+        proc = subprocess.Popen([grep_reference_file], stdout=subprocess.PIPE, shell=True)
+        (out, err) = proc.communicate()
+        out = out.strip()
+        reference_file = out.split(':')
+        # Change it to multiprocessing
+        gatk_filter2_final_vcf_file = gatk_filter2(aln_mpileup_vcf_file, temp_dir, analysis, reference_file[1])
+        # print gatk_filter2_final_vcf_file
+        gatk_filter2_final_vcf_file_no_proximate_snp = remove_proximate_snps(gatk_filter2_final_vcf_file, temp_dir,
+                                                                             analysis, reference_file[1])
+        grep_fq_field = "awk -F\'\\t\' \'{print $8}\' %s | grep -o \'FQ=.*\' | sed \'s/FQ=//g\' | awk -F\';\' \'{print $1}\' > %s/%s_FQ_values" % (
+        gatk_filter2_final_vcf_file_no_proximate_snp, os.path.dirname(i), analysis)
+        # os.system(grep_fq_field)
+        call("%s" % grep_fq_field, logger)
+        # print grep_fq_field
+
+
+def DP_analysis():
+    create_job_DP(args.jobrun, vcf_filenames)
+    paste_command = "paste %s/extract_DP_positions.txt" % args.filter2_only_snp_vcf_dir
+    for i in vcf_filenames:
+        label_file = i.replace('_filter2_final.vcf_no_proximate_snp.vcf', '_DP_values')
+        paste_command = paste_command + " " + label_file
+
+    paste_file = args.filter2_only_snp_vcf_dir + "/paste_DP_files.sh"
+    f2 = open(paste_file, 'w+')
+    paste_command = paste_command + " > %s/filtered_DP_values_temp.txt" % args.filter2_only_snp_vcf_dir
+    # os.system(paste_command)
+    f2.write(paste_command + '\n')
+    cat_header = "cat %s/header.txt %s/filtered_DP_values_temp.txt > %s/filtered_DP_values.txt" % (
+    args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir)
+    # os.system(cat_header)
+    f2.write(cat_header + '\n')
+    sed_command = "sed -i \'s/_filter2_final.vcf_no_proximate_snp.vcf//g\' %s/filtered_DP_values.txt" % (
+        args.filter2_only_snp_vcf_dir)
+    # os.system(sed_command)
+    f2.write(sed_command + '\n')
+    cmd = "bash %s" % paste_file
+    # os.system("bash %s/paste_DP_files.sh" % args.filter2_only_snp_vcf_dir)
+
+
+def DP_analysis_barplot():
+    # os.system("bash %s/paste_DP_files.sh" % args.filter2_only_snp_vcf_dir)
+    call("bash %s/paste_DP_files.sh" % args.filter2_only_snp_vcf_dir, logger)
+    keep_logging('Generating DP barplots data...', 'Generating DP barplots data...', logger, 'info')
+    c_reader = csv.reader(open('%s/filtered_DP_values.txt' % args.filter2_only_snp_vcf_dir, 'r'), delimiter='\t')
+    columns = list(zip(*c_reader))
+    counts = 1
+    end = len(vcf_filenames) + 1
+    f_bar_count = open("%s/DP_bargraph_counts.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+    f_bar_perc = open("%s/DP_bargraph_percentage.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+    f_bar_count.write("Sample\treference_position\toneto5\tsixto10\televento14\tfifteenorabove\n")
+    f_bar_perc.write("Sample\treference_position\toneto5\tsixto10\televento14\tfifteenorabove\n")
+    for i in xrange(1, end, 1):
+        """ Bar Count Statistics: Variant Position Count Statistics """
+        reference_position = columns[i].count('NA')
+        oneto5 = 0
+        for k in list(columns[i][1:]):
+            if k != "":
+                if k != "NA":
+                    if int(k) < 5:
+                        oneto5 += 1
+        sixto10 = 0
+        for k in list(columns[i][1:]):
+            if k != "":
+                if k != "NA":
+                    if int(k) >= 5 and int(k) <= 10:
+                        sixto10 += 1
+        elevento14 = 0
+        for k in list(columns[i][1:]):
+            if k != "":
+                if k != "NA":
+                    if int(k) >= 11 and int(k) <= 14:
+                        elevento14 += 1
+        fifteenorabove = 0
+        for k in list(columns[i][1:]):
+            if k != "":
+                if k != "NA":
+                    if int(k) >= 15:
+                        fifteenorabove += 1
+        total = reference_position + oneto5 + sixto10 + elevento14 + fifteenorabove
+        filename_count = i - 1
+        bar_string = "%s\t%s\t%s\t%s\t%s\t%s\n" % (
+        os.path.basename(vcf_filenames[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')),
+        reference_position, oneto5, sixto10, elevento14, fifteenorabove)
+        f_bar_count.write(bar_string)
+
+        """ Bar Count Percentage Statistics: Variant Position Percentage Statistics """
+        try:
+            reference_position_perc = float(reference_position * 100 / total)
+        except ZeroDivisionError:
+            reference_position_perc = 0
+        try:
+            oneto5_perc = float(oneto5 * 100 / total)
+        except ZeroDivisionError:
+            oneto5_perc = 0
+        try:
+            sixto10_perc = float(sixto10 * 100 / total)
+        except ZeroDivisionError:
+            sixto10_perc = 0
+        try:
+            elevento14_perc = float(elevento14 * 100 / total)
+        except ZeroDivisionError:
+            elevento14_perc = 0
+        try:
+            fifteenorabove_perc = float(fifteenorabove * 100 / total)
+        except ZeroDivisionError:
+            fifteenorabove_perc = 0
+        bar_perc_string = "%s\t%s\t%s\t%s\t%s\t%s\n" % (
+        os.path.basename(vcf_filenames[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')),
+        reference_position_perc, oneto5_perc, sixto10_perc, elevento14_perc, fifteenorabove_perc)
+        f_bar_perc.write(bar_perc_string)
diff --git a/modules/variant_diagnostics/core_pipeline_core_prep_main.pyc b/modules/variant_diagnostics/core_pipeline_core_prep_main.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ba3029d19a157813b9af888aef318ec7a646dd0d
GIT binary patch
literal 80696
zcmeHwX>=UfbzXI2ApsI3K!N}<<jmB7<Ph8dlCx9fh#X=#JBKWCh9hdIRuf$Snr!q|
z4H6uI@-mVoTZ&{mPQ1kN8q0QK*@_b7_{2G}B0GsU*^V4pcI-G_l4Co^aU44*aq=V1
z_ucnudDYd$#x|m)A;5>Ky6@e$zq<E(_ucCFtG?b}nfut4DNFzB!rzm&WqF&kmQ}!S
z#wuCXT*kVbu`;~RTBWRYIV&_=cUYG@{OeBZa;JaYWnJ#_ue+_w-Tw7D>+(9w&05#i
zTi1Flw@WU1n=Uq3?ndj)O!zB#cUYx9Yi^Tud6Ozjr`&J0E^k)%T~?{znj5e#4_I?s
ztjk-15^uFGZ}m$sXk8xkueVv3xB1sY*5x7pdf2)=EZ5yuX}dMI-MYLz;5A}h9`WPd
zV_m+-zusY8-XYiPtkO<v?q2Kiy(-Omt8|}rk!^>o`-3Z6UG-R{2dueK>+)z2dsh%=
zH?DfE4_MaaJyv0ZTs&wMHp<0btI#JGV^(34T<o(7o8@A^Rp^(C16E-`E)H6SEpl<l
zDr}XDajP&W7ZX-tn_L{W3PW;n#3~HS#ZjxUT`nGCzh4*|!Qd$V4gQT~aMf6kpisO~
z^oo`8S@mcG9=%*IxOKNMS)6Kw4-4*ee!kQw@F7N)N_;L~&zDMWsj-fn+*1CQTZpjE
zH{3>X&Q(wP5#@BHJYAf*;7&D=<4gIfqOdIc)g?#o1-JIx%QBuf;f|b7+ZfM((uqth
z>sp3GdiXYOveu%E=Qp#~Z9MJJ<e}SHYhm1KbXb#D9<*+ESc@IjHH>Tgc3F(uX)R*-
zcU!}YonVx;YKN^$g>{m;OY@k{T0^%n{?`XQF#&pl8$`x^ueI1^4WaH}n$Zus;}6ya
z518o&pT+R)^-_wB;R94B!`Xy*m^c~U^jL3N7*%~blB&Lxt%qYq-ABXe4wSpsnz5|g
z8?1#xsuA|IXoL;cHB_Nfa;rU(xwMD}8#n|p_!QG2sTs;W;6K19#IuhNTDLb^iyI@w
zy@X+T{xv>Qg+}jSd}=lcsn$!jO{vhV+x%3KmK{oXhO+krjjNl#H++DSfZ4F0a0Vk}
zyVdAJwwX*r5l7O9_oNYbq!D+f5${bS-j_zaKaFUo5g$k+j;0ZJr4e_h5%;7KA50_e
zO(Tw_5%;AL_oopLq!ACM5i!SPQ5s4bU|59`h=mFN@omg(wUG1$0~hn?!q{P~dfw*{
z^xF7$akl8$XXeYYY}z-ArIOvqUw3V+y>?^PwfC3uSKX35T`b{QxnR$@Ww(ySccPm2
z8m?Vz*i)6#{9GAHJ=bnjf(Ulrp367t#f5R?;ew{_R2ElVd){*kcCl<1DmTksqwePC
zz&~HQ<rTegDcP|q-T-sQ?DBl6q#o4W#(cdTdE#wDHB?D*QqCO9>5Y4{9^WH34|w<C
zk1FXNZ(_b&tj)W*YQ-z^CE=;qdl7|K4Yz(2e^G|<8&lJ{awS)-R~CvWwwv?HRX+3{
zYd>xz8!1|YuOiz!gfM^ey1n;(FFO}bjP{Ja<M#Wi^<ueU?>=&Suf4~!pF}V*>*n#I
z9B(W%ynB&?ZFQ(<@AW3$Gto1_Xf0wKa_zml^RLHaur>C0(i)0?!=0-JO@=Dx>iL_J
z8(-rp%BfN*9>betSt|90eW+|7x_;!~5tbw&VloG&bPJNrt6(&hZ<rNHViu-J<CzQF
zs|e1Pw;e&t9%mhs%Z-sE{dDuJv=t(?YAB8|4z8<tZ#JpiY)?el6^W9@>Se?WJGt&o
zLrN-7xw-lBT)tX$3&CibI6N^Uy&G?J)liX!Xi(tsX5CUrbz3lmLhhL4ZpocE(k@Hy
zoklTVJ>@)i>g>gdBNv@lo^G3mS=ARSH_x2Uoqy%ji@C{jkr{X4?74}fE8+=u!Zw`6
zOXK;XQP)U0m&@?^1coc9^D_zTmRqz_3A}WlETI>YxaeM30+%OSlw}Dljwi5~T*`Pl
zXE1oSI5XRFkUre@AWdYNJWAVV`cli1nq#4TjxQu}Y#OKS^LsXlpINxa+HS+Al9>4;
z^`Vuq@rP>rY|P~_GE64g=W!ybMi{K^b9lnwAPd#RBW*XuF(b9A!^ER)C%<eYZ&Qw%
zXqWvM)$|6mb1Pq;@fv)q`;uCvT)IWMEJtCjP^?R=O4Thx53?*jq%2s@&$*sJ2z9qw
z%1^lszO)m0gwdJb6yA-HpDn9YnVE6x@-n?yF9LPoYjvtr@mwME?OZO%WwGobVxt#y
zZ~iJq_>}8;Qml*hc~@11M3G0ObWmhbn1`jr>giNvZVqC$2q8sb70R^@Hii;(au6YN
zQ*#BC8N>ti7`ms35&AZFw6O&bMD8%z^Dza6Tqs45+aMkr5q-WaXGyhVWxm0dlDKtu
zu5yDy%FTQkl3lLhE;Pmv)sn#bdR$GEr@aZ+D;FEri{*){#d2<@UYV~>REt%&R4ls_
z&lDT8^H(Qs<m*K+$+6vw<(ZsM&lTJo6LXcqJmmTyQlXfiDObEkamt&Rs?;IUYi>Dp
zhrZ6u<%{L<>aE5Q3gIg)Iiqq1K=FPBfwjIbvpLg;pC0^d$#i74@-91&!H<M}nO=F?
zkr~N$>v#j1;p_-v_UdpjGsyTI-I@N(x{L~kf~)A$R@aQZycB-QzDzT?wHX6esfzXP
zE?xjV(I^-2t7epo@Dxz4Q=E5`SDqli1AGf3atL78^>tSL2|>MpVBxnrf_kkBQLiVg
z+7kf20C8m`?`{dRmS9Lorr<8-Br2*Z)zEEVU+V~D3HAjPhuD{TK<vwSK<rCBAoiu8
zF7WfQFDSoKQ#?@ES5FlC>Q&g6Lb<Yj*;pK4Uz_#4VxYn%?2AnkU|+GO5U9(?zAh^4
zYa@X#4Rs+OsTtaJi(gD=<_7i^6!#L4E`@!mLIZl#C>Gcxr1G(^SSqxl!nu$t(vS+m
zGM}OOiG3NZP3%iQ=(DbY8Q?C}zJw-#b!}4Emm(^73tW?k3f^i#RPa^{qJp<t5EZ=D
zf~erF7DNSawIC{Zs|8WPTP=tR-fBTq@Ky_=g11@_6};7gsNk&@L<MiPAS!sP1yR9U
zEr<%<YC%-+Rtw^xv@xXMtrjU2$Q36R4*Lv=eW`VW*q0i(Wz5ne)-%syK3y0)%BBBv
z2)+t1FbXuy7+4PPX?fEw4Gh}{W_tt}YX=T{i=Z>7hu!rwf*HKo?L#;0y?Z>t$`<Vu
zEG+`0g{?tE?Lo2&;E))i)-q}{v}Q)7j0B90ZPFTJGn0%4aYH#w5J<vtK~#@m8Kt#5
zp>9EnRH!T%7;K6fSQ|K8z=JSd60oIlrYM)x?6kldjo$`D$CgQk=)wYevkt}Sv(n@(
zpt>+Ns>7+k?n%My!uW`LFcLQjybB|1=C2_^IIu#TZJ8*Z!QbKbsb{MDVGfX*+eY#X
zww|e;FB#G^nVAg`!OWWQgSN1}J2Tfh`*MIolldz~*F{QhA{EOh_Jst_tI-@LoTN&;
z;=(3jls>$&jJhYW(_PjcoM(1Y66DF!Er%ChC*fRG&DSnQXqM_-2B!!L)Qo7ig_^a&
zJ$cwKgS)(1f>&X*?B-Bf2J4GS&C)dRm%;da5@WNtZ2^jAJ14@P{?Km=jBA6RKlYcw
z&s?V?J=GpkXcq2FQWLfdI-0DWGFZtxY7c!h*_<}mECq%%`4ImIr}b*$k!C)RBcBV`
zi&cNW8<>^txmDR4$F?@4=Q$TJ6}HHUmU;GcrEdE&(AZ`qc8JwMH?b`Lu|wd}A;{-Q
zgnnKPFd$s#Y-KRWfV<ufaV=+<0Z~zhdzu0<I`{B)2ZNmqxP9zUM{w?EU^94t0Yxl_
z`;*RY274G#GIl5;34kZT-GSn<?8zuz&@DfNDBkZQu-5fteL$~YAiZt99RSvr4Cw*T
z8{8O&^jd*;Z_V?*%wR_|#J4ek_<UsVYsG$zCN6zMF@0OPnCY5t(565=1xET|6R1b4
zz5vvVS@jtPZD#_5wgA)vJSw0bz(@t^;Xxdp3+(SgP%kuS3ry1zQpG_%oeFhNGF}Vp
z(tv?TnE*a+7sw8n$cXi3XJAp7X5$7Z5Bv>FH9&VE&Ih7lj20&v#%OV(VT=|h8pddG
zqC)G?xWpFYM1|J1AR5MK@sx%!TAXMYqs57aF<P8x7^B6BhA~>4Xc(i#iH0#+oM;%M
z#fgS7TAXMYqs55|t!vS?hA~<^Wnhw))MW~@Yr*!20D5s^;i&&O(Z-GV-a~vBd=9}^
z0lvpqxyJX3Wf(#SMp>q%8(-qGy+zPQTe_+EE3kJP2h%`?Qrtodxj1*(sZ?0qA*4$|
z=GNBAjV%#D;LJvg+PYDw2(hcgc-}h1(Rh(38g~*7mzXq-vjWk?67DK8Fc7#X@Yc*Y
zOnGPi5o0g~xls(f!{}g}XyC0mb7j<~Q8zZTkGdsC)u<RFMj^N4$iUkOKKl^`Z%dA;
z*;4jBQ!E!@;#ROZ@*#qckB?8jeD-PCbT5{tE1n7Kgm_sxBG;U+Mm<9_Lj}i~_(sS+
z0gh{fr$F4H^3n>6Ylo9rZ)jzw&a`1<w_MK2a6}X@YKOCf4h?G`DYL2DFO_W*PKKsv
zf+3d5)Wl&`SyxufCTuJ0F$3OerzMkk`Ow2sYdYDYSC(9;rxF;&0JnDP){Iz%4Hbid
zquA22StpO}rLs0bV8R=Ism)QdRKCqvSkmZTD%Z0{SLxE41&tzX?X+z(eC0Pl40c*J
zGk<_bv9)%XnLu!)YYbqm9Zt;{mUPCF`MlF;epy@$NUfc+oHTf-S{Y`2?J#+-!6Xm_
zLlH3PkVMqDuZ>JXFl?Flml(ZWcU_!kfT<pYfK>#<e0+`=-qE`f7$e3P0As4T4I{TF
zaO}kUT|{bO<koE%xn&2kG=57lbL*g)+t#}P6$=gJl8oLS3J|gS{<n?Z<iMx49|H!o
zNdaccE+%I0<^yKxn5J(Vr;NRt0JFBO;M~9pZotRKV7=p8!EN>Jc3_*+$_j2<lNDTG
z&z6wN2h3urQ1>K@x4@V!v~dGYMzgl{M8E_vQ|3fzhH;z3Fb?2HXcz~8Y_l?qQ$)jp
zElxBn*y2RPf-O!oEZE{i!-6eNG%VQSM8kqDPBbjo;zYxOElxBn*y2RPf-O!oEZE{i
z!-6eNG%VQSM8kqDPBbjo;zYxOElxBn*y2RPf-O!A?AQpH#fb_N)3$GMqC&>v#KJ@V
zploFr_b?6PwgEVMgm3(>65x!uLo47c*foln$1#3e2pVyUwK0-gF5X5N_A9_Pj<DdJ
zkh!pDfb)3Vf=mOt3_H0fR!3NkpiPs_T(d|T<&l<`#(@0oT0!sNE5bzYjN;oZk+cL}
zZDifjmaYw8k1g4<IMF7+jL--lyDJspGb+oO#1uY)N0Y|%#%2my;`%9CV}4B;rGS5q
zoa=ZG_=aFoh|H}B_%n*35I|G05A#_%0%$PzOci{YP@u`uY?27R2F=-2?w5cO-8qY7
z0A<-(yxZv5NG(>W3Z^xe;=g(gWTID!`zjSY3Fq|fmzLeyNlbNzE(HKIM@bTR+GNJp
zymS~Cyvflt-ts$b`6bL5gDipxH6zDMz}Cz%C+3knS(cs0>n-bH4!UJ$U>^U}4b?PV
zmd*ZBQu)oIF9C=&+dmOH^(V{{K*Tn<`ZH$PT+O#ZWIQefTQm#$Vp7vC4U07ynh3AV
z!KI*<CbQ=fO1BjF(&Wb7&rldxeH_!wZAGSm{V1xK20GL-mI;YEJh`xnFsMVh?yiJC
z_n=^D@Mq97zf2tTKO;7*13O?H*wKejTL*R|S_ggwVWCmmLs$rA#X_(Hbp;BlJlxIU
z0=9u4fb%vv@yiQ<3mx)AP(J#wlViU0WCs&5*wnoV2XR2NaL5<GILYf%;CfM0fQsSF
zZb*T9IXabF4<w-nIeNd4)h0O4Xtz;zp*StRZ5no|=r!!hw0+KZ#{`#9w4$ZgDeWtY
zDujRjorW8R1VH_3yjKi-#wK0tNigwgoCddFRcFZq&c~Xb*f(d3Q?vHXyl0my4Vvh}
zU>9Zt(Cz4xDqp_ElCdNe1a;fX&sAx~TbOt0<4PY^*Xdw<07q3WIEaU=e95aIDcoxL
zhBuN=wGJm(aPvg=+P9YGI>sXSdda?hx=v^OGWqReOH1gcIVSa{1Fsi#GnOmmLp=Q+
z<m&m3xBOzD_{BnbZv!mg;0vC+P!m>BwsYXd3u&cKuDVlsc;{jaHc-5ADrK~aY8+hw
zUE-JNkpUV|&Ogf^WSoQbb549FAcpn|hTgNoT3nokLxrHjJoFFxLrUkjHH+fHR{&Z$
z(6ZQ)IQC@2wmKhlN@Es&rGVpb!S`F*XQJv7b{JfZNkiVj*O<ha!hp7Dj!nbu3pPgg
zC*dCg^8-U3lZvwdmZk7s3O|YPhzTGYGmK9=Vox#M@Dy{+h>KV<kIbhkR;N|}few-}
z4Rs<h?fm#_S$L9!M+%;_*M#7Trm-*-rM2feai)l9y*yq2W+sDU_YE8|M+_7nr|cJO
z-b`NEZs836B4+hQ>wYW%yy~+?j@LeKU1Bbq#4}{T7rfwVasjaH2BftuoSt7q_Hcj*
z<56rfAa^(#KrEihuk~OL0A+*=NSrW-hu3~K+%UEuhQ%c%9BH;9b+#leT;Mu2dF6!+
zJX5L@_AtMN1ugajfM>!yc4i&jRRZ>h3G58I1fBv0=v@+d@&a}2wMKArvi`vgAFz4i
zl!Yw(XdoUbTg3&!=d;)y0GP^lQ!FO0z;7k$-^c0=qw2#T*Lt#-kPZ)}p108^Qq4u0
z(aI9b+^?am+xY-{4cEJ@x|@ab05--BiB+5T?{}*E&u44D-c+Jr&w>>j^Y&)(^@)&M
z56DFNe7LdAFDV|X()80M0`gfzPaq<g);^fQ@1;u^74SI#|GPu7p3_~ZzCr?hg^K7a
z6jog|hOKKbG{!E68jFOlv8w@-WBSEZL~LZVCSoI_HSvKoA~rG-QkHvU)?gz;5%IF(
z{?hncG;2CKS3Yar*|`R@W}lih53Ith`I*jEv*x?I+M6SXx|T6VzPZbsBfst6>pAkf
zT`QgrKhs(Jna;1Cxqvx+VQe>EU1!D461Z-A?`z+a=EF$ZcB05j-a8UW=*tp;J43h{
zNE8h`s;7Q-ZKcZ53861~_#O=4lL#W6AoWyXz%W&$473HNz?xxM^;hJ8v;%jX6fcol
zi(!FmL;1)(X5Wc5IoiE~P2<fAp&H(g=HO1M?m;oJdWsCGrdr-<1X^5XmT3Yr9kOCi
zpaCC|b|K#gJg$z9+r7fK7ea$&9hUH@gbzvhu!N6D_^5=BNq9uOTZunM2sS7Y+dxLZ
zkzV<8xYsjrt0gMa?TtAFtzTo_!P(R`$tXHbXI7kDTjNv)<2SxFdEIGFn0BJdjprfX
z8>n<u`&Hu7(#|9plk<rjmg=Y@rD+9o2;d>e#a29nqa$psc{jwh8r%cJ9j=&lm*Hbf
zF3v&`k%N*lQQ-}&6A7yM#;h1^VNXe%zsZS78P#wyT&xOLxNeibIHuX)=?Z^GFXvU=
zsp52TDqNl<<y2)JF<Bn)P^T~D%%vR5l!KR43&cH}zbq)VRZ2E}MEyCc{yZdSGo_5E
z9Y?h*Ufg)2tI3?|m79<?Q6c&U&av!pil+Ae<VdF!>_)yc?@GZ+@K(p|Ppy85O<Xjo
zM89y+agItefuC||$43uwYw&Jz*~X<h0Cmh3&2gF2vo>^IcLU3m_vfqyj(ZNtPX|1<
z(VJUeW+TkmdgW&ukAQ+o4{rQ_UuK8=^da1sxd%U6@O%K_CQwH*{n@SAElAmy*_pXl
z^B|`v+!DX;?HFl4KlqZ1=GNb#^17$Zyrx)V9u?+UV9`i_0!~8ippilyg-bMyMSQoQ
zUa=O6)E_6WJmmX%>kRz7Jwp8e(+~c>`00QSY6-(gF5fkH#Hu}FT`C_C^^Xjo{=uDG
z>jXEfOX{-&;6S}Y%+ZDcDT(?A4KkwK25K(GF=ck>u+VU!%_;>J+9}jhYNJq+<OF6+
z=~L<+ZMg6S4t#iHCzX<!A%30#3Q6I9(FCD5t427UE!ePs!3(eu=op1j(G<RDQBzjh
zEY#jzROYEZ1N4I41nqdQQlEdEH<MR>koqv?VL)g~i$tk+TleGUWc>}%FXfX6j1r;j
z5N!?*_J}H~b<FMA0zNU(BRZyvEIKAy5AA?nK!*&2T>Bm9)haEuPBtiXVb#=-fkm_1
zo~`Z3*51hA7Xs~GDtG8s7~5gOFs8aH|A{~lQ0KL_q}t$LwAlvVs@mYgX>IWSEUTW>
z2HhR))%F*mQQx7q-5s^}XYrd-RXs;GNQGfJrYo#a8$bjY3g|!mbiWJ*M?ez{Q<#fs
zLU0uh(>BHmAOnsSKrNV5s$+pV=t>>0eWK$MdY2v;(Gl%TtY7Q6q&|&-WX%(6VS$*&
z<drJn3d|tr<d_9UFCcS(JTgYNb%wp59w(2vm!Xy7*mY#Q$;UbT@wTnAwhb}xOpmc}
zv<1)9I0P&K@Sp%IJJqIy3}96^%vqMvL>SVwCc==eH4%n%t%)$CYfXe9U27uD=~@#X
zP9r{&Mtn4l_*fe8@igKSX~bh`#CN0-kEan&q!FJ?Bfc|@cruN6DvkJ58u4@*aWa{x
zVBbx$R6OlR(Xemw0&;RK7Z4zeYE?X5`{Q*>TtNEQr!F8LumD$1URjg{WRqS%jMr#%
zNslZhudZ)zG5KEPpR$;|y1w@6dVehsmXGiSrdNqGekUX?A<v`{pG_k^mqvU(jrc+u
z@x?UaOKHTjX~dV)i09IX=hKKz8u3CJ@nRbBl{Dh3X~auu#Mjb@?@A+HP9wfMjre*R
z@k$~Qa@d72vB|4Jd=K{X2sPPB-UbCh!Y3?(4L*%TMSvgiGNd&pVn<i=^=b*5i(Vt&
zz&0V=Wvfr?aNp2-6uClSm#mhk>k}2i$8`j`(r99v@QgyG<e;;Usj&c+dcJ<EHL}*S
zIW(X~B|g{`Z2o47D8DdBIdfjrZn=UXm#E$9hGswFh25vgVd=;dZ5Jfa9nEj>IS4R-
zwq@x%;jk&Z*Csy#3PhqX6?P4ED$(#&mWEN2I4uu~Vv>kFgoQQ@HwDcI(n*=k#B8WY
z;<%v}<SxHxO@?mR#tCQ%oOgy)UG-SeA`x_B#v%-c9Lz$qRbz2!B@=eI+65N;LmSEp
z$iXdSPs374BgjO1IhgAX!Rio;b>4y7vmRT`pD9su1iC{2tm{1yp|?Mia%avnwR0G$
zy)uI43DJU?o+ttEyo@Ofe7M4VmkX~hF-yO$oG@O&$T;jq-vlB+--2%)EiX6kT`e+F
z<0bg)(K5N4rm?<-wr^>>Or~brq)gQb-==aEGfJGeE6pfxbV8&?#w1^9F6O30umrAZ
z9<L|VWK}b0G7b+vI<(UENn)+Ld!?++6A)nsuaxikgyx2Q>I%obYj8E+%;rmGO>$b3
zDR#AGomO>tzBefwRA^N*xssI0s^-vaQ@(oBERpvr&9jS1EkjXir32MTV!ZO1cGh4k
z>q20yzRZ`_%LYp|c~@HdWeRVT=S1{-ONA(%rzs<>h5V#5S|j;Mb6;KgDY0$Bmr;{C
zv+QA$lF{le?mDA2mY;MkYbrnKJl0Nr(itQPIa=3TX?G{FUg^57OTFsyljgh1@{{KJ
z7RXO&IjzYQTWk4ASLnS7Z?9Efm)1~z($z?maaL)XCGuXSdA9oUlWve!@{@nsZ>hqL
zW}75GEm!%`ELT;2Qas1R5{lqiqB+-Q5>7HW#o#Fh1p1uQ3<6^>=QwXqFnE%|I~hze
zc$xt}SCD9TMfL)sN*kM|LSh#+{m?@W1r6l}pFQPpy>(t<z!rAcCJx7jL!X&~ye2RM
zfTj37{IJ7?wPn1?b9b9H)ln9gpL<`DN!9-b#_#<(`?ZTERoz*dQuSwc!?XT<*&Tt2
z(-s&@sgGuEM2s#NO(`QQ8dD8v^QJ8sn?_VHj0$K0Ma-%?nM2G_3;8HREgEU@!<_d7
z#!`b~9@XC!H@)&dy7^^#F6|Vn1xjw#0%m4GJ;coGE~|x@l--YB@5TQMHpalmU{=_W
z!-F)5fq4duWO$}XEFgfh!|DY4EASORgd<HjT>%3mSaQ)w3h7`c1tS;B`Y5)`VR!-t
z;1nDhRGeVhMT0c(=&|bGY++Xh`znZy=fgB9!v`<dTibC}hEK8>k@@!o`NP@?CwySt
zf~PR;(WO#PyJ&WF4TfWK^e4l^En-DR`>2J{Zfx}+7nRqW{0_jTZdiW7HVXU9BRI<h
zV>4C&hoizra=wN}JmDibNyB46;iCabC>B{huPyQjJLO?4eEt*cH%1@9bO!V=$~N8)
zM;_qNjro99Ge!(KRTNFaqc}zkcsDa2$eE*P5;;)8!%UH!?`5jU{e5vR_s1XD@dppY
zAHdkmEXl6;gWd9ghp14PO&A<vf(N??Ic^2pqhVM&4TCsLDy)x&Z{vs)%=S2Z2K*7W
zvjb*!u-?P@vuk*WgIxpYVVoFyP+Rk{YUosSB6=JBhJHf7Y__J?<NL6PF2iV1`Y=;|
zl16(WH^f~Uw_8X(7;Q=!XN56q5T~+speVPu!pd^k>V%cU^<Jy~T^+ECD(vIqa>jbC
zDY=ZvX!1cL`Ir3UgUQKd{6>>+Gm;N=>fE;_Czr7vO+I8Kf5lHel$>1JG@5+aNd6su
z^5Nv<GKZqcw;RcS!%x0lCI4is<?pKb7OlpJQH}5E(tR+Zs`0(CYJ558X0+7z7^Obf
ztxJ86l$v9H#9F*ZjDK&#XQEpDf2_LzPkncon@GCVS{$+#hpol!*5VFp8sCDxz0+!M
zE+REfLdF<yX74m-wt7J<>f>a83FGD9m}7V_lD44omI=$O3J))ncrME?=CJtG#HmQU
zmSE^835H<F%$L=D*5ZA96D>-&$!U9-Kk6vKRruNk_IF`y9Pj3{9yRe^u@-UjILm%P
zj;P}pcWHjE?8!-Vd`o?5zQk`(@t8NhO*MrB==m8rzAb6giE<nR_eH}*ixK5e>rrIq
zZ-Tejmu_*E%c&!&?5t~x5$>l4p8M<iXq7HFJowB0yEmlrg9*z_Jzt&0m)&s)!xPXI
zqAAT!c#s4o3Sxv6lvp&aQH|2mw6pB>gs`dF4en{w=UxB!SdUb&RJjSUUtLe03!lAo
zzDK+Q^m^CRvdFPK23z$c!aMQ+O0L8Ys4tyY4<swO>~*Th&mk)7R2b0tjzi?mq1fik
zGT<pDXPN=WheNE`VM{m!1{^|g&bt|u7_j^fk6Af9dgjncghMdIA?WD57lB9hZjaZK
z^xG>RQ*G1mM@%V-ERH(mwyg!<Uhm&`SXQZcl@>u`Gc&YbI=Zjde*lY)*?iUA*Sm1U
zK7PpN;2Ce^ua?}g(H7%i^dOJO;q=f69uQ>0-Fm7>`br(7vEboB8wYPL%+Gmv^zK7P
zuI$^_J6D)Ka$?RcHSp+Qu`mu_0^W&H#bb0|Z?!NzHi}^ghmhLbY8;P{>&;@JF>9Z&
zA3Dquz-Fi+H~V^3rN+qjpq+QUF~#d3JQ(0O&p{q2ES(UK1*q4)ef9xN&&nLRkI|6p
z#AvaAd0cGV8YS%(Ch^$lIb(94iTjT5B<g}hRk^7rSQqBpu?CJ*p=|J^FbBpt-i~?2
zH{26XNbVli5trRu!oi#Je7-bRhP#UsqgZ>qN?B~qKnxvd_-eR^Mvy9bC#J?mcO8EG
zp~H_}#k0|?rTo-&Ucl_DJmd1Jkgs2d1It^y-Zfn)9L^t=%g3gVK6d!wQPm1Fs5E=}
zBLCRW!n{YfKyhvijeO+rVK&Ge>U8ArzJqW8QFcq?S5XXpBO9kE!`j!~sm2&9i)VY`
zCZyr+J;+tHGG7m$kOc<jxD+UoIBdG3DA06%3e9x2QYxS>b6CiVWyDn%V@D7{3XMPe
zz%Rd0#OhX@y6%nNif}|*m9LMP^o1n4W`MpMkJ6<?LGgp8m!hF3P`2@6BVQ^`p@`@V
zwCf4`2^1)3$dZefLt)(SRFJ2}4v#;IQjb4|Vrg=r$aX7YIGnIYXDjvM8x_22N|+x7
z_r`c-dU|Z1*C$gxSg*$&?--sq^JR1yyrnp{H^mcrbW0L_AIb7B%`K;F{b@e#jEyjP
z5{9&igpCZCC*HeUQ9N~o?~P(rTq~(#b8cCEYmu0g80Ia=kaT!5GKO0T<a1+cT8`{-
z26%x1nbXhYT?nR2m3#xID=h1>TdzzOZxlT=%G33FMI9Uz1H^KrJ|_niX%2-IZNAj-
za%jOZD&Fd3pnnvQXZrM;QHrPDf>L($iWneHb7fGkN3m#B_k5YCgMw^_=*tZ%k7YH#
z5#^O441-q?FfZmW_uASqpuKcnj0M#@GN)fsz8@T~!Pmwt);zpGn~7{FNg++vs9>Oo
zog`+RoY{<2hE-FCAEi;F^^kX>6r#B@ipF<L{5`KI?606IC?Z=+&P?jMCdAV3<Hk-T
zZBc8L>Rv=>Rdp4E*h=_XhwMlfsCA)5E9;N+oRd>CLmCNBx%A*6o9k`^8#&+n67pr`
zZNlX9eictGV21qZ?d}F9)&tydtDuVgnZ9g)mWMXGGeq##Wk<4GLDN5--;x=?|K99q
zwigKFMoGIBM>F*me0S{u%GL|c?f&iP{5If}Dosbnjm$1yXXy)!JiGC?Pq=r3Cy$Bt
z;gBYeeRgCHfX@5gjM$Z8>7vuAWA-!3yOkW*502qaxgLaBB|Hn#?8iPmi-neP{dc@a
zr%<68HxhCUoT?P`cVnmVaVxkzf)xx%^pOR=-eYLeqyjwEWb<vZM%uGul%T_va|{xf
z<-EpVfx-79xZ4KSE|kfCD+C6j+BsO{Ot4GXnv0J$(DMyFe`9U$`OsDwR((38q-?Ri
zT!c>Bh)t>YZP#dl@2$68quFn}Mx$Sn6}d+HbksE(W|ul%4VYgYoW!O9(GRtu0KndO
zidaDZyJAn3f3)CaxaOeJ;w1Zzh^Hs)H)vM!ag)$L8a4?uA-IA(MG(gDkA~yD+#k5J
zf3&{9KiVexNBe^EkA~yun<++WzTk!Jfjjy~BTx<hXqy86Xn=!4|7iV&f3!prkSXMx
zL^A!O$+uUc&e3pAUi(Lb+pdv2_(%Jemj2OhH+dubLKY|QTlq&reR*=2{?RsN1K&@m
zXNrF`*|@+_e)>nd7W0SphJUXxwAvT4wJ$c6=!@bXjhSHA07u>thW^n|BH-)Trvl#3
zgV4k9Dj=H&7&C~d{iA&`Tl<y_epl)f3NF#qSma4X_(L=NquFW12hxb6X~e(<nvtjW
zkG9)SiJcNm%g{esFju(Arpg?SZs-Pd3xVyv<<6S*%0JqQX3b>aA1!J8V?(di)F7VQ
zYStM3(N;EVKHJhi+V^#}H%E?!{?Xc>BOi<TL;F4dUeA#~5dUZ^nhu}stbKOH{?YE9
zxd3SL!q^_jf@i%6gb|TmbVBHn`fC<G5@NV66DSlYU|Vr1eFMKfia%{8iTbIc;O{5%
zaksRhMbkRVOM53jQV%J%@Q#-7#Sg=2ZvdHuN8esVVs2wlBLaukr$F0aFI*3h;SmIC
zyCFQz=g|f!(y_~Pt)`B!I_y1%6?YzHzz-)m+#_*BF?*a>G%a@o<Kj8OBwuO-<(GX1
zubr``JgISKU`?}$jxp*}E6Z%Dqx!>dWsY~63ojXkN%g6wKiBB{zizR8=IsYZO!zj3
zf4lzG5>4(1Ma5vxh`HV*sZS$lJb)nZnx?AJ>^n_?sLwJHkvQd#p5UWH;C754sq-9z
z=NY`fK-L0o55^7j&hZ(MHL<>PxQ;19zKgtig#k^p9U73{ZO{c5i2%AV`@9*z2%VaJ
zipbetAT6Dmbpd(ekGM4xTf7vvX0XrW&#JmL%ci(B^Yg=B<wh;$)-1bHw`M;Ipd6NC
zZ>BifhCcYVTQfSiR(dlu;>WU7qXT^czRf7x$2^*i(teK$Z{X4FK}O3gq<J*kr9GO>
zH+eMkDMP+Tv)xgTW?Qo`mI^(ZA#$>Jsozu{&5$<gVG2qcY_g!S(^vJ@Y;7o8o6q1E
z>ez$n9?kap%8PD;m`AggZII^C%&+OKCXZ%5B|}5AHfsCx(1FDbSnJwmI%>DF_)V#*
zo*poJM5l)?+>$+-?USJpcr=5}jRr@8t0a$RE!!#X(F`r1J({5lkJo;1sUFRKX%#)1
z^>>E7pnMZ0kGbd5J(~4**81=4(Tw9zyCZ^SP^jd?>c{YC2E(4#M8n-#Jf-38EKW4s
zoyCcUyR$gaaCa6b8t%^GM8n-#oM^Z^ixUlZXK|w8?kr9;+?~aVhP$&k(QtPbCmQa~
z;zYyUS)6FNJBt$ycV}^;;qEL>Jef93PNfl_N+TNX&cZyS9?d5G^esG^sRbnPX!djv
zJ-Ax&0-`*c$pZ3+OZ8~BVU0YRonPPHV)C8o9?j0LubrnyGkI;b>Cr6gC*?8m8NbO1
z=*5YKN3%HbxwMqerx9OBBfgkMd?}4+cr=Tb*6?T+CmJ5j;zYxvS)6EiG>a1rk7jYA
z;n6HkG(4KciH1kBIMMKE7AG1W&EiDEqgkA2cr=R>4UcAVqT$gjPBc841w;&J?a^!=
zT8rt?Z2x{agOuPtY5#tE6}_4rN6t`|B{Km?mI@|laE>RRdY0we>@-sPcovJXOy6c=
zI3UKJVkLv4?5EE7?#oV{p&v1tk;r4iS((t{PxSH3z<rrwK=UIyQNSG|1is25ENDp;
ziNb7s_hl%~)}RE2hYsI;nc`!TxHlgb+VmabyDtm#Z%Nk`iqVI|($b!`URfyOYg29x
z<127qrYjqx(yB+-Gx3PAnEjXy5#ulzaxgM>${;9v-72npG)D$#^EXb74>8j_Se>Z}
zW`;1P_DV(!#vkd4jKm81osn2eoe#NFXF{0v77L<b+HhE>vTVaonF)s&ka`4Dv^_2;
z%LWyfwHWx7=Opp28QF(6Wlff6X)ep|TvwfE>=PSdc6s^SS(@Xr_VXZI7Pe>`?#tR|
zZMIfqw#O%BnpcLEWtTXKS0_7K*oO@lshu(HS0^twA+NP*#H$H8twDuePspYX-vR9p
zhNKMiU|5|lc~^@XtPXeL*YUxX?ux1XrnzqLPJ5ZHPF)i7Tf>QI?s7%zw*~{5M{L55
z3Pp!C$ciS5O<Ao$KIUd*wEb3V1SaONs<V?vqMF)@3(V>a>DQ9juU6}DD=RF!yjap4
zrnNuuxjhy#Rvo)K?Xe>1QadH_P`Wz_mVwR1SGGv0Yi!C#p?J62(xlEVXLNs<-Kt2F
zI*+xGCKFpLB5$p-Ae55bTD-J$c5jI^sq=aZq)DCAT1%5Un<Qaxb)?CZ3|8lTr88I~
zX;O1v6KPWOPL%EvVVM=aYSQvs!-@IUNRzs9Z-q3evs!a$Qs<K>>#geKOw3_bXXjc;
zle&poNs}p#<Ju7VH1i~BGR1vd`;6B>np8!8OI*qQAf?T142VQKp*fnEzKJXfQwnQ!
zoW`9x<J=&#^9*hxxZ6zA9!Hr39bJZf+CyE?D*l4|B+5gaoSSU&5Qh(FMm@wue89n)
zP1%WUQU2jzVb<C|96i9re5-v$dWcJ?2kO=2BTl!>CV0`q$<w>uN8AtKm^fw)&Uvkh
zk2vw41~V@l8cTB*2X|QCVPQ)v#a&!85j<DNXWZ^)pK<--Gp-xuj`&7<+i_f^F}|G6
zsaE7T4r6Wi3LVF_=bx%w&vQ+l-|pZz?vowb0oU>z$Nia~e0h%J4*0&;mghL`nxA}m
zj^lpZPrf|Iaev|`|H^h8_fubyj^o@rJC4KL{H2vRjx$}oWiy5kx@3=zt$fhMS>1hJ
z<EZ`-ZME0dX`FI<aU7ZXCJL#R0xgcdgw_vB@f^2P<r=t8QL0U||2R7_9$mpGH+f<|
zu_|8VPMujhFLJPFVUhg|A~z-YuWQ4L+z+D}oF8NG6Ab<_gHJK|DF#2u;FAn~l))z$
z{0M`8z~D0s{wafh!r;>k{uzUR#NZz?_;CbZ1}}2I-?2h3a#|raMgx3gy~V!d)OpIc
zJ<54SIv0D}qukpb<)R+tXn%<Ji@DOnuiQy4Jj?Ver)ON--sK!IjC+GQzK_8^1_CAx
z@@kR6GYn2LIK|*82DcgXGk8CPuVWy%)YtRs8xV*)IOiLA`zC`AF!&&YZ({Hv2KO@f
zFoSPqP(fh&a03MO5mG)%3hg8LR=_)>wa2yuqVsXG_*Mqr#^Bo-{A~u`!Qk&8Xs{~s
zj@KE81G?`d`MXHwOSwhX$|)LaGE0wf3HeJ5=kJl(_cHkV489M+-G--eK>F^_)C)wM
zpgq@(ci<O5i#kE;12DEFvmxpREz=csgq9h}cKL45_}6!ZmKlY+vYrm*0By*3gqH1F
zO;>2y4ghXLnazNo`Xko^ILi4|?+ERO&`-`!Gx%8qciRx-`qDO9`~5saOrRU?(B6bQ
zwBeqQY#pSRy}%tBB3N7SzYqUU+lc*l@dS$&fV}Hjt9~f6@HzU+nY{AP<;x0|bq(h*
zv-Fh>fDVUEaX<)44;)hB94U}BoE^nyAV!S)uGl@ks9@07#qQVRUOZqU6JEd#KQigC
zrZ>P%H=HX09_+*wPOJhf!N(+&Cv9BAX%cAs_=J_=4G5TVHF(fh{NSi&7zZ-3Kwg7d
z*CRFPwHAJ;1E)$SuN;*M$`>NYpZ=8ahaZOMQ(mIvD-ik_&&byPCtRGn9c--4Y%K5T
z46pm8;kr9!a9SCjany$g@Ocb;-GK#02CBe))>nShmNV&1dp{5CPF@*i)8fMn=$?Xn
zL19P^@xF|cuXw1sNw*E0_~VrF5esPD!d`ynVe-oT7JW9su{s<!p-pj(4#bxol>esM
z-{P^@L7o8K6!5~R8L;ZVC8v<pH#z#PGTMKubq#L*!33WY*=7~s^9fWdQy8-Fas}H-
zl#Anfz$$+<^R>`{6CEq*Kpu%*c?X`Q7ok`OVy?lx|3;aAoNJ(8ko`gLhqIRDJKS{b
zYn7|sgz}CGr<t?)hCNkxapc2;`^>tFpzOJ2Z{CA1X1s~%THDor?Jm;i@br9Hc`d$4
zhtK}9D$eukkRLVU3Faa$IfZv?)^=suu9Qo+R19(NS#H>Y@!Gf?iF&SFoo|TO>@z4+
zqwd1x_fv4C-H>dn`C|QM(Q}n^P^8Y6^QBuJd_(t2>SI+p*=4;^z+?7Pc^dY??XtK~
z7H``&?7HX#n{k4sk@~Sph&nn|nx{k2(POq-M8rBD(yb_*KWh@aa5EnsH#k(P!2dVm
zGIg;G*V*bh6A8)oZh_kzl0;)RQ~iX8!tA~It8;L+=u@OO$ydiv?WJpMcwRnc)1Nb&
zd%B4Bu|br+6}QZGaon}{=gO7H2_3Io9Y>^b`{JyN#^pG`Hxe*H*iecG{F$(0z|-$I
z)VoBlM5ta>I!rJ#Pd>Du%T{+A^Y!vE)iw1zI?hu!9_3-bVt$pq_hY+s*SV)I*oV&A
za2PLxvR6HL2@mw;TzL7aepCt{)xyhD;bqCLRlF0^rTK;4-ih+GH{p8aV&i(TJaM&H
z&e6+xbpk^P6QYnm%4ZYRTa8(?b*sqLV%0_M-HB%~M&_?h=tjxuM#=f~92|U4%vB2W
zXwe{2p_rd3S3I~gM?n@E_54&L$B6;A*3<q+7F6xWtGDb!>Wtn|6$M?DV`sx5HmcWx
z$PN^0V9qFa=tDS<Mz=~rJ-PEG{fz=V?Uf^N-qfti0@vkeojaF@`}&ROuX3ePoGwn~
zi8V+>IOm4ncX-{Eh_!il-j#@K=UjeDVTw%YcjkEYs^?*aqJJ+wpV;qL6VFB4^SuDq
zF!)g*1(dEsRQ<r?k-K2v$dnQ<+<H!UBOpWPovO^i8GpgcaU$qEyjgEOs8{peEUPN$
z=~+3iSI%(>o8o3Y3)g}Fo{j$x-~uuWU(oVxE;x_C>kslC!dEjjNOkmz#$?)OFvUet
zFtHk-rKJqwO&AUhENlF*Dyn{tCrW(wz8+U=KG4|l;jmP9b+?LhWW_Runb(6t9OoLp
zP^~}tFq&LCs@w9|`nu7^hH&kiWN?bXQw*TJDd+H~E9L3pOvJN1Sq39eeE?Ckek$_f
zc7CQ*xf(iJR^}$B$&>lWanADm9E<8H$4Uea=HL&$2>X8R>{$llRCQi?MR(YDW0csX
zW7T^=@CNbgF141WlJ7gtA@Q5;zvqIfpj^Xqw17j7%cdysys6fU_y~k{x=yu#4>{i?
znmB8cUdBil{u>a5*ZO4%ftfDrWn|>?^?Lr6%ozU(8Ht-G@nJt*t;)>eLgPtA(Ht^^
zJ;_5m$G_WjFivD8XK-Gr+4n!F<9`eUYs1D)>h>Mb*=_vngvPuX3fCqWtnlxS?DmfB
zom9bimk>`=wjtdXTyM+_<EkgK8Jhm+PDuw&3F*im|BZCnDbCop6}-0<yyfZ6p@O$1
zQFl&I@bGOd=*@V6WNJwY9`vu}D0thJrQqQ!VM@Upep|s?P6s9_cuVg<QSf3N2n7!x
zZ;L2++aPFAws~8@dt1SS67?0T;C%qH^4khtzFy2jh7|QSH(xG7aSFcK;jN*@2kjQ}
zx#&Ls3QLLeIR-z+;O80qYX<)Y0d*fJQKF5iQ*gdgs&(=;QRAF{%d8Jc)-nB!YH@Nw
zpwqFi&?8&Nq0ZT?_&L9TbgQKDIll;A>Z>uoge&Kld5iydt$O$AC8~GMudrIw*yWR9
z&gXeWO<cLZm5te7<?TTR+~$kva?Y>u+0zVuox#6j5a?>o7kEqM&iM@lqCu;{Dw>tr
z#{DA6zscZB44!B3TMT}i!GB=zI}BcA@VgBDBZJ>#@ShOeWgXJEA7zSYN#^)gi$jIo
zbQda31T(1Q5g9mbqm1X!mE7D>yJBCm7D{SYtRP-zgIzJW+r;hxU=!J}f{J?$TUZie
z3u~IYRxm!tH_fxS!mb%UN{$^g*)`+W#=H3$bqLRtoq4m4TNP^mN_gO#D8p*oiyvM9
z6?x(NBA?D;vrG1`@Z}Wjuz&~n>p$=kg%7UFS5B}Qg=8S%88P+R_hJ8PKFezL$o|!T
zmn!zk{*{}Z!8QrF!K0{TK6JZR3JiX#z#HDuM%a#>rnE+YBEm)hkcFnl{t)&Fv4Mxm
zvN3Bv43DySs2Y^bj#^+>PbnfBWv6MQ+G)b4#j&G-kHDbzxbCs8$rIu72lO?&iHZoE
z_Gw;lQz#pWI9Gt;Z5Fq?*qc&T|C?mfX~4RMFI$7(CT>7suaH!1Y4nRu(i3btZMMpP
zeAl%1hqEhb@2&9Q8n(CE1y!R+k3BTw1wpaA_dsG2_&nUoxK));6a!iT%J`0OTLupR
z+T&}T+=?LjTIWXtO1F%C0+s4(cV|RzMMU-`lD8s0Xh^ETCPcnk#1}A&C+FP4`21XP
zYIYn*d9W{$*H_`TM44ZyJ8|S#wOT%L<lvk;cjDp0hY!zPJ$MuAeWQr)#6I@uvBQV)
ztCQAjg6EZjzCC688x=X@d2~Nxsz8yh6@Hxmvg~T$oGCI^J98`A!sr!T8p0?JK}I$Y
zh%7_s(+~MBa+a}kphQk$%}rYd;GDQ$@HH(q9)K9_?+WlU1bSB>Cap`;jS0|yk{xr3
zf$S870_2ipM=>01DL_i+hXSOS{C%~_in3`SyX%6ePoDd4lnm==SZloxr;oG-(gAxD
z5I17Pp>UYmX%KyT2)zd>LeM=s#a!>h|6cqT>8}^qenX@`ZjiymDQk@sIz&!-EQ6gd
zNJM!159vQVVAv%h7ey$fRQ6G@OM<*{fIDpq4=CwzM5Gl+k0P=ukwPSm3l<~|3z7yo
zz_1EZqL@3ufM!U2qzi)N!lM)xC$Egt4vQyGL?lty5bCb4v+CbXyD4%<c_;^cw}oFh
zl%kAo$R3~61ACZ7iTTu$*#EUJvugH#ZPord%D(&=OEp1iHDKn$YQW4VRt=-*?}Z45
zPj;<Jxj!Z1b=WMQ%G5p;We*E2sAy<9G}_QJ&r$})tr(W!mdz!$T+$mNMnWoIfCSp1
zBv28~AwxnUg`6`iEx#AGPWWJ&wsisrd?-*ic$7nTYYs(&gBY6W(Kvw72y~YtbSwS;
zbA%3BBeZp6!^6Z44(I#88Q&v7Js_9?eTDhc5M#nD4bm{j2)}%YG4gZSOH2jxwEY>e
z-Wtsy6(2!<MGtLYw2yy~hFA;lqZQWV6|br5FdFf3H9Ebw>MwTCGAnHD9Ua(fgnbqP
zU<@1-0RnSBKR`Hw7QEhV)pvB@(`bCO!y4f*-qFONwxdJO5E_Q5Rz=AQJE0-L;tmaP
zueCUU$)f&*Q<Bm>iZUQ|Wpz*v)d5?qg#*gqY?pN#?Fd6P7^C4xGrI6L4t#B)TCfQ8
zn~n5eA>iVH%DDD_vX=_@l9}qy+uJ1n`z?OkFIWl|W;4io^2&QNw}+$|(f+nnev5_A
zQn2q}lcxF({b}^&C>mzSn#Kxq8%9y89MZHIWze>W&p4u-_{QHhlrEDg>~5PMx<K*U
zt{OxP<QBKb`0bIpVKW8hl?aDq06nM&kf^mIJ|Fo|qhBt3QZiaD*q>k9Ze2rMj6CSB
z=;M0~R=0Op1*`@;nzEq2yi=R+E$%eA>I&>s9dicX6h!ZjS&Q4y`|3~F3x$2Umy+5W
zAHPuTf4~}%#=h5T4AQy!;=RzBMd{68QNA!n2=A<Hw>Rypt1qADt5w*#BoI50@)o|~
zd>Vfpcdl}S=0&e6xRf?(%H9Z8R%Zg+71}ECbF85mnOH9!6O{7+`5Z=|JynO_pYzRi
z0^_sj5jzoa>Iate(Yit8a<(!~`zRV2&}TPL`aPaU=`u=q?(F2#FDlZ@AVKhyV*!LO
zQwx>52}2_q@HDlXTL{r(-?#?ai`DDB63al@+^<45n9Gp<U^!LIH)h2Y0XL8aT?`Qj
z=0b*<MWa$Fd1Bw--{cU_BV3qhe<rS*g~cw9W8wA20rBgMg~+m`;fm{UB`SfKLC|Q;
zqj2<0_=$0}Igvz>=(TwxMfqVf7Hn0N=_5ZDPCP4rz}updA?4MXH;Pr*R&f-G-9;mR
zwYb2-t1rgw3}Om_F%MP6%x;f2KJ!MeN3aowQb=zvBA7rq4*}8C)|L{<eiDT7>3pw8
zQCizz9mQYhO{`Z~LDEA9g-?#}pLh~1XZHH(M?r_-9tNJDfp7jKHhh>bG7>o&rYm*3
zNIOL^*pJNLVHYaA#_!c)C4cj}eP~LIK^N@3@53k6Ve4Y=rv1x7yX2N<8na`&J-4>c
zzP;BzZtpHKdD#_H8<iaR1qTK>NmsLF_ju}~YqS$mKr&Hc`AAx1EKim49<~=`HizR`
zVacK`a3VRfGK6XbN0K^M#QCQXWa<M_3|RQo8y=?(mOL6F$uRP&rDDT*meFbXAG)|v
zp!P{V3pAE7%DLiqJyO!Jj1dE#>Bzj45ks3D_3R$-NSfX{H3$0ibQLQok7S}XZ5=;(
zw(3-DOymi3A&AznFoHGV0RD0g(mYUR9(w?<Ktz;Q<=BE1qGF&doY<04qPV7453s6m
zwc$XNWCF@7<0oadL=~KrT&0Qw!8CJ{rq$&Os2n`XDvISSRy^k-{>lo$)M8u3s`Ew~
zPUc>2YOWBOc>bN*GUFYS$b!R$P^V5+8BlIL`lUfITIb#Jl$$GI@RBQM=3TTeBkxJy
zvN~+-SFynABT@Xa+&cUqT;DAwPxofKarlU)Pa89Xncj}*T_5iJ`+*p(H@g`oQ1XpD
zq=q3BjzrOPO0KhZX1$zz*^(Im|2|yP0Ba+{et0Rem9HK7ZXTcaX9mP%Yb&ldqip-}
zm*3y>|BlK%k8th8^8x(uVAN1%Seaa*dKUkmwo%Vd-~w9*1pfii;~FZR5;b^M2KUeq
z)jiI}KnAAA6u{Wna_F?C*Qvb%2sJne(*>bM{lToDCdkj1gDnIzknjxRbO!2+<{*4T
zK!cX2qTwWp9}T*Iia_xST-QOi;e<KsaqH%#^H{(iIcz`ppiQ@JxH)oI?k3OS`U&s5
z5a(L{M&3SjEl*2bF0pt^Rp#7N8W_9y!Z=BXF8U(Q8BEi<Sl8Nz93-(1>9tuei$`-a
z`Nnm=8|*`e^jln_X$#?}VYvzE3D&vx=pJtrkua$s9G%4!w9lQoaKY!}{4omW{0Rb?
zN8~CKk{7`nS_5kKX=SW<%zpPC?~1a=Rja!93>eJ7gw)=<>-EJbf4-SeUq8FB*nH(h
z3Bk)wUe7IPV|@FIm1+3)2=^GrIE=)qQkdoVi3l!$e9Q$*BoyBGc%-cJ^7fJHswO&$
zcsMxVqP21~fK|;1`4%-~qxiA>r6pWdfy?G+f+dG9DI<X@6bdv3%H6c?a3n`d`rBwD
zbrudYza#S?X82%cU3LK1TQPh4Gh!;q7cQ4}nYV)&2UL7JC4{ml(|DMWiP{d7&+C2s
zd5il0Nlu{2E01$ej;GI{`3QjEO~@_guk7qWDJPDBNsi4m9E}roa>A-j_>j-SPbxg*
z6V%)p6-TF_XJYvf-CZ^Tp@;&3K!Jqib-aETXb+5AJSR)uNTTiB#lji3Zi}(*qehRF
zLX_%^jvn4aYsbpuCuPY%(~knyQs5n#+I^YYf`#8pYzu6+0eylh4~!t%G#?U};I@JZ
z<|9pmr7L53WjUvZmXbZR`cKGhq@`GvpqX?zf0mI5830tkq9w<>GPR%0)SgcyJkL!#
zLTT*0UIh=Jgi!lcftA|dg`QJtKRQpT{X8tF)PCfm)&4&PO4J>!k{m0^Vm?(e=g$yC
zKS52&QL8T^ugw%i0C>r{a4Prs;YS|#_O#*`ii=A4P@kY4<2Y4D3V+F~|H**!Q^Y_D
zR|`sXe42zo2G1fGW3ff#i_Q)#09dty_p>tyJ;&L`fTj?l;y^nM3L=~Pb!xD%><PZJ
z<13q5L^_C+DS0$Co%-nS|7Ct`D-kf2D8}W@XD**9@^9p{{({JWu<OTSs6VExpHqn9
zSH6{e#`c=>Zg8!0k18qcQ9N2o5rYgo^}%9-@jzX|(i!ISciWQ4f;M#;&f9o@%C&J4
zR10fkHzXMvo<o+(+$Vo~As^k7*$C;X4{POaJSY7={H5o@t&pZ@qV7vgQ{=<dm#VC+
zqg@zB3;&NR!{n9!C6KrrBIY89uMT2mgBSu~0s;z_0`<vKh(Cy<o`_UKs|u_^6jgw>
zV_A~rj`)%oQeY{-8i$wSo0+V}k+G&g!^WD_&Gi}cR>5`J(gI*1769bEo}(Ll@rtGB
z1<V!)kRey`q_FJnHxXbwfJGb_v8n|&^raR^EwB;LVnCNzXK`K`B043q+B%Di(1kH-
z5oZN=3EsGYh|n}2MET#36tE2|=jN&yw)0iK|3!O#<Z4y<s(@t}=V5_Q1z-?JM#(Mj
z*|iIrnOmoZK%oT-Dtfw6fZ!mCKB{0@#{`4$W^pMB@|nG_H}*n3*4pvo(1k_K7nTs&
z_MSO^VtoHzDE?I1_j(h!pP;(FXzTRvh$KIP<i1#R<~%T<(maF@C1HsPfYNGDBKJ@)
zRh}`Zzsh2EPE;_?=5x=Tds@C9F3J$471$R7h;WtWENkFBsFa9D;@T8vV4VGY!r_P;
z(btHCh=?TuuhJC>vv6r(gq)~eTr;Chj|P^-Ll#C^bl4=c%u_8aM3Fdod`I+FG_?G6
z>vdVU%(%{dD46pAOUK-W+jI?V+|pgA$8qSQ|9mNP0IR8I<C%q}K)ZN_cZdgY4V5Z8
zf{&E<L$undw$r$Pv-@N@_%FD?TA-$nuZ%!ukqJM99kYdT?x#&&c~DjcJjQ+rqyn`L
zh+>4LU>I|D7?EqcA%dv2g7#5LfWcxR`)yb&_}<6nnPScal&P>D;PHC3Im68_AxA@*
z$|7MgG78{FR`Z^=T(Sa@3hBqELPjG_kJX6NovYwkIb~PHc`P1F430y>fWUuQdE#ko
zFXFbjD9Gy(wgGxFIef7RxHlo$kivoN5f<b`!(qe+@JCHUCFui22-4>`PoxYOh(J|e
zM5wU62?acm#<Hg*ZN*#I5(f_Py*4W;D@tOf!jrQZlawbNMMHA_2oID6=NEWIh*a5f
z%Kmw<AFm?sB$Y33hX9yE1WR7%(z2*~PF395#HmSK=Pm?RI3dx8{)F5YPRM>tNlwZw
zS-!;wM8_GCuot1cpD8DDQp$vU5>pcs5^rD50K8)Lq{LFjd53o+<`ZTg!3KHrVlCxn
zw~CAxFy3*PZ}LLTV1{Awg=mWz_EG=!j`;z@JhU#BZM<|jT{u(0M$J9si8m${2(ZOd
zyhAl}^;Atnan6C{qLu_V3%5J4dV(cp9>O=dauho_J0Vs8P~mPTw)pU>2jfj*GKp`d
zBDx|Y$^9TsUOCNIKGh9)^XRbZnGCP{<a#pGz*}CF6#Dmo6m^Qq29iDo`Jr|yQ=81x
zGW?~1Aa`7^C*|;pnZr~hho3fc=(6gci98vwu45zhGt}NB&t9wcnN01cGqoxHN}e#s
zY$wkzMe;nD4QsW<x{f^eMV@SxJoja}t;%|Cuxk6VwS(E(mooU3JYhN6PD{R!4O{YK
zksJmk&yQrGvq}ydt=dPjwU1?MFYuT9<!Iw!)Oi?X7)GpNDzmcKZik|YEo`-s4D(71
zE9iqG*I-{c#16zrLZ2!QRCz((Xid`#3;Gf3KO{coKIGPRZt@`_Hme7rDDU7n>4#zL
zu+<5{{yLnCj&*QP72OISA{py7odUfdNikrgxa_BZACWKx`Yw`Ui;?1^eu^#86lnQK
z3iz>!l<HslDYQdhRS&deB*mbS;xGIZ%7<?o%?BlWfE&Beh@k;X^TA~bRsb{~x0)f?
zeqIZ<6xGhk;dH$yC%lTd)JFEPfFo*OS)N_UH}cr1Qs|pL;@<qW;ldyH394jQodnBa
z;0&(W;V0L7yilT%V%|PaBgO^s@6ZzpYW{1whdV))M|!;CLZfozaF1JZZ(w7fa^&Hj
z>Ed+51@>9bUxg&zE9(8(Q$=vIL?SN|;VV+9EppW^@@-V)8%l4<_3ZJYzr9P*5Q>mA
zEOl;Oq*}@x>cuC=6b|zCID-=m<UM$cS8p&7%QJaZQr3-ls}s!NB%hsP@Du}ik-nc-
zUxz?t`Hj4NlfefVe2~F6G58PzwzzBnG`FT`BC18P)Wcj!<a6D&@){1?S0rb%jbq;E
z#I;>>Cd#!On_T6j?%6^*BlUm{5ay<yDF8Iut@2zP(2XJf-2`>AtPx3obD&zPG)_?>
zV7FM=LFny!AQ5y!C?CQP<%5yzR^0E9U6w7-;Qi~}Ev^}7NOn2yCkO1UYzG(adfe$R
z<BvUzawx$SyN2+Z;^uM=%O>I#eaKK8$=7FqQ+uxSSGaLbGLW$=JDD;PDY!UCD3(zI
z%wP2a{5vN?Z!V`bLpZMy-#(th;pI>cmN5qd@lx@soXv3{_o(waLTSvx0<qxyDr2%R
zyps(}&R!^R<%+{AwQVPHVZI^i-Zc*nLpZFk>y8s>=U9b0UapWE<v|XH0}Vv>lrIU2
zo7!c{c&Rco14<6=K(x;XQ7j|moW2$n(SI)6^3rrq6)O%)E0CN|7C#U``Kvh(*4&L-
zIe0vox~{fR;eRF<Y{=23-r;z52sAk33?>--0E5pm_<aU{$l#9{^rJZ)s@S4cJI8qS
zB!d?akW(bMD;$4FUmjO2^CX9XcNsyyjC<}d^5@}fzswB(ek1PGbw5Tpf8_6`o|`@2
h+4Jo^-veKIe`6h&@M6#V@l%9<z5brDp6xx`|37mpBH;i4

literal 0
HcmV?d00001

diff --git a/modules/variant_diagnostics/core_pipeline_modular.py b/modules/variant_diagnostics/core_pipeline_modular.py
new file mode 100755
index 0000000..59fc42b
--- /dev/null
+++ b/modules/variant_diagnostics/core_pipeline_modular.py
@@ -0,0 +1,4658 @@
+# System wide imports
+from __future__ import division
+import sys
+import argparse
+import re
+import os
+import csv
+import subprocess
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+""" Hacky way to append. Instead Add this path to PYTHONPATH Variable """
+from collections import OrderedDict
+from collections import defaultdict
+from joblib import Parallel, delayed
+import multiprocessing
+import thread
+import glob
+import readline
+#import pandas as pd
+import errno
+from pyfasta import Fasta
+from datetime import datetime
+import threading
+import json
+from cyvcf2 import VCF
+import ConfigParser
+from config_settings import ConfigSectionMap
+from logging_subprocess import *
+from log_modules import *
+from tabix import *
+from Bio import SeqIO
+from phage_detection import *
+from find_repeats import *
+from mask_regions import *
+from fasttree import fasttree
+from gubbins import *
+from raxml import raxml
+from pyfasta import Fasta
+from core_prep_sanity_checks import *
+from iqtree import iqtree
+from core_pipeline_core_prep_label import core_prep_label
+
+# Parse Command line Arguments
+parser = argparse.ArgumentParser(description='Parsing filtered VCF files and investigating Variants to determine the reason why it was filtered out from the final list')
+required = parser.add_argument_group('Required arguments')
+optional = parser.add_argument_group('Optional arguments')
+required.add_argument('-filter2_only_snp_vcf_dir', action='store', dest="filter2_only_snp_vcf_dir",
+                    help='Directory where all the filter2 only SNP vcf files are saved.')
+required.add_argument('-filter2_only_snp_vcf_filenames', action='store', dest="filter2_only_snp_vcf_filenames",
+                    help='Names of filter2 only SNP vcf files with name per line.')
+optional.add_argument('-jobrun', action='store', dest="jobrun",
+                    help='Running a job on Cluster, Running Parallel jobs, Run jobs/commands locally (default): cluster, local, parallel-local, parallel-single-cluster')
+optional.add_argument('-cluster_type', action='store', dest="cluster_type",
+                    help='Type of Cluster: torque, pbs, sgd')
+optional.add_argument('-cluster_resources', action='store', dest="cluster_resources",
+                    help='Cluster Resources to use. for example nodes,core. Ex: 1,4')
+optional.add_argument('-numcores', action='store', dest="numcores",
+                    help='Number of cores to use on local system for parallel-local parameter')
+optional.add_argument('-remove_temp', action='store', dest="remove_temp",
+                    help='Remove Temporary files generated during the run')
+optional.add_argument('-gubbins', action='store', dest="gubbins", help='yes/no for running gubbins')
+optional.add_argument('-outgroup', action='store', dest="outgroup", help='outgroup sample name')
+required.add_argument('-reference', action='store', dest="reference",
+                    help='Path to Reference Fasta file for consensus generation')
+required.add_argument('-steps', action='store', dest="steps",
+                    help='Analysis Steps to be performed. This should be in sequential order.'
+                         'Step 1: Run pbs jobs and process all pipeline generated vcf files to generate label files'
+                         'Step 2: Analyze label files and generate matrix'
+                         'Step 3: DP/FQ Analysis')
+required.add_argument('-results_dir', action='store', dest="results_dir",
+                    help='Path to Core results directory')
+required.add_argument('-config', action='store', dest="config",
+                    help='Path to config file')
+# optional.add_argument('-db', action='store', dest="snpeff_db",
+#                     help='snpEff prebuilt reference database to use for variant annotations. The database will be downloaded in /data/ folder under snpEff install directory. Make sure if you are providing the name of pre-built snpEff reference database then the build option of snpeff section in config section is set to \"no\"')
+optional.add_argument('-debug_mode', action='store', dest="debug_mode",
+                    help='yes/no for debug mode')
+args = parser.parse_args()
+
+""" Generic Methods """
+def make_sure_path_exists(out_path):
+    """This function checks if the args out_path exists and generates an empty directory if it doesn't.
+
+    :param:
+        out_path: Directory path to check or create a new directory.
+
+    :return: null/exception
+
+    """
+
+    try:
+        os.makedirs(out_path)
+    except OSError as exception:
+        if exception.errno != errno.EEXIST:
+            keep_logging('\nErrors in output folder path! please change the output path or analysis name\n',
+                         '\nErrors in output folder path! please change the output path or analysis name\n', logger,
+                         'info')
+            exit()
+
+def run_command(i):
+    """Function to run each command and is run as a part of python Parallel mutiprocessing method.
+
+    :param:
+        i: command variable to run
+
+    :return:
+        done: string variable with completion status of command.
+    """
+
+    call("%s" % i, logger)
+    # A subprocess exception is raised if the command finish abnormally.
+    # An exception is raised in call method.
+    # If none of the exceptions are raised, return done status.
+    done = "Completed: %s" % i
+    return done
+
+# """core methods
+#
+#     This block contains methods that are respnsible for running the second part of core_All step of the pipeline.
+#     It uses intermediate files generated during the first step, finds core SNPs and annotates variants using snpEff.
+#     It will generate all types of SNP matrices that is required for downstream pathways / Association analysis.
+#     Output:
+#         -
+#
+# """
+#
+# def generate_paste_command():
+#     """
+#     This Function will take all the *label file and generate/paste it column wise to generate a matrix. These matrix will be used in downstream analysis.
+#     :param: null
+#     :return: null
+#     """
+#
+#     """ Paste/Generate and sort SNP Filter Label Matrix """
+#     paste_file = args.filter2_only_snp_vcf_dir + "/paste_label_files.sh"
+#     f4=open(paste_file, 'w+')
+#     paste_command = "paste %s/unique_positions_file" % args.filter2_only_snp_vcf_dir
+#     for i in vcf_filenames:
+#         label_file = i.replace('_filter2_final.vcf_no_proximate_snp.vcf', '_filter2_final.vcf_no_proximate_snp.vcf_positions_label')
+#         paste_command = paste_command + " " + label_file
+#     header_awk_cmd = "awk \'{ORS=\"\t\";}{print $1}\' %s > %s/header.txt" % (args.filter2_only_snp_vcf_filenames, args.filter2_only_snp_vcf_dir)
+#     sed_header = "sed -i \'s/^/\t/\' %s/header.txt" % args.filter2_only_snp_vcf_dir
+#     sed_header_2 = "sed -i -e \'$a\\' %s/header.txt" % args.filter2_only_snp_vcf_dir
+#
+#     call("%s" % header_awk_cmd, logger)
+#     call("%s" % sed_header, logger)
+#     call("%s" % sed_header_2, logger)
+#
+#     temp_paste_command = paste_command + " > %s/temp_label_final_raw.txt" % args.filter2_only_snp_vcf_dir
+#     paste_command = paste_command + " > %s/All_label_final_raw" % args.filter2_only_snp_vcf_dir
+#     f4.write(paste_command)
+#     f4.close()
+#     sort_All_label_cmd = "sort -n -k1,1 %s/All_label_final_raw > %s/All_label_final_sorted.txt" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir)
+#     paste_command_header = "cat %s/header.txt %s/All_label_final_sorted.txt > %s/All_label_final_sorted_header.txt" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir)
+#
+#     ls = []
+#     for i in vcf_filenames:
+#         label_file = i.replace('_filter2_final.vcf_no_proximate_snp.vcf', '_filter2_final.vcf_no_proximate_snp.vcf_positions_label')
+#         ls.append(label_file)
+#     ls.insert(0, "%s/unique_positions_file" % args.filter2_only_snp_vcf_dir)
+#
+#     with open('%s/All_label_final_raw.sh' % args.filter2_only_snp_vcf_dir, 'w') as outfile:
+#         outfile.write(paste_command)
+#     outfile.close()
+#
+#     with open('%s/temp_label_final_raw.txt.sh' % args.filter2_only_snp_vcf_dir, 'w') as outfile:
+#         outfile.write(temp_paste_command)
+#     outfile.close()
+#
+#     call("bash %s/All_label_final_raw.sh" % args.filter2_only_snp_vcf_dir, logger)
+#     call("bash %s/temp_label_final_raw.txt.sh" % args.filter2_only_snp_vcf_dir, logger)
+#     call("%s" % sort_All_label_cmd, logger)
+#     call("%s" % paste_command_header, logger)
+#
+#     """ Assign numeric code to each variant filter reason"""
+#     subprocess.call(["sed -i 's/reference_unmapped_position/0/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/reference_allele/1/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/VARIANT/1TRUE/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/LowFQ_QUAL_DP_proximate_SNP/2/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/LowFQ_DP_QUAL_proximate_SNP/2/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/LowFQ_QUAL_proximate_SNP/2/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/LowFQ_DP_proximate_SNP/2/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/LowFQ_proximate_SNP/2/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/LowFQ_QUAL_DP/2/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/LowFQ_DP_QUAL/2/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/LowFQ_QUAL/2/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/LowFQ_DP/2/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/HighFQ_QUAL_DP_proximate_SNP/4/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/HighFQ_DP_QUAL_proximate_SNP/4/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/HighFQ_QUAL_proximate_SNP/4/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/HighFQ_DP_proximate_SNP/4/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/HighFQ_proximate_SNP/7/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/HighFQ_QUAL_DP/3/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/HighFQ_DP_QUAL/3/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/HighFQ_QUAL/3/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/HighFQ_DP/3/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/LowFQ/5/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/HighFQ/6/g' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     remove_unwanted_text = "sed -i \'s/_filter2_final.vcf_no_proximate_snp.vcf//g\' %s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir
+#     call("%s" % remove_unwanted_text, logger)
+#
+# def generate_paste_command_outgroup():
+#     """
+#     This Function will take all the *label file and generate/paste it column wise to generate a matrix. These matrix will be used in downstream analysis.
+#     :param: null
+#     :return: null
+#     """
+#
+#     if args.outgroup:
+#         """ Paste/Generate and sort SNP Filter Label Matrix """
+#         paste_file = args.filter2_only_snp_vcf_dir + "/paste_label_files_outgroup.sh"
+#         f4=open(paste_file, 'w+')
+#         paste_command = "paste %s/unique_positions_file" % args.filter2_only_snp_vcf_dir
+#         for i in vcf_filenames:
+#             if "%s_filter2_final.vcf_no_proximate_snp.vcf" % outgroup not in i:
+#                 label_file = i.replace('_filter2_final.vcf_no_proximate_snp.vcf', '_filter2_final.vcf_no_proximate_snp.vcf_positions_label')
+#                 paste_command = paste_command + " " + label_file
+#
+#
+#         """Exclude outgroup sample name in header
+#
+#         header_awk_cmd = "awk \'{ORS=\"\t\";}{print $1}\' %s > %s/header.txt" % (args.filter2_only_snp_vcf_filenames, args.filter2_only_snp_vcf_dir)
+#         sed_header = "sed -i \'s/^/\t/\' %s/header.txt" % args.filter2_only_snp_vcf_dir
+#         sed_header_2 = "sed -i -e \'$a\\' %s/header.txt" % args.filter2_only_snp_vcf_dir
+#
+#         """
+#
+#         header_awk_cmd = "grep -v \'%s\' %s | awk \'{ORS=\"\t\";}{print $1}\' > %s/header_outgroup.txt" % (outgroup, args.filter2_only_snp_vcf_filenames, args.filter2_only_snp_vcf_dir)
+#         sed_header = "sed -i \'s/^/\t/\' %s/header_outgroup.txt" % args.filter2_only_snp_vcf_dir
+#         sed_header_2 = "sed -i -e \'$a\\' %s/header_outgroup.txt" % args.filter2_only_snp_vcf_dir
+#
+#         call("%s" % header_awk_cmd, logger)
+#         call("%s" % sed_header, logger)
+#         call("%s" % sed_header_2, logger)
+#
+#         temp_paste_command = paste_command + " > %s/temp_label_final_raw_outgroup.txt" % args.filter2_only_snp_vcf_dir
+#         paste_command = paste_command + " > %s/All_label_final_raw_outgroup" % args.filter2_only_snp_vcf_dir
+#         f4.write(paste_command)
+#         f4.close()
+#         sort_All_label_cmd = "sort -n -k1,1 %s/All_label_final_raw_outgroup > %s/All_label_final_sorted_outgroup.txt" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir)
+#         paste_command_header = "cat %s/header_outgroup.txt %s/All_label_final_sorted_outgroup.txt > %s/All_label_final_sorted_header_outgroup.txt" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir)
+#
+#         ls = []
+#         for i in vcf_filenames:
+#             label_file = i.replace('_filter2_final.vcf_no_proximate_snp.vcf', '_filter2_final.vcf_no_proximate_snp.vcf_positions_label')
+#             ls.append(label_file)
+#         ls.insert(0, "%s/unique_positions_file" % args.filter2_only_snp_vcf_dir)
+#
+#         with open('%s/All_label_final_raw_outgroup.sh' % args.filter2_only_snp_vcf_dir, 'w') as outfile:
+#             outfile.write(paste_command)
+#         outfile.close()
+#
+#         with open('%s/temp_label_final_raw_outgroup.txt.sh' % args.filter2_only_snp_vcf_dir, 'w') as outfile:
+#             outfile.write(temp_paste_command)
+#         outfile.close()
+#         call("bash %s/All_label_final_raw_outgroup.sh" % args.filter2_only_snp_vcf_dir, logger)
+#         call("bash %s/temp_label_final_raw_outgroup.txt.sh" % args.filter2_only_snp_vcf_dir, logger)
+#
+#
+#         """
+#         remove this lines
+#         #subprocess.call(["%s" % paste_command], shell=True)
+#         #subprocess.call(["%s" % temp_paste_command], shell=True)
+#         #subprocess.check_call('%s' % paste_command)
+#         #subprocess.check_call('%s' % temp_paste_command)
+#         #os.system(paste_command) change
+#         #os.system(temp_paste_command) change
+#         """
+#
+#         call("%s" % sort_All_label_cmd, logger)
+#         call("%s" % paste_command_header, logger)
+#
+#         """ Assign numeric code to each variant filter reason"""
+#         subprocess.call(["sed -i 's/reference_unmapped_position/0/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/reference_allele/1/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/VARIANT/1TRUE/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_QUAL_DP_proximate_SNP/2/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_DP_QUAL_proximate_SNP/2/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_QUAL_proximate_SNP/2/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_DP_proximate_SNP/2/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_proximate_SNP/2/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_QUAL_DP/2/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_DP_QUAL/2/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_QUAL/2/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_DP/2/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_QUAL_DP_proximate_SNP/4/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_DP_QUAL_proximate_SNP/4/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_QUAL_proximate_SNP/4/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_DP_proximate_SNP/4/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_proximate_SNP/7/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_QUAL_DP/3/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_DP_QUAL/3/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_QUAL/3/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_DP/3/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ/5/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ/6/g' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         remove_unwanted_text = "sed -i \'s/_filter2_final.vcf_no_proximate_snp.vcf//g\' %s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir
+#         call("%s" % remove_unwanted_text, logger)
+#
+#     else:
+#         print "Skip generating seperate intermediate files for outgroup"
+#
+# def generate_indel_paste_command():
+#     """
+#     This Function will take all the *label file and generate/paste it column wise to generate a matrix. These matrix will be used in downstream analysis.
+#     :param: null
+#     :return: null
+#     """
+#
+#     """ Paste/Generate and sort SNP Filter Label Matrix """
+#     paste_file = args.filter2_only_snp_vcf_dir + "/paste_indel_label_files.sh"
+#     f4=open(paste_file, 'w+')
+#     paste_command = "paste %s/unique_indel_positions_file" % args.filter2_only_snp_vcf_dir
+#     for i in vcf_filenames:
+#         label_file = i.replace('_filter2_final.vcf_no_proximate_snp.vcf', '_filter2_indel_final.vcf_indel_positions_label')
+#         paste_command = paste_command + " " + label_file
+#     header_awk_cmd = "awk \'{ORS=\"\t\";}{print $1}\' %s > %s/header.txt" % (args.filter2_only_snp_vcf_filenames, args.filter2_only_snp_vcf_dir)
+#     sed_header = "sed -i \'s/^/\t/\' %s/header.txt" % args.filter2_only_snp_vcf_dir
+#     sed_header_2 = "sed -i -e \'$a\\' %s/header.txt" % args.filter2_only_snp_vcf_dir
+#
+#     #os.system(header_awk_cmd)
+#     #os.system(sed_header)
+#     #os.system(sed_header_2)
+#
+#     call("%s" % header_awk_cmd, logger)
+#     call("%s" % sed_header, logger)
+#     call("%s" % sed_header_2, logger)
+#
+#
+#
+#     temp_paste_command = paste_command + " > %s/temp_indel_label_final_raw.txt" % args.filter2_only_snp_vcf_dir
+#     paste_command = paste_command + " > %s/All_indel_label_final_raw" % args.filter2_only_snp_vcf_dir
+#     f4.write(paste_command)
+#     f4.close()
+#
+#     call("bash %s" % paste_file, logger)
+#
+#     sort_All_label_cmd = "sort -n -k1,1 %s/All_indel_label_final_raw > %s/All_indel_label_final_sorted.txt" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir)
+#     paste_command_header = "cat %s/header.txt %s/All_indel_label_final_sorted.txt > %s/All_indel_label_final_sorted_header.txt" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir)
+#
+#     ls = []
+#     for i in vcf_filenames:
+#         label_file = i.replace('_filter2_final.vcf_no_proximate_snp.vcf', '_filter2_indel_final.vcf_indel_positions_label')
+#         ls.append(label_file)
+#     ls.insert(0, "%s/unique_indel_positions_file" % args.filter2_only_snp_vcf_dir)
+#
+#     with open('%s/All_indel_label_final_raw.sh' % args.filter2_only_snp_vcf_dir, 'w') as outfile2:
+#         outfile2.write(paste_command)
+#     outfile2.close()
+#
+#     with open('%s/temp_indel_label_final_raw.txt.sh' % args.filter2_only_snp_vcf_dir, 'w') as outfile2:
+#         outfile2.write(temp_paste_command)
+#     outfile2.close()
+#
+#     # Why is this not working?
+#     call("bash %s/All_indel_label_final_raw.sh" % args.filter2_only_snp_vcf_dir, logger)
+#     call("bash %s/temp_indel_label_final_raw.txt.sh" % args.filter2_only_snp_vcf_dir, logger)
+#     keep_logging('Finished pasting...DONE', 'Finished pasting...DONE', logger, 'info')
+#
+#     """
+#     remove this lines
+#     #subprocess.call(["%s" % paste_command], shell=True)
+#     #subprocess.call(["%s" % temp_paste_command], shell=True)
+#     #subprocess.check_call('%s' % paste_command)
+#     #subprocess.check_call('%s' % temp_paste_command)
+#     #os.system(paste_command) change
+#     #os.system(temp_paste_command) change
+#     """
+#
+#     call("%s" % sort_All_label_cmd, logger)
+#     call("%s" % paste_command_header, logger)
+#
+#     """ Assign numeric code to each variant filter reason"""
+#     subprocess.call(["sed -i 's/reference_unmapped_position/0/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/reference_allele/1/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/VARIANT/1TRUE/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/LowAF_QUAL_DP_proximate_SNP/2/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/LowAF_DP_QUAL_proximate_SNP/2/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/LowAF_QUAL_proximate_SNP/2/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/LowAF_DP_proximate_SNP/2/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/LowAF_proximate_SNP/2/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/LowAF_QUAL_DP/2/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/LowAF_DP_QUAL/2/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/LowAF_QUAL/2/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/LowAF_DP/2/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/HighAF_QUAL_DP_proximate_SNP/4/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/HighAF_DP_QUAL_proximate_SNP/4/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/HighAF_QUAL_proximate_SNP/4/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/HighAF_DP_proximate_SNP/4/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/HighAF_proximate_SNP/7/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/HighAF_QUAL_DP/3/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/HighAF_DP_QUAL/3/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/HighAF_QUAL/3/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/HighAF_DP/3/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/LowAF/5/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     subprocess.call(["sed -i 's/HighAF/6/g' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#     remove_unwanted_text = "sed -i \'s/_filter2_final.vcf_no_proximate_snp.vcf//g\' %s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir
+#     call("%s" % remove_unwanted_text, logger)
+#
+# def generate_indel_paste_command_outgroup():
+#     """
+#     This Function will take all the *label file and generate/paste it column wise to generate a matrix. These matrix will be used in downstream analysis.
+#     :param: null
+#     :return: null
+#     """
+#
+#     if args.outgroup:
+#         """ Paste/Generate and sort SNP Filter Label Matrix """
+#         # define a file name where the paste commands will be saved.
+#         paste_file = args.filter2_only_snp_vcf_dir + "/paste_indel_label_files_outgroup.sh"
+#         f4=open(paste_file, 'w+')
+#
+#         # initiate paste command string
+#         paste_command = "paste %s/unique_indel_positions_file" % args.filter2_only_snp_vcf_dir
+#
+#
+#         # Generate paste command
+#         for i in vcf_filenames:
+#             if "%s_filter2_final.vcf_no_proximate_snp.vcf" % outgroup not in i:
+#                 label_file = i.replace('_filter2_final.vcf_no_proximate_snp.vcf', '_filter2_indel_final.vcf_indel_positions_label')
+#                 paste_command = paste_command + " " + label_file
+#         # Change header awk command to exclude outgroup
+#         #header_awk_cmd = "awk \'{ORS=\"\t\";}{print $1}\' %s > %s/header.txt" % (args.filter2_only_snp_vcf_filenames, args.filter2_only_snp_vcf_dir)
+#         header_awk_cmd = "grep -v \'%s\' %s | awk \'{ORS=\"\t\";}{print $1}\' > %s/header_outgroup.txt" % (outgroup, args.filter2_only_snp_vcf_filenames, args.filter2_only_snp_vcf_dir)
+#         sed_header = "sed -i \'s/^/\t/\' %s/header_outgroup.txt" % args.filter2_only_snp_vcf_dir
+#         sed_header_2 = "sed -i -e \'$a\\' %s/header_outgroup.txt" % args.filter2_only_snp_vcf_dir
+#
+#
+#
+#         call("%s" % header_awk_cmd, logger)
+#         call("%s" % sed_header, logger)
+#         call("%s" % sed_header_2, logger)
+#
+#
+#
+#         temp_paste_command = paste_command + " > %s/temp_indel_label_final_raw_outgroup.txt" % args.filter2_only_snp_vcf_dir
+#         paste_command = paste_command + " > %s/All_indel_label_final_raw_outgroup" % args.filter2_only_snp_vcf_dir
+#         f4.write(paste_command)
+#         f4.close()
+#
+#         call("bash %s" % paste_file, logger)
+#
+#         sort_All_label_cmd = "sort -n -k1,1 %s/All_indel_label_final_raw_outgroup > %s/All_indel_label_final_sorted_outgroup.txt" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir)
+#         paste_command_header = "cat %s/header_outgroup.txt %s/All_indel_label_final_sorted_outgroup.txt > %s/All_indel_label_final_sorted_header_outgroup.txt" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir)
+#
+#         ls = []
+#         for i in vcf_filenames:
+#             label_file = i.replace('_filter2_final.vcf_no_proximate_snp.vcf', '_filter2_indel_final.vcf_indel_positions_label')
+#             ls.append(label_file)
+#         ls.insert(0, "%s/unique_indel_positions_file" % args.filter2_only_snp_vcf_dir)
+#
+#         with open('%s/All_indel_label_final_raw_outgroup.sh' % args.filter2_only_snp_vcf_dir, 'w') as outfile2:
+#             outfile2.write(paste_command)
+#         outfile2.close()
+#
+#         with open('%s/temp_indel_label_final_raw_outgroup.txt.sh' % args.filter2_only_snp_vcf_dir, 'w') as outfile2:
+#             outfile2.write(temp_paste_command)
+#         outfile2.close()
+#
+#         # Why is this not working?
+#         call("bash %s/All_indel_label_final_raw_outgroup.sh" % args.filter2_only_snp_vcf_dir, logger)
+#         call("bash %s/temp_indel_label_final_raw_outgroup.txt.sh" % args.filter2_only_snp_vcf_dir, logger)
+#         keep_logging('Finished pasting...DONE', 'Finished pasting...DONE', logger, 'info')
+#
+#         """
+#         remove this lines
+#         #subprocess.call(["%s" % paste_command], shell=True)
+#         #subprocess.call(["%s" % temp_paste_command], shell=True)
+#         #subprocess.check_call('%s' % paste_command)
+#         #subprocess.check_call('%s' % temp_paste_command)
+#         #os.system(paste_command) change
+#         #os.system(temp_paste_command) change
+#         """
+#
+#         call("%s" % sort_All_label_cmd, logger)
+#         call("%s" % paste_command_header, logger)
+#
+#         """ Assign numeric code to each variant filter reason"""
+#         subprocess.call(["sed -i 's/reference_unmapped_position/0/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/reference_allele/1/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/VARIANT/1TRUE/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_QUAL_DP_proximate_SNP/2/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_DP_QUAL_proximate_SNP/2/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_QUAL_proximate_SNP/2/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_DP_proximate_SNP/2/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_proximate_SNP/2/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_QUAL_DP/2/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_DP_QUAL/2/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_QUAL/2/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_DP/2/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_QUAL_DP_proximate_SNP/4/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_DP_QUAL_proximate_SNP/4/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_QUAL_proximate_SNP/4/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_DP_proximate_SNP/4/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_proximate_SNP/7/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_QUAL_DP/3/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_DP_QUAL/3/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_QUAL/3/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_DP/3/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF/5/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF/6/g' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         remove_unwanted_text = "sed -i \'s/_filter2_final.vcf_no_proximate_snp.vcf//g\' %s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir
+#         call("%s" % remove_unwanted_text, logger)
+#     else:
+#         print "Skip generating seperate intermediate files for outgroup"
+#
+# def generate_position_label_data_matrix():
+#
+#     """
+#     Generate different list of Positions using the matrix All_label_final_sorted_header.txt.
+#
+#     (Defining Core Variant Position: Variant Position which was not filtered out in any of the other samples due to variant filter parameter and also this position was present in all the samples(not unmapped)).
+#
+#     Filtered Position label matrix:
+#         List of non-core positions. These positions didn't make it to the final core list because it was filtered out in one of the samples.
+#
+#     Only_ref_variant_positions_for_closely_matrix.txt :
+#         Those Positions where the variant was either reference allele or a variant that passed all the variant filter parameters.
+#
+#     :param: null
+#     :return: null
+#
+#     """
+#     def generate_position_label_data_matrix_All_label():
+#         position_label = OrderedDict()
+#         f1 = open("%s/Only_ref_variant_positions_for_closely" % args.filter2_only_snp_vcf_dir, 'w+')
+#         f2 = open("%s/Only_ref_variant_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+#         f3 = open("%s/Only_filtered_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+#         f4 = open(
+#             "%s/Only_filtered_positions_for_closely_matrix_TRUE_variants_filtered_out.txt" % args.filter2_only_snp_vcf_dir,
+#             'w+')
+#         if args.outgroup:
+#             with open("%s/All_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir, 'rU') as csv_file:
+#                 keep_logging(
+#                     'Reading All label positions file: %s/All_label_final_sorted_header.txt \n' % args.filter2_only_snp_vcf_dir,
+#                     'Reading All label positions file: %s/All_label_final_sorted_header.txt \n' % args.filter2_only_snp_vcf_dir,
+#                     logger, 'info')
+#                 csv_reader = csv.reader(csv_file, delimiter='\t')
+#                 next(csv_reader, None)
+#                 for row in csv_reader:
+#                     position_label[row[0]] = row[1:]
+#                 keep_logging('Generating different list of Positions and heatmap data matrix... \n',
+#                              'Generating different list of Positions and heatmap data matrix... \n', logger, 'info')
+#                 print_string_header = "\t"
+#                 for i in vcf_filenames:
+#                     print_string_header = print_string_header + os.path.basename(i) + "\t"
+#                 f2.write('\t' + print_string_header.strip() + '\n')
+#                 f3.write('\t' + print_string_header.strip() + '\n')
+#                 f4.write('\t' + print_string_header.strip() + '\n')
+#                 for value in position_label:
+#                     lll = ['0', '2', '3', '4', '5', '6', '7']
+#                     ref_var = ['1', '1TRUE']
+#                     if set(ref_var) & set(position_label[value]):
+#                         if set(lll) & set(position_label[value]):
+#                             if int(value) not in outgroup_specific_positions:
+#                                 print_string = ""
+#                                 for i in position_label[value]:
+#                                     print_string = print_string + "\t" + i
+#                                 STRR2 = value + print_string + "\n"
+#                                 f3.write(STRR2)
+#                                 if position_label[value].count('1TRUE') >= 2:
+#                                     f4.write('1\n')
+#                                 else:
+#                                     f4.write('0\n')
+#                         else:
+#                             if int(value) not in outgroup_specific_positions:
+#                                 strr = value + "\n"
+#                                 f1.write(strr)
+#                                 STRR3 = value + "\t" + str(position_label[value]) + "\n"
+#                                 f2.write(STRR3)
+#             csv_file.close()
+#             f1.close()
+#             f2.close()
+#             f3.close()
+#             f4.close()
+#             subprocess.call(["sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_ref_variant_positions_for_closely" % args.filter2_only_snp_vcf_dir], shell=True)
+#             subprocess.call(["sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_ref_variant_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#             subprocess.call(["sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_filtered_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#             subprocess.call(["sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_filtered_positions_for_closely_matrix_TRUE_variants_filtered_out.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#             subprocess.call(["sed -i 's/1TRUE/-1/g' %s/Only_filtered_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#
+#         else:
+#             with open("%s/All_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir, 'rU') as csv_file:
+#                 keep_logging(
+#                     'Reading All label positions file: %s/All_label_final_sorted_header.txt \n' % args.filter2_only_snp_vcf_dir,
+#                     'Reading All label positions file: %s/All_label_final_sorted_header.txt \n' % args.filter2_only_snp_vcf_dir,
+#                     logger, 'info')
+#                 csv_reader = csv.reader(csv_file, delimiter='\t')
+#                 next(csv_reader, None)
+#                 for row in csv_reader:
+#                     position_label[row[0]] = row[1:]
+#                 keep_logging('Generating different list of Positions and heatmap data matrix... \n',
+#                              'Generating different list of Positions and heatmap data matrix... \n', logger, 'info')
+#                 print_string_header = "\t"
+#                 for i in vcf_filenames:
+#                     print_string_header = print_string_header + os.path.basename(i) + "\t"
+#                 f2.write('\t' + print_string_header.strip() + '\n')
+#                 f3.write('\t' + print_string_header.strip() + '\n')
+#                 f4.write('\t' + print_string_header.strip() + '\n')
+#                 for value in position_label:
+#                     lll = ['0', '2', '3', '4', '5', '6', '7']
+#                     ref_var = ['1', '1TRUE']
+#                     if set(ref_var) & set(position_label[value]):
+#                         if set(lll) & set(position_label[value]):
+#
+#                             print_string = ""
+#                             for i in position_label[value]:
+#                                 print_string = print_string + "\t" + i
+#                             STRR2 = value + print_string + "\n"
+#                             f3.write(STRR2)
+#                             if position_label[value].count('1TRUE') >= 2:
+#                                 f4.write('1\n')
+#                             else:
+#                                 f4.write('0\n')
+#                         else:
+#
+#                             strr = value + "\n"
+#                             f1.write(strr)
+#                             STRR3 = value + "\t" + str(position_label[value]) + "\n"
+#                             f2.write(STRR3)
+#             csv_file.close()
+#             f1.close()
+#             f2.close()
+#             f3.close()
+#             f4.close()
+#             subprocess.call(["sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_ref_variant_positions_for_closely" % args.filter2_only_snp_vcf_dir],
+#                             shell=True)
+#             subprocess.call(["sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_ref_variant_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir],
+#                             shell=True)
+#             subprocess.call(["sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_filtered_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir],
+#                             shell=True)
+#             subprocess.call(["sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_filtered_positions_for_closely_matrix_TRUE_variants_filtered_out.txt" % args.filter2_only_snp_vcf_dir],
+#                             shell=True)
+#             subprocess.call(["sed -i 's/1TRUE/-1/g' %s/Only_filtered_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir],
+#                             shell=True)
+#
+#     def temp_generate_position_label_data_matrix_All_label():
+#
+#         """
+#         Read temp_label_final_raw.txt SNP position label data matrix for generating barplot statistics.
+#         """
+#         temp_position_label = OrderedDict()
+#         f33=open("%s/temp_Only_filtered_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+#         print_string_header = "\t"
+#
+#         if args.outgroup:
+#             for i in vcf_filenames:
+#                 if "%s_filter2_final.vcf_no_proximate_snp.vcf" % outgroup not in i:
+#                     print_string_header = print_string_header + os.path.basename(i) + "\t"
+#         else:
+#             for i in vcf_filenames:
+#                 print_string_header = print_string_header + os.path.basename(i) + "\t"
+#
+#         f33.write('\t' + print_string_header.strip() + '\n')
+#         keep_logging('Reading temporary label positions file: %s/temp_label_final_raw.txt \n' % args.filter2_only_snp_vcf_dir, 'Reading temporary label positions file: %s/temp_label_final_raw.txt \n' % args.filter2_only_snp_vcf_dir, logger, 'info')
+#         lll = ['reference_unmapped_position', 'LowFQ', 'LowFQ_DP', 'LowFQ_QUAL', 'LowFQ_DP_QUAL', 'LowFQ_QUAL_DP', 'HighFQ_DP', 'HighFQ_QUAL', 'HighFQ_DP_QUAL', 'HighFQ_QUAL_DP', 'HighFQ', 'LowFQ_proximate_SNP', 'LowFQ_DP_proximate_SNP', 'LowFQ_QUAL_proximate_SNP', 'LowFQ_DP_QUAL_proximate_SNP', 'LowFQ_QUAL_DP_proximate_SNP', 'HighFQ_DP_proximate_SNP', 'HighFQ_QUAL_proximate_SNP', 'HighFQ_DP_QUAL_proximate_SNP', 'HighFQ_QUAL_DP_proximate_SNP', 'HighFQ_proximate_SNP', '_proximate_SNP']
+#         ref_var = ['reference_allele', 'VARIANT']
+#
+#         if args.outgroup:
+#             print "here"
+#             with open("%s/temp_label_final_raw_outgroup.txt" % args.filter2_only_snp_vcf_dir, 'r') as csv_file:
+#                 csv_reader = csv.reader(csv_file, delimiter='\t')
+#                 next(csv_reader, None)
+#                 for row in csv_reader:
+#                     if set(ref_var) & set(row[1:]):
+#                         if set(lll) & set(row[1:]):
+#                             if int(row[0]) not in outgroup_specific_positions:
+#
+#                                 print_string = ""
+#                                 for i in row[1:]:
+#                                     print_string = print_string + "\t" + i
+#                                 STRR2 = row[0] + print_string + "\n"
+#                                 f33.write(STRR2)
+#             csv_file.close()
+#             f33.close()
+#
+#         else:
+#             with open("%s/temp_label_final_raw.txt" % args.filter2_only_snp_vcf_dir, 'r') as csv_file:
+#                 csv_reader = csv.reader(csv_file, delimiter='\t')
+#                 next(csv_reader, None)
+#                 for row in csv_reader:
+#                     if set(ref_var) & set(row[1:]):
+#                         if set(lll) & set(row[1:]):
+#
+#                             print_string = ""
+#                             for i in row[1:]:
+#                                 print_string = print_string + "\t" + i
+#                             STRR2 = row[0] + print_string + "\n"
+#                             f33.write(STRR2)
+#             csv_file.close()
+#             f33.close()
+#         """
+#         Read temp_Only_filtered_positions_for_closely_matrix file and generate a matrix of positions that are being filtered just because of FQ
+#         """
+#         temp_position_label_FQ = OrderedDict()
+#         f44=open("%s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+#         with open("%s/temp_Only_filtered_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir, 'rU') as csv_file:
+#             keep_logging('Reading temporary Only_filtered_positions label file: %s/temp_Only_filtered_positions_for_closely_matrix.txt \n' % args.filter2_only_snp_vcf_dir, 'Reading temporary Only_filtered_positions label file: %s/temp_Only_filtered_positions_for_closely_matrix.txt \n' % args.filter2_only_snp_vcf_dir, logger, 'info')
+#             csv_reader = csv.reader(csv_file, delimiter='\t')
+#             next(csv_reader, None)
+#
+#             for row in csv_reader:
+#               temp_position_label_FQ[row[0]] = row[1:]
+#             print_string_header = "\t"
+#             for i in vcf_filenames:
+#               print_string_header = print_string_header + os.path.basename(i) + "\t"
+#             f44.write('\t' + print_string_header.strip() + '\n')
+#             for value in temp_position_label_FQ:
+#               lll = ['LowFQ']
+#               if set(lll) & set(temp_position_label_FQ[value]):
+#
+#                   print_string = ""
+#                   for i in temp_position_label_FQ[value]:
+#                       print_string = print_string + "\t" + i
+#                   STRR2 = value + print_string + "\n"
+#                   f44.write(STRR2)
+#             f44.close()
+#             csv_file.close()
+#             f44.close()
+#
+#         """
+#         Perform Sed on temp files. Find a faster way to do this.
+#         """
+#         subprocess.call(["sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/reference_unmapped_position/0/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/reference_allele/1/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/VARIANT/2/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_QUAL_DP_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_DP_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_DP_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_QUAL_DP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_DP_QUAL/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_QUAL/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_DP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_QUAL_DP_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_DP_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_DP_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_QUAL_DP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_DP_QUAL/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_QUAL/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_DP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ/3/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#
+#
+#         """
+#         Read temp_Only_filtered_positions_for_closely_matrix file and generate a matrix of positions that are being filtered just because of Dp
+#         """
+#         temp_position_label_DP = OrderedDict()
+#         f44=open("%s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+#         with open("%s/temp_Only_filtered_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir, 'rU') as csv_file:
+#             keep_logging('Reading temporary Only_filtered_positions label file: %s/temp_Only_filtered_positions_for_closely_matrix.txt \n' % args.filter2_only_snp_vcf_dir, 'Reading temporary Only_filtered_positions label file: %s/temp_Only_filtered_positions_for_closely_matrix.txt \n' % args.filter2_only_snp_vcf_dir, logger, 'info')
+#             csv_reader = csv.reader(csv_file, delimiter='\t')
+#             next(csv_reader, None)
+#             for row in csv_reader:
+#                 temp_position_label_DP[row[0]] = row[1:]
+#             print_string_header = "\t"
+#             for i in vcf_filenames:
+#                 print_string_header = print_string_header + os.path.basename(i) + "\t"
+#             f44.write('\t' + print_string_header.strip() + '\n')
+#             for value in temp_position_label_DP:
+#                 lll = ['HighFQ_DP']
+#                 ref_var = ['reference_allele', 'VARIANT']
+#                 if set(lll) & set(temp_position_label_FQ[value]):
+#
+#                     print_string = ""
+#                     for i in temp_position_label_FQ[value]:
+#                         print_string = print_string + "\t" + i
+#                     STRR2 = value + print_string + "\n"
+#                     f44.write(STRR2)
+#         f44.close()
+#         csv_file.close()
+#
+#         """
+#         Perform Sed on temp files. Find a faster way to do this.
+#         """
+#         subprocess.call(["sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/reference_unmapped_position/0/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/reference_allele/1/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/VARIANT/2/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_QUAL_DP_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_DP_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_DP_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_QUAL_DP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_DP_QUAL/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_QUAL/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ_DP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_QUAL_DP_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_DP_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_DP_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_proximate_SNP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_QUAL_DP/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_DP_QUAL/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_QUAL/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ_DP/3/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowFQ/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighFQ/4/g' %s/temp_Only_filtered_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#
+#     def barplot_stats():
+#         keep_logging('\nRead each Sample columns and calculate the percentage of each label to generate barplot statistics.\n', '\nRead each Sample columns and calculate the percentage of each label to generate barplot statistics.\n', logger, 'info')
+#         """
+#         Read each Sample columns and calculate the percentage of each label to generate barplot statistics.
+#         This will give a visual explanation of how many positions in each samples were filtered out because of different reason
+#         """
+#
+#         c_reader = csv.reader(open('%s/temp_Only_filtered_positions_for_closely_matrix.txt' % args.filter2_only_snp_vcf_dir, 'r'), delimiter='\t')
+#         columns = list(zip(*c_reader))
+#         keep_logging('Finished reading columns...', 'Finished reading columns...', logger, 'info')
+#         counts = 1
+#
+#         if args.outgroup:
+#             end = len(vcf_filenames) + 1
+#             end = end - 1
+#         else:
+#             end = len(vcf_filenames) + 1
+#
+#         f_bar_count = open("%s/bargraph_counts.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+#         f_bar_perc = open("%s/bargraph_percentage.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+#         f_bar_count.write("Sample\tunmapped_positions\treference_allele\ttrue_variant\tOnly_low_FQ\tOnly_DP\tOnly_low_MQ\tother\n")
+#         f_bar_perc.write("Sample\tunmapped_positions_perc\ttrue_variant_perc\tOnly_low_FQ_perc\tOnly_DP_perc\tOnly_low_MQ_perc\tother_perc\n")
+#
+#         for i in xrange(1, end, 1):
+#             """ Bar Count Statistics: Variant Position Count Statistics """
+#             true_variant = columns[i].count('VARIANT')
+#             unmapped_positions = columns[i].count('reference_unmapped_position')
+#             reference_allele = columns[i].count('reference_allele')
+#             Only_low_FQ = columns[i].count('LowFQ')
+#             Only_DP = columns[i].count('HighFQ_DP')
+#             Only_low_MQ = columns[i].count('HighFQ')
+#             low_FQ_other_parameters = columns[i].count('LowFQ_QUAL_DP_proximate_SNP') + columns[i].count('LowFQ_DP_QUAL_proximate_SNP') + columns[i].count('LowFQ_QUAL_proximate_SNP') + columns[i].count('LowFQ_DP_proximate_SNP') + columns[i].count('LowFQ_proximate_SNP') + columns[i].count('LowFQ_QUAL_DP') + columns[i].count('LowFQ_DP_QUAL') + columns[i].count('LowFQ_QUAL') + columns[i].count('LowFQ_DP')
+#             high_FQ_other_parameters = columns[i].count('HighFQ_QUAL_DP_proximate_SNP') + columns[i].count('HighFQ_DP_QUAL_proximate_SNP') + columns[i].count('HighFQ_QUAL_proximate_SNP') + columns[i].count('HighFQ_DP_proximate_SNP') + columns[i].count('HighFQ_proximate_SNP') + columns[i].count('HighFQ_QUAL_DP') + columns[i].count('HighFQ_DP_QUAL') + columns[i].count('HighFQ_QUAL')
+#             other = low_FQ_other_parameters + high_FQ_other_parameters
+#
+#             total = true_variant + unmapped_positions + reference_allele + Only_low_FQ + Only_DP + low_FQ_other_parameters + high_FQ_other_parameters + Only_low_MQ
+#
+#             filename_count = i - 1
+#
+#             if args.outgroup:
+#                 bar_string = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (os.path.basename(vcf_filenames_outgroup[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')), unmapped_positions, reference_allele, true_variant, Only_low_FQ, Only_DP, Only_low_MQ, other)
+#                 f_bar_count.write(bar_string)
+#             else:
+#                 bar_string = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (os.path.basename(
+#                     vcf_filenames[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')),
+#                                                                    unmapped_positions, reference_allele, true_variant,
+#                                                                    Only_low_FQ, Only_DP, Only_low_MQ, other)
+#             #f_bar_count.write(bar_string)
+#             """ Bar Count Percentage Statistics: Variant Position Percentage Statistics """
+#             try:
+#                 true_variant_perc = float((columns[i].count('VARIANT') * 100) / total)
+#             except ZeroDivisionError:
+#                 true_variant_perc = 0
+#             try:
+#                 unmapped_positions_perc = float((columns[i].count('reference_unmapped_position') * 100) / total)
+#             except ZeroDivisionError:
+#                 unmapped_positions_perc = 0
+#             try:
+#                 reference_allele_perc = float((columns[i].count('reference_allele') * 100) / total)
+#             except ZeroDivisionError:
+#                 reference_allele_perc = 0
+#             try:
+#                 Only_low_FQ_perc = float((columns[i].count('LowFQ') * 100) / total)
+#             except ZeroDivisionError:
+#                 Only_low_FQ_perc = 0
+#             try:
+#                 Only_DP_perc = float((columns[i].count('HighFQ_DP') * 100) / total)
+#             except ZeroDivisionError:
+#                 Only_DP_perc = 0
+#             try:
+#                 Only_low_MQ_perc = float((columns[i].count('HighFQ') * 100) / total)
+#             except ZeroDivisionError:
+#                 Only_low_MQ_perc = 0
+#             try:
+#                 low_FQ_other_parameters_perc = float(((columns[i].count('LowFQ_QUAL_DP_proximate_SNP') + columns[i].count('LowFQ_DP_QUAL_proximate_SNP') + columns[i].count('LowFQ_QUAL_proximate_SNP') + columns[i].count('LowFQ_DP_proximate_SNP') + columns[i].count('LowFQ_proximate_SNP') + columns[i].count('LowFQ_QUAL_DP') + columns[i].count('LowFQ_DP_QUAL') + columns[i].count('LowFQ_QUAL') + columns[i].count('LowFQ_DP'))  * 100) / total)
+#             except ZeroDivisionError:
+#                 low_FQ_other_parameters_perc = 0
+#             try:
+#                 high_FQ_other_parameters_perc = float(((columns[i].count('HighFQ_QUAL_DP_proximate_SNP') + columns[i].count('HighFQ_DP_QUAL_proximate_SNP') + columns[i].count('HighFQ_QUAL_proximate_SNP') + columns[i].count('HighFQ_DP_proximate_SNP') + columns[i].count('HighFQ_proximate_SNP') + columns[i].count('HighFQ_QUAL_DP') + columns[i].count('HighFQ_DP_QUAL') + columns[i].count('HighFQ_QUAL')) * 100) / total)
+#             except ZeroDivisionError:
+#                 high_FQ_other_parameters_perc = 0
+#
+#             other_perc = float(low_FQ_other_parameters_perc + high_FQ_other_parameters_perc)
+#             if args.outgroup:
+#                 bar_perc_string = "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (os.path.basename(vcf_filenames_outgroup[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')), unmapped_positions_perc, true_variant_perc, Only_low_FQ_perc, Only_DP_perc, Only_low_MQ_perc, other_perc)
+#             else:
+#                 bar_perc_string = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (os.path.basename(
+#                     vcf_filenames[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')),
+#                                                                         unmapped_positions_perc, reference_allele_perc, true_variant_perc,
+#                                                                         Only_low_FQ_perc, Only_DP_perc, Only_low_MQ_perc, other_perc)
+#             f_bar_count.write(bar_string)
+#             f_bar_perc.write(bar_perc_string)
+#         f_bar_count.close()
+#         f_bar_perc.close()
+#         bargraph_R_script = "library(ggplot2)\nlibrary(reshape)\nx1 <- read.table(\"bargraph_percentage.txt\", header=TRUE)\nx1$Sample <- reorder(x1$Sample, rowSums(x1[-1]))\nmdf1=melt(x1,id.vars=\"Sample\")\npdf(\"%s/%s_barplot.pdf\", width = 30, height = 30)\nggplot(mdf1, aes(Sample, value, fill=variable)) + geom_bar(stat=\"identity\") + ylab(\"Percentage of Filtered Positions\") + xlab(\"Samples\") + theme(text = element_text(size=9)) + scale_fill_manual(name=\"Reason for filtered out positions\", values=c(\"#08306b\", \"black\", \"orange\", \"darkgrey\", \"#fdd0a2\", \"#7f2704\")) + ggtitle(\"Title Here\") + ylim(0, 100) + theme(text = element_text(size=10), panel.background = element_rect(fill = 'white', colour = 'white'), plot.title = element_text(size=20, face=\"bold\", margin = margin(10, 0, 10, 0)), axis.ticks.y = element_blank(), axis.ticks.x = element_blank(),  axis.text.x = element_text(colour = \"black\", face= \"bold.italic\", angle = 90)) + theme(legend.position = c(0.6, 0.7), legend.direction = \"horizontal\")\ndev.off()" % (args.filter2_only_snp_vcf_dir, os.path.basename(os.path.normpath(args.results_dir)))
+#         barplot_R_file = open("%s/bargraph.R" % args.filter2_only_snp_vcf_dir, 'w+')
+#         barplot_R_file.write(bargraph_R_script)
+#         keep_logging('Run this R script to generate bargraph plot: %s/bargraph.R' % args.filter2_only_snp_vcf_dir, 'Run this R script to generate bargraph plot: %s/bargraph.R' % args.filter2_only_snp_vcf_dir, logger, 'info')
+#
+#     """ Methods Steps"""
+#     keep_logging('Running: Generating data matrices...', 'Running: Generating data matrices...', logger, 'info')
+#     generate_position_label_data_matrix_All_label()
+#     keep_logging('Running: Changing variables in data matrices to codes for faster processing...', 'Running: Changing variables in data matrices to codes for faster processing...', logger, 'info')
+#     temp_generate_position_label_data_matrix_All_label()
+#     keep_logging('Running: Generating Barplot statistics data matrices...', 'Running: Generating Barplot statistics data matrices...', logger, 'info')
+#     barplot_stats()
+#
+# def generate_indel_position_label_data_matrix():
+#
+#     """
+#     Generate different list of Positions using the matrix All_label_final_sorted_header.txt.
+#
+#     (Defining Core Variant Position: Variant Position which was not filtered out in any of the other samples due to variant filter parameter and also this position was present in all the samples(not unmapped)).
+#
+#     Filtered Position label matrix:
+#         List of non-core positions. These positions didn't make it to the final core list because it was filtered out in one of the samples.
+#
+#     Only_ref_variant_positions_for_closely_matrix.txt :
+#         Those Positions where the variant was either reference allele or a variant that passed all the variant filter parameters.
+#
+#     :param: null
+#     :return: null
+#
+#     """
+#     def generate_indel_position_label_data_matrix_All_label():
+#         position_label = OrderedDict()
+#         print "Generating Only_ref_indel_positions_for_closely"
+#         f1=open("%s/Only_ref_indel_positions_for_closely" % args.filter2_only_snp_vcf_dir, 'w+')
+#         f2=open("%s/Only_ref_indel_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+#         f3=open("%s/Only_filtered_indel_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+#         f4=open("%s/Only_filtered_indel_positions_for_closely_matrix_TRUE_variants_filtered_out.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+#
+#         if args.outgroup:
+#             with open("%s/All_indel_label_final_sorted_header_outgroup.txt" % args.filter2_only_snp_vcf_dir, 'rU') as csv_file:
+#                 keep_logging(
+#                     'Reading All label positions file: %s/All_indel_label_final_sorted_header.txt' % args.filter2_only_snp_vcf_dir,
+#                     'Reading All label positions file: %s/All_indel_label_final_sorted_header.txt' % args.filter2_only_snp_vcf_dir,
+#                     logger, 'info')
+#                 csv_reader = csv.reader(csv_file, delimiter='\t')
+#                 next(csv_reader, None)
+#                 for row in csv_reader:
+#                     position_label[row[0]] = row[1:]
+#                 keep_logging('Generating different list of Positions and heatmap data matrix...',
+#                              'Generating different list of Positions and heatmap data matrix...', logger, 'info')
+#                 print_string_header = "\t"
+#                 for i in vcf_filenames:
+#                     print_string_header = print_string_header + os.path.basename(i) + "\t"
+#                 # f.write('\t' + print_string_header.strip() + '\n')
+#                 f2.write('\t' + print_string_header.strip() + '\n')
+#                 f3.write('\t' + print_string_header.strip() + '\n')
+#                 f4.write('\t' + print_string_header.strip() + '\n')
+#                 for value in position_label:
+#                     lll = ['0', '2', '3', '4', '5', '6', '7']
+#                     ref_var = ['1', '1TRUE']
+#                     if set(ref_var) & set(position_label[value]):
+#                         if set(lll) & set(position_label[value]):
+#                             if int(value) not in outgroup_indel_specific_positions:
+#                                 print_string = ""
+#                                 for i in position_label[value]:
+#                                     print_string = print_string + "\t" + i
+#                                 STRR2 = value + print_string + "\n"
+#                                 f3.write(STRR2)
+#                                 if position_label[value].count('1TRUE') >= 2:
+#                                     f4.write('1\n')
+#                                 else:
+#                                     f4.write('0\n')
+#                         else:
+#                             if int(value) not in outgroup_indel_specific_positions:
+#                                 strr = value + "\n"
+#                                 f1.write(strr)
+#                                 STRR3 = value + "\t" + str(position_label[value]) + "\n"
+#                                 f2.write(STRR3)
+#             csv_file.close()
+#             f1.close()
+#             f2.close()
+#             f3.close()
+#             f4.close()
+#             subprocess.call(["sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_ref_indel_positions_for_closely" % args.filter2_only_snp_vcf_dir],
+#                             shell=True)
+#             subprocess.call(["sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_ref_indel_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir],
+#                             shell=True)
+#             subprocess.call(["sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_filtered_indel_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir],
+#                             shell=True)
+#             subprocess.call(["sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_filtered_indel_positions_for_closely_matrix_TRUE_variants_filtered_out.txt" % args.filter2_only_snp_vcf_dir],
+#                             shell=True)
+#             subprocess.call(["sed -i 's/1TRUE/-1/g' %s/Only_filtered_indel_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir],
+#                             shell=True)
+#         else:
+#             with open("%s/All_indel_label_final_sorted_header.txt" % args.filter2_only_snp_vcf_dir, 'rU') as csv_file:
+#                 keep_logging('Reading All label positions file: %s/All_indel_label_final_sorted_header.txt' % args.filter2_only_snp_vcf_dir, 'Reading All label positions file: %s/All_indel_label_final_sorted_header.txt' % args.filter2_only_snp_vcf_dir, logger, 'info')
+#                 csv_reader = csv.reader(csv_file, delimiter='\t')
+#                 next(csv_reader, None)
+#                 for row in csv_reader:
+#                     position_label[row[0]] = row[1:]
+#                 keep_logging('Generating different list of Positions and heatmap data matrix...', 'Generating different list of Positions and heatmap data matrix...', logger, 'info')
+#                 print_string_header = "\t"
+#                 for i in vcf_filenames:
+#                     print_string_header = print_string_header + os.path.basename(i) + "\t"
+#                 #f.write('\t' + print_string_header.strip() + '\n')
+#                 f2.write('\t' + print_string_header.strip() + '\n')
+#                 f3.write('\t' + print_string_header.strip() + '\n')
+#                 f4.write('\t' + print_string_header.strip() + '\n')
+#                 for value in position_label:
+#
+#                     lll = ['0', '2', '3', '4', '5', '6', '7']
+#                     ref_var = ['1', '1TRUE']
+#                     if set(ref_var) & set(position_label[value]):
+#                         if set(lll) & set(position_label[value]):
+#                             print_string = ""
+#                             for i in position_label[value]:
+#                                 print_string = print_string + "\t" + i
+#                             STRR2 = value + print_string + "\n"
+#                             f3.write(STRR2)
+#                             if position_label[value].count('1TRUE') >= 2:
+#                                 f4.write('1\n')
+#                             else:
+#                                 f4.write('0\n')
+#                         else:
+#                             strr = value + "\n"
+#                             f1.write(strr)
+#                             STRR3 = value + "\t" + str(position_label[value]) + "\n"
+#                             f2.write(STRR3)
+#             csv_file.close()
+#             f1.close()
+#             f2.close()
+#             f3.close()
+#             f4.close()
+#             subprocess.call(["sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_ref_indel_positions_for_closely" % args.filter2_only_snp_vcf_dir], shell=True)
+#             subprocess.call(["sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_ref_indel_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#             subprocess.call(["sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_filtered_indel_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#             subprocess.call(["sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/Only_filtered_indel_positions_for_closely_matrix_TRUE_variants_filtered_out.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#             subprocess.call(["sed -i 's/1TRUE/-1/g' %s/Only_filtered_indel_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#
+#     def temp_generate_indel_position_label_data_matrix_All_label():
+#
+#         """
+#         Read **temp_label_final_raw.txt** SNP position label data matrix for generating barplot statistics.
+#         """
+#         temp_position_label = OrderedDict()
+#         f33=open("%s/temp_Only_filtered_indel_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+#         print_string_header = "\t"
+#         if args.outgroup:
+#             for i in vcf_filenames:
+#
+#                 if "%s_filter2_final.vcf_no_proximate_snp.vcf" % outgroup not in i:
+#                     print_string_header = print_string_header + os.path.basename(i) + "\t"
+#         else:
+#             for i in vcf_filenames:
+#                 print_string_header = print_string_header + os.path.basename(i) + "\t"
+#
+#         f33.write('\t' + print_string_header.strip() + '\n')
+#         keep_logging('Reading temporary label positions file: %s/temp_label_final_raw.txt' % args.filter2_only_snp_vcf_dir, 'Reading temporary label positions file: %s/temp_label_final_raw.txt' % args.filter2_only_snp_vcf_dir, logger, 'info')
+#         # lll = ['reference_unmapped_position', 'LowFQ', 'LowFQ_DP', 'LowFQ_QUAL', 'LowFQ_DP_QUAL', 'LowFQ_QUAL_DP', 'HighFQ_DP', 'HighFQ_QUAL', 'HighFQ_DP_QUAL', 'HighFQ_QUAL_DP', 'HighFQ', 'LowFQ_proximate_SNP', 'LowFQ_DP_proximate_SNP', 'LowFQ_QUAL_proximate_SNP', 'LowFQ_DP_QUAL_proximate_SNP', 'LowFQ_QUAL_DP_proximate_SNP', 'HighFQ_DP_proximate_SNP', 'HighFQ_QUAL_proximate_SNP', 'HighFQ_DP_QUAL_proximate_SNP', 'HighFQ_QUAL_DP_proximate_SNP', 'HighFQ_proximate_SNP', '_proximate_SNP']
+#         lll = ['reference_unmapped_position', 'LowAF', 'LowAF_DP', 'LowAF_QUAL', 'LowAF_DP_QUAL', 'LowAF_QUAL_DP',
+#                'HighAF_DP', 'HighAF_QUAL', 'HighAF_DP_QUAL', 'HighAF_QUAL_DP', 'HighAF', 'LowAF_proximate_SNP',
+#                'LowAF_DP_proximate_SNP', 'LowAF_QUAL_proximate_SNP', 'LowAF_DP_QUAL_proximate_SNP',
+#                'LowAF_QUAL_DP_proximate_SNP', 'HighAF_DP_proximate_SNP', 'HighAF_QUAL_proximate_SNP',
+#                'HighAF_DP_QUAL_proximate_SNP', 'HighAF_QUAL_DP_proximate_SNP', 'HighAF_proximate_SNP', '_proximate_SNP']
+#         ref_var = ['reference_allele', 'VARIANT']
+#
+#         if args.outgroup:
+#             with open("%s/temp_indel_label_final_raw_outgroup.txt" % args.filter2_only_snp_vcf_dir, 'r') as csv_file:
+#                 csv_reader = csv.reader(csv_file, delimiter='\t')
+#                 next(csv_reader, None)
+#                 for row in csv_reader:
+#                     if set(ref_var) & set(row[1:]):
+#                         if set(lll) & set(row[1:]):
+#                             if int(row[0]) not in outgroup_indel_specific_positions:
+#                                 print_string = ""
+#                                 for i in row[1:]:
+#                                     print_string = print_string + "\t" + i
+#                                 STRR2 = row[0] + print_string + "\n"
+#                                 f33.write(STRR2)
+#             csv_file.close()
+#             f33.close()
+#         else:
+#             with open("%s/temp_indel_label_final_raw.txt" % args.filter2_only_snp_vcf_dir, 'r') as csv_file:
+#                 csv_reader = csv.reader(csv_file, delimiter='\t')
+#                 next(csv_reader, None)
+#                 for row in csv_reader:
+#                     if set(ref_var) & set(row[1:]):
+#                         if set(lll) & set(row[1:]):
+#
+#                             print_string = ""
+#                             for i in row[1:]:
+#                                 print_string = print_string + "\t" + i
+#                             STRR2 = row[0] + print_string + "\n"
+#                             f33.write(STRR2)
+#             csv_file.close()
+#             f33.close()
+#         """
+#         Read temp_Only_filtered_positions_for_closely_matrix file and generate a matrix of positions that are being filtered just because of AF
+#         """
+#         temp_position_label_AF = OrderedDict()
+#         f44=open("%s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+#         with open("%s/temp_Only_filtered_indel_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir, 'rU') as csv_file:
+#             keep_logging('Reading temporary Only_filtered_indel_positions label file: %s/temp_Only_filtered_indel_positions_for_closely_matrix.txt ' % args.filter2_only_snp_vcf_dir, 'Reading temporary Only_filtered_indel_positions label file: %s/temp_Only_filtered_indel_positions_for_closely_matrix.txt ' % args.filter2_only_snp_vcf_dir, logger, 'info')
+#             csv_reader = csv.reader(csv_file, delimiter='\t')
+#             next(csv_reader, None)
+#
+#             for row in csv_reader:
+#               temp_position_label_AF[row[0]] = row[1:]
+#             print_string_header = "\t"
+#             for i in vcf_filenames:
+#               print_string_header = print_string_header + os.path.basename(i) + "\t"
+#             f44.write('\t' + print_string_header.strip() + '\n')
+#             for value in temp_position_label_AF:
+#               lll = ['LowAF']
+#               if set(lll) & set(temp_position_label_AF[value]):
+#
+#                   print_string = ""
+#                   for i in temp_position_label_AF[value]:
+#                       print_string = print_string + "\t" + i
+#                   STRR2 = value + print_string + "\n"
+#                   f44.write(STRR2)
+#             f44.close()
+#             csv_file.close()
+#             f44.close()
+#
+#         """
+#         Perform Sed on temp files. Find a faster way to do this.
+#         """
+#         subprocess.call(["sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/reference_unmapped_position/0/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/reference_allele/1/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/VARIANT/2/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_QUAL_DP_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_DP_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_DP_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_QUAL_DP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_DP_QUAL/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_QUAL/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_DP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_QUAL_DP_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_DP_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_DP_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_QUAL_DP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_DP_QUAL/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_QUAL/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_DP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF/3/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_AF.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#
+#
+#         """
+#         Read temp_Only_filtered_positions_for_closely_matrix file and generate a matrix of positions that are being filtered just because of Dp
+#         """
+#         temp_position_label_DP = OrderedDict()
+#         f44=open("%s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+#         with open("%s/temp_Only_filtered_indel_positions_for_closely_matrix.txt" % args.filter2_only_snp_vcf_dir, 'rU') as csv_file:
+#             keep_logging('Reading temporary Only_filtered_positions label file: %s/temp_Only_filtered_indel_positions_for_closely_matrix.txt ' % args.filter2_only_snp_vcf_dir, 'Reading temporary Only_filtered_positions label file: %s/temp_Only_filtered_indel_positions_for_closely_matrix.txt ' % args.filter2_only_snp_vcf_dir, logger, 'info')
+#             csv_reader = csv.reader(csv_file, delimiter='\t')
+#             next(csv_reader, None)
+#             for row in csv_reader:
+#                 temp_position_label_DP[row[0]] = row[1:]
+#             print_string_header = "\t"
+#             for i in vcf_filenames:
+#                 print_string_header = print_string_header + os.path.basename(i) + "\t"
+#             f44.write('\t' + print_string_header.strip() + '\n')
+#             for value in temp_position_label_DP:
+#                 lll = ['HighAF_DP']
+#                 ref_var = ['reference_allele', 'VARIANT']
+#                 if set(lll) & set(temp_position_label_AF[value]):
+#                     print_string = ""
+#                     for i in temp_position_label_AF[value]:
+#                         print_string = print_string + "\t" + i
+#                     STRR2 = value + print_string + "\n"
+#                     f44.write(STRR2)
+#         f44.close()
+#         csv_file.close()
+#
+#         """
+#         Perform Sed on temp files. Find a faster way to do this.
+#         """
+#         subprocess.call(["sed -i 's/_filter2_final.vcf_no_proximate_snp.vcf//g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/reference_unmapped_position/0/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/reference_allele/1/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/VARIANT/2/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_QUAL_DP_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_DP_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_DP_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_QUAL_DP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_DP_QUAL/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_QUAL/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF_DP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_QUAL_DP_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_DP_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_QUAL_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_DP_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_proximate_SNP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_QUAL_DP/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_DP_QUAL/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_QUAL/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF_DP/3/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/LowAF/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#         subprocess.call(["sed -i 's/HighAF/4/g' %s/temp_Only_filtered_indel_positions_for_closely_matrix_DP.txt" % args.filter2_only_snp_vcf_dir], shell=True)
+#
+#
+#     def barplot_indel_stats():
+#         keep_logging('Read each Sample columns and calculate the percentage of each label to generate barplot statistics.', 'Read each Sample columns and calculate the percentage of each label to generate barplot statistics.', logger, 'info')
+#         """
+#         Read each Sample columns and calculate the percentage of each label to generate barplot statistics.
+#         This will give a visual explanation of how many positions in each samples were filtered out because of different reason
+#         """
+#
+#         c_reader = csv.reader(
+#             open('%s/temp_Only_filtered_indel_positions_for_closely_matrix.txt' % args.filter2_only_snp_vcf_dir,
+#                  'r'), delimiter='\t')
+#         columns = list(zip(*c_reader))
+#         print len(columns)
+#         keep_logging('Finished reading columns...', 'Finished reading columns...', logger, 'info')
+#         counts = 1
+#
+#         if args.outgroup:
+#             end = len(vcf_filenames) + 1
+#             end = end - 1
+#         else:
+#             end = len(vcf_filenames) + 1
+#         print end
+#
+#         f_bar_count = open("%s/bargraph_indel_counts.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+#         f_bar_perc = open("%s/bargraph_indel_percentage.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+#         f_bar_count.write("Sample\tunmapped_positions\treference_allele\ttrue_variant\tOnly_low_AF\tOnly_DP\tOnly_low_MQ\tother\n")
+#         f_bar_perc.write("Sample\tunmapped_positions_perc\ttrue_variant_perc\tOnly_low_AF_perc\tOnly_DP_perc\tOnly_low_MQ_perc\tother_perc\n")
+#         for i in xrange(1, end, 1):
+#             """ Bar Count Statistics: Variant Position Count Statistics """
+#             print i
+#             true_variant = columns[i].count('VARIANT')
+#             unmapped_positions = columns[i].count('reference_unmapped_position')
+#             reference_allele = columns[i].count('reference_allele')
+#             Only_low_AF = columns[i].count('LowAF')
+#             Only_DP = columns[i].count('HighAF_DP')
+#             Only_low_MQ = columns[i].count('HighAF')
+#             low_AF_other_parameters = columns[i].count('LowAF_QUAL_DP_proximate_SNP') + columns[i].count('LowAF_DP_QUAL_proximate_SNP') + columns[i].count('LowAF_QUAL_proximate_SNP') + columns[i].count('LowAF_DP_proximate_SNP') + columns[i].count('LowAF_proximate_SNP') + columns[i].count('LowAF_QUAL_DP') + columns[i].count('LowAF_DP_QUAL') + columns[i].count('LowAF_QUAL') + columns[i].count('LowAF_DP')
+#             high_AF_other_parameters = columns[i].count('HighAF_QUAL_DP_proximate_SNP') + columns[i].count('HighAF_DP_QUAL_proximate_SNP') + columns[i].count('HighAF_QUAL_proximate_SNP') + columns[i].count('HighAF_DP_proximate_SNP') + columns[i].count('HighAF_proximate_SNP') + columns[i].count('HighAF_QUAL_DP') + columns[i].count('HighAF_DP_QUAL') + columns[i].count('HighAF_QUAL')
+#             other = low_AF_other_parameters + high_AF_other_parameters
+#             total = true_variant + unmapped_positions + reference_allele + Only_low_AF + Only_DP + low_AF_other_parameters + high_AF_other_parameters + Only_low_MQ
+#             filename_count = i - 1
+#             # bar_string = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (os.path.basename(vcf_filenames_outgroup[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')), unmapped_positions, reference_allele, true_variant, Only_low_AF, Only_DP, Only_low_MQ, other)
+#             if args.outgroup:
+#                 ###
+#
+#                 bar_string = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (os.path.basename(
+#                     vcf_filenames_outgroup[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')),
+#                                                                    unmapped_positions, reference_allele, true_variant,
+#                                                                    Only_low_AF, Only_DP, Only_low_MQ, other)
+#             else:
+#                 bar_string = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (os.path.basename(
+#                     vcf_filenames[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')),
+#                                                                    unmapped_positions, reference_allele, true_variant,
+#                                                                    Only_low_AF, Only_DP, Only_low_MQ, other)
+#
+#             f_bar_count.write(bar_string)
+#
+#             """ Bar Count Percentage Statistics: Variant Position Percentage Statistics """
+#             try:
+#                 true_variant_perc = float((columns[i].count('VARIANT') * 100) / total)
+#             except ZeroDivisionError:
+#                 true_variant_perc = 0
+#             try:
+#                 unmapped_positions_perc = float((columns[i].count('reference_unmapped_position') * 100) / total)
+#             except ZeroDivisionError:
+#                 unmapped_positions_perc = 0
+#             try:
+#                 reference_allele_perc = float((columns[i].count('reference_allele') * 100) / total)
+#             except ZeroDivisionError:
+#                 reference_allele_perc = 0
+#             try:
+#                 Only_low_AF_perc = float((columns[i].count('LowAF') * 100) / total)
+#             except ZeroDivisionError:
+#                 Only_low_AF_perc = 0
+#             try:
+#                 Only_DP_perc = float((columns[i].count('HighAF_DP') * 100) / total)
+#             except ZeroDivisionError:
+#                 Only_DP_perc = 0
+#             try:
+#                 Only_low_MQ_perc = float((columns[i].count('HighAF') * 100) / total)
+#             except ZeroDivisionError:
+#                 Only_low_MQ_perc = 0
+#             try:
+#                 low_AF_other_parameters_perc = float(((columns[i].count('LowAF_QUAL_DP_proximate_SNP') + columns[i].count('LowAF_DP_QUAL_proximate_SNP') + columns[i].count('LowAF_QUAL_proximate_SNP') + columns[i].count('LowAF_DP_proximate_SNP') + columns[i].count('LowAF_proximate_SNP') + columns[i].count('LowAF_QUAL_DP') + columns[i].count('LowAF_DP_QUAL') + columns[i].count('LowAF_QUAL') + columns[i].count('LowAF_DP'))  * 100) / total)
+#             except ZeroDivisionError:
+#                 low_AF_other_parameters_perc = 0
+#             try:
+#                 high_AF_other_parameters_perc = float(((columns[i].count('HighAF_QUAL_DP_proximate_SNP') + columns[i].count('HighAF_DP_QUAL_proximate_SNP') + columns[i].count('HighAF_QUAL_proximate_SNP') + columns[i].count('HighAF_DP_proximate_SNP') + columns[i].count('HighAF_proximate_SNP') + columns[i].count('HighAF_QUAL_DP') + columns[i].count('HighAF_DP_QUAL') + columns[i].count('HighAF_QUAL')) * 100) / total)
+#             except ZeroDivisionError:
+#                 high_AF_other_parameters_perc = 0
+#
+#             other_perc = float(low_AF_other_parameters_perc + high_AF_other_parameters_perc)
+#             if args.outgroup:
+#                 ###
+#                 bar_perc_string = "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (
+#                 os.path.basename(vcf_filenames_outgroup[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')),
+#                 unmapped_positions_perc, true_variant_perc, Only_low_AF_perc, Only_DP_perc, Only_low_MQ_perc,
+#                 other_perc)
+#                 f_bar_perc.write(bar_perc_string)
+#             else:
+#                 bar_perc_string = "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (
+#                 os.path.basename(vcf_filenames[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')),
+#                 unmapped_positions_perc, true_variant_perc, Only_low_AF_perc, Only_DP_perc, Only_low_MQ_perc,
+#                 other_perc)
+#                 f_bar_perc.write(bar_perc_string)
+#
+#         f_bar_count.close()
+#         f_bar_perc.close()
+#         bargraph_R_script = "library(ggplot2)\nlibrary(reshape)\nx1 <- read.table(\"bargraph_indel_percentage.txt\", header=TRUE)\nx1$Sample <- reorder(x1$Sample, rowSums(x1[-1]))\nmdf1=melt(x1,id.vars=\"Sample\")\npdf(\"%s/%s_barplot_indel.pdf\", width = 30, height = 30)\nggplot(mdf1, aes(Sample, value, fill=variable)) + geom_bar(stat=\"identity\") + ylab(\"Percentage of Filtered Positions\") + xlab(\"Samples\") + theme(text = element_text(size=9)) + scale_fill_manual(name=\"Reason for filtered out positions\", values=c(\"#08306b\", \"black\", \"orange\", \"darkgrey\", \"#fdd0a2\", \"#7f2704\")) + ggtitle(\"Title Here\") + ylim(0, 100) + theme(text = element_text(size=10), panel.background = element_rect(fill = 'white', colour = 'white'), plot.title = element_text(size=20, face=\"bold\", margin = margin(10, 0, 10, 0)), axis.ticks.y = element_blank(), axis.ticks.x = element_blank(),  axis.text.x = element_text(colour = \"black\", face= \"bold.italic\", angle = 90)) + theme(legend.position = c(0.6, 0.7), legend.direction = \"horizontal\")\ndev.off()"  % (args.filter2_only_snp_vcf_dir, os.path.basename(os.path.normpath(args.results_dir)))
+#         barplot_R_file = open("%s/bargraph_indel.R" % args.filter2_only_snp_vcf_dir, 'w+')
+#         barplot_R_file.write(bargraph_R_script)
+#         keep_logging('Run this R script to generate bargraph plot: %s/bargraph_indel.R' % args.filter2_only_snp_vcf_dir, 'Run this R script to generate bargraph plot: %s/bargraph_indel.R' % args.filter2_only_snp_vcf_dir, logger, 'info')
+#
+#
+#     """ Methods Steps"""
+#     keep_logging('Running: Generating data matrices...', 'Running: Generating data matrices...', logger, 'info')
+#     # if args.outgroup:
+#     #     f_outgroup = open("%s/outgroup_indel_specific_positions.txt" % args.filter2_only_snp_vcf_dir, 'r+')
+#     #     global outgroup_indel_specific_positions
+#     #     outgroup_indel_specific_positions = []
+#     #     for i in f_outgroup:
+#     #         outgroup_indel_specific_positions.append(i)
+#     #     f_outgroup.close()
+#     #
+#     #     f_outgroup = open("%s/outgroup_specific_positions.txt" % args.filter2_only_snp_vcf_dir, 'r+')
+#     #     global outgroup_specific_positions
+#     #     outgroup_specific_positions = []
+#     #     for i in f_outgroup:
+#     #         outgroup_specific_positions.append(i)
+#     #     f_outgroup.close()
+#     # else:
+#     #     global outgroup_specific_positions
+#     #     global outgroup_indel_specific_positions
+#     #     outgroup_indel_specific_positions = []
+#     #     outgroup_specific_positions = []
+#     generate_indel_position_label_data_matrix_All_label()
+#     keep_logging('Running: Changing variables in data matrices to codes for faster processing...', 'Running: Changing variables in data matrices to codes for faster processing...', logger, 'info')
+#     temp_generate_indel_position_label_data_matrix_All_label()
+#     keep_logging('Running: Generating Barplot statistics data matrices...', 'Running: Generating Barplot statistics data matrices...', logger, 'info')
+#     barplot_indel_stats()
+#
+# def create_job_fasta(jobrun, vcf_filenames, core_vcf_fasta_dir, functional_filter):
+#
+#     """ Generate jobs/scripts that creates core consensus fasta file.
+#
+#     This function will generate and run scripts/jobs to create core consensus fasta file of only core variant positions.
+#     Input for Fasttree, Beast and pairwise variant analysis.
+#
+#     :param jobrun: Based on this value all the job/scripts will run on "cluster": either on single cluster, "parallel-local": run in parallel on local system, "local": run on local system, "parallel-cluster": submit parallel jobs on cluster.
+#     :param vcf_filenames: list of final vcf filenames i.e *_no_proximate_snp.vcf. These files are the final output of variant calling step for each sample.
+#     :return:
+#     :raises:
+#     """
+#     if jobrun == "parallel-cluster":
+#         """
+#         Supports only PBS clusters for now.
+#         """
+#         for i in vcf_filenames:
+#             job_name = os.path.basename(i)
+#             job_print_string = "#PBS -N %s_fasta\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/extract_only_ref_variant_fasta.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -reference %s -out_core %s -functional_filter %s\n" % (job_name, ConfigSectionMap("scheduler", Config)['email'], ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['resources'], ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'], args.filter2_only_snp_vcf_dir, i, args.reference, core_vcf_fasta_dir, functional_filter)
+#             job_file_name = "%s_fasta.pbs" % (i)
+#             f1=open(job_file_name, 'w+')
+#             f1.write(job_print_string)
+#             f1.close()
+#         #os.system("mv %s/*.pbs %s/temp" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir))
+#         pbs_dir = args.filter2_only_snp_vcf_dir + "/*_fasta.pbs"
+#         pbs_scripts = glob.glob(pbs_dir)
+#         for i in pbs_scripts:
+#             keep_logging('Running: qsub %s' % i, 'Running: qsub %s' % i, logger, 'info')
+#             #os.system("qsub %s" % i)
+#             call("qsub %s" % i, logger)
+#
+#
+#     elif jobrun == "parallel-local" or jobrun == "cluster":
+#         """
+#         Generate a Command list of each job and run it in parallel on different cores available on local system
+#         """
+#         command_array = []
+#         command_file = "%s/commands_list_fasta.sh" % args.filter2_only_snp_vcf_dir
+#         f3 = open(command_file, 'w+')
+#         for i in vcf_filenames:
+#             job_name = os.path.basename(i)
+#             job_print_string = "#PBS -N %s_fasta\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/extract_only_ref_variant_fasta.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -reference %s -out_core %s -functional_filter %s\n" % (job_name, ConfigSectionMap("scheduler", Config)['email'], ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['resources'], ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'], args.filter2_only_snp_vcf_dir, i, args.reference, core_vcf_fasta_dir, functional_filter)
+#             job_file_name = "%s_fasta.pbs" % (i)
+#             f1=open(job_file_name, 'w+')
+#             f1.write(job_print_string)
+#             f1.close()
+#         pbs_dir = args.filter2_only_snp_vcf_dir + "/*_fasta.pbs"
+#         pbs_scripts = glob.glob(pbs_dir)
+#         for i in pbs_scripts:
+#             f3.write("bash %s\n" % i)
+#         f3.close()
+#         with open(command_file, 'r') as fpp:
+#             for lines in fpp:
+#                 lines = lines.strip()
+#                 command_array.append(lines)
+#         fpp.close()
+#         if args.numcores:
+#             num_cores = int(num_cores)
+#         else:
+#             num_cores = multiprocessing.cpu_count()
+#         results = Parallel(n_jobs=num_cores)(delayed(run_command)(command) for command in command_array)
+#
+#     # elif jobrun == "cluster":
+#     #     command_array = []
+#     #     command_file = "%s/commands_list_fasta.sh" % args.filter2_only_snp_vcf_dir
+#     #     f3 = open(command_file, 'w+')
+#     #     for i in vcf_filenames:
+#     #         job_name = os.path.basename(i)
+#     #         job_print_string = "#PBS -N %s_fasta\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/extract_only_ref_variant_fasta.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -reference %s -out_core %s\n" % (job_name, ConfigSectionMap("scheduler", Config)['email'], ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['resources'], ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'],args.filter2_only_snp_vcf_dir, i, args.reference, core_vcf_fasta_dir)
+#     #         job_file_name = "%s_fasta.pbs" % (i)
+#     #         f1=open(job_file_name, 'w+')
+#     #         f1.write(job_print_string)
+#     #         f1.close()
+#     #     pbs_dir = args.filter2_only_snp_vcf_dir + "/*_fasta.pbs"
+#     #     pbs_scripts = glob.glob(pbs_dir)
+#     #     for i in pbs_scripts:
+#     #         f3.write("bash %s\n" % i)
+#     #     f3.close()
+#     #     with open(command_file, 'r') as fpp:
+#     #         for lines in fpp:
+#     #             lines = lines.strip()
+#     #             command_array.append(lines)
+#     #     fpp.close()
+#     #     os.system("bash %s/command_file" % args.filter2_only_snp_vcf_dir)
+#     else:
+#         """
+#         Generate a Command list of each job and run it on local system one at a time
+#         """
+#         command_array = []
+#         command_file = "%s/commands_list_fasta.sh" % args.filter2_only_snp_vcf_dir
+#         f3 = open(command_file, 'w+')
+#
+#
+#         for i in vcf_filenames:
+#             job_name = os.path.basename(i)
+#             job_print_string = "#PBS -N %s_fasta\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/extract_only_ref_variant_fasta.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -reference %s -out_core %s -functional_filter %s\n" % (job_name, ConfigSectionMap("scheduler", Config)['email'], ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['resources'], ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'], args.filter2_only_snp_vcf_dir, i, args.reference, core_vcf_fasta_dir, functional_filter)
+#             job_file_name = "%s_fasta.pbs" % (i)
+#             f1=open(job_file_name, 'w+')
+#             f1.write(job_print_string)
+#             f1.close()
+#         #os.system("mv %s/*.pbs %s/temp" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir))
+#         pbs_dir = args.filter2_only_snp_vcf_dir + "/*_fasta.pbs"
+#         pbs_scripts = glob.glob(pbs_dir)
+#
+#
+#         for i in pbs_scripts:
+#             f3.write("bash %s\n" % i)
+#         f3.close()
+#         with open(command_file, 'r') as fpp:
+#             for lines in fpp:
+#                 lines = lines.strip()
+#                 command_array.append(lines)
+#         fpp.close()
+#         #os.system("bash command_file")
+#         call("bash %s" % command_file, logger)
+#
+# def create_job_allele_variant_fasta(jobrun, vcf_filenames, core_vcf_fasta_dir, config_file):
+#
+#     """ Generate jobs/scripts that creates core consensus fasta file.
+#
+#     This function will generate and run scripts/jobs to create core consensus fasta file of only core variant positions.
+#     Input for Fasttree, Beast and pairwise variant analysis.
+#
+#     :param jobrun: Based on this value all the job/scripts will run on "cluster": either on single cluster, "parallel-local": run in parallel on local system, "local": run on local system, "parallel-cluster": submit parallel jobs on cluster.
+#     :param vcf_filenames: list of final vcf filenames i.e *_no_proximate_snp.vcf. These files are the final output of variant calling step for each sample.
+#     :return:
+#     :raises:
+#     """
+#     if jobrun == "parallel-cluster":
+#         """
+#         Supports only PBS clusters for now.
+#         """
+#         for i in vcf_filenames:
+#             job_name = os.path.basename(i)
+#             job_print_string = "#PBS -N %s_fasta\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/extract_only_ref_variant_fasta_unique_positions.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -reference %s -out_core %s -config %s\n" % (job_name, ConfigSectionMap("scheduler", Config)['email'], ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['resources'], ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'], args.filter2_only_snp_vcf_dir, i, args.reference, core_vcf_fasta_dir, config_file)
+#             job_file_name = "%s_ref_allele_variants_fasta.pbs" % (i)
+#             f1=open(job_file_name, 'w+')
+#             f1.write(job_print_string)
+#             f1.close()
+#         #os.system("mv %s/*.pbs %s/temp" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir))
+#         pbs_dir = args.filter2_only_snp_vcf_dir + "/*_fasta.pbs"
+#         pbs_scripts = glob.glob(pbs_dir)
+#         for i in pbs_scripts:
+#             keep_logging('Running: qsub %s' % i, 'Running: qsub %s' % i, logger, 'info')
+#             #os.system("qsub %s" % i)
+#             call("qsub %s" % i, logger)
+#
+#
+#     elif jobrun == "parallel-local" or jobrun == "cluster":
+#         """
+#         Generate a Command list of each job and run it in parallel on different cores available on local system
+#         """
+#         command_array = []
+#         command_file = "%s/commands_list_ref_allele_variants_fasta.sh" % args.filter2_only_snp_vcf_dir
+#         f3 = open(command_file, 'w+')
+#         for i in vcf_filenames:
+#             job_name = os.path.basename(i)
+#             job_print_string = "#PBS -N %s_fasta\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/extract_only_ref_variant_fasta_unique_positions.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -reference %s -out_core %s -config %s\n" % (job_name, ConfigSectionMap("scheduler", Config)['email'], ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['resources'], ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'], args.filter2_only_snp_vcf_dir, i, args.reference, core_vcf_fasta_dir, config_file)
+#             job_file_name = "%s_ref_allele_variants_fasta.pbs" % (i)
+#             f1=open(job_file_name, 'w+')
+#             f1.write(job_print_string)
+#             f1.close()
+#         pbs_dir = args.filter2_only_snp_vcf_dir + "/*_ref_allele_variants_fasta.pbs"
+#         pbs_scripts = glob.glob(pbs_dir)
+#         for i in pbs_scripts:
+#             f3.write("bash %s\n" % i)
+#         f3.close()
+#         with open(command_file, 'r') as fpp:
+#             for lines in fpp:
+#                 lines = lines.strip()
+#                 command_array.append(lines)
+#         fpp.close()
+#         if args.numcores:
+#             num_cores = int(num_cores)
+#         else:
+#             num_cores = multiprocessing.cpu_count()
+#         results = Parallel(n_jobs=num_cores)(delayed(run_command)(command) for command in command_array)
+#
+#     # elif jobrun == "cluster":
+#     #     command_array = []
+#     #     command_file = "%s/commands_list_fasta.sh" % args.filter2_only_snp_vcf_dir
+#     #     f3 = open(command_file, 'w+')
+#     #     for i in vcf_filenames:
+#     #         job_name = os.path.basename(i)
+#     #         job_print_string = "#PBS -N %s_fasta\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/extract_only_ref_variant_fasta.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -reference %s -out_core %s\n" % (job_name, ConfigSectionMap("scheduler", Config)['email'], ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['resources'], ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'],args.filter2_only_snp_vcf_dir, i, args.reference, core_vcf_fasta_dir)
+#     #         job_file_name = "%s_fasta.pbs" % (i)
+#     #         f1=open(job_file_name, 'w+')
+#     #         f1.write(job_print_string)
+#     #         f1.close()
+#     #     pbs_dir = args.filter2_only_snp_vcf_dir + "/*_fasta.pbs"
+#     #     pbs_scripts = glob.glob(pbs_dir)
+#     #     for i in pbs_scripts:
+#     #         f3.write("bash %s\n" % i)
+#     #     f3.close()
+#     #     with open(command_file, 'r') as fpp:
+#     #         for lines in fpp:
+#     #             lines = lines.strip()
+#     #             command_array.append(lines)
+#     #     fpp.close()
+#     #     os.system("bash %s/command_file" % args.filter2_only_snp_vcf_dir)
+#     else:
+#         """
+#         Generate a Command list of each job and run it on local system one at a time
+#         """
+#         command_array = []
+#         command_file = "%s/commands_list_ref_allele_variants_fasta.sh" % args.filter2_only_snp_vcf_dir
+#         f3 = open(command_file, 'w+')
+#
+#
+#         for i in vcf_filenames:
+#             job_name = os.path.basename(i)
+#             job_print_string = "#PBS -N %s_fasta\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l %s\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\n\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/extract_only_ref_variant_fasta_unique_positions.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s -reference %s -out_core %s -config %s\n" % (job_name, ConfigSectionMap("scheduler", Config)['email'], ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['resources'], ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'], args.filter2_only_snp_vcf_dir, i, args.reference, core_vcf_fasta_dir, config_file)
+#             job_file_name = "%s_ref_allele_variants_fasta.pbs" % (i)
+#             f1=open(job_file_name, 'w+')
+#             f1.write(job_print_string)
+#             f1.close()
+#         #os.system("mv %s/*.pbs %s/temp" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir))
+#         pbs_dir = args.filter2_only_snp_vcf_dir + "/*_ref_allele_variants_fasta.pbs"
+#         pbs_scripts = glob.glob(pbs_dir)
+#
+#
+#         for i in pbs_scripts:
+#             f3.write("bash %s\n" % i)
+#         f3.close()
+#         with open(command_file, 'r') as fpp:
+#             for lines in fpp:
+#                 lines = lines.strip()
+#                 command_array.append(lines)
+#         fpp.close()
+#         #os.system("bash command_file")
+#         call("bash %s" % command_file, logger)
+#
+# def create_job_DP(jobrun, vcf_filenames):
+#     """
+#     Based on type of jobrun; generate jobs and run accordingly.
+#     :param jobrun: Based on this value all the job/scripts will run on "cluster": either on single cluster, "parallel-local": run in parallel on local system, "local": run on local system, "parallel-cluster": submit parallel jobs on cluster.
+#     :param vcf_filenames:
+#     :return:
+#     """
+#
+#     if jobrun == "parallel-cluster":
+#         """
+#         Supports only PBS clusters for now.
+#         """
+#         for i in vcf_filenames:
+#             job_name = os.path.basename(i)
+#             job_print_string = "#PBS -N %s\n#PBS -M apirani@med.umich.edu\n#PBS -m a\n#PBS -V\n#PBS -l nodes=1:ppn=1,mem=4000mb,walltime=76:00:00\n#PBS -q fluxod\n#PBS -A esnitkin_fluxod\n#PBS -l qos=flux\n\ncd %s\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/DP_analysis.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s\n" % (job_name, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, i)
+#             job_file_name = "%s_DP.pbs" % (i)
+#             f1=open(job_file_name, 'w+')
+#             f1.write(job_print_string)
+#             f1.close()
+#         #os.system("mv %s/*.pbs %s/temp" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir))
+#         pbs_dir = args.filter2_only_snp_vcf_dir + "/*_DP.pbs"
+#         pbs_scripts = glob.glob(pbs_dir)
+#         for i in pbs_scripts:
+#             keep_logging('Running: qsub %s' % i, 'Running: qsub %s' % i, logger, 'info')
+#             #os.system("qsub %s" % i)
+#             call("qsub %s" % i, logger)
+#
+#
+#     elif jobrun == "parallel-local" or jobrun == "cluster" :
+#         """
+#         Generate a Command list of each job and run it in parallel on different cores available on local system
+#         """
+#         command_array = []
+#         command_file = "%s/commands_list_DP.sh" % args.filter2_only_snp_vcf_dir
+#         f3 = open(command_file, 'w+')
+#
+#
+#         for i in vcf_filenames:
+#             job_name = os.path.basename(i)
+#             job_print_string = "#PBS -N %s\n#PBS -M apirani@med.umich.edu\n#PBS -m a\n#PBS -V\n#PBS -l nodes=1:ppn=1,mem=4000mb,walltime=76:00:00\n#PBS -q fluxod\n#PBS -A esnitkin_fluxod\n#PBS -l qos=flux\n\ncd %s\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/DP_analysis.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s\n" % (job_name, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, i)
+#             job_file_name = "%s_DP.pbs" % (i)
+#             f1=open(job_file_name, 'w+')
+#             f1.write(job_print_string)
+#             f1.close()
+#         #os.system("mv %s/*.pbs %s/temp" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir))
+#         pbs_dir = args.filter2_only_snp_vcf_dir + "/*_DP.pbs"
+#         pbs_scripts = glob.glob(pbs_dir)
+#
+#
+#         for i in pbs_scripts:
+#             f3.write("bash %s\n" % i)
+#         f3.close()
+#         with open(command_file, 'r') as fpp:
+#             for lines in fpp:
+#                 lines = lines.strip()
+#                 command_array.append(lines)
+#         fpp.close()
+#         print len(command_array)
+#         if args.numcores:
+#             num_cores = int(num_cores)
+#         else:
+#             num_cores = multiprocessing.cpu_count()
+#         results = Parallel(n_jobs=num_cores)(delayed(run_command)(command) for command in command_array)
+#
+#     # elif jobrun == "cluster":
+#     #     """ Test pending """
+#     #     command_file = "%s/commands_list_DP.sh" % args.filter2_only_snp_vcf_dir
+#     #     f3 = open(command_file, 'w+')
+#     #     for i in vcf_filenames:
+#     #         job_name = os.path.basename(i)
+#     #         job_print_string = "#PBS -N %s\n#PBS -M apirani@med.umich.edu\n#PBS -m a\n#PBS -V\n#PBS -l nodes=1:ppn=1,mem=4000mb,walltime=76:00:00\n#PBS -q fluxod\n#PBS -A esnitkin_fluxod\n#PBS -l qos=flux\n\ncd %s\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/DP_analysis.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s\n" % (job_name, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, i)
+#     #         job_file_name = "%s_DP.pbs" % (i)
+#     #         f1=open(job_file_name, 'w+')
+#     #         f1.write(job_print_string)
+#     #         f1.close()
+#     #     pbs_dir = args.filter2_only_snp_vcf_dir + "/*_DP.pbs"
+#     #     pbs_scripts = glob.glob(pbs_dir)
+#     #     for i in pbs_scripts:
+#     #         f3.write("bash %s\n" % i)
+#     #     f3.close()
+#     #     os.system("bash %s/commands_list_DP.sh" % args.filter2_only_snp_vcf_dir)
+#
+#     else:
+#         """
+#         Generate a Command list of each job and run it on local system one at a time
+#         """
+#         command_file = "%s/commands_list_DP.sh" % args.filter2_only_snp_vcf_dir
+#         f3 = open(command_file, 'w+')
+#         for i in vcf_filenames:
+#             job_name = os.path.basename(i)
+#             job_print_string = "#PBS -N %s\n#PBS -M apirani@med.umich.edu\n#PBS -m a\n#PBS -V\n#PBS -l nodes=1:ppn=1,mem=4000mb,walltime=76:00:00\n#PBS -q fluxod\n#PBS -A esnitkin_fluxod\n#PBS -l qos=flux\n\ncd %s\n/nfs/esnitkin/bin_group/anaconda2/bin/python /nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/DP_analysis.py -filter2_only_snp_vcf_dir %s -filter2_only_snp_vcf_file %s\n" % (job_name, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, i)
+#             job_file_name = "%s_DP.pbs" % (i)
+#             f1=open(job_file_name, 'w+')
+#             f1.write(job_print_string)
+#             f1.close()
+#         pbs_dir = args.filter2_only_snp_vcf_dir + "/*_DP.pbs"
+#         pbs_scripts = glob.glob(pbs_dir)
+#         for i in pbs_scripts:
+#             f3.write("bash %s\n" % i)
+#         f3.close()
+#         #os.system("bash %s/commands_list_DP.sh" % args.filter2_only_snp_vcf_dir)
+#         call("bash %s/commands_list_DP.sh" % args.filter2_only_snp_vcf_dir, logger)
+#
+# def generate_vcf_files():
+#     if ConfigSectionMap("functional_filters", Config)['apply_functional_filters'] == "yes":
+#         keep_logging('Removing Variants falling in Functional filters positions file: %s\n' % functional_class_filter_positions, 'Removing Variants falling in Functional filters positions file: %s\n' % functional_class_filter_positions, logger,
+#                      'info')
+#         # phage_positions = []
+#         # phage_region_positions = "%s/phage_region_positions.txt" % args.filter2_only_snp_vcf_dir
+#         # with open(phage_region_positions, 'rU') as fp:
+#         #     for line in fp:
+#         #         phage_positions.append(line.strip())
+#         # fp.close()
+#
+#
+#         functional_filter_pos_array = []
+#         with open(functional_class_filter_positions, 'rU') as f_functional:
+#             for line_func in f_functional:
+#                 functional_filter_pos_array.append(line_func.strip())
+#
+#         ref_variant_position_array = []
+#         ffp = open("%s/Only_ref_variant_positions_for_closely" % args.filter2_only_snp_vcf_dir, 'r+')
+#         for line in ffp:
+#             line = line.strip()
+#             if line not in functional_filter_pos_array:
+#                 ref_variant_position_array.append(line)
+#         ffp.close()
+#
+#         # Adding core indel support: 2018-07-24
+#         ref_indel_variant_position_array = []
+#         ffp = open("%s/Only_ref_indel_positions_for_closely" % args.filter2_only_snp_vcf_dir, 'r+')
+#         for line in ffp:
+#             line = line.strip()
+#             if line not in functional_filter_pos_array:
+#                 ref_indel_variant_position_array.append(line)
+#         ffp.close()
+#
+#     else:
+#         functional_filter_pos_array = []
+#         ref_variant_position_array = []
+#         ffp = open("%s/Only_ref_variant_positions_for_closely" % args.filter2_only_snp_vcf_dir, 'r+')
+#         for line in ffp:
+#             line = line.strip()
+#             ref_variant_position_array.append(line)
+#         ffp.close()
+#
+#         # Adding core indel support: 2018-07-24
+#         ref_indel_variant_position_array = []
+#         ffp = open("%s/Only_ref_indel_positions_for_closely" % args.filter2_only_snp_vcf_dir, 'r+')
+#         for line in ffp:
+#             line = line.strip()
+#             if line not in functional_filter_pos_array:
+#                 ref_indel_variant_position_array.append(line)
+#         ffp.close()
+#
+#     print "No. of core SNPs: %s" % len(ref_variant_position_array)
+#     print "No. of core INDELs: %s" % len(ref_indel_variant_position_array)
+#
+#     f_file = open("%s/Only_ref_variant_positions_for_closely_without_functional_filtered_positions" % args.filter2_only_snp_vcf_dir, 'w+')
+#     for pos in ref_variant_position_array:
+#         f_file.write(pos + '\n')
+#     f_file.close()
+#
+#     # Adding core indel support: 2018-07-24
+#     f_file = open(
+#         "%s/Only_ref_indel_variant_positions_for_closely_without_functional_filtered_positions" % args.filter2_only_snp_vcf_dir,
+#         'w+')
+#     for pos in ref_indel_variant_position_array:
+#         f_file.write(pos + '\n')
+#     f_file.close()
+#
+#     base_vcftools_bin = ConfigSectionMap("bin_path", Config)['binbase'] + "/" + ConfigSectionMap("vcftools", Config)['vcftools_bin']
+#     filter2_files_array = []
+#     for i in vcf_filenames:
+#         filter2_file = i.replace('_no_proximate_snp.vcf', '')
+#         filter2_files_array.append(filter2_file)
+#
+#
+#     filtered_out_vcf_files = []
+#     for i in filter2_files_array:
+#         print_array =[]
+#         with open(i) as file_open:
+#             for line in file_open:
+#                 line = line.strip()
+#                 if line.startswith("#"):
+#                     print_array.append(line)
+#                 else:
+#                     split_array = re.split(r'\t+', line)
+#                     if split_array[1] in ref_variant_position_array and 'INDEL' not in split_array[7]:
+#                         print_array.append(line)
+#         file_open.close()
+#         file_name = i + "_core.vcf"
+#         keep_logging('Generating %s' % file_name, 'Generating %s' % file_name, logger, 'info')
+#         filtered_out_vcf_files.append(file_name)
+#         f1 = open(file_name, 'w+')
+#         for ios in print_array:
+#             print_string = str(ios) + "\n"
+#             f1.write(print_string)
+#         f1.close()
+#
+#     filename = "%s/consensus.sh" % args.filter2_only_snp_vcf_dir
+#     keep_logging('Generating Consensus...', 'Generating Consensus...', logger, 'info')
+#     for file in filtered_out_vcf_files:
+#         f1 = open(filename, 'a+')
+#         bgzip_cmd = "%s/%s/bgzip -f %s\n" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("vcftools", Config)['tabix_bin'], file)
+#         f1.write(bgzip_cmd)
+#         subprocess.call([bgzip_cmd], shell=True)
+#         tabix_cmd = "%s/%s/tabix -f -p vcf %s.gz\n" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("vcftools", Config)['tabix_bin'], file)
+#         f1.write(tabix_cmd)
+#         subprocess.call([tabix_cmd], shell=True)
+#         fasta_cmd = "cat %s | %s/vcf-consensus %s.gz > %s.fa\n" % (args.reference, base_vcftools_bin, file, file.replace('_filter2_final.vcf_core.vcf', ''))
+#         f1.write(fasta_cmd)
+#         subprocess.call([fasta_cmd], shell=True)
+#         base = os.path.basename(file)
+#         header = base.replace('_filter2_final.vcf_core.vcf', '')
+#         sed_command = "sed -i 's/>.*/>%s/g' %s.fa\n" % (header, file.replace('_filter2_final.vcf_core.vcf', ''))
+#         subprocess.call([sed_command], shell=True)
+#         f1.write(sed_command)
+#     keep_logging('The consensus commands are in : %s' % filename, 'The consensus commands are in : %s' % filename, logger, 'info')
+#     sequence_lgth_cmd = "for i in %s/*.fa; do %s/%s/bioawk -c fastx \'{ print $name, length($seq) }\' < $i; done" % (args.filter2_only_snp_vcf_dir, ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("bioawk", Config)['bioawk_bin'])
+#     #os.system(sequence_lgth_cmd)
+#     call("%s" % sequence_lgth_cmd, logger)
+#
+# def gatk_filter2(final_raw_vcf, out_path, analysis, reference):
+#     gatk_filter2_parameter_expression = "MQ > 50 && QUAL > 100 && DP > 9"
+#     gatk_filter2_command = "java -jar %s/%s/GenomeAnalysisTK.jar -T VariantFiltration -R %s -o %s/%s_filter2_gatk.vcf --variant %s --filterExpression \"%s\" --filterName PASS_filter2" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("gatk", Config)['gatk_bin'], reference, out_path, analysis, final_raw_vcf, gatk_filter2_parameter_expression)
+#     keep_logging('Running Command: [%s]' % gatk_filter2_command, 'Running Command: [%s]' % gatk_filter2_command, logger, 'info')
+#     #os.system(gatk_filter2_command)
+#     call("%s" % gatk_filter2_command, logger)
+#     filter_flag_command = "grep '#\|PASS_filter2' %s/%s_filter2_gatk.vcf > %s/%s_filter2_final.vcf" % (out_path, analysis, out_path, analysis)
+#     call("%s" % filter_flag_command, logger)
+#     gatk_filter2_final_vcf = "%s/%s_filter2_final.vcf" % (out_path, analysis)
+#     return gatk_filter2_final_vcf
+#
+# def remove_proximate_snps(gatk_filter2_final_vcf_file, out_path, analysis, reference):
+#     all_position = []
+#     remove_proximate_position_array = []
+#     gatk_filter2_final_vcf_file_no_proximate_snp = gatk_filter2_final_vcf_file + "_no_proximate_snp.vcf"
+#     with open(gatk_filter2_final_vcf_file, 'rU') as csv_file:
+#         for line in csv_file:
+#             if not line.startswith('#'):
+#                 line_array = line.split('\t')
+#                 all_position.append(line_array[1])
+#     for position in all_position:
+#         position_index = all_position.index(position)
+#         next_position_index = position_index + 1
+#
+#         if next_position_index < len(all_position):
+#             diff = int(all_position[next_position_index]) - int(position)
+#             if diff < 10:
+#                 #print position + "  " + all_position[next_position_index]
+#                 if position not in remove_proximate_position_array and all_position[next_position_index] not in remove_proximate_position_array:
+#                     remove_proximate_position_array.append(int(position))
+#                     remove_proximate_position_array.append(int(all_position[next_position_index]))
+#     f1=open(gatk_filter2_final_vcf_file_no_proximate_snp, 'w+')
+#     with open(gatk_filter2_final_vcf_file, 'rU') as csv_file2:
+#         for line in csv_file2:
+#             if line.startswith('gi') or line.startswith('MRSA_8058'): ##change this!
+#                line_array = line.split('\t')
+#                if int(line_array[1]) not in remove_proximate_position_array:
+#                    print_string = line
+#                    f1.write(print_string)
+#             else:
+#                 print_string = line
+#                 f1.write(print_string)
+#     gatk_filter2_final_vcf_file_no_proximate_snp_positions = gatk_filter2_final_vcf_file + "_no_proximate_snp.vcf_positions_array"
+#     f2=open(gatk_filter2_final_vcf_file_no_proximate_snp_positions, 'w+')
+#     for i in remove_proximate_position_array:
+#         position_print_string = str(i) + "\n"
+#         f2.write(position_print_string)
+#     return gatk_filter2_final_vcf_file_no_proximate_snp
+#
+# def FQ_analysis():
+#     for i in vcf_filenames:
+#         filename_base = os.path.basename(i)
+#         aln_mpileup_vcf_file = i.replace('_filter2_final.vcf_no_proximate_snp.vcf', '_aln_mpileup_raw.vcf_5bp_indel_removed.vcf')
+#         analysis = filename_base.replace('_filter2_final.vcf_no_proximate_snp.vcf', '')
+#         #print aln_mpileup_vcf_file
+#         grep_reference_file = "grep \'^##reference\' %s" % aln_mpileup_vcf_file
+#         proc = subprocess.Popen([grep_reference_file], stdout=subprocess.PIPE, shell=True)
+#         (out, err) = proc.communicate()
+#         out = out.strip()
+#         reference_file = out.split(':')
+#         # Change it to multiprocessing
+#         gatk_filter2_final_vcf_file = gatk_filter2(aln_mpileup_vcf_file, temp_dir, analysis, reference_file[1])
+#         #print gatk_filter2_final_vcf_file
+#         gatk_filter2_final_vcf_file_no_proximate_snp = remove_proximate_snps(gatk_filter2_final_vcf_file, temp_dir, analysis, reference_file[1])
+#         grep_fq_field = "awk -F\'\\t\' \'{print $8}\' %s | grep -o \'FQ=.*\' | sed \'s/FQ=//g\' | awk -F\';\' \'{print $1}\' > %s/%s_FQ_values" % (gatk_filter2_final_vcf_file_no_proximate_snp, os.path.dirname(i), analysis)
+#         #os.system(grep_fq_field)
+#         call("%s" % grep_fq_field, logger)
+#         #print grep_fq_field
+#
+# def DP_analysis():
+#     create_job_DP(args.jobrun, vcf_filenames)
+#     paste_command = "paste %s/extract_DP_positions.txt" % args.filter2_only_snp_vcf_dir
+#     for i in vcf_filenames:
+#         label_file = i.replace('_filter2_final.vcf_no_proximate_snp.vcf', '_DP_values')
+#         paste_command = paste_command + " " + label_file
+#
+#     paste_file = args.filter2_only_snp_vcf_dir + "/paste_DP_files.sh"
+#     f2=open(paste_file, 'w+')
+#     paste_command = paste_command + " > %s/filtered_DP_values_temp.txt" % args.filter2_only_snp_vcf_dir
+#     #os.system(paste_command)
+#     f2.write(paste_command + '\n')
+#     cat_header = "cat %s/header.txt %s/filtered_DP_values_temp.txt > %s/filtered_DP_values.txt" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir)
+#     #os.system(cat_header)
+#     f2.write(cat_header + '\n')
+#     sed_command = "sed -i \'s/_filter2_final.vcf_no_proximate_snp.vcf//g\' %s/filtered_DP_values.txt" % (args.filter2_only_snp_vcf_dir)
+#     #os.system(sed_command)
+#     f2.write(sed_command + '\n')
+#     cmd = "bash %s" % paste_file
+#     # os.system("bash %s/paste_DP_files.sh" % args.filter2_only_snp_vcf_dir)
+#
+# def DP_analysis_barplot():
+#     #os.system("bash %s/paste_DP_files.sh" % args.filter2_only_snp_vcf_dir)
+#     call("bash %s/paste_DP_files.sh" % args.filter2_only_snp_vcf_dir, logger)
+#     keep_logging('Generating DP barplots data...', 'Generating DP barplots data...', logger, 'info')
+#     c_reader = csv.reader(open('%s/filtered_DP_values.txt' % args.filter2_only_snp_vcf_dir, 'r'), delimiter='\t')
+#     columns = list(zip(*c_reader))
+#     counts = 1
+#     end = len(vcf_filenames) + 1
+#     f_bar_count = open("%s/DP_bargraph_counts.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+#     f_bar_perc = open("%s/DP_bargraph_percentage.txt" % args.filter2_only_snp_vcf_dir, 'w+')
+#     f_bar_count.write("Sample\treference_position\toneto5\tsixto10\televento14\tfifteenorabove\n")
+#     f_bar_perc.write("Sample\treference_position\toneto5\tsixto10\televento14\tfifteenorabove\n")
+#     for i in xrange(1, end, 1):
+#         """ Bar Count Statistics: Variant Position Count Statistics """
+#         reference_position = columns[i].count('NA')
+#         oneto5 = 0
+#         for k in list(columns[i][1:]):
+#             if k != "":
+#                 if k != "NA":
+#                     if int(k) < 5:
+#                         oneto5 += 1
+#         sixto10 = 0
+#         for k in list(columns[i][1:]):
+#             if k != "":
+#                 if k != "NA":
+#                     if int(k) >= 5 and int(k) <= 10:
+#                         sixto10 += 1
+#         elevento14 = 0
+#         for k in list(columns[i][1:]):
+#             if k != "":
+#                 if k != "NA":
+#                     if int(k) >= 11 and int(k) <= 14:
+#                         elevento14 += 1
+#         fifteenorabove = 0
+#         for k in list(columns[i][1:]):
+#             if k != "":
+#                 if k != "NA":
+#                     if int(k) >= 15:
+#                         fifteenorabove += 1
+#         total = reference_position + oneto5 + sixto10 + elevento14 + fifteenorabove
+#         filename_count = i - 1
+#         bar_string = "%s\t%s\t%s\t%s\t%s\t%s\n" % (os.path.basename(vcf_filenames[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')), reference_position, oneto5, sixto10, elevento14, fifteenorabove)
+#         f_bar_count.write(bar_string)
+#
+#         """ Bar Count Percentage Statistics: Variant Position Percentage Statistics """
+#         try:
+#             reference_position_perc = float(reference_position * 100 / total)
+#         except ZeroDivisionError:
+#             reference_position_perc = 0
+#         try:
+#             oneto5_perc = float(oneto5 * 100 / total)
+#         except ZeroDivisionError:
+#             oneto5_perc = 0
+#         try:
+#             sixto10_perc = float(sixto10 * 100 / total)
+#         except ZeroDivisionError:
+#             sixto10_perc = 0
+#         try:
+#             elevento14_perc = float(elevento14 * 100 / total)
+#         except ZeroDivisionError:
+#             elevento14_perc = 0
+#         try:
+#             fifteenorabove_perc = float(fifteenorabove * 100 / total)
+#         except ZeroDivisionError:
+#             fifteenorabove_perc = 0
+#         bar_perc_string = "%s\t%s\t%s\t%s\t%s\t%s\n" % (os.path.basename(vcf_filenames[filename_count].replace('_filter2_final.vcf_no_proximate_snp.vcf', '')), reference_position_perc, oneto5_perc, sixto10_perc, elevento14_perc, fifteenorabove_perc)
+#         f_bar_perc.write(bar_perc_string)
+
+def extract_only_ref_variant_fasta(core_vcf_fasta_dir):
+    if ConfigSectionMap("functional_filters", Config)['apply_functional_filters'] == "yes" and ConfigSectionMap("functional_filters", Config)['apply_to_calls'] == "yes":
+        functional_filter = "yes"
+    create_job_fasta(args.jobrun, vcf_filenames, core_vcf_fasta_dir, functional_filter)
+
+def extract_only_ref_variant_fasta_from_reference():
+    if ConfigSectionMap("functional_filters", Config)['apply_functional_filters'] == "yes" and \
+            ConfigSectionMap("functional_filters", Config)['apply_to_calls'] == "yes":
+        ffp = open("%s/Only_ref_variant_positions_for_closely_without_functional_filtered_positions" % args.filter2_only_snp_vcf_dir).readlines()
+    else:
+        ffp = open("%s/Only_ref_variant_positions_for_closely" % args.filter2_only_snp_vcf_dir).readlines()
+    fasta_string = ""
+    #firstLine = ffp.pop(0)
+    for lines in ffp:
+        lines = lines.strip()
+        extract_base = "grep -v \'>\' %s | tr -d \'\\n\'| cut -b%s" % (args.reference, lines)
+        proc = subprocess.Popen([extract_base], stdout=subprocess.PIPE, shell=True)
+        (out, err) = proc.communicate()
+        out = out.strip()
+        fasta_string = fasta_string + out
+        if not out:
+            print lines
+            keep_logging('Error extracting reference allele', 'Error extracting reference allele', logger, 'info')
+            exit()
+
+    pattern = re.compile(r'\s+')
+    fasta_string = re.sub(pattern, '', fasta_string)
+    final_fasta_string = ">%s\n" % os.path.basename(args.reference.replace('.fasta', '').replace('.fa', '')) + fasta_string + "\n"
+    fp = open("%s/%s_variants.fa" % (args.filter2_only_snp_vcf_dir, os.path.basename(args.reference.replace('.fasta', '').replace('.fa', ''))), 'w+')
+    fp.write(final_fasta_string)
+    fp.close()
+
+def extract_only_ref_variant_fasta_from_reference_allele_variant():
+    ffp = open("%s/unique_positions_file" % args.filter2_only_snp_vcf_dir).readlines()
+    #unique_positions_array = []
+
+    fasta_string = ""
+    #firstLine = ffp.pop(0)
+    for lines in ffp:
+        lines = lines.strip()
+        #unique_positions_array.append(lines)
+        extract_base = "grep -v \'>\' %s | tr -d \'\\n\'| cut -b%s" % (args.reference, lines)
+        proc = subprocess.Popen([extract_base], stdout=subprocess.PIPE, shell=True)
+        (out, err) = proc.communicate()
+        out = out.strip()
+        fasta_string = fasta_string + out
+        if not out:
+            print lines
+            keep_logging('Error extracting reference allele', 'Error extracting reference allele', logger, 'info')
+            exit()
+
+    pattern = re.compile(r'\s+')
+    fasta_string = re.sub(pattern, '', fasta_string)
+    final_fasta_string = ">%s\n" % os.path.basename(args.reference.replace('.fasta', '').replace('.fa', '')) + fasta_string + "\n"
+    fp = open("%s/%s_allele_variants.fa" % (args.filter2_only_snp_vcf_dir, os.path.basename(args.reference.replace('.fasta', '').replace('.fa', ''))), 'w+')
+    fp.write(final_fasta_string)
+    fp.close()
+
+def prepare_snpEff_db(reference_basename):
+    keep_logging('Preparing snpEff database requirements.', 'Preparing snpEff database requirements.', logger, 'info')
+    reference_basename = (os.path.basename(args.reference)).split(".")
+    if os.path.isfile("%s/%s/snpEff.config" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'])):
+        #os.system("cp %s/%s/snpEff.config %s" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], args.filter2_only_snp_vcf_dir))
+        keep_logging("cp %s/%s/snpEff.config %s" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], args.filter2_only_snp_vcf_dir), "cp %s/%s/snpEff.config %s" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], args.filter2_only_snp_vcf_dir), logger, 'debug')
+        call("cp %s/%s/snpEff.config %s" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], args.filter2_only_snp_vcf_dir), logger)
+    else:
+        keep_logging("Error: %s/%s/snpEff.config doesn't exists.\nExiting..." % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin']),"Error: %s/%s/snpEff.config doesn't exists.\nExiting..." % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin']), logger, 'exception')
+        exit()
+    make_sure_path_exists("%s/%s/data/%s" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], reference_basename[0]))
+    make_sure_path_exists("%s/%s/data/genomes/" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin']))
+    #os.system("cp %s %s/%s/data/genomes/" % (args.reference, ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin']))
+    keep_logging("cp %s %s/%s/data/genomes/%s.fa" % (args.reference, ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], reference_basename[0]), "cp %s %s/%s/data/genomes/" % (args.reference, ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin']), logger, 'debug')
+    call("cp %s %s/%s/data/genomes/%s.fa" % (args.reference, ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], reference_basename[0]), logger)
+    with open("%s/snpEff.config" % args.filter2_only_snp_vcf_dir, "a") as conf_file:
+        conf_file.write("\n\n##Building Custom Database###\n%s.genome\t: %s\n\n" % (reference_basename[0], reference_basename[0]))
+    conf_file.close()
+    #get the gff name from config file
+    if os.path.isfile("%s/%s.gff" % (os.path.dirname(args.reference), reference_basename[0])):
+        keep_logging("cp %s/%s.gff %s/%s/data/%s/genes.gff" % (
+        os.path.dirname(args.reference), reference_basename[0], ConfigSectionMap("bin_path", Config)['binbase'],
+        ConfigSectionMap("snpeff", Config)['snpeff_bin'], reference_basename[0]),
+                     "cp %s/%s.gff %s/%s/data/%s/genes.gff" % (os.path.dirname(args.reference), reference_basename[0],
+                                                               ConfigSectionMap("bin_path", Config)['binbase'],
+                                                               ConfigSectionMap("snpeff", Config)['snpeff_bin'],
+                                                               reference_basename[0]), logger, 'debug')
+        keep_logging("cp %s/%s.gb* %s/%s/data/%s/genes.gbk" % (
+        os.path.dirname(args.reference), reference_basename[0], ConfigSectionMap("bin_path", Config)['binbase'],
+        ConfigSectionMap("snpeff", Config)['snpeff_bin'], reference_basename[0]),
+                     "cp %s/%s.gff %s/%s/data/%s/genes.gff" % (os.path.dirname(args.reference), reference_basename[0],
+                                                               ConfigSectionMap("bin_path", Config)['binbase'],
+                                                               ConfigSectionMap("snpeff", Config)['snpeff_bin'],
+                                                               reference_basename[0]), logger, 'debug')
+        call("cp %s/%s.gff %s/%s/data/%s/genes.gff" % (
+        os.path.dirname(args.reference), reference_basename[0], ConfigSectionMap("bin_path", Config)['binbase'],
+        ConfigSectionMap("snpeff", Config)['snpeff_bin'], reference_basename[0]), logger)
+        call("cp %s/%s.gb* %s/%s/data/%s/genes.gbk" % (
+        os.path.dirname(args.reference), reference_basename[0], ConfigSectionMap("bin_path", Config)['binbase'],
+        ConfigSectionMap("snpeff", Config)['snpeff_bin'], reference_basename[0]), logger)
+    else:
+        keep_logging("Error: %s/%s.gff file doesn't exists. Make sure the GFF file has the same prefix as reference fasta file\nExiting..." % (os.path.dirname(args.reference), reference_basename[0]),
+                         "Error: %s/%s.gff file doesn't exists. Make sure the GFF file has the same prefix as reference fasta file\nExiting..." % (os.path.dirname(args.reference), reference_basename[0]), logger, 'exception')
+        exit()
+    #keep_logging("java -jar %s/%s/%s build -gff3 -v %s -c %s/snpEff.config -dataDir %s/%s/data" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], ConfigSectionMap("snpeff", Config)['base_cmd'], reference_basename[0], args.filter2_only_snp_vcf_dir, ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin']), "java -jar %s/%s/%s build -gff3 -v %s -c %s/snpEff.config -dataDir %s/%s/data" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], ConfigSectionMap("snpeff", Config)['base_cmd'], reference_basename[0], args.filter2_only_snp_vcf_dir, ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin']), logger, 'debug')
+    keep_logging("java -jar %s/%s/%s build -genbank -v %s -c %s/snpEff.config -dataDir %s/%s/data" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], ConfigSectionMap("snpeff", Config)['base_cmd'], reference_basename[0], args.filter2_only_snp_vcf_dir, ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin']), "java -jar %s/%s/%s build -gff3 -v %s -c %s/snpEff.config -dataDir %s/%s/data" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], ConfigSectionMap("snpeff", Config)['base_cmd'], reference_basename[0], args.filter2_only_snp_vcf_dir, ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin']), logger, 'debug')
+
+    #call("java -jar %s/%s/%s build -gff3 -v %s -c %s/snpEff.config -dataDir %s/%s/data" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], ConfigSectionMap("snpeff", Config)['base_cmd'], reference_basename[0], args.filter2_only_snp_vcf_dir, ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin']), logger)
+    call("java -jar %s/%s/%s build -genbank -v %s -c %s/snpEff.config -dataDir %s/%s/data" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], ConfigSectionMap("snpeff", Config)['base_cmd'], reference_basename[0], args.filter2_only_snp_vcf_dir, ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin']), logger)
+    keep_logging('Finished Preparing snpEff database requirements.', 'Finished Preparing snpEff database requirements.', logger, 'info')
+
+def variant_annotation():
+    keep_logging('Annotating Variants using snpEff.', 'Annotating Variants using snpEff.', logger, 'info')
+
+    if ConfigSectionMap("snpeff", Config)['prebuild'] == "yes":
+        if ConfigSectionMap("snpeff", Config)['db']:
+            print "Using pre-built snpEff database: %s" % ConfigSectionMap("snpeff", Config)['db']
+            proc = subprocess.Popen(["java -jar %s/%s/%s databases | grep %s" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], ConfigSectionMap("snpeff", Config)['base_cmd'], ConfigSectionMap("snpeff", Config)['db'])],
+                                    stdout=subprocess.PIPE, shell=True)
+            (out2, err2) = proc.communicate()
+            if out2:
+                snpeffdb = ConfigSectionMap("snpeff", Config)['db']
+            else:
+                print "The database name %s provided was not found. Check the name and try again" % ConfigSectionMap("snpeff", Config)['db']
+                exit()
+        else:
+            print "snpEff db section is not set in config file"
+            exit()
+    else:
+        reference_basename = (os.path.basename(args.reference)).split(".")
+        snpeffdb = reference_basename[0]
+        prepare_snpEff_db(reference_basename)
+
+    annotate_vcf_cmd_array = []
+    annotate_final_vcf_cmd_array = []
+    for i in vcf_filenames:
+        raw_vcf = i.replace('_filter2_final.vcf_no_proximate_snp.vcf', '_aln_mpileup_raw.vcf')
+        annotate_vcf_cmd = "java -Xmx4g -jar %s/%s/%s -csvStats %s_ANN.csv -dataDir %s/%s/data/ %s -c %s/snpEff.config %s %s > %s_ANN.vcf" % \
+                           (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], ConfigSectionMap("snpeff", Config)['base_cmd'], raw_vcf, ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], ConfigSectionMap("snpeff", Config)['snpeff_parameters'], args.filter2_only_snp_vcf_dir, snpeffdb, raw_vcf, raw_vcf)
+        print annotate_vcf_cmd
+        annotate_vcf_cmd_array.append(annotate_vcf_cmd)
+        final_vcf = i
+        annotate_final_vcf_cmd = "java -Xmx4g -jar %s/%s/%s -csvStats %s_ANN.csv -dataDir %s/%s/data/ %s -c %s/snpEff.config %s %s > %s_ANN.vcf" % \
+                           (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], ConfigSectionMap("snpeff", Config)['base_cmd'], final_vcf, ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], ConfigSectionMap("snpeff", Config)['snpeff_parameters'], args.filter2_only_snp_vcf_dir, snpeffdb, final_vcf, final_vcf)
+        annotate_final_vcf_cmd_array.append(annotate_final_vcf_cmd)
+    if args.numcores:
+        num_cores = int(num_cores)
+    else:
+        num_cores = multiprocessing.cpu_count()
+    #print annotate_vcf_cmd_array
+    results = Parallel(n_jobs=num_cores)(delayed(run_command)(command) for command in annotate_vcf_cmd_array)
+    results_2 = Parallel(n_jobs=num_cores)(delayed(run_command)(command) for command in annotate_final_vcf_cmd_array)
+
+def indel_annotation():
+    keep_logging('Annotating indels using snpEff.', 'Annotating indels using snpEff.', logger, 'info')
+
+    if ConfigSectionMap("snpeff", Config)['prebuild'] == "yes":
+        if ConfigSectionMap("snpeff", Config)['db']:
+            print "Using pre-built snpEff database: %s" % ConfigSectionMap("snpeff", Config)['db']
+            proc = subprocess.Popen(["java -jar %s/%s/%s databases | grep %s" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], ConfigSectionMap("snpeff", Config)['base_cmd'], ConfigSectionMap("snpeff", Config)['db'])],
+                                    stdout=subprocess.PIPE, shell=True)
+            (out2, err2) = proc.communicate()
+            if out2:
+                snpeffdb = ConfigSectionMap("snpeff", Config)['db']
+            else:
+                print "The database name %s provided was not found. Check the name and try again" % ConfigSectionMap("snpeff", Config)['db']
+                exit()
+        else:
+            print "snpEff db section is not set in config file"
+            exit()
+    else:
+        reference_basename = (os.path.basename(args.reference)).split(".")
+        snpeffdb = reference_basename[0]
+        prepare_snpEff_db(reference_basename)
+
+
+    annotate_vcf_cmd_array = []
+    annotate_final_vcf_cmd_array = []
+    for i in vcf_filenames:
+        raw_vcf = i.replace('_filter2_final.vcf_no_proximate_snp.vcf', '_aln_mpileup_raw.vcf')
+        annotate_vcf_cmd = "java -Xmx4g -jar %s/%s/%s -csvStats %s_ANN.csv -dataDir %s/%s/data/ %s -c %s/snpEff.config %s %s > %s_ANN.vcf" % \
+                           (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], ConfigSectionMap("snpeff", Config)['base_cmd'], raw_vcf, ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], ConfigSectionMap("snpeff", Config)['snpeff_parameters'], args.filter2_only_snp_vcf_dir, snpeffdb, raw_vcf, raw_vcf)
+        annotate_vcf_cmd_array.append(annotate_vcf_cmd)
+        final_vcf = i.replace('_filter2_final.vcf_no_proximate_snp.vcf', '_filter2_indel_final.vcf')
+        annotate_final_vcf_cmd = "java -Xmx4g -jar %s/%s/%s -csvStats %s_ANN.csv -dataDir %s/%s/data/ %s -c %s/snpEff.config %s %s > %s_ANN.vcf" % \
+                           (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], ConfigSectionMap("snpeff", Config)['base_cmd'], final_vcf, ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("snpeff", Config)['snpeff_bin'], ConfigSectionMap("snpeff", Config)['snpeff_parameters'], args.filter2_only_snp_vcf_dir, snpeffdb, final_vcf, final_vcf)
+        annotate_final_vcf_cmd_array.append(annotate_final_vcf_cmd)
+    if args.numcores:
+        num_cores = int(num_cores)
+    else:
+        num_cores = multiprocessing.cpu_count()
+    results = Parallel(n_jobs=num_cores)(delayed(run_command)(command) for command in annotate_vcf_cmd_array)
+    results_2 = Parallel(n_jobs=num_cores)(delayed(run_command)(command) for command in annotate_final_vcf_cmd_array)
+
+def gatk_combine_variants(files_gatk, reference, out_path, merged_file_suffix, logger, Config):
+    base_cmd = ConfigSectionMap("bin_path", Config)['binbase'] + "/" + ConfigSectionMap("gatk", Config)[
+        'gatk_bin'] + "/" + ConfigSectionMap("gatk", Config)['base_cmd']
+    #files_gatk = "--variant " + ' --variant '.join(vcf_files_array)
+    keep_logging("java -jar %s -T CombineVariants -R %s %s -o %s/Final_vcf_gatk%s" % (base_cmd, reference, files_gatk, out_path, merged_file_suffix), "java -jar %s -T CombineVariants -R %s %s -o %s/Final_vcf_gatk%s" % (base_cmd, reference, files_gatk, out_path, merged_file_suffix), logger, 'debug')
+    merge_gatk_commands_file = "%s/gatk_merge.sh" % args.filter2_only_snp_vcf_dir
+    with open(merge_gatk_commands_file, 'w+') as fopen:
+        fopen.write("java -jar %s -T CombineVariants -R %s %s -o %s/Final_vcf_gatk%s" % (base_cmd, reference, files_gatk, out_path, merged_file_suffix) + '\n')
+    fopen.close()
+    # Commenting out calling gatk combine variants with a custom logging call method, problem with python subprocess, OSError: [Errno 7] Argument list too long
+    os.system("bash %s" % merge_gatk_commands_file)
+    return "%s/Final_vcf_gatk%s" % (out_path, merged_file_suffix)
+
+def annotated_snp_matrix():
+    """
+    :return: Annotate core vcf files generated at core_prep steps.
+    Read Genbank file and return a dictionary of Prokka ID mapped to Gene Name, Prokka ID mapped to Product Name.
+    This dictionary will then be used to insert annotation into SNP/Indel matrix
+    """
+
+    """Annotate all VCF file formats with SNPeff"""
+    # Commented for debugging
+    variant_annotation()
+
+    indel_annotation()
+
+
+    """ Start of Extract Annotation information from Genbank file 
+    
+    Extract Annotation information from Genbank file 
+    
+    - Check if Reference genome Genbank file exists.
+    - Initiate dictionaries that maps locus tag to gene name and product. This information will be used for annotating SNP/Indel Matrix
+    - Read the locus tag and gene annotations into a dictionary that maps locus tags to gene name/product name
+    
+    """
+
+    reference_basename = (os.path.basename(args.reference)).split(".")
+    if os.path.isfile("%s/%s.gbf" % (os.path.dirname(args.reference), reference_basename[0])):
+        handle = open("%s/%s.gbf" % (os.path.dirname(args.reference), reference_basename[0]), 'rU')
+    else:
+        raise IOError('%s/%s.gbf does not exist.' % (os.path.dirname(args.reference), reference_basename[0]))
+        exit()
+
+    locus_tag_to_gene_name = {}
+    locus_tag_to_product = {}
+    locus_tag_to_strand = {}
+    #locus_tag_to_uniprot = {}
+    #locus_tag_to_ec_number = {}
+
+    keep_logging(
+        'Reading annotations from Reference genome genbank file: %s/%s.gbf' % (os.path.dirname(args.reference), reference_basename[0]),
+        'Reading annotations from Reference genome genbank file: %s/%s.gbf' % (os.path.dirname(args.reference), reference_basename[0]),
+        logger, 'info')
+    for record in SeqIO.parse(handle, 'genbank') :
+        for feature in record.features:
+            location = str(feature.location)
+            strand = location.split('(')[1].replace(')', '')
+            if 'locus_tag' in feature.qualifiers:
+                locus_tag_to_strand[str(feature.qualifiers['locus_tag'][0])] = strand
+                if 'gene' in feature.qualifiers:
+                    locus_tag_to_gene_name[str(feature.qualifiers['locus_tag'][0])] = str(feature.qualifiers['gene'][0])
+                else:
+                    locus_tag_to_gene_name[str(feature.qualifiers['locus_tag'][0])] = "null or hypothetical protein"
+                if 'product' in feature.qualifiers:
+                    locus_tag_to_product[str(feature.qualifiers['locus_tag'][0])] = str(feature.qualifiers['product'][0])
+                else:
+                    locus_tag_to_product[str(feature.qualifiers['locus_tag'][0])] = "null or hypothetical protein"
+            else:
+                keep_logging(
+                    'Error: locus_tag specifications for the below feature doesnt exists. Please check the format of genbank file\n%s' % str(feature),
+                    'Error: locus_tag specifications for the below feature doesnt exists. Please check the format of genbank file\n%s' % str(feature),
+                    logger, 'exception')
+
+    # Annotation Bug fix 1
+    first_locus_tag = record.features[1].qualifiers['locus_tag'][0]
+    last_element = len(record.features) - 1
+    last_locus_tag = record.features[last_element].qualifiers['locus_tag'][0]
+
+    # #Debugging prints
+    # print first_locus_tag
+    # print locus_tag_to_gene_name[first_locus_tag]
+    # print last_locus_tag
+    # print locus_tag_to_gene_name[last_locus_tag]
+
+    """ End of Extract Annotation information from Genbank file 
+
+        Extract Annotation information from Genbank file 
+
+        - Check if Reference genome Genbank file exists.
+        - Initiate dictionaries that maps locus tag to gene name and product. This information will be used for annotating SNP/Indel Matrix
+        - Read the locus tag and gene annotations into a dictionary that maps locus tags to gene name/product name
+
+    """
+
+
+
+    """ Start of Merging Step:
+    
+    - Merge Individual Annotated raw and filtered vcf files to generate a Final merged vcf file using Gatk combine variants method.
+    - Parse this merged Final_vcf* file and generate a SNP/Indel matrix 
+    
+    """
+
+    keep_logging('Merging Final Annotated VCF files into %s/Final_vcf_no_proximate_snp.vcf using bcftools' % args.filter2_only_snp_vcf_dir, 'Merging Final Annotated VCF files into %s/Final_vcf_no_proximate_snp.vcf using bcftools' % args.filter2_only_snp_vcf_dir, logger, 'info')
+
+    #Commented for debugging
+    files_for_tabix = glob.glob("%s/*.vcf_no_proximate_snp.vcf_ANN.vcf" % args.filter2_only_snp_vcf_dir)
+    tabix(files_for_tabix, "vcf", logger, Config)
+    files_for_tabix = glob.glob("%s/*_filter2_indel_final.vcf_ANN.vcf" % args.filter2_only_snp_vcf_dir)
+    tabix(files_for_tabix, "vcf", logger, Config)
+
+    files = ' '.join(vcf_filenames)
+
+
+    """ bcftools merging is deprecated. Replaced with GATK combinevariants """
+    merge_commands_file = "%s/bcftools_merge.sh" % args.filter2_only_snp_vcf_dir
+
+    with open(merge_commands_file, 'w+') as fopen:
+        fopen.write("%s/%s/bcftools merge -i ANN:join -m both -o %s/Final_vcf_no_proximate_snp.vcf -O v %s" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("bcftools", Config)['bcftools_bin'], args.filter2_only_snp_vcf_dir, files.replace("_filter2_final.vcf_no_proximate_snp.vcf", "_filter2_final.vcf_no_proximate_snp.vcf_ANN.vcf.gz")) + '\n')
+        fopen.write("%s/%s/bcftools merge -i ANN:join -m both -o %s/Final_vcf_indel.vcf -O v %s" % (ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("bcftools", Config)['bcftools_bin'], args.filter2_only_snp_vcf_dir,files.replace("_filter2_final.vcf_no_proximate_snp.vcf","_filter2_indel_final.vcf_ANN.vcf.gz")) + '\n')
+
+    fopen.close()
+
+    os.system("bash %s" % merge_commands_file)
+
+
+    """ Merge with Gatk combine variants method """
+    # #Commented for debugging
+    merged_file_suffix = "_no_proximate_snp.vcf"
+
+    annotated_no_proximate_snp_file = "%s/annotated_no_proximate_snp_list.txt" % args.filter2_only_snp_vcf_dir
+    annotated_no_proximate_snp_indel_file = "%s/annotated_no_proximate_snp_indel_list.txt" % args.filter2_only_snp_vcf_dir
+
+    with open(annotated_no_proximate_snp_file, 'w+') as fopen:
+        for i in vcf_filenames:
+            fopen.write(i.replace('_filter2_final.vcf_no_proximate_snp.vcf', '_filter2_final.vcf_no_proximate_snp.vcf_ANN.vcf.gz') + '\n')
+    fopen.close()
+
+    with open(annotated_no_proximate_snp_indel_file, 'w+') as fopen:
+        for i in vcf_filenames:
+            fopen.write(i.replace('_filter2_final.vcf_no_proximate_snp.vcf', '_filter2_indel_final.vcf_ANN.vcf.gz') + '\n')
+    fopen.close()
+
+    #files_gatk = "--variant " + ' --variant '.join(vcf_filenames)
+    files_gatk = ""
+    for i in vcf_filenames:
+        files_gatk = files_gatk + " --variant " + i
+    final_gatk_snp_merged_vcf = gatk_combine_variants(files_gatk.replace('_filter2_final.vcf_no_proximate_snp.vcf', '_filter2_final.vcf_no_proximate_snp.vcf_ANN.vcf.gz'), args.reference, args.filter2_only_snp_vcf_dir, merged_file_suffix, logger, Config)
+
+    # Test this merge and annotate this merged file - Testing Mode Right now.
+    #merged_file_suffix = "_no_proximate_snp_1.vcf"
+    #final_gatk_snp_merged_vcf_1 = gatk_combine_variants(files_gatk,args.reference, args.filter2_only_snp_vcf_dir, merged_file_suffix, logger, Config)
+    merged_file_suffix = "_indel.vcf"
+    final_gatk_indel_merged_vcf = gatk_combine_variants(files_gatk.replace('_filter2_final.vcf_no_proximate_snp.vcf',
+                                                                         '_filter2_indel_final.vcf_ANN.vcf.gz'),
+                                                      args.reference, args.filter2_only_snp_vcf_dir, merged_file_suffix,
+                                                      logger, Config)
+
+    """ Tabix index the combined GATK Final vcf file """
+    files_for_tabix = glob.glob("%s/Final_vcf_*.vcf" % args.filter2_only_snp_vcf_dir)
+    tabix(files_for_tabix, "vcf", logger, Config)
+
+
+    """ End of Merging Step. """
+
+
+    """ Extract ANN information from bcftools Final vcf file. (There is a reason why i am using bcftools merged file to extract ANN information) """
+    snp_var_ann_dict = {}
+    indel_var_ann_dict = {}
+
+    for variants in VCF("%s/Final_vcf_no_proximate_snp.vcf.gz" % args.filter2_only_snp_vcf_dir):
+        snp_var_ann_dict[variants.POS] = variants.INFO.get('ANN')
+
+    for variants in VCF("%s/Final_vcf_indel.vcf.gz" % args.filter2_only_snp_vcf_dir):
+        indel_var_ann_dict[variants.POS] = variants.INFO.get('ANN')
+
+    """ End of Extract ANN information from bcftools Final vcf file"""
+
+
+
+    """ This step is no longer required: Remove this after testing. print_string_header will be the column names of SNP matrix. Column names = Sample names"""
+    print_string_header = "\t"
+    for i in vcf_filenames:
+        print_string_header = print_string_header + os.path.basename(i) + "\t"
+
+
+
+    """ Generate an array of core positions. Read Only_ref_variant_positions_for_closely* to get final core variant positions into core_positions array"""
+    core_positions = []
+    if ConfigSectionMap("functional_filters", Config)['apply_to_calls'] == "yes":
+        core_positions_file = "%s/Only_ref_variant_positions_for_closely_without_functional_filtered_positions" % args.filter2_only_snp_vcf_dir
+    else:
+        core_positions_file = "%s/Only_ref_variant_positions_for_closely" % args.filter2_only_snp_vcf_dir
+    with open(core_positions_file) as fp:
+        for line in fp:
+            line = line.strip()
+            core_positions.append(line)
+        fp.close()
+
+    indel_core_positions = []
+    if ConfigSectionMap("functional_filters", Config)['apply_to_calls'] == "yes":
+        core_positions_file = "%s/Only_ref_indel_variant_positions_for_closely_without_functional_filtered_positions" % args.filter2_only_snp_vcf_dir
+    else:
+        core_positions_file = "%s/Only_ref_indel_positions_for_closely" % args.filter2_only_snp_vcf_dir
+    with open(core_positions_file) as fp:
+        for line in fp:
+            line = line.strip()
+            indel_core_positions.append(line)
+        fp.close()
+
+    """ End: Generate an array of core positions. """
+
+
+
+    """ Generate a list of functional class positions from Phaster, Mummer and Custom Masking results/files"""
+    """ Read in functional class filter positions. """
+    functional_filter_pos_array = []
+    with open(functional_class_filter_positions, 'rU') as f_functional:
+        for line_func in f_functional:
+            functional_filter_pos_array.append(line_func.strip())
+
+    """ GET individual PHAGE/Repetitive/masked region positions to assign functional class group string """
+    phage_positions = []
+    repetitive_positions = []
+    mask_positions = []
+    if ConfigSectionMap("functional_filters", Config)['apply_functional_filters'] == "yes":
+        if ConfigSectionMap("functional_filters", Config)['find_phage_region'] == "yes":
+            phage_region_positions = "%s/phage_region_positions.txt" % args.filter2_only_snp_vcf_dir
+            if os.path.isfile(phage_region_positions):
+                with open(phage_region_positions, 'rU') as fphage:
+                    for line in fphage:
+                        phage_positions.append(line.strip())
+                fphage.close()
+            else:
+                raise IOError('%s/phage_region_positions.txt does not exist.' % args.filter2_only_snp_vcf_dir)
+                exit()
+        # GET REPETITIVE REGIONS
+        if ConfigSectionMap("functional_filters", Config)['find_repetitive_region'] == "yes":
+            repetitive_positions_file = "%s/repeat_region_positions.txt" % args.filter2_only_snp_vcf_dir
+            if os.path.isfile(repetitive_positions_file):
+                with open(repetitive_positions_file, 'rU') as frep:
+                    for line in frep:
+                        repetitive_positions.append(line.strip())
+                frep.close()
+            else:
+                raise IOError('%s/repeat_region_positions.txt does not exist.' % args.filter2_only_snp_vcf_dir)
+                exit()
+        # GET MASK REGIONS
+        if ConfigSectionMap("functional_filters", Config)['mask_region'] == "yes":
+            mask_positions_file = "%s/mask_positions.txt" % args.filter2_only_snp_vcf_dir
+            if os.path.isfile(mask_positions_file):
+                with open(mask_positions_file, 'rU') as fmask:
+                    for line in fmask:
+                        mask_positions.append(line.strip())
+                fmask.close()
+            else:
+                raise IOError('%s/mask_positions.txt does not exist.' % args.filter2_only_snp_vcf_dir)
+                exit()
+
+    """ End: Generate a list of functional class positions from Phaster, Mummer and Custom Masking results/files"""
+
+
+
+
+    """ Read and parse final GATK merged vcf file cyvcf library; Generate a header string from the sample lis fo this merged vcf file"""
+
+    final_merge_anno_file = VCF("%s/Final_vcf_gatk_no_proximate_snp.vcf.gz" % args.filter2_only_snp_vcf_dir)
+
+    """ Prepare SNP/Indel Matrix print strings and add matrix row information subsequently """
+    header_print_string = "Type of SNP at POS > ALT functional=PHAGE_REPEAT_MASK locus_tag=locus_id strand=strand; ALT|Effect|Impact|GeneID|Nrchange|Aachange|Nrgenepos|AAgenepos|gene_symbol|product"
+    for sample in final_merge_anno_file.samples:
+        # header_print_string = header_print_string + "," + sample
+        header_print_string = header_print_string + "\t" + sample
+    header_print_string = header_print_string + "\n"
+
+    """ End """
+
+
+
+
+    """ Prepare a All_indel_label_final_ordered_sorted.txt file with sorted unique variant positions. """
+    paste_label_command = "paste %s/unique_positions_file " % args.filter2_only_snp_vcf_dir
+    paste_indel_label_command = "paste %s/unique_indel_positions_file " % args.filter2_only_snp_vcf_dir
+    paste_label_command_exclude_outgroup = "paste %s/unique_positions_file " % args.filter2_only_snp_vcf_dir
+    paste_indel_label_command_exclude_outgroup = "paste %s/unique_indel_positions_file " % args.filter2_only_snp_vcf_dir
+
+    for filename_base in final_merge_anno_file.samples:
+        if "R1_001_final.fastq.gz" in filename_base:
+            second_part = filename_base.replace("R1_001_final.fastq.gz", "R2_001_final.fastq.gz")
+            first_part_split = filename_base.split('R1_001_final.fastq.gz')
+            first_part = first_part_split[0].replace('_L001', '')
+            first_part = re.sub("_S.*_", "", first_part)
+        elif "_R1.fastq.gz" in filename_base:
+            second_part = filename_base.replace("_R1.fastq.gz", "_R2.fastq.gz")
+            first_part_split = filename_base.split('_R1.fastq.gz')
+            first_part = first_part_split[0].replace('_L001', '')
+            first_part = re.sub("_S.*_", "", first_part)
+            # Changed on 03/15/2019
+        elif "R1.fastq.gz" in filename_base:
+            second_part = filename_base.replace("R1.fastq.gz", "R2.fastq.gz")
+            first_part_split = filename_base.split('R1.fastq.gz')
+            first_part = first_part_split[0].replace('_L001', '')
+            first_part = re.sub("_S.*_", "", first_part)
+            # Changed on 03/15/2019
+            first_part = re.sub("_S.*", "", first_part)
+        elif "1_combine.fastq.gz" in filename_base:
+            second_part = filename_base.replace("1_combine.fastq.gz", "2_combine.fastq.gz")
+            first_part_split = filename_base.split('1_combine.fastq.gz')
+            first_part = first_part_split[0].replace('_L001', '')
+            first_part = re.sub("_S.*_", "", first_part)
+        elif "1_sequence.fastq.gz" in filename_base:
+            second_part = filename_base.replace("1_sequence.fastq.gz", "2_sequence.fastq.gz")
+            first_part_split = filename_base.split('1_sequence.fastq.gz')
+            first_part = first_part_split[0].replace('_L001', '')
+            first_part = re.sub("_S.*_", "", first_part)
+        elif "_forward.fastq.gz" in filename_base:
+            second_part = filename_base.replace("_forward.fastq.gz", "_reverse.fastq.gz")
+            first_part_split = filename_base.split('_forward.fastq.gz')
+            first_part = first_part_split[0].replace('_L001', '')
+            first_part = re.sub("_S.*_", "", first_part)
+        elif "R1_001.fastq.gz" in filename_base:
+            second_part = filename_base.replace("R1_001.fastq.gz", "R2_001.fastq.gz")
+            first_part_split = filename_base.split('R1_001.fastq.gz')
+            first_part = first_part_split[0].replace('_L001', '')
+            first_part = re.sub("_S.*_", "", first_part)
+        elif "_1.fastq.gz" in filename_base:
+            second_part = filename_base.replace("_1.fastq.gz", "_2.fastq.gz")
+            first_part_split = filename_base.split('_1.fastq.gz')
+            first_part = first_part_split[0].replace('_L001', '')
+            first_part = re.sub("_S.*_", "", first_part)
+        elif ".1.fastq.gz" in filename_base:
+            second_part = filename_base.replace(".1.fastq.gz", ".2.fastq.gz")
+            first_part_split = filename_base.split('.1.fastq.gz')
+            first_part = first_part_split[0].replace('_L001', '')
+            first_part = re.sub("_S.*_", "", first_part)
+        sample_label_file = "%s/%s_filter2_final.vcf_no_proximate_snp.vcf_positions_label" % (
+        args.filter2_only_snp_vcf_dir, first_part)
+        sample_indel_label_file = "%s/%s_filter2_indel_final.vcf_indel_positions_label" % (
+        args.filter2_only_snp_vcf_dir, first_part)
+        paste_label_command = paste_label_command + sample_label_file + " "
+        paste_indel_label_command = paste_indel_label_command + sample_indel_label_file + " "
+        if args.outgroup:
+            if outgroup not in sample_label_file:
+                paste_label_command_exclude_outgroup = paste_label_command_exclude_outgroup + sample_label_file + " "
+                paste_indel_label_command_exclude_outgroup = paste_indel_label_command_exclude_outgroup + sample_indel_label_file + " "
+
+    paste_label_command = paste_label_command + " > %s/All_label_final_ordered.txt" % args.filter2_only_snp_vcf_dir
+    paste_indel_label_command = paste_indel_label_command + " > %s/All_indel_label_final_ordered.txt" % args.filter2_only_snp_vcf_dir
+    sort_ordered_label_cmd = "sort -n -k1,1 %s/All_label_final_ordered.txt > %s/All_label_final_ordered_sorted.txt" % (
+        args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir)
+    sort_ordered_indel_label_cmd = "sort -n -k1,1 %s/All_indel_label_final_ordered.txt > %s/All_indel_label_final_ordered_sorted.txt" % (
+        args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir)
+
+    if args.outgroup:
+        paste_label_command_exclude_outgroup = paste_label_command_exclude_outgroup + " > %s/All_label_final_ordered_exclude_outgroup.txt" % args.filter2_only_snp_vcf_dir
+        paste_indel_label_command_exclude_outgroup = paste_indel_label_command_exclude_outgroup + " > %s/All_indel_label_final_ordered_exclude_outgroup.txt" % args.filter2_only_snp_vcf_dir
+        sort_ordered_label_cmd_exclude_outgroup = "sort -n -k1,1 %s/All_label_final_ordered_exclude_outgroup.txt > %s/All_label_final_ordered_exclude_outgroup_sorted.txt" % (
+            args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir)
+        sort_ordered_indel_label_cmd_exclude_outgroup = "sort -n -k1,1 %s/All_indel_label_final_ordered_exclude_outgroup.txt > %s/All_indel_label_final_ordered_exclude_outgroup_sorted.txt" % (
+            args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir)
+
+
+    with open('%s/All_label_final_ordered.sh' % args.filter2_only_snp_vcf_dir, 'w') as outfile:
+        outfile.write(paste_label_command + '\n')
+        outfile.write(sort_ordered_label_cmd + '\n')
+        outfile.write(paste_indel_label_command + '\n')
+        outfile.write(sort_ordered_indel_label_cmd + '\n')
+    outfile.close()
+
+    os.system("bash %s/All_label_final_ordered.sh" % args.filter2_only_snp_vcf_dir)
+
+    if args.outgroup:
+        # Just in case if os.system past commands doesn't work
+        with open('%s/All_label_final_ordered_exclude_outgroup.sh' % args.filter2_only_snp_vcf_dir, 'w') as outfile:
+            outfile.write(paste_label_command_exclude_outgroup + '\n')
+            outfile.write(sort_ordered_label_cmd_exclude_outgroup + '\n')
+            outfile.write(paste_indel_label_command_exclude_outgroup + '\n')
+            outfile.write(sort_ordered_indel_label_cmd_exclude_outgroup + '\n')
+        outfile.close()
+
+        # Changed: Uncomment this
+        os.system("bash %s/All_label_final_ordered_exclude_outgroup.sh" % args.filter2_only_snp_vcf_dir)
+
+    """ End: Prepare a All_indel_label_final_ordered_sorted.txt file with sorted unique variant positions. """
+
+
+
+
+
+
+
+    """ Generate a position_label and position_indel_label dictionary that will contain information about each unique variant position that passed variant filters in any sample and reasons for being filtered out in any sample """
+    position_label = OrderedDict()
+    with open("%s/All_label_final_ordered_sorted.txt" % args.filter2_only_snp_vcf_dir, 'rU') as csv_file:
+        keep_logging('Reading All label positions file: %s/All_label_final_ordered_sorted.txt' % args.filter2_only_snp_vcf_dir,
+                     'Reading All label positions file: %s/All_label_final_ordered_sorted.txt' % args.filter2_only_snp_vcf_dir,
+                     logger, 'info')
+        csv_reader = csv.reader(csv_file, delimiter='\t')
+        for row in csv_reader:
+            position_label[row[0]] = ','.join(row[1:])
+    csv_file.close()
+
+    # #Commented for debugging
+    position_indel_label = OrderedDict()
+    with open("%s/All_indel_label_final_ordered_sorted.txt" % args.filter2_only_snp_vcf_dir, 'rU') as csv_file:
+        keep_logging(
+            'Reading All label positions file: %s/All_indel_label_final_ordered_sorted.txt' % args.filter2_only_snp_vcf_dir,
+            'Reading All label positions file: %s/All_indel_label_final_ordered_sorted.txt' % args.filter2_only_snp_vcf_dir,
+            logger, 'info')
+        csv_reader = csv.reader(csv_file, delimiter='\t')
+        for row in csv_reader:
+            if row[0] not in position_label.keys():
+                position_indel_label[row[0]] = ','.join(row[1:])
+            else:
+                position_indel_label[row[0]] = ','.join(row[1:])
+                keep_logging('Warning: position %s already present as a SNP' % row[0],
+                             'Warning: position %s already present as a SNP' % row[0], logger, 'info')
+    csv_file.close()
+
+    """ End: Generate a position_label and position_indel_label dictionary """
+
+
+
+
+
+    """ Generate mask_fq_mq_positions array with positions where a variant was filtered because of LowFQ or LowMQ """
+    mask_fq_mq_positions = []
+    mask_fq_mq_positions_outgroup_specific = []
+    if args.outgroup:
+        position_label_exclude_outgroup = OrderedDict()
+        with open("%s/All_label_final_ordered_exclude_outgroup_sorted.txt" % args.filter2_only_snp_vcf_dir, 'rU') as csv_file:
+            keep_logging(
+                'Reading All label positions file: %s/All_label_final_ordered_exclude_outgroup_sorted.txt' % args.filter2_only_snp_vcf_dir,
+                'Reading All label positions file: %s/All_label_final_ordered_exclude_outgroup_sorted.txt' % args.filter2_only_snp_vcf_dir,
+                logger, 'info')
+            csv_reader = csv.reader(csv_file, delimiter='\t')
+            for row in csv_reader:
+                position_label_exclude_outgroup[row[0]] = ','.join(row[1:])
+        csv_file.close()
+
+        #Commented for debugging
+        position_indel_label_exclude_outgroup = OrderedDict()
+        with open("%s/All_indel_label_final_ordered_exclude_outgroup_sorted.txt" % args.filter2_only_snp_vcf_dir, 'rU') as csv_file:
+            keep_logging(
+                'Reading All label positions file: %s/All_indel_label_final_ordered_exclude_outgroup_sorted.txt' % args.filter2_only_snp_vcf_dir,
+                'Reading All label positions file: %s/All_indel_label_final_ordered_exclude_outgroup_sorted.txt' % args.filter2_only_snp_vcf_dir,
+                logger, 'info')
+            csv_reader = csv.reader(csv_file, delimiter='\t')
+            for row in csv_reader:
+                if row[0] not in position_label_exclude_outgroup.keys():
+                    position_indel_label_exclude_outgroup[row[0]] = ','.join(row[1:])
+                else:
+                    position_indel_label_exclude_outgroup[row[0]] = ','.join(row[1:])
+                    keep_logging('Warning: position %s already present as a SNP' % row[0],
+                                 'Warning: position %s already present as a SNP' % row[0], logger, 'info')
+        csv_file.close()
+
+        for key in position_label_exclude_outgroup.keys():
+            label_sep_array = position_label_exclude_outgroup[key].split(',')
+            for i in label_sep_array:
+                if "LowFQ" in str(i):
+                    if key not in mask_fq_mq_positions:
+                        if int(key) not in outgroup_specific_positions:
+                            mask_fq_mq_positions.append(key)
+                        elif int(key) in outgroup_specific_positions:
+                            mask_fq_mq_positions_outgroup_specific.append(key)
+                if i == "HighFQ":
+                    if key not in mask_fq_mq_positions:
+                        if int(key) not in outgroup_specific_positions:
+                            mask_fq_mq_positions.append(key)
+                        elif int(key) in outgroup_specific_positions:
+                            mask_fq_mq_positions_outgroup_specific.append(key)
+    else:
+        for key in position_label.keys():
+            label_sep_array = position_label[key].split(',')
+            for i in label_sep_array:
+                if "LowFQ" in str(i):
+                    if key not in mask_fq_mq_positions:
+                        mask_fq_mq_positions.append(key)
+                if i == "HighFQ":
+                    if key not in mask_fq_mq_positions:
+                        mask_fq_mq_positions.append(key)
+
+    fp = open("%s/mask_fq_mq_positions.txt" % (args.filter2_only_snp_vcf_dir), 'w+')
+    for i in mask_fq_mq_positions:
+        fp.write(i + '\n')
+    fp.close()
+
+    fp = open("%s/mask_fq_mq_positions_outgroup_specific.txt" % (args.filter2_only_snp_vcf_dir), 'w+')
+    for i in mask_fq_mq_positions_outgroup_specific:
+        fp.write(i + '\n')
+    fp.close()
+
+    print "Length of mask_fq_mq_positions:%s" % len(mask_fq_mq_positions)
+    print "Length of mask_fq_mq_positions specific to outgroup:%s" % len(mask_fq_mq_positions_outgroup_specific)
+
+    """ End: Generate mask_fq_mq_positions array """
+
+
+
+
+
+
+
+
+
+
+    """ Main: Generate SNP Matrix """
+
+
+    """ Open Matrix files to write strings """
+    fp_code = open("%s/SNP_matrix_code.csv" % args.filter2_only_snp_vcf_dir, 'w+')
+    fp_allele = open("%s/SNP_matrix_allele_outdated.csv" % args.filter2_only_snp_vcf_dir, 'w+')
+    fp_allele_new = open("%s/SNP_matrix_allele_new.csv" % args.filter2_only_snp_vcf_dir, 'w+')
+    fp_allele_new_phage = open("%s/SNP_matrix_allele_unmasked.csv" % args.filter2_only_snp_vcf_dir, 'w+')
+    fp_code.write(header_print_string)
+    fp_allele.write(header_print_string)
+    fp_allele_new.write(header_print_string)
+    fp_allele_new_phage.write(header_print_string)
+
+    """ Parse variant positions from the loaded cyvcf VCF object and generate the matrix row information """
+    for variants in VCF("%s/Final_vcf_gatk_no_proximate_snp.vcf.gz" % args.filter2_only_snp_vcf_dir):
+        # Initiate print_string variable to add matrix row information.
+        # print_string generator no. 1
+        print_string = ""
+
+        # Initiate and assign Functional Field filter string => PHAGE/REPEAT/MASK/NULL
+        functional_field = ""
+        if str(variants.POS) in phage_positions:
+            functional_field = functional_field + "PHAGE_"
+        else:
+            functional_field = functional_field + "NULL_"
+        if str(variants.POS) in repetitive_positions:
+            functional_field = functional_field + "REPEATS_"
+        else:
+            functional_field = functional_field + "NULL_"
+        if str(variants.POS) in mask_positions:
+            functional_field = functional_field + "MASK"
+        else:
+            functional_field = functional_field + "NULL"
+
+        # Initiate variant code string where the code means:
+        # REF allele = 0, core = 1, Filtered = 2, unmapped = -1, True but non-core = 3
+        # This will be used as row information for SNP_matrix_code file
+
+        code_string = position_label[str(variants.POS)]
+        code_string = code_string.replace('reference_allele', '0')
+        code_string = code_string.replace('reference_unmapped_position', '-1')
+        # Changing LowFQ code from 2 to -3
+        # Changing HighFQ but LowMQ code from 2 to -4
+        code_string = code_string.replace('LowFQ_QUAL_DP_proximate_SNP', '-3')
+        code_string = code_string.replace('LowFQ_DP_QUAL_proximate_SNP', '-3')
+        code_string = code_string.replace('LowFQ_QUAL_proximate_SNP', '-3')
+        code_string = code_string.replace('LowFQ_DP_proximate_SNP', '-3')
+        code_string = code_string.replace('LowFQ_proximate_SNP', '-3')
+        code_string = code_string.replace('LowFQ_QUAL_DP', '-3')
+        code_string = code_string.replace('LowFQ_DP_QUAL', '-3')
+        code_string = code_string.replace('LowFQ_QUAL', '-3')
+        code_string = code_string.replace('LowFQ_DP', '-3')
+        code_string = code_string.replace('HighFQ_QUAL_DP_proximate_SNP', '2')
+        code_string = code_string.replace('HighFQ_DP_QUAL_proximate_SNP', '2')
+        code_string = code_string.replace('HighFQ_QUAL_proximate_SNP', '2')
+        code_string = code_string.replace('HighFQ_DP_proximate_SNP', '2')
+        code_string = code_string.replace('HighFQ_proximate_SNP', '2')
+        code_string = code_string.replace('HighFQ_QUAL_DP', '2')
+        code_string = code_string.replace('HighFQ_DP_QUAL', '2')
+        code_string = code_string.replace('HighFQ_QUAL', '2')
+        code_string = code_string.replace('HighFQ_DP', '2')
+        code_string = code_string.replace('LowFQ', '-3')
+        code_string = code_string.replace('HighFQ', '-4')
+
+
+        if str(variants.POS) in core_positions:
+            code_string = code_string.replace('VARIANT', '1')
+        # Adding functional class status code to SNP matrix: 2018-07-24
+        elif str(variants.POS) in functional_filter_pos_array:
+            # Changing Functional class filter code to -2 from 2: 2018-12-04
+            code_string = code_string.replace('VARIANT', '-2')
+        else:
+            code_string = code_string.replace('VARIANT', '3')
+
+        # Remove this commented section: Deprecated
+        # Changing SNP type: Date 28/05/2019
+        # Assign type of snp: coding / non-coding
+        # if variants.INFO.get('ANN'):
+        #     if "protein_coding" in variants.INFO.get('ANN'):
+        #         snp_type = "Coding SNP"
+        #     else:
+        #         snp_type = "Non-coding SNP"
+        # else:
+        #     if len(variants.ALT) > 1 and snp_var_ann_dict[variants.POS]:
+        #         #print variants.ALT
+        #         #print ';'.join(set(snp_var_ann_dict[variants.POS].split(',')))
+        #         #print variants.POS
+        #         #print set(snp_var_ann_dict[variants.POS])
+        #         if "protein_coding" in set(snp_var_ann_dict[variants.POS].split(',')):
+        #             snp_type = "Coding SNP"
+        #         else:
+        #             snp_type = "Non-coding SNP"
+        #     else:
+        #         snp_type = "Non-coding SNP"
+        # Remove this commented section: Deprecated
+
+        # Annotation Bug fix 2
+        # Changing SNP type: Date 28/05/2019
+        if variants.POS in snp_var_ann_dict.keys():
+            if snp_var_ann_dict[variants.POS] is not None:
+                if "protein_coding" in set(snp_var_ann_dict[variants.POS].split('|')) and "intergenic_region" not in set(snp_var_ann_dict[variants.POS].split('|')):
+                    snp_type = "Coding SNP"
+                elif "protein_coding" in set(snp_var_ann_dict[variants.POS].split('|')) and "intergenic_region" in set(snp_var_ann_dict[variants.POS].split('|')):
+                    snp_type = "Coding and Non-coding SNP"
+                elif "protein_coding" not in set(snp_var_ann_dict[variants.POS].split('|')) and "intergenic_region" in set(snp_var_ann_dict[variants.POS].split('|')):
+                    snp_type = "Non-Coding SNP"
+                elif "protein_coding" not in set(snp_var_ann_dict[variants.POS].split('|')) and "intragenic_variant" in set(snp_var_ann_dict[variants.POS].split('|')):
+                    snp_type = "Non-Coding SNP"
+                else:
+                    print set((snp_var_ann_dict[variants.POS].split('|')))
+                    snp_type = "No_protein_coding/intergenic_region_field_in_ANN SNP"
+            #print snp_type
+        else:
+            keep_logging('Warning: position %s not found in snp_var_ann_dict dictionary. Assigning Not found as SNP type.' % variants.POS, 'Warning: position %s not found in snp_var_ann_dict dictionary. Assigning Not found as SNP type.' % variants.POS, logger, 'info')
+            print set((snp_var_ann_dict[variants.POS].split('|')))
+            snp_type = "Not Found in Annotated VCF file"
+
+            #print snp_type
+
+        # print_string generator no. 2
+        print_string = print_string + snp_type + " at %s > " % str(variants.POS) + str(",".join(variants.ALT)) + " functional=%s" % functional_field
+
+        # Annotation Bug fix 3
+        # Get ANN field from variant INFO column and save it as an array. Split and Go through each elements, add bells and whistles
+        if variants.INFO.get('ANN'):
+
+            ann_array = (variants.INFO.get('ANN')).split(',')
+
+            # Generate tag string before generating ann_string
+            if len(ann_array) > 1:
+                # print variants.INFO.get('ANN')
+                # print list(set(ann_array))
+                tag_list = []
+
+                for i_again in set(snp_var_ann_dict[variants.POS].split(',')):
+                    i_split_again = i_again.split('|')
+
+
+
+
+                    if "-" not in i_split_again[4]:
+                        if i_split_again[4] not in tag_list:
+                            tag_list.append(i_split_again[4])
+
+                    else:
+                        split_tags = i_split_again[4].split('-')
+                        for splittagsindividual in split_tags:
+                            if splittagsindividual not in tag_list:
+                                tag_list.append(splittagsindividual)
+
+                if len(tag_list) == 1:
+                    tag = tag_list[0]
+
+                elif len(tag_list) == 2:
+                    tag = str(tag_list[0]) + "-" + str(tag_list[1])
+
+                elif len(tag_list) > 2:
+                    print tag_list
+                tag = tag.replace('CHR_START-', '')
+                tag = tag.replace('-CHR_END', '')
+            else:
+                for i in list(set(ann_array)):
+                    i_split = i.split('|')
+                    tag = str(i_split[4]).replace('CHR_START-', '')
+                    tag = str(tag).replace('-CHR_END', '')
+
+
+            ann_string = ";"
+            for i in list(set(ann_array)):
+                i_split = i.split('|')
+                #ann_string = ann_string + '|'.join([i_split[0],i_split[1],i_split[2],i_split[3],i_split[9], i_split[10], i_split[11], i_split[13]]) + ";"
+
+                # MOve this tag before this for loop because of multiple tags associated.
+                # tag = str(i_split[4]).replace('CHR_START-', '')
+                # tag = str(tag).replace('-CHR_END', '')
+
+                if "-" in tag:
+                    #print tag
+                    extra_tags = ""
+                    tag_split = tag.split('-')
+                    for i in tag_split:
+                        if i in locus_tag_to_gene_name.keys():
+                            extra_tags = extra_tags + locus_tag_to_gene_name[i] + ","
+                        else:
+                            extra_tags = extra_tags + "None" + ","
+                    extra_tags_prot = ""
+                    for i in tag_split:
+                        if i in locus_tag_to_product.keys():
+                            extra_tags_prot = extra_tags_prot + locus_tag_to_product[i] + ","
+                        else:
+                            extra_tags_prot = extra_tags_prot + "None" + ","
+                    ann_string = ann_string + '|'.join([i_split[0],i_split[1],i_split[2],i_split[3],i_split[9], i_split[10], i_split[11], i_split[13], extra_tags, extra_tags_prot]) + ";"
+                # Changing SNP type: Date 28/05/2019
+                elif tag == "":
+                    print "ERROR: Issues with this locus tag. Check this tag in genbank file"
+                    print list(set(ann_array))
+                    # Adding this so that Ann string is not empty: 30/05/2019
+                    if tag in locus_tag_to_gene_name.keys() and tag in locus_tag_to_product.keys():
+                        extra_tags = str(locus_tag_to_gene_name[tag]) + "|" + str(locus_tag_to_product[tag])
+                    else:
+                        print "tag key not found: %s" % tag
+                        extra_tags = "NULL" + "|" + "NULL"
+                    # ann_string = ann_string + '|'.join([i_split[0],i_split[1],i_split[2],i_split[3],i_split[9], i_split[10], i_split[11], i_split[13], extra_tags]) + ";"
+                    # Added 2019-31-05
+                    if "ERROR_OUT_OF_CHROMOSOME_RANGE" in i:
+                        ann_string = ann_string + '|'.join(
+                            [i_split[0], "intergenic_region", i_split[2], "ERROR_OUT_OF_CHROMOSOME_RANGE", i_split[9], i_split[10], i_split[11],
+                             i_split[13], extra_tags]) + ";"
+                    else:
+                        ann_string = ann_string + '|'.join([i_split[0],i_split[1],i_split[2],i_split[3],i_split[9], i_split[10], i_split[11], i_split[13], extra_tags]) + ";"
+                    # Debugging
+                    if i_split[3] == "CD630_00290":
+                        print ann_string
+                # Changing SNP type: Date 28/05/2019
+                else:
+                    if tag in locus_tag_to_gene_name.keys() and tag in locus_tag_to_product.keys():
+                        extra_tags = str(locus_tag_to_gene_name[tag]) + "|" + str(locus_tag_to_product[tag])
+                    else:
+                        print "tag key not found: %s" % tag
+                        extra_tags = "NULL" + "|" + "NULL"
+                    # ann_string = ann_string + '|'.join([i_split[0],i_split[1],i_split[2],i_split[3],i_split[9], i_split[10], i_split[11], i_split[13], extra_tags]) + ";"
+                    ann_string = ann_string + '|'.join([i_split[0],i_split[1],i_split[2],i_split[3],i_split[9], i_split[10], i_split[11], i_split[13], extra_tags]) + ";"
+
+        # Annotation Bug fix 4
+        # Changing SNP type: Date 28/05/2019
+        # Working/Testing
+        else:
+            if len(variants.ALT) > 1 and snp_var_ann_dict[variants.POS]:
+                #print variants.ALT
+                #print ';'.join(set(snp_var_ann_dict[variants.POS].split(',')))
+
+                ann_string = ";%s" % ';'.join(set(snp_var_ann_dict[variants.POS].split(',')))
+                # Get Tag here; Multiple tag names.
+                tag_list = []
+
+
+                for i in set(snp_var_ann_dict[variants.POS].split(',')):
+                    i_split = i.split('|')
+                    if i_split[4] not in tag_list:
+                        tag_list.append(i_split[4])
+                if len(tag_list) > 1:
+                    tag =  str(tag_list[0]) + "-" + str(tag_list[1])
+                else:
+                    tag = tag_list[0]
+
+                # if len(set(snp_var_ann_dict[variants.POS].split(','))) > 2:
+                #     print tag
+                #     print set(snp_var_ann_dict[variants.POS].split(','))
+
+            else:
+                ann_string = ";None"
+
+        # Annotation Bug fix 5
+        # Changing SNP type: Date 28/05/2019
+        ann_string = ann_string.replace('ERROR_OUT_OF_CHROMOSOME_RANGE', '%s-%s' % (locus_tag_to_gene_name[last_locus_tag], locus_tag_to_gene_name[first_locus_tag]))
+        ann_string = ann_string.replace('CHR_END', '%s' % locus_tag_to_gene_name[first_locus_tag])
+
+        # SNP Matrix Bug
+        # No changes here: 28/05/2019
+        ann_string_split = ann_string.split(';')
+        #print len(ann_string_split)
+        if len(ann_string_split) == 3:
+            first_allele_ann_string_split = ann_string_split[1].split('|')
+            second_allele_ann_string_split = ann_string_split[2].split('|')
+            if len(first_allele_ann_string_split) == 10 and len(second_allele_ann_string_split) == 10:
+                ann_string = ann_string
+            elif len(first_allele_ann_string_split) > 10 and len(second_allele_ann_string_split) == 10:
+                if first_allele_ann_string_split[14] == "" and first_allele_ann_string_split[15] == "":
+                    prod = first_allele_ann_string_split[3] + first_allele_ann_string_split[15]
+                else:
+                    prod = first_allele_ann_string_split[14] + "|" + first_allele_ann_string_split[15]
+                new_first_allele_ann_string = ";" + first_allele_ann_string_split[0] + "|" + first_allele_ann_string_split[1] + "|" + first_allele_ann_string_split[2] + "|" + first_allele_ann_string_split[4] + "|" + first_allele_ann_string_split[9] + "|" + first_allele_ann_string_split[10] + "|" + first_allele_ann_string_split[11] + "|" + first_allele_ann_string_split[13] + "|" + prod + "|" + prod + ";"
+
+                ann_string = new_first_allele_ann_string + str(ann_string_split[2])
+
+            elif len(first_allele_ann_string_split) == 10 and len(second_allele_ann_string_split) > 10:
+
+                if second_allele_ann_string_split[14] == "" and second_allele_ann_string_split[15] == "":
+                    prod = second_allele_ann_string_split[3] + second_allele_ann_string_split[15]
+                else:
+                    prod = second_allele_ann_string_split[14] + "|" + second_allele_ann_string_split[15]
+                new_second_allele_ann_string = second_allele_ann_string_split[0] + "|" + second_allele_ann_string_split[1] + "|" + second_allele_ann_string_split[2] + "|" + \
+                second_allele_ann_string_split[4] + "|" + second_allele_ann_string_split[9] + "|" + \
+                second_allele_ann_string_split[10] + "|" + second_allele_ann_string_split[11] + "|" + \
+                second_allele_ann_string_split[13] + "|" + prod + "|" + prod + ";"
+
+                ann_string = str(ann_string_split[1]) + new_second_allele_ann_string
+            elif len(first_allele_ann_string_split) > 10 and len(second_allele_ann_string_split) > 10:
+
+
+                if first_allele_ann_string_split[14] == "" and first_allele_ann_string_split[15] == "":
+                    prod = first_allele_ann_string_split[3] + first_allele_ann_string_split[15]
+                else:
+                    prod = first_allele_ann_string_split[14] + "|" + first_allele_ann_string_split[15]
+                new_first_allele_ann_string = ";" + first_allele_ann_string_split[0] + "|" + first_allele_ann_string_split[1] + "|" + first_allele_ann_string_split[2] + "|" + first_allele_ann_string_split[4] + "|" + first_allele_ann_string_split[9] + "|" + first_allele_ann_string_split[10] + "|" + first_allele_ann_string_split[11] + "|" + first_allele_ann_string_split[13] + "|" + prod + "|" + prod + ";"
+
+                if second_allele_ann_string_split[14] == "" and second_allele_ann_string_split[15] == "":
+                    prod = second_allele_ann_string_split[3] + second_allele_ann_string_split[15]
+                else:
+                    prod = second_allele_ann_string_split[14] + "|" + second_allele_ann_string_split[15]
+                new_second_allele_ann_string = second_allele_ann_string_split[0] + "|" + second_allele_ann_string_split[1] + "|" + second_allele_ann_string_split[2] + "|" + \
+                second_allele_ann_string_split[4] + "|" + second_allele_ann_string_split[9] + "|" + \
+                second_allele_ann_string_split[10] + "|" + second_allele_ann_string_split[11] + "|" + \
+                second_allele_ann_string_split[13] + "|" + prod + "|" + prod + ";"
+
+                ann_string = new_first_allele_ann_string + new_second_allele_ann_string
+
+
+        if len(ann_string_split) > 3:
+            first_allele_ann_string_split = ann_string_split[1].split('|')
+            second_allele_ann_string_split = ann_string_split[2].split('|')
+            third_allele_ann_string_split = ann_string_split[3].split('|')
+            if len(first_allele_ann_string_split) == 10 and len(second_allele_ann_string_split) == 10 and len(third_allele_ann_string_split) == 10:
+                ann_string = ann_string
+
+            elif len(first_allele_ann_string_split) > 10 and len(second_allele_ann_string_split) == 10 and len(third_allele_ann_string_split) == 10:
+                if first_allele_ann_string_split[14] == "" and first_allele_ann_string_split[15] == "":
+                    prod = first_allele_ann_string_split[3] + first_allele_ann_string_split[15]
+                else:
+                    prod = first_allele_ann_string_split[14] + "|" + first_allele_ann_string_split[15]
+                new_first_allele_ann_string = ";" + first_allele_ann_string_split[0] + "|" + first_allele_ann_string_split[1] + "|" + first_allele_ann_string_split[2] + "|" + first_allele_ann_string_split[4] + "|" + first_allele_ann_string_split[9] + "|" + first_allele_ann_string_split[10] + "|" + first_allele_ann_string_split[11] + "|" + first_allele_ann_string_split[13] + "|" + prod + "|" + prod + ";"
+
+                ann_string = new_first_allele_ann_string + str(ann_string_split[2]) + str(ann_string_split[3])
+
+            elif len(first_allele_ann_string_split) == 10 and len(second_allele_ann_string_split) > 10 and len(third_allele_ann_string_split) == 10:
+
+                if second_allele_ann_string_split[14] == "" and second_allele_ann_string_split[15] == "":
+                    prod = second_allele_ann_string_split[3] + second_allele_ann_string_split[15]
+                else:
+                    prod = second_allele_ann_string_split[14] + "|" + second_allele_ann_string_split[15]
+                new_second_allele_ann_string = second_allele_ann_string_split[0] + "|" + second_allele_ann_string_split[1] + "|" + second_allele_ann_string_split[2] + "|" + \
+                second_allele_ann_string_split[4] + "|" + second_allele_ann_string_split[9] + "|" + \
+                second_allele_ann_string_split[10] + "|" + second_allele_ann_string_split[11] + "|" + \
+                second_allele_ann_string_split[13] + "|" + prod + "|" + prod + ";"
+
+                ann_string = str(ann_string_split[1]) + new_second_allele_ann_string + str(ann_string_split[3])
+
+            elif len(first_allele_ann_string_split) == 10 and len(second_allele_ann_string_split) == 10 and len(third_allele_ann_string_split) > 10:
+
+                if third_allele_ann_string_split[14] == "" and third_allele_ann_string_split[15] == "":
+                    prod = third_allele_ann_string_split[3] + third_allele_ann_string_split[15]
+                else:
+                    prod = third_allele_ann_string_split[14] + "|" + third_allele_ann_string_split[15]
+                new_third_allele_ann_string = third_allele_ann_string_split[0] + "|" + third_allele_ann_string_split[1] + "|" + third_allele_ann_string_split[2] + "|" + \
+                                              third_allele_ann_string_split[4] + "|" + third_allele_ann_string_split[9] + "|" + \
+                                              third_allele_ann_string_split[10] + "|" + third_allele_ann_string_split[11] + "|" + \
+                                              third_allele_ann_string_split[13] + "|" + prod + "|" + prod + ";"
+
+                ann_string = str(ann_string_split[1]) + str(ann_string_split[2]) + new_third_allele_ann_string
+
+            elif len(first_allele_ann_string_split) > 10 and len(second_allele_ann_string_split) > 10 and len(third_allele_ann_string_split) > 10:
+                #print ann_string
+                if first_allele_ann_string_split[14] == "" and first_allele_ann_string_split[15] == "":
+                    prod = first_allele_ann_string_split[3] + first_allele_ann_string_split[15]
+                else:
+                    prod = first_allele_ann_string_split[14] + "|" + first_allele_ann_string_split[15]
+                new_first_allele_ann_string = ";" + first_allele_ann_string_split[0] + "|" + first_allele_ann_string_split[1] + "|" + first_allele_ann_string_split[2] + "|" + first_allele_ann_string_split[4] + "|" + first_allele_ann_string_split[9] + "|" + first_allele_ann_string_split[10] + "|" + first_allele_ann_string_split[11] + "|" + first_allele_ann_string_split[13] + "|" + prod + "|" + prod + ";"
+
+                if second_allele_ann_string_split[14] == "" and second_allele_ann_string_split[15] == "":
+                    prod = second_allele_ann_string_split[3] + second_allele_ann_string_split[15]
+                else:
+                    prod = second_allele_ann_string_split[14] + "|" + second_allele_ann_string_split[15]
+                new_second_allele_ann_string = second_allele_ann_string_split[0] + "|" + second_allele_ann_string_split[1] + "|" + second_allele_ann_string_split[2] + "|" + \
+                second_allele_ann_string_split[4] + "|" + second_allele_ann_string_split[9] + "|" + \
+                second_allele_ann_string_split[10] + "|" + second_allele_ann_string_split[11] + "|" + \
+                second_allele_ann_string_split[13] + "|" + prod + "|" + prod + ";"
+
+                if third_allele_ann_string_split[14] == "" and third_allele_ann_string_split[15] == "":
+                    prod = third_allele_ann_string_split[3] + third_allele_ann_string_split[15]
+                else:
+                    prod = third_allele_ann_string_split[14] + "|" + third_allele_ann_string_split[15]
+                new_third_allele_ann_string = third_allele_ann_string_split[0] + "|" + third_allele_ann_string_split[1] + "|" + third_allele_ann_string_split[2] + "|" + \
+                                              third_allele_ann_string_split[4] + "|" + third_allele_ann_string_split[9] + "|" + \
+                                              third_allele_ann_string_split[10] + "|" + third_allele_ann_string_split[11] + "|" + \
+                                              third_allele_ann_string_split[13] + "|" + prod + "|" + prod + ";"
+
+                ann_string = new_first_allele_ann_string + new_second_allele_ann_string + new_third_allele_ann_string
+
+
+        # print_string generator no. 3
+
+        # Annotation Bug fix 6
+        # Changing Strandness string: Date 28/05/2019
+        # Each Locus ID with a strand information
+        strandness = " Strand Information: "
+        if "-" in tag:
+            tagsplit = tag.split('-')
+            for i in tagsplit:
+                if i in locus_tag_to_strand.keys():
+                    if "," in locus_tag_to_strand[i]:
+                        locus_tag_to_strand_split = locus_tag_to_strand[i].split(',')
+                        strand = locus_tag_to_strand_split[0]
+                    else:
+                        strand = locus_tag_to_strand[i]
+                    strandness = strandness + i + "=" + strand + "/"
+                else:
+                    if i == "" or i == "None":
+                        strandness = strandness + "NULL=" + "No Strand Information found" + "/"
+                    else:
+                        strandness = strandness + i + "=" + "No Strand Information found" + "/"
+        else:
+            if tag in locus_tag_to_strand.keys():
+                # strandness = strandness + locus_tag_to_strand[tag]
+                if "," in locus_tag_to_strand[tag]:
+                    locus_tag_to_strand_split = locus_tag_to_strand[tag].split(',')
+                    strand = locus_tag_to_strand_split[0]
+                else:
+                    strand = locus_tag_to_strand[tag]
+                strandness = strandness + tag + "=" + strand
+            else:
+                if tag == "" or tag == "None":
+                    strandness = strandness + "NULL=" + "No Strand Information found"
+                else:
+                    strandness = strandness + tag + "=" + "No Strand Information found"
+
+        # Annotation Bug fix 7
+        # Changing tag equals NULL: Date 30/05/2019
+        if tag == "" or tag == "None":
+            tag = "NULL"
+
+        print_string = print_string + " locus_tag=" + tag + strandness + ann_string
+        print_string_phage = print_string
+
+
+
+        """ Go over each genotype for a variant and generate a gt_string variable """
+        gt_string = ""
+        for gt in variants.gt_bases:
+            gt = gt.replace('./.', '.')
+            gt_string = gt_string + "," + gt
+        gt_string = gt_string.replace('A/A', 'A')
+        gt_string = gt_string.replace('G/G', 'G')
+        gt_string = gt_string.replace('C/C', 'C')
+        gt_string = gt_string.replace('T/T', 'T')
+        gt_string = gt_string.replace('.', variants.REF)
+
+
+        # print_string generator no. 4
+        # Replace various seperators that were used in old matrix. Clean up this block of code
+        final_allele_string = print_string + gt_string.replace(',', '\t') + '\n'
+        # Replace code at Phage Positions with -2
+        if str(variants.POS) in functional_filter_pos_array:
+            code_string_array = code_string.split(',')
+            for (i, item) in enumerate(code_string_array):
+                if item == "0":
+                    code_string_array[i] = "-2"
+            for (i, item) in enumerate(code_string_array):
+                if item == "1":
+                    code_string_array[i] = "-2"
+            for (i, item) in enumerate(code_string_array):
+                if item == "2":
+                    code_string_array[i] = "-2"
+            for (i, item) in enumerate(code_string_array):
+                if item == "3":
+                    code_string_array[i] = "-2"
+            for (i, item) in enumerate(code_string_array):
+                if item == "4":
+                    code_string_array[i] = "-2"
+            for (i, item) in enumerate(code_string_array):
+                if item == "-1":
+                    code_string_array[i] = "-2"
+            for (i, item) in enumerate(code_string_array):
+                if item == "-2":
+                    code_string_array[i] = "-2"
+            for (i, item) in enumerate(code_string_array):
+                if item == "-3":
+                    code_string_array[i] = "-2"
+            for (i, item) in enumerate(code_string_array):
+                if item == "-4":
+                    code_string_array[i] = "-2"
+            code_string = ','.join(code_string_array)
+
+        final_code_string = print_string + "\t" + code_string.replace(',', '\t') + '\n'
+        final_allele_string = final_allele_string.replace(',|', '|')
+
+        final_allele_string = final_allele_string.replace(',;,', ':::')
+        final_allele_string = final_allele_string.replace(';,', ':::')
+        final_code_string = final_code_string.replace(',|', '|')
+
+
+        final_code_string = final_code_string.replace(',;,', ':::')
+        final_code_string = final_code_string.replace(';,', ':::')
+        final_code_string = final_code_string.replace(';\t\t', ';\t')
+        final_code_string = final_code_string.replace('\t\t', '\t')
+        final_allele_string = final_allele_string.replace('\t\t', '\t')
+        fp_allele.write(final_allele_string)
+        fp_code.write(final_code_string)
+
+
+
+        ntd_string = ""
+        ntd_string_phage = ""
+        count = 0
+        code_string_array = code_string.split(',')
+        gt_string_array = gt_string[1:].split(',')
+
+
+        for i in gt_string_array:
+            if str(code_string_array[count]) == "0" or str(code_string_array[count]) == "1" or str(code_string_array[count]) == "3":
+                ntd_string = ntd_string + "\t" + str(i)
+                ntd_string_phage = ntd_string_phage + "\t" + str(i)
+            if code_string_array[count] == "-1":
+                ntd_string = ntd_string + "\t" + "-"
+                ntd_string_phage = ntd_string_phage + "\t" + "-"
+            # Changing Functional class filter code to -2 from 2 and replacing variant allele with N: 2018-12-04
+            if str(code_string_array[count]) == "2" or str(code_string_array[count]) == "-2" or str(code_string_array[count]) == "-3" or str(code_string_array[count]) == "-4":
+
+                ntd_string = ntd_string + "\t" + "N"
+            if str(code_string_array[count]) == "2":
+                ntd_string_phage = ntd_string_phage + "\t" + "N"
+            if str(code_string_array[count]) == "-2":
+                ntd_string_phage = ntd_string_phage + "\t" + str(i)
+            count += 1
+
+        # Annotation Bug fix 8
+        """ Mask Phage positions and LowFQ/MQ positions in SNP_matrix_allele_new.csv. This is the default matrix. """
+        if str(variants.POS) in functional_filter_pos_array:
+            ntd_string_array = ntd_string.split('\t')
+            #print ntd_string_array
+            ntd_string = ""
+            for i in ntd_string_array[1:]:
+                ntd_string = ntd_string + "\t" + "N"
+            ntd_string_array = ntd_string.split('\t')
+            #print ntd_string_array
+
+
+        if str(variants.POS) in mask_fq_mq_positions:
+            ntd_string_array = ntd_string.split('\t')
+            #print ntd_string_array
+            ntd_string = ""
+            for i in ntd_string_array[1:]:
+                ntd_string = ntd_string + "\t" + "N"
+            ntd_string_array = ntd_string.split('\t')
+            #print ntd_string_array
+
+
+        """ Generate a print_string for each of the matrix - SNP_matrix_allele_new.csv and SNP_matrix_allele_phage.csv """
+        print_string = print_string + ntd_string + "\n"
+
+        print_string_phage = print_string_phage + ntd_string_phage + "\n"
+
+        """ This is a hardcoded solution. Find the root cause of these strings getting into the print_strint variable """
+        print_string.replace(',;,', '\t')
+        print_string.replace(';,', '\t')
+        print_string_phage.replace(',;,', '\t')
+        print_string_phage.replace(';,', '\t')
+
+        fp_allele_new.write(print_string)
+        fp_allele_new_phage.write(print_string_phage)
+
+    fp_code.close()
+    fp_allele.close()
+    fp_allele_new.close()
+    fp_allele_new_phage.close()
+
+######################################
+    """ Indel matrix """
+    """ Prepare SNP/Indel Matrix print strings and add matrix row information subsequently """
+    header_print_string = "Type of SNP at POS > ALT functional=PHAGE_REPEAT_MASK locus_tag=locus_id strand=strand; ALT|Effect|Impact|GeneID|Nrchange|Aachange|Nrgenepos|AAgenepos|gene_symbol|product"
+    final_merge_anno_file = VCF("%s/Final_vcf_gatk_indel.vcf.gz" % args.filter2_only_snp_vcf_dir)
+    for sample in final_merge_anno_file.samples:
+        # header_print_string = header_print_string + "," + sample
+        header_print_string = header_print_string + "\t" + sample
+    header_print_string = header_print_string + "\n"
+    #header_print_string = header_print_string.replace(':::,', ':::')
+    #header_print_string = header_print_string.replace(':::,', '\t')
+    fp_code = open("%s/Indel_matrix_code.csv" % args.filter2_only_snp_vcf_dir, 'w+')
+    fp_allele = open("%s/Indel_matrix_allele.csv" % args.filter2_only_snp_vcf_dir, 'w+')
+    fp_code.write(header_print_string)
+    fp_allele.write(header_print_string)
+
+    # """ Generate mask_fq_mq_positions array with positions where a variant was filtered because of LowFQ or LowMQ"""
+    # mask_fq_mq_positions = []
+    # for key in position_indel_label.keys():
+    #     label_sep_array = position_indel_label[key].split(',')
+    #     for i in label_sep_array:
+    #         if "LowAF" in i:
+    #             if key not in mask_fq_mq_positions:
+    #                 mask_fq_mq_positions.append(key)
+    #         if i == "HighAF":
+    #             if key not in mask_fq_mq_positions:
+    #                 mask_fq_mq_positions.append(key)
+    #
+    # print "Length of indel mask_fq_mq_positions array:%s" % len(mask_fq_mq_positions)
+
+    """ Generate mask_fq_mq_positions array with positions where a variant was filtered because of LowFQ or LowMQ"""
+    mask_fq_mq_positions = []
+    mask_fq_mq_positions_outgroup_specific = []
+
+    if args.outgroup:
+        position_label_exclude_outgroup = OrderedDict()
+        with open("%s/All_label_final_ordered_exclude_outgroup_sorted.txt" % args.filter2_only_snp_vcf_dir,
+                  'rU') as csv_file:
+            keep_logging(
+                'Reading All label positions file: %s/All_label_final_ordered_exclude_outgroup_sorted.txt' % args.filter2_only_snp_vcf_dir,
+                'Reading All label positions file: %s/All_label_final_ordered_exclude_outgroup_sorted.txt' % args.filter2_only_snp_vcf_dir,
+                logger, 'info')
+            csv_reader = csv.reader(csv_file, delimiter='\t')
+            for row in csv_reader:
+                position_label_exclude_outgroup[row[0]] = ','.join(row[1:])
+        csv_file.close()
+
+        position_indel_label_exclude_outgroup = OrderedDict()
+        with open("%s/All_indel_label_final_ordered_exclude_outgroup_sorted.txt" % args.filter2_only_snp_vcf_dir,
+                  'rU') as csv_file:
+            keep_logging(
+                'Reading All label positions file: %s/All_indel_label_final_ordered_exclude_outgroup_sorted.txt' % args.filter2_only_snp_vcf_dir,
+                'Reading All label positions file: %s/All_indel_label_final_ordered_exclude_outgroup_sorted.txt' % args.filter2_only_snp_vcf_dir,
+                logger, 'info')
+            csv_reader = csv.reader(csv_file, delimiter='\t')
+            for row in csv_reader:
+                if row[0] not in position_label_exclude_outgroup.keys():
+                    position_indel_label_exclude_outgroup[row[0]] = ','.join(row[1:])
+                else:
+                    position_indel_label_exclude_outgroup[row[0]] = ','.join(row[1:])
+                    keep_logging('Warning: position %s already present as a SNP' % row[0],
+                                 'Warning: position %s already present as a SNP' % row[0], logger, 'info')
+        csv_file.close()
+        for key in position_label_exclude_outgroup.keys():
+            label_sep_array = position_label_exclude_outgroup[key].split(',')
+            for i in label_sep_array:
+                if "LowFQ" in str(i):
+                    if key not in mask_fq_mq_positions:
+                        if int(key) not in outgroup_specific_positions:
+                            mask_fq_mq_positions.append(key)
+                        elif int(key) in outgroup_specific_positions:
+                            mask_fq_mq_positions_outgroup_specific.append(key)
+                if i == "HighFQ":
+                    if key not in mask_fq_mq_positions:
+                        if int(key) not in outgroup_specific_positions:
+                            mask_fq_mq_positions.append(key)
+                        elif int(key) in outgroup_specific_positions:
+                            mask_fq_mq_positions_outgroup_specific.append(key)
+    else:
+        for key in position_label.keys():
+            label_sep_array = position_label[key].split(',')
+            for i in label_sep_array:
+                if "LowFQ" in str(i):
+                    if key not in mask_fq_mq_positions:
+                        mask_fq_mq_positions.append(key)
+                if i == "HighFQ":
+                    if key not in mask_fq_mq_positions:
+                        mask_fq_mq_positions.append(key)
+
+
+
+    print "Length of Indel mask_fq_mq_positions:%s" % len(mask_fq_mq_positions)
+    print "Length of Indel mask_fq_mq_positions specific to outgroup:%s" % len(mask_fq_mq_positions_outgroup_specific)
+
+
+
+
+
+
+
+    for variants in VCF("%s/Final_vcf_gatk_indel.vcf.gz" % args.filter2_only_snp_vcf_dir):
+        print_string = ""
+
+        functional_field = ""
+        if str(variants.POS) in phage_positions:
+            functional_field = functional_field + "PHAGE_"
+        else:
+            functional_field = functional_field + "NULL_"
+        if str(variants.POS) in repetitive_positions:
+            functional_field = functional_field + "REPEATS_"
+        else:
+            functional_field = functional_field + "NULL_"
+        if str(variants.POS) in mask_positions:
+            functional_field = functional_field + "MASK"
+        else:
+            functional_field = functional_field + "NULL"
+
+        code_string = position_indel_label[str(variants.POS)]
+        code_string = code_string.replace('reference_allele', '0')
+        code_string = code_string.replace('reference_unmapped_position', '-1')
+        code_string = code_string.replace('LowAF_QUAL_DP_proximate_SNP', '2')
+        code_string = code_string.replace('LowAF_DP_QUAL_proximate_SNP', '2')
+        code_string = code_string.replace('LowAF_QUAL_proximate_SNP', '2')
+        code_string = code_string.replace('LowAF_DP_proximate_SNP', '2')
+        code_string = code_string.replace('LowAF_proximate_SNP', '2')
+        code_string = code_string.replace('LowAF_QUAL_DP', '2')
+        code_string = code_string.replace('LowAF_DP_QUAL', '2')
+        code_string = code_string.replace('LowAF_QUAL', '2')
+        code_string = code_string.replace('LowAF_DP', '2')
+        code_string = code_string.replace('HighAF_QUAL_DP_proximate_SNP', '2')
+        code_string = code_string.replace('HighAF_DP_QUAL_proximate_SNP', '2')
+        code_string = code_string.replace('HighAF_QUAL_proximate_SNP', '2')
+        code_string = code_string.replace('HighAF_DP_proximate_SNP', '2')
+        code_string = code_string.replace('HighAF_proximate_SNP', '2')
+        code_string = code_string.replace('HighAF_QUAL_DP', '2')
+        code_string = code_string.replace('HighAF_DP_QUAL', '2')
+        code_string = code_string.replace('HighAF_QUAL', '2')
+        code_string = code_string.replace('HighAF_DP', '2')
+        code_string = code_string.replace('LowAF', '-3')
+        code_string = code_string.replace('HighAF', '-4')
+
+        if str(variants.POS) in indel_core_positions:
+            code_string = code_string.replace('VARIANT', '1')
+        # Adding functional class status code to SNP matrix: 2018-07-24
+        elif str(variants.POS) in functional_filter_pos_array:
+            # Changing Functional class filter code to -2 from 2: 2018-12-04
+            code_string = code_string.replace('VARIANT', '-2')
+        else:
+            code_string = code_string.replace('VARIANT', '3')
+
+
+
+
+        # Changing SNP type: Date 28/05/2019
+        # Assign type of snp: coding / non-coding
+        if variants.POS in indel_var_ann_dict.keys():
+            if indel_var_ann_dict[variants.POS] is not None:
+                if "protein_coding" in set(indel_var_ann_dict[variants.POS].split('|')) and "intergenic_region" not in set(indel_var_ann_dict[variants.POS].split('|')):
+                    snp_type = "Coding Indel"
+                elif "protein_coding" in set(indel_var_ann_dict[variants.POS].split('|')) and "intergenic_region" in set(indel_var_ann_dict[variants.POS].split('|')):
+                    snp_type = "Coding and Non-coding Indel"
+                elif "protein_coding" not in set(indel_var_ann_dict[variants.POS].split('|')) and "intergenic_region" in set(indel_var_ann_dict[variants.POS].split('|')):
+                    snp_type = "Non-Coding Indel"
+                elif "protein_coding" not in set(indel_var_ann_dict[variants.POS].split('|')) and "intragenic_variant" in set(indel_var_ann_dict[variants.POS].split('|')):
+                    snp_type = "Non-Coding Indel"
+                else:
+                    print set((indel_var_ann_dict[variants.POS].split('|')))
+                    snp_type = "No_protein_coding/intergenic_region_field_in_ANN SNP"
+            #print snp_type
+        else:
+            keep_logging('Warning: position %s not found in snp_var_ann_dict dictionary. Assigning Not found as SNP type.' % variants.POS, 'Warning: position %s not found in snp_var_ann_dict dictionary. Assigning Not found as SNP type.' % variants.POS, logger, 'info')
+            print set((indel_var_ann_dict[variants.POS].split('|')))
+            snp_type = "Not Found in Annotated VCF file"
+
+
+
+
+        print_string = print_string + snp_type + " at %s > " % str(variants.POS) + str(",".join(variants.ALT)) + " functional=%s" % functional_field
+
+        # Get ANN field from variant INFO column and save it as an array. Split and Go through each elements, add bells and whistles
+        if variants.INFO.get('ANN'):
+
+            ann_array = (variants.INFO.get('ANN')).split(',')
+
+            # Generate tag string before generating ann_string
+            if len(ann_array) > 1:
+                # print variants.INFO.get('ANN')
+                # print list(set(ann_array))
+                tag_list = []
+
+                for i_again in set(indel_var_ann_dict[variants.POS].split(',')):
+                    i_split_again = i_again.split('|')
+
+                    if "-" not in i_split_again[4]:
+                        if i_split_again[4] not in tag_list:
+                            tag_list.append(i_split_again[4])
+
+                    else:
+                        split_tags = i_split_again[4].split('-')
+                        for splittagsindividual in split_tags:
+                            if splittagsindividual not in tag_list:
+                                tag_list.append(splittagsindividual)
+
+                if len(tag_list) == 1:
+                    tag = tag_list[0]
+
+                elif len(tag_list) == 2:
+                    tag = str(tag_list[0]) + "-" + str(tag_list[1])
+
+                elif len(tag_list) > 2:
+                    print tag_list
+                tag = tag.replace('CHR_START-', '')
+                tag = tag.replace('-CHR_END', '')
+            else:
+                for i in list(set(ann_array)):
+                    i_split = i.split('|')
+                    tag = str(i_split[4]).replace('CHR_START-', '')
+                    tag = str(tag).replace('-CHR_END', '')
+
+            ann_string = ";"
+            for i in list(set(ann_array)):
+                i_split = i.split('|')
+                # ann_string = ann_string + '|'.join([i_split[0],i_split[1],i_split[2],i_split[3],i_split[9], i_split[10], i_split[11], i_split[13]]) + ";"
+
+                # MOve this tag before this for loop because of multiple tags associated.
+                # tag = str(i_split[4]).replace('CHR_START-', '')
+                # tag = str(tag).replace('-CHR_END', '')
+
+                if "-" in tag:
+                    # print tag
+                    extra_tags = ""
+                    tag_split = tag.split('-')
+                    for i in tag_split:
+                        if i in locus_tag_to_gene_name.keys():
+                            extra_tags = extra_tags + locus_tag_to_gene_name[i] + ","
+                        else:
+                            extra_tags = extra_tags + "None" + ","
+                    extra_tags_prot = ""
+                    for i in tag_split:
+                        if i in locus_tag_to_product.keys():
+                            extra_tags_prot = extra_tags_prot + locus_tag_to_product[i] + ","
+                        else:
+                            extra_tags_prot = extra_tags_prot + "None" + ","
+                    ann_string = ann_string + '|'.join(
+                        [i_split[0], i_split[1], i_split[2], i_split[3], i_split[9], i_split[10], i_split[11],
+                         i_split[13], extra_tags, extra_tags_prot]) + ";"
+                # Changing SNP type: Date 28/05/2019
+                elif tag == "":
+                    print "ERROR: Issues with this locus tag. Check this tag in genbank file"
+                    print list(set(ann_array))
+                    # Adding this so that Ann string is not empty: 30/05/2019
+                    if tag in locus_tag_to_gene_name.keys() and tag in locus_tag_to_product.keys():
+                        extra_tags = str(locus_tag_to_gene_name[tag]) + "|" + str(locus_tag_to_product[tag])
+                    else:
+                        print "tag key not found: %s" % tag
+                        extra_tags = "NULL" + "|" + "NULL"
+                    # ann_string = ann_string + '|'.join([i_split[0],i_split[1],i_split[2],i_split[3],i_split[9], i_split[10], i_split[11], i_split[13], extra_tags]) + ";"
+                    # Added 2019-31-05
+                    if "ERROR_OUT_OF_CHROMOSOME_RANGE" in i:
+                        ann_string = ann_string + '|'.join(
+                            [i_split[0], "intergenic_region", i_split[2], "ERROR_OUT_OF_CHROMOSOME_RANGE", i_split[9],
+                             i_split[10], i_split[11],
+                             i_split[13], extra_tags]) + ";"
+                    else:
+                        ann_string = ann_string + '|'.join(
+                            [i_split[0], i_split[1], i_split[2], i_split[3], i_split[9], i_split[10], i_split[11],
+                             i_split[13], extra_tags]) + ";"
+                    # Debugging
+                    if i_split[3] == "CD630_00290":
+                        print ann_string
+                # Changing SNP type: Date 28/05/2019
+                else:
+                    if tag in locus_tag_to_gene_name.keys() and tag in locus_tag_to_product.keys():
+                        extra_tags = str(locus_tag_to_gene_name[tag]) + "|" + str(locus_tag_to_product[tag])
+                    else:
+                        print "tag key not found: %s" % tag
+                        extra_tags = "NULL" + "|" + "NULL"
+                    # ann_string = ann_string + '|'.join([i_split[0],i_split[1],i_split[2],i_split[3],i_split[9], i_split[10], i_split[11], i_split[13], extra_tags]) + ";"
+                    ann_string = ann_string + '|'.join(
+                        [i_split[0], i_split[1], i_split[2], i_split[3], i_split[9], i_split[10], i_split[11],
+                         i_split[13], extra_tags]) + ";"
+
+
+        # Changing SNP type: Date 28/05/2019
+        # Working/Testing
+        else:
+            if len(variants.ALT) > 1 and indel_var_ann_dict[variants.POS]:
+                # print variants.ALT
+                # print ';'.join(set(snp_var_ann_dict[variants.POS].split(',')))
+
+                ann_string = ";%s" % ';'.join(set(indel_var_ann_dict[variants.POS].split(',')))
+                # Get Tag here; Multiple tag names.
+                tag_list = []
+
+                for i in set(indel_var_ann_dict[variants.POS].split(',')):
+                    i_split = i.split('|')
+                    if i_split[4] not in tag_list:
+                        tag_list.append(i_split[4])
+                if len(tag_list) > 1:
+                    tag = str(tag_list[0]) + "-" + str(tag_list[1])
+                else:
+                    tag = tag_list[0]
+
+                # if len(set(snp_var_ann_dict[variants.POS].split(','))) > 2:
+                #     print tag
+                #     print set(snp_var_ann_dict[variants.POS].split(','))
+
+            else:
+                ann_string = ";None"
+
+
+        # Changing SNP type: Date 28/05/2019
+        ann_string = ann_string.replace('ERROR_OUT_OF_CHROMOSOME_RANGE', '%s-%s' % (locus_tag_to_gene_name[last_locus_tag], locus_tag_to_gene_name[first_locus_tag]))
+        ann_string = ann_string.replace('CHR_END', '%s' % locus_tag_to_gene_name[first_locus_tag])
+
+
+        # SNP Matrix Bug
+        ann_string_split = ann_string.split(';')
+        if len(ann_string_split) == 3:
+            first_allele_ann_string_split = ann_string_split[1].split('|')
+            second_allele_ann_string_split = ann_string_split[2].split('|')
+            if len(first_allele_ann_string_split) == 10 and len(second_allele_ann_string_split) == 10:
+                ann_string = ann_string
+            elif len(first_allele_ann_string_split) > 10 and len(second_allele_ann_string_split) == 10:
+                if first_allele_ann_string_split[14] == "" and first_allele_ann_string_split[15] == "":
+                    prod = first_allele_ann_string_split[3] + first_allele_ann_string_split[15]
+                else:
+                    prod = first_allele_ann_string_split[14] + first_allele_ann_string_split[15]
+                new_first_allele_ann_string = ";" + first_allele_ann_string_split[0] + "|" + \
+                                              first_allele_ann_string_split[1] + "|" + \
+                                              first_allele_ann_string_split[2] + "|" + \
+                                              first_allele_ann_string_split[4] + "|" + \
+                                              first_allele_ann_string_split[9] + "|" + \
+                                              first_allele_ann_string_split[10] + "|" + \
+                                              first_allele_ann_string_split[11] + "|" + \
+                                              first_allele_ann_string_split[13] + "|" + prod + "|" + prod + ";"
+
+                ann_string = new_first_allele_ann_string + str(ann_string_split[2])
+
+            elif len(first_allele_ann_string_split) == 10 and len(second_allele_ann_string_split) > 10:
+
+                if second_allele_ann_string_split[14] == "" and second_allele_ann_string_split[15] == "":
+                    prod = second_allele_ann_string_split[3] + second_allele_ann_string_split[15]
+                else:
+                    prod = second_allele_ann_string_split[14] + second_allele_ann_string_split[15]
+                new_second_allele_ann_string = second_allele_ann_string_split[0] + "|" + \
+                                               second_allele_ann_string_split[1] + "|" + \
+                                               second_allele_ann_string_split[2] + "|" + \
+                                               second_allele_ann_string_split[4] + "|" + \
+                                               second_allele_ann_string_split[9] + "|" + \
+                                               second_allele_ann_string_split[10] + "|" + \
+                                               second_allele_ann_string_split[11] + "|" + \
+                                               second_allele_ann_string_split[
+                                                   13] + "|" + prod + "|" + prod + ";"
+
+                ann_string = str(ann_string_split[1]) + new_second_allele_ann_string
+            elif len(first_allele_ann_string_split) > 10 and len(second_allele_ann_string_split) > 10:
+
+                if first_allele_ann_string_split[14] == "" and first_allele_ann_string_split[15] == "":
+                    prod = first_allele_ann_string_split[3] + first_allele_ann_string_split[15]
+                else:
+                    prod = first_allele_ann_string_split[14] + first_allele_ann_string_split[15]
+                new_first_allele_ann_string = ";" + first_allele_ann_string_split[0] + "|" + \
+                                              first_allele_ann_string_split[1] + "|" + \
+                                              first_allele_ann_string_split[2] + "|" + \
+                                              first_allele_ann_string_split[4] + "|" + \
+                                              first_allele_ann_string_split[9] + "|" + \
+                                              first_allele_ann_string_split[10] + "|" + \
+                                              first_allele_ann_string_split[11] + "|" + \
+                                              first_allele_ann_string_split[13] + "|" + prod + "|" + prod + ";"
+
+                if second_allele_ann_string_split[14] == "" and second_allele_ann_string_split[15] == "":
+                    prod = second_allele_ann_string_split[3] + second_allele_ann_string_split[15]
+                else:
+                    prod = second_allele_ann_string_split[14] + second_allele_ann_string_split[15]
+                new_second_allele_ann_string = second_allele_ann_string_split[0] + "|" + \
+                                               second_allele_ann_string_split[1] + "|" + \
+                                               second_allele_ann_string_split[2] + "|" + \
+                                               second_allele_ann_string_split[4] + "|" + \
+                                               second_allele_ann_string_split[9] + "|" + \
+                                               second_allele_ann_string_split[10] + "|" + \
+                                               second_allele_ann_string_split[11] + "|" + \
+                                               second_allele_ann_string_split[
+                                                   13] + "|" + prod + "|" + prod + ";"
+
+                ann_string = new_first_allele_ann_string + new_second_allele_ann_string
+
+
+        if len(ann_string_split) > 3:
+
+            first_allele_ann_string_split = ann_string_split[1].split('|')
+            second_allele_ann_string_split = ann_string_split[2].split('|')
+            third_allele_ann_string_split = ann_string_split[3].split('|')
+
+            if len(first_allele_ann_string_split) == 10 and len(second_allele_ann_string_split) == 10 and len(
+                    third_allele_ann_string_split) == 10:
+                ann_string = ann_string
+
+            elif len(first_allele_ann_string_split) > 10 and len(second_allele_ann_string_split) == 10 and len(
+                    third_allele_ann_string_split) == 10:
+                if first_allele_ann_string_split[14] == "" and first_allele_ann_string_split[15] == "":
+                    prod = first_allele_ann_string_split[3] + first_allele_ann_string_split[15]
+                else:
+                    prod = first_allele_ann_string_split[14] + first_allele_ann_string_split[15]
+                new_first_allele_ann_string = ";" + first_allele_ann_string_split[0] + "|" + \
+                                              first_allele_ann_string_split[1] + "|" + \
+                                              first_allele_ann_string_split[2] + "|" + \
+                                              first_allele_ann_string_split[4] + "|" + \
+                                              first_allele_ann_string_split[9] + "|" + \
+                                              first_allele_ann_string_split[10] + "|" + \
+                                              first_allele_ann_string_split[11] + "|" + \
+                                              first_allele_ann_string_split[13] + "|" + prod + "|" + prod + ";"
+
+                ann_string = new_first_allele_ann_string + str(ann_string_split[2]) + str(ann_string_split[3])
+
+            elif len(first_allele_ann_string_split) == 10 and len(second_allele_ann_string_split) > 10 and len(
+                    third_allele_ann_string_split) == 10:
+
+                if second_allele_ann_string_split[14] == "" and second_allele_ann_string_split[15] == "":
+                    prod = second_allele_ann_string_split[3] + second_allele_ann_string_split[15]
+                else:
+                    prod = second_allele_ann_string_split[14] + second_allele_ann_string_split[15]
+                new_second_allele_ann_string = second_allele_ann_string_split[0] + "|" + \
+                                               second_allele_ann_string_split[1] + "|" + \
+                                               second_allele_ann_string_split[2] + "|" + \
+                                               second_allele_ann_string_split[4] + "|" + \
+                                               second_allele_ann_string_split[9] + "|" + \
+                                               second_allele_ann_string_split[10] + "|" + \
+                                               second_allele_ann_string_split[11] + "|" + \
+                                               second_allele_ann_string_split[
+                                                   13] + "|" + prod + "|" + prod + ";"
+
+                ann_string = str(ann_string_split[1]) + new_second_allele_ann_string + str(ann_string_split[3])
+
+            elif len(first_allele_ann_string_split) == 10 and len(second_allele_ann_string_split) == 10 and len(
+                    third_allele_ann_string_split) > 10:
+
+                if third_allele_ann_string_split[14] == "" and third_allele_ann_string_split[15] == "":
+                    prod = third_allele_ann_string_split[3] + third_allele_ann_string_split[15]
+                else:
+                    prod = third_allele_ann_string_split[14] + third_allele_ann_string_split[15]
+                new_third_allele_ann_string = third_allele_ann_string_split[0] + "|" + \
+                                              third_allele_ann_string_split[1] + "|" + \
+                                              third_allele_ann_string_split[2] + "|" + \
+                                              third_allele_ann_string_split[4] + "|" + \
+                                              third_allele_ann_string_split[9] + "|" + \
+                                              third_allele_ann_string_split[10] + "|" + \
+                                              third_allele_ann_string_split[11] + "|" + \
+                                              third_allele_ann_string_split[13] + "|" + prod + "|" + prod + ";"
+
+                ann_string = str(ann_string_split[1]) + str(ann_string_split[2]) + new_third_allele_ann_string
+
+            elif len(first_allele_ann_string_split) > 10 and len(second_allele_ann_string_split) > 10 and len(
+                    third_allele_ann_string_split) > 10:
+                # print ann_string
+                if first_allele_ann_string_split[14] == "" and first_allele_ann_string_split[15] == "":
+                    prod = first_allele_ann_string_split[3] + first_allele_ann_string_split[15]
+                else:
+                    prod = first_allele_ann_string_split[14] + first_allele_ann_string_split[15]
+                new_first_allele_ann_string = ";" + first_allele_ann_string_split[0] + "|" + \
+                                              first_allele_ann_string_split[1] + "|" + \
+                                              first_allele_ann_string_split[2] + "|" + \
+                                              first_allele_ann_string_split[4] + "|" + \
+                                              first_allele_ann_string_split[9] + "|" + \
+                                              first_allele_ann_string_split[10] + "|" + \
+                                              first_allele_ann_string_split[11] + "|" + \
+                                              first_allele_ann_string_split[13] + "|" + prod + "|" + prod + ";"
+
+                if second_allele_ann_string_split[14] == "" and second_allele_ann_string_split[15] == "":
+                    prod = second_allele_ann_string_split[3] + second_allele_ann_string_split[15]
+                else:
+                    prod = second_allele_ann_string_split[14] + second_allele_ann_string_split[15]
+                new_second_allele_ann_string = second_allele_ann_string_split[0] + "|" + \
+                                           second_allele_ann_string_split[1] + "|" + \
+                                           second_allele_ann_string_split[2] + "|" + \
+                                           second_allele_ann_string_split[4] + "|" + \
+                                           second_allele_ann_string_split[9] + "|" + \
+                                           second_allele_ann_string_split[10] + "|" + \
+                                           second_allele_ann_string_split[11] + "|" + \
+                                           second_allele_ann_string_split[
+                                               13] + "|" + prod + "|" + prod + ";"
+
+                if third_allele_ann_string_split[14] == "" and third_allele_ann_string_split[15] == "":
+                    prod = third_allele_ann_string_split[3] + third_allele_ann_string_split[15]
+                else:
+                    prod = third_allele_ann_string_split[14] + third_allele_ann_string_split[15]
+                new_third_allele_ann_string = third_allele_ann_string_split[0] + "|" + \
+                                              third_allele_ann_string_split[1] + "|" + \
+                                              third_allele_ann_string_split[2] + "|" + \
+                                              third_allele_ann_string_split[4] + "|" + \
+                                              third_allele_ann_string_split[9] + "|" + \
+                                              third_allele_ann_string_split[10] + "|" + \
+                                              third_allele_ann_string_split[11] + "|" + \
+                                              third_allele_ann_string_split[13] + "|" + prod + "|" + prod + ";"
+
+                ann_string = new_first_allele_ann_string + new_second_allele_ann_string + new_third_allele_ann_string
+
+                # print ann_string
+
+        # # JUST FOR THE SAKE OF DEBUGGING
+        # ann_string_split = ann_string.split(';')
+        # for i in ann_string_split:
+        #     if len(i.split('|')) != 10 and len(i.split('|')) != 1:
+        #         print ann_string
+
+        # Changing Strandness string: Date 28/05/2019
+        # Each Locus ID with a strand information
+        strandness = " Strand Information: "
+        if "-" in tag:
+            tagsplit = tag.split('-')
+            for i in tagsplit:
+                if i in locus_tag_to_strand.keys():
+                    if "," in locus_tag_to_strand[i]:
+                        locus_tag_to_strand_split = locus_tag_to_strand[i].split(',')
+                        strand = locus_tag_to_strand_split[0]
+                    else:
+                        strand = locus_tag_to_strand[i]
+                    strandness = strandness + i + "=" + strand + "/"
+                else:
+                    if i == "" or i == "None":
+                        strandness = strandness + "NULL=" + "No Strand Information found" + "/"
+                    else:
+                        strandness = strandness + i + "=" + "No Strand Information found" + "/"
+        else:
+            if tag in locus_tag_to_strand.keys():
+                # strandness = strandness + locus_tag_to_strand[tag]
+                if "," in locus_tag_to_strand[tag]:
+                    locus_tag_to_strand_split = locus_tag_to_strand[tag].split(',')
+                    strand = locus_tag_to_strand_split[0]
+                else:
+                    strand = locus_tag_to_strand[tag]
+                strandness = strandness + tag + "=" + strand
+            else:
+                if tag == "" or tag == "None":
+                    strandness = strandness + "NULL=" + "No Strand Information found"
+                else:
+                    strandness = strandness + tag + "=" + "No Strand Information found"
+
+
+        # Changing tag equals NULL: Date 30/05/2019
+        if tag == "" or tag == "None":
+            tag = "NULL"
+
+        print_string = print_string + " locus_tag=" + tag + strandness + ann_string
+
+        gt_string = ""
+        for gt in variants.gt_bases:
+            gt = gt.replace('./.', '.')
+            if "/" in gt:
+                gt_split = gt.split('/')
+                gt = gt_split[1]
+            gt_string = gt_string + "," + gt
+        gt_string = gt_string.replace('.', variants.REF)
+
+        """Replacing Phage/Functional filter position code"""
+        if str(variants.POS) in functional_filter_pos_array:
+            code_string_array = code_string.split(',')
+            code_string = ""
+            for i in code_string_array:
+                code_string = code_string + "," + "-2"
+
+        final_allele_string = print_string + gt_string.replace(',', '\t') + '\n'
+        final_code_string = print_string + "\t" + code_string.replace(',', '\t') + '\n'
+        final_allele_string = final_allele_string.replace(',|', '|')
+        # final_allele_string = final_allele_string.replace(',;,', ':::')
+        # final_allele_string = final_allele_string.replace(';,', ':::')
+        final_allele_string = final_allele_string.replace(',;,', ':::')
+        final_allele_string = final_allele_string.replace(';,', ':::')
+        final_code_string = final_code_string.replace(',|', '|')
+        # final_code_string = final_code_string.replace(',;,', ':::')
+        # final_code_string = final_code_string.replace(';,', ':::')
+        final_code_string = final_code_string.replace(',;,', ':::')
+        final_code_string = final_code_string.replace(';,', ':::')
+        final_code_string = final_code_string.replace('\t\t', '\t')
+        final_allele_string = final_allele_string.replace('\t\t', '\t')
+        fp_allele.write(final_allele_string)
+        fp_code.write(final_code_string)
+    fp_code.close()
+    fp_allele.close()
+
+def core_prep_snp(core_vcf_fasta_dir):
+    """ Generate SNP Filter Label Matrix """
+    generate_paste_command()
+
+    generate_paste_command_outgroup()
+
+    """ Generate different list of Positions from the **All_label_final_sorted_header.txt** SNP position label data matrix. """
+    generate_position_label_data_matrix()
+
+    """ Generate VCF files from final list of variants in Only_ref_variant_positions_for_closely; generate commands for consensus generation """
+    generate_vcf_files()
+
+    """ Generate consensus fasta file from core vcf files """
+    extract_only_ref_variant_fasta_from_reference()
+
+    """ Generate consensus fasta file with only reference and variant position bases """
+    extract_only_ref_variant_fasta(core_vcf_fasta_dir)
+
+    # """ Analyze the positions that were filtered out only due to insufficient depth"""
+    # DP_analysis()
+
+def core_prep_indel(core_vcf_fasta_dir):
+    """ Generate SNP Filter Label Matrix """
+    generate_indel_paste_command()
+
+    generate_indel_paste_command_outgroup()
+
+    """ Generate different list of Positions from the **All_label_final_sorted_header.txt** SNP position label data matrix. """
+    generate_indel_position_label_data_matrix()
+
+""" report methods """
+def alignment_report(data_matrix_dir):
+    keep_logging('Generating Alignment report...', 'Generating Alignment report...', logger, 'info')
+    varcall_dir = os.path.dirname(args.results_dir)
+    print varcall_dir
+    report_string = ""
+    header = "Sample,QC-passed reads,Mapped reads,% mapped reads,mean depth,%_bases_above_5,%_bases_above_10,%_bases_above_15,unmapped_positions,READ_PAIR_DUPLICATES,READ_PAIR_OPTICAL_DUPLICATES,unmapped reads,% unmapped reads"
+    fp = open("%s/Report_alignment.txt" % (data_matrix_dir), 'w+')
+    fp.write(header + '\n')
+    for vcf in vcf_filenames:
+        sample = os.path.basename(vcf.replace('_filter2_final.vcf_no_proximate_snp.vcf', ''))
+        #print sample
+        report_string = sample + ","
+        qc = (subprocess.check_output("grep \'QC-passed\' %s/%s/%s_alignment_stats | sed \'s/ + 0 in total (QC-passed reads + QC-failed reads)//g\'" % (varcall_dir, sample, sample), shell=True)).strip()
+        mapped = (subprocess.check_output("grep \'mapped (\' %s/%s/%s_alignment_stats | awk -F\' \' \'{print $1}\'" % (varcall_dir, sample, sample), shell=True)).strip()
+        replace = "%:-nan%)"
+        perc_mapped = (subprocess.check_output("grep \'mapped (\' %s/%s/%s_alignment_stats | awk -F\' \' \'{print $5}\' | sed \'s/%s//g\' | sed \'s/(//g\'" % (varcall_dir, sample, sample, replace), shell=True)).strip()
+        depth_of_coverage = (subprocess.check_output("awk -F\'\\t\' \'{OFS=\",\"};FNR==2{print $3,$7,$8,$9}\' %s/%s/%s_depth_of_coverage.sample_summary" % (varcall_dir, sample, sample), shell=True)).strip()
+        unmapped_positions = (subprocess.check_output("wc -l %s/%s/%s_unmapped.bed_positions | cut -d\' \' -f1" % (varcall_dir, sample, sample), shell=True)).strip()
+        opt_dup = (subprocess.check_output("awk -F\'\\t\' \'{OFS=\",\"};FNR==8{print $7,$8,$5}\' %s/%s/%s_markduplicates_metrics" % (varcall_dir, sample, sample), shell=True)).strip()
+        perc_unmapped = str(100 - float(perc_mapped))
+        myList = ','.join(map(str, (sample, qc, mapped, perc_mapped, depth_of_coverage, unmapped_positions, opt_dup, perc_unmapped)))
+        #print myList
+        fp.write(myList + '\n')
+    fp.close()
+    keep_logging('Alignment report can be found in %s/Report_alignment.txt' % data_matrix_dir, 'Alignment report can be found in %s/Report_alignment.txt' % data_matrix_dir, logger, 'info')
+
+def variant_report(data_matrix_dir):
+    keep_logging('Generating Variants report...', 'Generating Variants report...', logger, 'info')
+    varcall_dir = os.path.dirname(os.path.abspath(args.results_dir))
+    report_string = ""
+    header = "Sample,Total Unique Variants,core SNPs,unmapped_positions,reference_allele,true_variant,Only_low_FQ,Only_DP,Only_low_MQ,other,unmapped_positions_perc,true_variant_perc,Only_low_FQ_perc,Only_DP_perc,Only_low_MQ_perc,other_perc"
+    fp = open("%s/Report_variants.txt" % (data_matrix_dir), 'w+')
+    fp.write(header + '\n')
+
+    for vcf in vcf_filenames:
+        sample = os.path.basename(vcf.replace('_filter2_final.vcf_no_proximate_snp.vcf', ''))
+        report_string = sample + ","
+        unmapped_positions = (subprocess.check_output("wc -l %s/core_temp_dir/unique_positions_file | cut -d\' \' -f1" % (varcall_dir), shell=True)).strip()
+        core_snps = (subprocess.check_output("wc -l %s/core_temp_dir/Only_ref_variant_positions_for_closely | cut -d\' \' -f1" % (varcall_dir), shell=True)).strip()
+        filtered_snp_count = (subprocess.check_output("grep -w \'^%s\' %s/core_temp_dir/bargraph_counts.txt | awk -F\'\\t\' \'{OFS=\",\"};{print $2,$3,$4,$5,$6,$7}\'" % (sample, varcall_dir), shell=True)).strip()
+        filtered_snp_perc = (subprocess.check_output("grep -w \'^%s\' %s/core_temp_dir/bargraph_percentage.txt | awk -F\'\\t\' \'{OFS=\",\"};{print $2,$3,$4,$5,$6,$7}\'" % (sample, varcall_dir), shell=True)).strip()
+        myList = ','.join(map(str, (sample, unmapped_positions, core_snps, filtered_snp_count, filtered_snp_perc)))
+        fp.write(myList + '\n')
+    fp.close()
+    keep_logging('Variant call report can be found in %s/Report_variants.txt' % data_matrix_dir, 'Variant call report can be found in %s/Report_variants.txt' % data_matrix_dir, logger, 'info')
+
+def gubbins(gubbins_dir, input_fasta, jobrun, logger, Config):
+    keep_logging('\nRunning Gubbins on input: %s\n' % input_fasta, '\nRunning Gubbins on input: %s\n' % input_fasta,
+                 logger,
+                 'info')
+
+
+    call("module load bioperl python-anaconda2/201607 biopython dendropy reportlab fasttree RAxML fastml/gub gubbins", logger)
+    #os.system("module load bioperl python-anaconda2/201607 biopython dendropy reportlab fasttree RAxML fastml/gub gubbins")
+    #gubbins_cmd = "%s/%s --prefix %s/%s %s" % (
+    # ConfigSectionMap("gubbins", Config)['gubbins_bin'], ConfigSectionMap("gubbins", Config)['base_cmd'], gubbins_dir,
+    # (os.path.basename(input_fasta)).replace('.fa', ''), input_fasta)
+
+    load_module = "module load bioperl python-anaconda2/201607 biopython dendropy reportlab fasttree RAxML fastml/gub gubbins"
+    gubbins_cmd = "%s --threads 6 --prefix %s/%s %s" % (
+    ConfigSectionMap("gubbins", Config)['base_cmd'], gubbins_dir,
+    (os.path.basename(input_fasta)).replace('.fa', ''), input_fasta)
+    keep_logging('\nRunning Gubbins on: %s' % input_fasta, '\nRunning Gubbins: %s\n' % input_fasta,
+                 logger,
+                 'info')
+
+    keep_logging('Running: %s' % gubbins_cmd, '%s' % gubbins_cmd, logger, 'info')
+    if jobrun == "parallel-local" or jobrun == "local":
+        call("cd %s" % gubbins_dir, logger)
+        call(gubbins_cmd, logger)
+    elif jobrun == "cluster":
+        call("cd %s" % gubbins_dir, logger)
+        call(gubbins_cmd, logger)
+    elif jobrun == "parallel-cluster":
+        job_file_name = "%s/gubbins_%s.pbs" % (gubbins_dir, os.path.basename(input_fasta))
+        job_name = os.path.basename(job_file_name)
+        job_print_string = "#PBS -N %s\n#PBS -M %s\n#PBS -m %s\n#PBS -V\n#PBS -l nodes=1:ppn=12,mem=47000mb,walltime=250:00:00\n#PBS -q %s\n#PBS -A %s\n#PBS -l qos=flux\ncd %s\n%s\n%s" % (job_name, ConfigSectionMap("scheduler", Config)['email'], ConfigSectionMap("scheduler", Config)['notification'], ConfigSectionMap("scheduler", Config)['queue'], ConfigSectionMap("scheduler", Config)['flux_account'], gubbins_dir, load_module, gubbins_cmd)
+        f1=open(job_file_name, 'w+')
+        f1.write(job_print_string)
+        f1.close()
+        #os.system("qsub %s" % job_file_name)
+        call("qsub %s" % job_file_name, logger)
+
+def get_outgroup():
+    """
+    Prepare Outgroup Sample name from the argument.
+    """
+    if args.outgroup:
+        if "R1_001_final.fastq.gz" in args.outgroup:
+            first_part_split = args.outgroup.split('R1_001_final.fastq.gz')
+            first_part = first_part_split[0].replace('_L001', '')
+            outgroup = re.sub("_S.*_", "", first_part)
+
+        elif "_R1.fastq.gz" in args.outgroup:
+            first_part_split = args.outgroup.split('_R1.fastq.gz')
+            first_part = first_part_split[0].replace('_L001', '')
+            outgroup = re.sub("_S.*_", "", first_part)
+
+        elif "R1.fastq.gz" in args.outgroup:
+            first_part_split = args.outgroup.split('R1.fastq.gz')
+            first_part = first_part_split[0].replace('_L001', '')
+            first_part = re.sub("_S.*_", "", first_part)
+            outgroup = re.sub("_S.*", "", first_part)
+
+        elif "1_combine.fastq.gz" in args.outgroup:
+            first_part_split = args.outgroup.split('1_combine.fastq.gz')
+            first_part = first_part_split[0].replace('_L001', '')
+            outgroup = re.sub("_S.*_", "", first_part)
+
+        elif "1_sequence.fastq.gz" in args.outgroup:
+            first_part_split = args.outgroup.split('1_sequence.fastq.gz')
+            first_part = first_part_split[0].replace('_L001', '')
+            outgroup = re.sub("_S.*_", "", first_part)
+
+        elif "_forward.fastq.gz" in args.outgroup:
+            first_part_split = args.outgroup.split('_forward.fastq.gz')
+            first_part = first_part_split[0].replace('_L001', '')
+            outgroup = re.sub("_S.*_", "", first_part)
+
+        elif "R1_001.fastq.gz" in args.outgroup:
+            first_part_split = args.outgroup.split('R1_001.fastq.gz')
+            first_part = first_part_split[0].replace('_L001', '')
+            outgroup = re.sub("_S.*_", "", first_part)
+
+        elif "_1.fastq.gz" in args.outgroup:
+            first_part_split = args.outgroup.split('_1.fastq.gz')
+            first_part = first_part_split[0].replace('_L001', '')
+            outgroup = re.sub("_S.*_", "", first_part)
+
+        elif ".1.fastq.gz" in args.outgroup:
+            first_part_split = args.outgroup.split('.1.fastq.gz')
+            first_part = first_part_split[0].replace('_L001', '')
+            outgroup = re.sub("_S.*_", "", first_part)
+
+        keep_logging(
+            'Using %s as Outgroup Sample Name' % outgroup,
+            'Using %s as Outgroup Sample Name' % outgroup,
+            logger, 'info')
+
+        return outgroup
+    else:
+        keep_logging('Outgroup Sample Name not provided\n', 'Outgroup Sample Name not provided\n', logger, 'info')
+        outgroup = ""
+
+def mask_fq_mq_positions_specific_to_outgroup():
+    """ Generate mask_fq_mq_positions array with positions where a variant was filtered because of LowFQ or LowMQ"""
+    mask_fq_mq_positions = []
+    mask_fq_mq_positions_outgroup_specific = []
+    if args.outgroup:
+        position_label_exclude_outgroup = OrderedDict()
+        with open("%s/All_label_final_ordered_exclude_outgroup_sorted.txt" % args.filter2_only_snp_vcf_dir,
+                  'rU') as csv_file:
+            keep_logging(
+                'Reading All label positions file: %s/All_label_final_ordered_exclude_outgroup_sorted.txt' % args.filter2_only_snp_vcf_dir,
+                'Reading All label positions file: %s/All_label_final_ordered_exclude_outgroup_sorted.txt' % args.filter2_only_snp_vcf_dir,
+                logger, 'info')
+            csv_reader = csv.reader(csv_file, delimiter='\t')
+            for row in csv_reader:
+                position_label_exclude_outgroup[row[0]] = ','.join(row[1:])
+        csv_file.close()
+
+        position_indel_label_exclude_outgroup = OrderedDict()
+        with open("%s/All_indel_label_final_ordered_exclude_outgroup_sorted.txt" % args.filter2_only_snp_vcf_dir,
+                  'rU') as csv_file:
+            keep_logging(
+                'Reading All label positions file: %s/All_indel_label_final_ordered_exclude_outgroup_sorted.txt' % args.filter2_only_snp_vcf_dir,
+                'Reading All label positions file: %s/All_indel_label_final_ordered_exclude_outgroup_sorted.txt' % args.filter2_only_snp_vcf_dir,
+                logger, 'info')
+            csv_reader = csv.reader(csv_file, delimiter='\t')
+            for row in csv_reader:
+                if row[0] not in position_label_exclude_outgroup.keys():
+                    position_indel_label_exclude_outgroup[row[0]] = ','.join(row[1:])
+                else:
+                    position_indel_label_exclude_outgroup[row[0]] = ','.join(row[1:])
+                    keep_logging('Warning: position %s already present as a SNP' % row[0],
+                                 'Warning: position %s already present as a SNP' % row[0], logger, 'info')
+        csv_file.close()
+        for key in position_label_exclude_outgroup.keys():
+            label_sep_array = position_label_exclude_outgroup[key].split(',')
+            for i in label_sep_array:
+                if "LowFQ" in str(i):
+                    if key not in mask_fq_mq_positions:
+                        if int(key) not in outgroup_specific_positions:
+                            mask_fq_mq_positions.append(key)
+                        elif int(key) in outgroup_specific_positions:
+                            mask_fq_mq_positions_outgroup_specific.append(key)
+                if i == "HighFQ":
+                    if key not in mask_fq_mq_positions:
+                        if int(key) not in outgroup_specific_positions:
+                            mask_fq_mq_positions.append(key)
+                        elif int(key) in outgroup_specific_positions:
+                            mask_fq_mq_positions_outgroup_specific.append(key)
+
+        fp = open("%s/mask_fq_mq_positions_outgroup_specific.txt" % (args.filter2_only_snp_vcf_dir), 'w+')
+        for i in mask_fq_mq_positions_outgroup_specific:
+            fp.write(i + '\n')
+        fp.close()
+        print "Length of mask_fq_mq_positions specific to outgroup:%s" % len(mask_fq_mq_positions_outgroup_specific)
+
+        outgroup = get_outgroup()
+        fqmqpositionsspecifictooutgroup = []
+
+        fopen = open("%s/mask_fq_mq_positions_outgroup_specific.txt" % (args.filter2_only_snp_vcf_dir), 'r+')
+        for i in fopen:
+            i = i.strip()
+            fqmqpositionsspecifictooutgroup.append(i)
+        fopen.close()
+
+        print "Length of low MQ/FQ positions specific to outgroup: %s" % len(fqmqpositionsspecifictooutgroup)
+
+        vcf_filename_unmapped = "%s/%s_ref_allele_unmapped_masked.vcf" % (args.filter2_only_snp_vcf_dir, outgroup)
+
+        fp = open("%s/%s_ref_allele_unmapped_masked.vcf" % (args.filter2_only_snp_vcf_dir, outgroup), 'w+')
+
+        vcf_header = "##fileformat=VCFv4.2\n#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t%s\n" % outgroup
+        fp.write(vcf_header)
+
+        for variants in VCF("%s/%s_ref_allele_unmapped.vcf.gz" % (args.filter2_only_snp_vcf_dir, outgroup)):
+            print_string = ""
+            if str(variants.POS) in fqmqpositionsspecifictooutgroup:
+                print_string_array = [str(variants.CHROM), str(variants.POS), '.', str(variants.REF), 'N', '221.999',
+                                      '.', '.', '.', '.', '.']
+
+
+            else:
+                print_string_array = [str(variants.CHROM), str(variants.POS), '.', str(variants.REF),
+                                      str(variants.ALT[0]), '221.999', '.', '.', '.', '.', '.']
+            print_string = '\t'.join(print_string_array)
+            fp.write(print_string + '\n')
+        fp.close()
+        base_vcftools_bin = ConfigSectionMap("bin_path", Config)['binbase'] + "/" + \
+                            ConfigSectionMap("vcftools", Config)[
+                                'vcftools_bin']
+        bgzip_cmd = "%s/%s/bgzip -f %s\n" % (
+            ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("vcftools", Config)['tabix_bin'],
+            vcf_filename_unmapped)
+
+        tabix_cmd = "%s/%s/tabix -f -p vcf %s.gz\n" % (
+            ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("vcftools", Config)['tabix_bin'],
+            vcf_filename_unmapped)
+
+        fasta_cmd = "cat %s | %s/vcf-consensus %s.gz > %s_ref_allele_unmapped_variants.fa\n" % (
+        args.reference, base_vcftools_bin, vcf_filename_unmapped, outgroup)
+
+        # print bgzip_cmd
+        # print tabix_cmd
+        # print fasta_cmd
+
+        subprocess.call([bgzip_cmd], shell=True)
+        subprocess.call([tabix_cmd], shell=True)
+        subprocess.call([fasta_cmd], shell=True)
+        sed_command = "sed -i 's/>.*/>%s/g' %s_ref_allele_unmapped_variants.fa\n" % (outgroup, outgroup)
+        subprocess.call([sed_command], shell=True)
+        # print sed_command
+
+
+    else:
+        for key in position_label.keys():
+            label_sep_array = position_label[key].split(',')
+            for i in label_sep_array:
+                if "LowFQ" in str(i):
+                    if key not in mask_fq_mq_positions:
+                        mask_fq_mq_positions.append(key)
+                if i == "HighFQ":
+                    if key not in mask_fq_mq_positions:
+                        mask_fq_mq_positions.append(key)
+
+        fp = open("%s/mask_fq_mq_positions.txt" % (args.filter2_only_snp_vcf_dir), 'w+')
+        for i in mask_fq_mq_positions:
+            fp.write(i + '\n')
+        fp.close()
+
+        print "Length of mask_fq_mq_positions:%s" % len(mask_fq_mq_positions)
+
+"""
+Pending inclusion
+
+class FuncThread(threading.Thread):
+    def __init__(self, target, *args):
+        self._target = target
+        self._args = args
+        threading.Thread.__init__(self)
+    def run(self):
+        self._target(*self._args)
+
+def someOtherFunc(data, key):
+    print "someOtherFunc was called : data=%s; key=%s" % (str(data), str(key))
+
+Pending inclusion
+"""
+
+
+
+if __name__ == '__main__':
+
+    """
+    Main Function for Variant Calling Core Pipeline 
+    :param:
+    :return:
+    
+    This function runs "core_prep" step to generate intermediate files required for extracting core variants at "core" step. 
+    Using these core variants, a "report" step will generate the final reports and output results of the pipeline as well as runs "tree" step to generate fasttree and raxml results 
+    using the core variants consensus in Date_Time_core_results folder.
+    Steps:
+    1. core_prep
+    2. core
+    3. report
+    4. tree
+    """
+
+    # Start Timer to use it for generating folder names and Log prefixes.
+    start_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    start_time_2 = datetime.now()
+    log_unique_time = datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
+    global logger
+    analysis_name_log = "step_" + str(args.steps)
+    logger = generate_logger(args.filter2_only_snp_vcf_dir, analysis_name_log, log_unique_time)
+    keep_logging('\nThe Script started at: %s' % start_time, '\nThe Script started at: %s' % start_time, logger, 'info')
+    print_details = "This step will parse final vcf files(*_no_proximate_snp.vcf) generated at the end of Variant Calling Pipeline. At the end of this step, the following results will be generated and placed in output directory:\n\n" \
+          "1. Final Core SNP Positions list(Variant positions that were not filtered out in any of the samples and passed all the filters)\n" \
+          "2. SNP Positions that were filtered out with labels indicating the reason (Depth, FQ, MQ, Unmapped in one or other samples, Proximate SNPS, Quality of Variant) why they were filtered out.\n" \
+          "3. Barplot Statistics about the filtered variants and their reason for getting filtered.\n" \
+          "4. Final Consensus fasta file using only Core SNP Positions\n"
+    keep_logging('%s' % print_details, '%s' % print_details, logger, 'info')
+
+    # Create temporary Directory core_temp_dir/temp for storing temporary intermediate files. Check if core_temp_dir contains all the required files to run these pipeline.
+    global temp_dir
+    temp_dir = args.filter2_only_snp_vcf_dir + "/temp"
+
+    # Read Config file into Config object that will be used to extract configuration settings set up in config file.
+    global config_file
+    if args.config:
+        config_file = args.config
+    else:
+        config_file = os.path.dirname(os.path.abspath(__file__)) + "/config"
+    global Config
+    Config = ConfigParser.ConfigParser()
+    Config.read(config_file)
+    keep_logging('Path to config file: %s' % config_file, 'Path to config file: %s' % config_file, logger, 'info')
+
+    make_sure_path_exists(temp_dir)
+
+    # Get outgroup_Sample name
+    outgroup = get_outgroup()
+    outgroup_vcf_filename = str(outgroup) + "_filter2_final.vcf_no_proximate_snp.vcf"
+    outgroup_indel_vcf_filename = str(outgroup) + "_filter2_indel_final.vcf"
+
+    # Read filenames. Core variants and final results will be extracted considering only these files.
+    filter2_only_snp_vcf_filenames = args.filter2_only_snp_vcf_filenames
+    vcf_filenames_temp = []
+    vcf_filenames_temp_outgroup = []
+
+    with open(filter2_only_snp_vcf_filenames) as fp:
+        for line in fp:
+            line = line.strip()
+            line = args.filter2_only_snp_vcf_dir + line
+            vcf_filenames_temp.append(line)
+            if args.outgroup:
+                if "%s_filter2_final.vcf_no_proximate_snp.vcf" % outgroup not in line:
+                    vcf_filenames_temp_outgroup.append(line)
+        fp.close()
+    vcf_filenames = sorted(vcf_filenames_temp)
+    vcf_filenames_outgroup = sorted(vcf_filenames_temp_outgroup)
+
+    make_sure_files_exists(vcf_filenames, Config, logger)
+
+    log_file_handle = "%s/%s_%s.log.txt" % (args.filter2_only_snp_vcf_dir, log_unique_time, analysis_name_log)
+
+    # Start Variant Calling Core Pipeline steps based on steps argument supplied.
+    if "1" in args.steps:
+        """ 
+        core_prep step
+        """
+
+        # Gather SNP positions from each final *_no_proximate_snp.vcf file (that passed the variant filter parameters from variant calling pipeline) and write to *_no_proximate_snp.vcf_position files for use in downstream methods
+        keep_logging('Gathering SNP position information from each final *_no_proximate_snp.vcf file...', 'Gathering SNP position information from each final *_no_proximate_snp.vcf file...', logger, 'info')
+
+        core_prep_label(vcf_filenames, args.filter2_only_snp_vcf_dir, args.outgroup, args.reference, log_unique_time, log_file_handle, logger, args.jobrun, Config)
+
+    if "2" in args.steps:
+        """ 
+        core step 
+        """
+
+        # Set variables; check if the output from core_prep steps (*label files) exists and was completed without any errors.
+        snp_unique_positions_file = args.filter2_only_snp_vcf_dir + "/unique_positions_file"
+        indel_unique_positions_file = args.filter2_only_snp_vcf_dir + "/unique_indel_positions_file"
+        uniq_snp_positions = sum(1 for line in open('%s' % snp_unique_positions_file))
+        uniq_indel_positions = sum(1 for line in open('%s' % indel_unique_positions_file))
+        if not os.path.isfile(snp_unique_positions_file) and not os.path.isfile(indel_unique_positions_file):
+            keep_logging('Error finding unique_positions_file/unique_indel_positions_file. Please rerun core_prep step.','Error finding unique_positions_file/unique_indel_positions_file. Please rerun core_prep step.', logger,'exception')
+            exit()
+
+        make_sure_label_files_exists(vcf_filenames, uniq_snp_positions, uniq_indel_positions, Config, logger)
+
+        # Set up Report and results directories to transfer the final results.
+        data_matrix_dir = args.results_dir + '/data_matrix'
+        core_vcf_fasta_dir = args.results_dir + '/core_snp_consensus'
+        make_sure_path_exists(data_matrix_dir)
+        make_sure_path_exists(core_vcf_fasta_dir)
+
+        functional_class_filter_positions = "%s/Functional_class_filter_positions.txt" % args.filter2_only_snp_vcf_dir
+
+        global outgroup_specific_positions
+        global outgroup_indel_specific_positions
+
+        # Get outgroup specific variant positions
+        if args.outgroup:
+            f_outgroup = open("%s/outgroup_indel_specific_positions.txt" % args.filter2_only_snp_vcf_dir, 'r+')
+
+            outgroup_indel_specific_positions = []
+            for i in f_outgroup:
+                i = i.strip()
+                outgroup_indel_specific_positions.append(int(i))
+            f_outgroup.close()
+
+            f_outgroup = open("%s/outgroup_specific_positions.txt" % args.filter2_only_snp_vcf_dir, 'r+')
+
+            outgroup_specific_positions = []
+            for i in f_outgroup:
+                i = i.strip()
+                outgroup_specific_positions.append(int(i))
+            f_outgroup.close()
+
+            print "No. of outgroup specific variant positions: %s" % len(outgroup_specific_positions)
+            print "No. of outgroup specific Indel variant positions: %s" % len(outgroup_indel_specific_positions)
+        else:
+
+            outgroup_indel_specific_positions = []
+            outgroup_specific_positions = []
+            print "No. of outgroup specific variant positions: %s" % len(outgroup_specific_positions)
+            print "No. of outgroup specific Indel variant positions: %s" % len(outgroup_indel_specific_positions)
+
+        # Run core steps. Generate SNP and data Matrix results. Extract core SNPS and consensus files.
+        core_prep_indel(core_vcf_fasta_dir)
+
+        core_prep_snp(core_vcf_fasta_dir)
+
+        # Moving this up before core_prep_snp; for some weird reason, it is failing to generate Only_ref_indel
+        #core_prep_indel(core_vcf_fasta_dir)
+
+        # Annotate core variants. Generate SNP and Indel matrix.
+        annotated_snp_matrix()
+
+        # Read new allele matrix and generate fasta; generate a seperate function
+        keep_logging('Generating Fasta from Variant Alleles...\n', 'Generating Fasta from Variant Alleles...\n', logger, 'info')
+
+        create_job_allele_variant_fasta(args.jobrun, vcf_filenames, args.filter2_only_snp_vcf_dir, config_file)
+
+        #extract_only_ref_variant_fasta_from_reference_allele_variant()
+
+        mask_fq_mq_positions_specific_to_outgroup()
+
+        call("cp %s %s/Logs/core/" % (
+            log_file_handle, os.path.dirname(os.path.dirname(args.filter2_only_snp_vcf_dir))), logger)
+
+    if "3" in args.steps:
+        """ 
+        report step 
+        """
+
+        # Get outgroup_Sample name
+        outgroup = get_outgroup()
+
+        keep_logging('Step 3: Generate Reports and Results folder.', 'Step 3: Generate Reports and Results folder.', logger, 'info')
+
+        ## Temporary fix. A bug was introduced that is causing the pipeline to generate *vcf_no_proximate_snp.vcf_filter2_consensus.fa
+        call("rm %s/*vcf_no_proximate_snp.vcf_filter2_consensus.fa" % args.filter2_only_snp_vcf_dir, logger)
+
+        # Generate DP barplots data and Analyze the FQ values of all the unique variant
+        # DP_analysis_barplot()
+        # FQ_analysis()
+
+        # Set up Report and results directories to transfer the final results.
+        # Set up Report and results directories to transfer the final results.
+        data_matrix_dir = args.results_dir + '/data_matrix'
+        core_vcf_fasta_dir = args.results_dir + '/core_snp_consensus'
+        make_sure_path_exists(args.results_dir)
+        make_sure_path_exists(data_matrix_dir)
+        make_sure_path_exists(core_vcf_fasta_dir)
+        data_matrix_dir = args.results_dir + '/data_matrix'
+        data_matrix_snpeff_dir = data_matrix_dir + '/snpEff_results'
+        core_vcf_fasta_dir = args.results_dir + '/core_snp_consensus'
+        consensus_var_dir = core_vcf_fasta_dir + '/consensus_variant_positions'
+        core_vcf_dir = core_vcf_fasta_dir + '/core_vcf'
+        consensus_allele_var_dir = core_vcf_fasta_dir + '/consensus_allele_variant_positions'
+        consensus_ref_allele_var_dir = core_vcf_fasta_dir + '/consensus_ref_allele_variant_positions'
+        consensus_ref_var_dir = core_vcf_fasta_dir + '/consensus_ref_variant_positions'
+        consensus_ref_allele_unmapped_variant_dir = core_vcf_fasta_dir + '/consensus_ref_allele_unmapped_variant'
+        make_sure_path_exists(data_matrix_dir)
+        make_sure_path_exists(data_matrix_snpeff_dir)
+        make_sure_path_exists(core_vcf_fasta_dir)
+        make_sure_path_exists(consensus_var_dir)
+        make_sure_path_exists(core_vcf_dir)
+        make_sure_path_exists(consensus_allele_var_dir)
+        #make_sure_path_exists(consensus_ref_allele_var_dir)
+        make_sure_path_exists(consensus_ref_var_dir)
+        make_sure_path_exists(consensus_ref_allele_unmapped_variant_dir)
+        reference_base = os.path.basename(args.reference).split('.')[0]
+        # Move results to the results directory
+        move_data_matrix_results = "cp -r %s/unique_positions_file %s/unique_indel_positions_file %s/*.csv %s/*.txt %s/temp_* %s/All* %s/Only* %s/*.R %s/R_scripts/generate_diagnostics_plots.R %s/" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, os.path.dirname(os.path.abspath(__file__)), data_matrix_dir)
+        #move_core_vcf_fasta_results = "cp %s/*_core.vcf.gz %s/*.fa %s/*_variants.fa %s/" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, core_vcf_fasta_dir)
+        move_core_vcf_fasta_results = "mv %s/*_core.vcf.gz* %s/*_ANN* %s/*.fa %s/" % (args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, args.filter2_only_snp_vcf_dir, core_vcf_fasta_dir)
+
+
+        move_consensus_var_fasta_results = "mv %s/*_variants.fa %s/" % (core_vcf_fasta_dir, consensus_var_dir)
+        move_consensus_ref_var_fasta_results = "mv %s/*.fa %s/" % (core_vcf_fasta_dir, consensus_ref_var_dir)
+        move_core_vcf = "mv %s/*_core.vcf.gz %s/*vcf_core.vcf.gz.tbi %s/" % (core_vcf_fasta_dir, core_vcf_fasta_dir, core_vcf_dir)
+        move_consensus_allele_var_fasta_results = "mv %s/*allele_variants.fa %s/" % (consensus_var_dir, consensus_allele_var_dir)
+        remove_ref_allele = "rm %s/*_ref_allele_variants.fa" % consensus_allele_var_dir
+        #move_consensus_ref_allele_var_fasta_results = "mv %s/*_ref_allele_variants.fa %s/" % (consensus_allele_var_dir, consensus_ref_allele_var_dir)
+        move_consensus_ref_allele_unmapped_var_fasta_results = "mv %s/*_ref_allele_unmapped_variants.fa %s/" % (consensus_var_dir, consensus_ref_allele_unmapped_variant_dir)
+        move_snpeff_results = "mv %s/*ANN* %s/" % (data_matrix_dir, data_matrix_snpeff_dir)
+        move_snpeff_vcf_results = "mv %s/*ANN* %s/" % (core_vcf_fasta_dir, data_matrix_snpeff_dir)
+        copy_reference = "cp %s %s/%s.fa" % (args.reference, consensus_ref_var_dir, reference_base)
+        #copy_reference_2 = "cp %s %s/%s.fa" % (args.reference, consensus_ref_allele_var_dir, reference_base)
+
+        call("%s" % move_data_matrix_results, logger)
+        call("%s" % move_core_vcf_fasta_results, logger)
+        call("%s" % move_consensus_var_fasta_results, logger)
+        call("%s" % move_consensus_ref_var_fasta_results, logger)
+        call("%s" % move_core_vcf, logger)
+        call("%s" % move_consensus_allele_var_fasta_results, logger)
+        call("%s" % remove_ref_allele, logger)
+        #call("%s" % move_consensus_ref_allele_var_fasta_results, logger)
+        call("%s" % move_consensus_ref_allele_unmapped_var_fasta_results, logger)
+        call("%s" % copy_reference, logger)
+        #call("%s" % copy_reference_2, logger)
+        call("%s" % move_snpeff_results, logger)
+        call("%s" % move_snpeff_vcf_results, logger)
+        subprocess.call(["sed -i 's/title_here/%s/g' %s/generate_diagnostics_plots.R" % (os.path.basename(args.results_dir), data_matrix_dir)], shell=True)
+
+        # Sanity Check if the variant consensus files generated are of same length
+        count = 0
+        for line in open("%s/Only_ref_variant_positions_for_closely_matrix.txt" % data_matrix_dir).xreadlines():
+            count += 1
+            ref_variants = count - 1
+        variant_consensus_files = glob.glob("%s/*_variants.fa" % core_vcf_fasta_dir)
+        for f in variant_consensus_files:
+            cmd2 = "%s/%s/bioawk -c fastx '{ print length($seq) }' < %s" % (
+            ConfigSectionMap("bin_path", Config)['binbase'], ConfigSectionMap("bioawk", Config)['bioawk_bin'], f)
+            proc = subprocess.Popen([cmd2], stdout=subprocess.PIPE, shell=True)
+            (out2, err2) = proc.communicate()
+
+            try:
+                int(out2) != int(ref_variants)
+            except OSError as exception:
+                if exception.errno != errno.EEXIST:
+                    keep_logging('Error generating variant consensus position file: %s' % f,
+                                 'Error generating variant consensus position file: %s' % f, logger, 'info')
+                    keep_logging('Error generating variant consensus position file: %s' % f,
+                                 'Error generating variant consensus position file: %s' % f, logger, 'exception')
+                    exit()
+
+        # Move and organize data_matrix_dir directory
+        os.chdir(data_matrix_dir)
+        plots_dir = "%s/plots" % data_matrix_dir
+        matrices_dir = "%s/matrices" % data_matrix_dir
+        functional_ann_dir = "%s/Functional_annotation_results" % data_matrix_dir
+        logs_dir = "%s/logs" % data_matrix_dir
+        make_sure_path_exists(plots_dir)
+        make_sure_path_exists(matrices_dir)
+        make_sure_path_exists(functional_ann_dir)
+        make_sure_path_exists(logs_dir)
+        call("mv *.log.txt %s" % logs_dir, logger)
+        call("mv summary.txt detail.txt Functional_class_filter_positions.txt inexact_repeat_region_positions.txt phage_region_positions.txt repeat_region_positions.txt %s" % functional_ann_dir, logger)
+        call("mv temp_* All* Only* SNP_matrix_* Indel* extract_DP_positions.txt header.txt unique_indel_positions_file unique_positions_file %s" % matrices_dir, logger)
+        call("mv annotated_no_proximate_snp_* %s/snpEff_results/" % data_matrix_dir, logger)
+        call("mv bargraph* generate_diagnostics_plots.R %s" % plots_dir, logger)
+        call("cp %s/temp_Only_filtered_positions_for_closely_matrix_FQ.txt %s/" % (matrices_dir, plots_dir), logger)
+
+        # """ Generate alignment report """
+        # alignment_report(data_matrix_dir)
+        #
+        # """ Generate core snps report """
+        # variant_report(data_matrix_dir)
+
+        """ Generating Gubbins MFA files"""
+        reference_base = os.path.basename(args.reference).split('.')[0]
+        gubbins_dir = args.results_dir + '/gubbins'
+        tree_dir = args.results_dir + '/trees'
+
+        make_sure_path_exists(gubbins_dir)
+        #make_sure_path_exists(tree_dir)
+
+
+        prepare_ref_var_consensus_input = "%s/gubbins/%s_%s_genome_aln_w_ref_allele.fa" % (args.results_dir, (os.path.basename(os.path.normpath(args.results_dir))).replace('_core_results', ''), reference_base)
+        prepare_var_consensus_input = "%s/gubbins/%s_%s_core_var_aln.fa" % (args.results_dir, (os.path.basename(os.path.normpath(args.results_dir))).replace('_core_results', ''), reference_base)
+        prepare_allele_var_consensus_input = "%s/gubbins/%s_%s_noncore_plus_core_variants_aln.fa" % (
+        args.results_dir, (os.path.basename(os.path.normpath(args.results_dir))).replace('_core_results', ''),
+        reference_base)
+        #prepare_ref_allele_var_consensus_input = "%s/gubbins/%s_%s_ref_allele_var_consensus.fa" % (args.results_dir, (os.path.basename(os.path.normpath(args.results_dir))).replace('_core_results', ''),reference_base)
+        prepare_ref_allele_unmapped_consensus_input = "%s/gubbins/%s_%s_genome_aln_w_alt_allele_unmapped.fa" % (args.results_dir, (os.path.basename(os.path.normpath(args.results_dir))).replace('_core_results', ''), reference_base)
+
+        prepare_ref_var_consensus_input_cmd = "cat %s/core_snp_consensus/consensus_ref_variant_positions/*.fa > %s" % (args.results_dir, prepare_ref_var_consensus_input)
+        prepare_var_consensus_input_cmd = "cat %s/core_snp_consensus/consensus_variant_positions/*_variants.fa > %s" % (args.results_dir, prepare_var_consensus_input)
+        prepare_allele_var_consensus_input_cmd = "cat %s/core_snp_consensus/consensus_allele_variant_positions/*_allele_variants.fa > %s" % (
+        args.results_dir, prepare_allele_var_consensus_input)
+        #prepare_ref_allele_var_consensus_input_cmd = "cat %s/core_snp_consensus/consensus_ref_allele_variant_positions/*.fa > %s" % (args.results_dir, prepare_ref_allele_var_consensus_input)
+        prepare_ref_allele_unmapped_consensus_input_cmd = "cat %s %s/core_snp_consensus/consensus_ref_allele_unmapped_variant/*.fa > %s" % (args.reference, args.results_dir, prepare_ref_allele_unmapped_consensus_input)
+        call("%s" % prepare_ref_var_consensus_input_cmd, logger)
+        call("%s" % prepare_var_consensus_input_cmd, logger)
+        call("%s" % prepare_allele_var_consensus_input_cmd, logger)
+        #call("%s" % prepare_ref_allele_var_consensus_input_cmd, logger)
+        call("%s" % prepare_ref_allele_unmapped_consensus_input_cmd, logger)
+        # os.system(prepare_ref_var_consensus_input_cmd)
+        # os.system(prepare_var_consensus_input_cmd)
+
+        print_details = "Results for core pipeline can be found in: %s\n" \
+              "Description of Results:\n" \
+              "1. data_matrix folder contains all the data matrices and other temporary files generated during the core pipeline. bargraph_counts.txt and bargraph_percentage.txt: contains counts/percentage of unique positions filtered out due to different filter parameters for each sample. Run bargraph.R to plot bargraph statistics." \
+              "2. core_snp_consensus contains all the core vcf and fasta files. *_core.vcf.gz: core vcf files, *.fa and *_variants.fa: core consensus fasta file and core consensus fasta with only variant positions." % (args.results_dir)
+        keep_logging(print_details, print_details, logger, 'info')
+
+        call("cp %s %s/Logs/report/" % (
+            log_file_handle, os.path.dirname(os.path.dirname(args.filter2_only_snp_vcf_dir))), logger)
+
+    if "4" in args.steps:
+        """ 
+        Gubbins/Raxml step
+        """
+
+
+        keep_logging('Step 4: Run Gubbins on core alignments and generate iqtree/RaxML trees.', 'Step 4: Run Gubbins on core alignments and generate iqtree/RaxML trees.', logger, 'info')
+
+        #parse_phaster(args.reference)
+        reference_base = os.path.basename(args.reference).split('.')[0]
+        gubbins_dir = args.results_dir + '/gubbins'
+        tree_dir = args.results_dir + '/trees'
+
+        make_sure_path_exists(gubbins_dir)
+        #make_sure_path_exists(tree_dir)
+
+
+        prepare_ref_var_consensus_input = "%s/gubbins/%s_%s_genome_aln_w_ref_allele.fa" % (args.results_dir, (os.path.basename(os.path.normpath(args.results_dir))).replace('_core_results', ''), reference_base)
+        prepare_var_consensus_input = "%s/gubbins/%s_%s_core_var_aln.fa" % (args.results_dir, (os.path.basename(os.path.normpath(args.results_dir))).replace('_core_results', ''), reference_base)
+        prepare_allele_var_consensus_input = "%s/gubbins/%s_%s_noncore_plus_core_variants_aln.fa" % (
+        args.results_dir, (os.path.basename(os.path.normpath(args.results_dir))).replace('_core_results', ''),
+        reference_base)
+        #prepare_ref_allele_var_consensus_input = "%s/gubbins/%s_%s_ref_allele_var_consensus.fa" % (args.results_dir, (os.path.basename(os.path.normpath(args.results_dir))).replace('_core_results', ''),reference_base)
+        prepare_ref_allele_unmapped_consensus_input = "%s/gubbins/%s_%s_genome_aln_w_alt_allele_unmapped.fa" % (args.results_dir, (os.path.basename(os.path.normpath(args.results_dir))).replace('_core_results', ''), reference_base)
+
+        prepare_ref_var_consensus_input_cmd = "cat %s/core_snp_consensus/consensus_ref_variant_positions/*.fa > %s" % (args.results_dir, prepare_ref_var_consensus_input)
+        prepare_var_consensus_input_cmd = "cat %s/core_snp_consensus/consensus_variant_positions/*_variants.fa > %s" % (args.results_dir, prepare_var_consensus_input)
+        prepare_allele_var_consensus_input_cmd = "cat %s/core_snp_consensus/consensus_allele_variant_positions/*_allele_variants.fa > %s" % (
+        args.results_dir, prepare_allele_var_consensus_input)
+        #prepare_ref_allele_var_consensus_input_cmd = "cat %s/core_snp_consensus/consensus_ref_allele_variant_positions/*.fa > %s" % (args.results_dir, prepare_ref_allele_var_consensus_input)
+        prepare_ref_allele_unmapped_consensus_input_cmd = "cat %s %s/core_snp_consensus/consensus_ref_allele_unmapped_variant/*.fa > %s" % (args.reference, args.results_dir, prepare_ref_allele_unmapped_consensus_input)
+        call("%s" % prepare_ref_var_consensus_input_cmd, logger)
+        call("%s" % prepare_var_consensus_input_cmd, logger)
+        call("%s" % prepare_allele_var_consensus_input_cmd, logger)
+        call("%s" % prepare_ref_allele_unmapped_consensus_input_cmd, logger)
+
+
+        if args.gubbins and args.gubbins == "yes":
+            os.chdir(gubbins_dir)
+            if args.outgroup:
+                # Get outgroup_Sample name
+                outgroup = get_outgroup()
+                keep_logging('%s/scripts/gubbins_iqtree_raxml.sh %s 1 esnitkin_flux \'%s\'' % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input, outgroup),
+                             '%s/scripts/gubbins_iqtree_raxml.sh %s 1 esnitkin_flux \'%s\'' % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input, outgroup), logger, 'info')
+                call("%s/scripts/gubbins_iqtree_raxml.sh %s 1 esnitkin_flux \'%s\'" % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input, outgroup), logger)
+                keep_logging('%s/scripts/gubbins_iqtree_raxml.sh %s 1 esnitkin_flux \'%s\'' % (
+                os.path.dirname(os.path.abspath(__file__)), prepare_ref_allele_unmapped_consensus_input, outgroup),
+                             '%s/scripts/gubbins_iqtree_raxml.sh %s 1 esnitkin_flux \'%s\'' % (
+                             os.path.dirname(os.path.abspath(__file__)), prepare_ref_allele_unmapped_consensus_input, outgroup),
+                             logger, 'info')
+                call("%s/scripts/gubbins_iqtree_raxml.sh %s 1 esnitkin_flux \'%s\'" % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_allele_unmapped_consensus_input, outgroup), logger)
+                # call("%s/scripts/gubbins_iqtree_raxml.sh %s 1" % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_allele_var_consensus_input), logger)
+            else:
+                keep_logging('%s/scripts/gubbins_iqtree_raxml.sh %s 1' % (
+                os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input),
+                             '%s/scripts/gubbins_iqtree_raxml.sh %s 1' % (
+                             os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input),
+                             logger, 'info')
+                call("%s/scripts/gubbins_iqtree_raxml.sh %s 1" % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input), logger)
+                keep_logging('%s/scripts/gubbins_iqtree_raxml.sh %s 1' % (
+                    os.path.dirname(os.path.abspath(__file__)), prepare_ref_allele_unmapped_consensus_input),
+                             '%s/scripts/gubbins_iqtree_raxml.sh %s 1' % (
+                                 os.path.dirname(os.path.abspath(__file__)),
+                                 prepare_ref_allele_unmapped_consensus_input),
+                             logger, 'info')
+                call("%s/scripts/gubbins_iqtree_raxml.sh %s 1" % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_allele_unmapped_consensus_input), logger)
+                #call("%s/scripts/gubbins_iqtree_raxml.sh %s 1" % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_allele_var_consensus_input), logger)
+        else:
+            if args.outgroup:
+                # Get outgroup_Sample name
+                outgroup = get_outgroup()
+                keep_logging('The gubbins argument is set to No.', 'The gubbins argument is set to No.', logger, 'info')
+                keep_logging('%s/scripts/gubbins_iqtree_raxml.sh %s 0 esnitkin_flux \'%s\'' % (
+                os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input, outgroup),
+                             '%s/scripts/gubbins_iqtree_raxml.sh %s 0 esnitkin_flux \'%s\'' % (
+                             os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input, outgroup),
+                             logger, 'info')
+                print "%s/scripts/gubbins_iqtree_raxml.sh %s 0 esnitkin_flux \'%s\'" % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input, outgroup)
+                keep_logging('%s/scripts/gubbins_iqtree_raxml.sh %s 0 esnitkin_flux \'%s\'' % (
+                    os.path.dirname(os.path.abspath(__file__)), prepare_ref_allele_unmapped_consensus_input, outgroup),
+                             '%s/scripts/gubbins_iqtree_raxml.sh %s 0 esnitkin_flux \'%s\'' % (
+                                 os.path.dirname(os.path.abspath(__file__)), prepare_ref_allele_unmapped_consensus_input, outgroup),
+                             logger, 'info')
+                print "%s/scripts/gubbins_iqtree_raxml.sh %s 0 esnitkin_flux \'%s\'" % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_allele_unmapped_consensus_input, outgroup)
+            else:
+                keep_logging('The gubbins argument is set to No.', 'The gubbins argument is set to No.', logger, 'info')
+                print "%s/scripts/gubbins_iqtree_raxml.sh %s 0" % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_var_consensus_input)
+                print "%s/scripts/gubbins_iqtree_raxml.sh %s 0" % (os.path.dirname(os.path.abspath(__file__)), prepare_ref_allele_unmapped_consensus_input)
+
+        call("cp %s %s/Logs/tree/" % (
+            log_file_handle, os.path.dirname(os.path.dirname(args.filter2_only_snp_vcf_dir))), logger)
+
+    """ The below steps are for debugging purpose only."""
+    if "5" in args.steps:
+        """ 
+        Debugging Purposes only: Run only SNP matrix annotation step 
+        """
+
+        keep_logging('Step 5: Running SNP matrix annotation step.', 'Step 5: Running SNP matrix annotation step.', logger, 'info')
+
+        functional_class_filter_positions = "%s/Functional_class_filter_positions.txt" % args.filter2_only_snp_vcf_dir
+
+        global outgroup_specific_positions
+        global outgroup_indel_specific_positions
+
+        # Get outgroup specific variant positions
+        if args.outgroup:
+            f_outgroup = open("%s/outgroup_indel_specific_positions.txt" % args.filter2_only_snp_vcf_dir, 'r+')
+
+            outgroup_indel_specific_positions = []
+            for i in f_outgroup:
+                i = i.strip()
+                outgroup_indel_specific_positions.append(int(i))
+            f_outgroup.close()
+
+            f_outgroup = open("%s/outgroup_specific_positions.txt" % args.filter2_only_snp_vcf_dir, 'r+')
+
+            outgroup_specific_positions = []
+            for i in f_outgroup:
+                i = i.strip()
+                outgroup_specific_positions.append(int(i))
+            f_outgroup.close()
+
+            print "No. of outgroup specific variant positions: %s" % len(outgroup_specific_positions)
+            print "No. of outgroup specific Indel variant positions: %s" % len(outgroup_indel_specific_positions)
+        else:
+
+            outgroup_indel_specific_positions = []
+            outgroup_specific_positions = []
+            print "No. of outgroup specific variant positions: %s" % len(outgroup_specific_positions)
+            print "No. of outgroup specific Indel variant positions: %s" % len(outgroup_indel_specific_positions)
+
+        # Annotate core variants. Generate SNP and Indel matrix.
+        annotated_snp_matrix()
+
+        # # Read new allele matrix and generate fasta; generate a seperate function
+        keep_logging('Generating Fasta from Variant Alleles...\n', 'Generating Fasta from Variant Alleles...\n', logger, 'info')
+
+        create_job_allele_variant_fasta(args.jobrun, vcf_filenames, args.filter2_only_snp_vcf_dir, config_file)
+
+        extract_only_ref_variant_fasta_from_reference_allele_variant()
+
+        mask_fq_mq_positions_specific_to_outgroup()
+
+        call("cp %s %s/Logs/core/" % (
+            log_file_handle, os.path.dirname(os.path.dirname(args.filter2_only_snp_vcf_dir))), logger)
+
+    if "6" in args.steps:
+        """ 
+        Debugging Purposes only: Run only Gubbins
+        """
+        reference_base = os.path.basename(args.reference).split('.')[0]
+        gubbins_dir = args.results_dir + '/gubbins'
+        tree_dir = args.results_dir + '/trees'
+
+        make_sure_path_exists(gubbins_dir)
+        #make_sure_path_exists(tree_dir)
+
+
+        prepare_ref_var_consensus_input = "%s/gubbins/%s_%s_ref_var_consensus.fa" % (args.results_dir, (os.path.basename(os.path.normpath(args.results_dir))).replace('_core_results', ''), reference_base)
+        prepare_var_consensus_input = "%s/gubbins/%s_%s_var_consensus.fa" % (args.results_dir, (os.path.basename(os.path.normpath(args.results_dir))).replace('_core_results', ''), reference_base)
+        prepare_allele_var_consensus_input = "%s/gubbins/%s_%s_allele_var_consensus.fa" % (
+        args.results_dir, (os.path.basename(os.path.normpath(args.results_dir))).replace('_core_results', ''),
+        reference_base)
+        prepare_ref_allele_var_consensus_input = "%s/gubbins/%s_%s_ref_allele_var_consensus.fa" % (
+            args.results_dir, (os.path.basename(os.path.normpath(args.results_dir))).replace('_core_results', ''),
+            reference_base)
+        prepare_ref_allele_unmapped_consensus_input = "%s/gubbins/%s_%s_ref_allele_unmapped_consensus.fa" % (
+        args.results_dir, (os.path.basename(os.path.normpath(args.results_dir))).replace('_core_results', ''),
+        reference_base)
+
+        if args.gubbins and args.gubbins == "yes":
+            gubbins(gubbins_dir, prepare_ref_var_consensus_input, args.jobrun, logger, Config)
+            #gubbins(gubbins_dir, prepare_ref_allele_var_consensus_input, logger, Config)
+            gubbins(gubbins_dir, prepare_ref_allele_unmapped_consensus_input,args.jobrun, logger, Config)
+        call("cp %s %s/Logs/tree/" % (
+            log_file_handle, os.path.dirname(os.path.dirname(args.filter2_only_snp_vcf_dir))), logger)
+
+    if "7" in args.steps:
+        """ 
+        Debugging Purposes only: Run iqtree
+        """
+        reference_base = os.path.basename(args.reference).split('.')[0]
+        gubbins_dir = args.results_dir + '/gubbins'
+        tree_dir = args.results_dir + '/trees'
+
+        make_sure_path_exists(gubbins_dir)
+        #make_sure_path_exists(tree_dir)
+
+
+        prepare_ref_var_consensus_input = "%s/gubbins/%s_%s_ref_var_consensus.fa" % (args.results_dir, (os.path.basename(os.path.normpath(args.results_dir))).replace('_core_results', ''), reference_base)
+        prepare_var_consensus_input = "%s/gubbins/%s_%s_var_consensus.fa" % (args.results_dir, (os.path.basename(os.path.normpath(args.results_dir))).replace('_core_results', ''), reference_base)
+        prepare_allele_var_consensus_input = "%s/gubbins/%s_%s_allele_var_consensus.fa" % (
+        args.results_dir, (os.path.basename(os.path.normpath(args.results_dir))).replace('_core_results', ''),
+        reference_base)
+        prepare_ref_allele_var_consensus_input = "%s/gubbins/%s_%s_ref_allele_var_consensus.fa" % (
+            args.results_dir, (os.path.basename(os.path.normpath(args.results_dir))).replace('_core_results', ''),
+            reference_base)
+        prepare_ref_allele_unmapped_consensus_input = "%s/gubbins/%s_%s_ref_allele_unmapped_consensus.fa" % (
+        args.results_dir, (os.path.basename(os.path.normpath(args.results_dir))).replace('_core_results', ''),
+        reference_base)
+        iqtree(tree_dir, prepare_ref_allele_var_consensus_input, args.jobrun, logger, Config)
+        iqtree(tree_dir, prepare_ref_var_consensus_input, args.jobrun, logger, Config)
+        iqtree(tree_dir, prepare_var_consensus_input, args.jobrun, logger, Config)
+        iqtree(tree_dir, prepare_ref_allele_unmapped_consensus_input, args.jobrun, logger, Config)
+    
+    time_taken = datetime.now() - start_time_2
+    if args.remove_temp:
+        del_command = "rm -r %s" % temp_dir
+        os.system(del_command)
+
+
+
+