-
Notifications
You must be signed in to change notification settings - Fork 35
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
265 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
import argparse | ||
import pandas as pd | ||
import seaborn as sns | ||
import matplotlib.pyplot as plt | ||
|
||
|
||
def get_args(): | ||
""" | ||
Get arguments from command line with argparse. | ||
""" | ||
parser = argparse.ArgumentParser( | ||
prog='Metabat-Plot.py', | ||
description="""Plot bin information from MetaBAT2.""") | ||
|
||
parser.add_argument("-i", "--input", | ||
required=True, | ||
help="The o2 format summary file from CheckM.") | ||
parser.add_argument("-l", "--label", | ||
required=True, | ||
help="A label for the plot.") | ||
parser.add_argument("-c", "--completeness", | ||
required=True, | ||
type=int, | ||
help="Completeness threshold.") | ||
parser.add_argument("-m", "--contamination", | ||
required=True, | ||
type=int, | ||
help="Contamination threshold.") | ||
parser.add_argument("-g", "--contigs", | ||
required=True, | ||
type=int, | ||
help="Contigs threshold.") | ||
parser.add_argument("-o1", "--output1", | ||
required=True, | ||
help="The name of the output file (a plot).") | ||
parser.add_argument("-o2", "--output2", | ||
required=True, | ||
help="The name of the output file (a plot).") | ||
parser.add_argument("-o3", "--output3", | ||
required=True, | ||
help="The name of the output file (a plot).") | ||
parser.add_argument("-o4", "--output4", | ||
required=True, | ||
help="The name of the output file (a plot).") | ||
parser.add_argument("-o5", "--output5", | ||
required=True, | ||
help="The name of the output file (a plot).") | ||
parser.add_argument("-o6", "--output6", | ||
required=True, | ||
help="The name of the output file (a plot).") | ||
|
||
return parser.parse_args() | ||
|
||
|
||
def create_unfiltered_joint_scatter(df, output): | ||
sns.jointplot(data=df, x='Completeness', y='Contamination', s=50, alpha=0.7, xlim=(-2,102), ylim=(-2,102)) | ||
plt.savefig("{}".format(output)) | ||
plt.close() | ||
|
||
def create_unfiltered_scatter_bins(df, label, output, completeness, contamination): | ||
ax = df.plot.scatter(x='Completeness', y='Contamination', s=40, alpha=0.9, xlim=(-2,102), ylim=(-2,102)) | ||
for i, txt in enumerate(df['Bin Id']): | ||
ax.annotate(txt, (df['Completeness'].iat[i], df['Contamination'].iat[i]), | ||
xytext=(df['Completeness'].iat[i] + 0.15, df['Contamination'].iat[i] + 0.15), | ||
fontsize=5) | ||
plt.xlabel('Genome Completeness') | ||
plt.ylabel('Genome Contamination') | ||
plt.title("Unfiltered Genome Bins: {}\n(labeled by bin name)".format(label)) | ||
plt.savefig("{}".format(output)) | ||
plt.close() | ||
|
||
def create_unfiltered_scatter_contigs(df, label, output, completeness, contamination): | ||
ax = df.plot.scatter(x='Completeness', y='Contamination', s=40, alpha=0.9, xlim=(-2,102), ylim=(-2,102)) | ||
for i, txt in enumerate(df['# contigs']): | ||
ax.annotate(txt, (df['Completeness'].iat[i], df['Contamination'].iat[i]), | ||
xytext=(df['Completeness'].iat[i] + 0.15, df['Contamination'].iat[i] + 0.15), | ||
fontsize=5) | ||
plt.xlabel('Genome Completeness') | ||
plt.ylabel('Genome Contamination') | ||
plt.title("Unfiltered Genome Bins: {}\n(labeled with numbers of contigs in bins)".format(label)) | ||
plt.savefig("{}".format(output)) | ||
plt.close() | ||
|
||
def create_filtered_scatter(df, label, output, completeness, contamination): | ||
ax = df.plot.scatter(x='Completeness', y='Contamination', s=40, alpha=0.9, xlim=((completeness-2),102), ylim=(-0.5,(contamination+0.5))) | ||
plt.xlabel('Genome Completeness') | ||
plt.ylabel('Genome Contamination') | ||
plt.title("Filtered Genome Bins: {}".format(label)) | ||
plt.savefig("{}".format(output)) | ||
plt.close() | ||
|
||
def create_filtered_scatter_contigs(df, label, output, completeness, contamination): | ||
ax = df.plot.scatter(x='Completeness', y='Contamination', s=40, alpha=0.9, xlim=((completeness-2),102), ylim=(-0.5,(contamination+0.5))) | ||
for i, txt in enumerate(df['# contigs']): | ||
ax.annotate(txt, (df['Completeness'].iat[i], df['Contamination'].iat[i]), | ||
xytext=(df['Completeness'].iat[i] + 0.1, df['Contamination'].iat[i] + 0.15), | ||
fontsize=6) | ||
plt.xlabel('Genome Completeness') | ||
plt.ylabel('Genome Contamination') | ||
plt.title("Filtered Genome Bins: {}\n(labeled with numbers of contigs in bins)".format(label)) | ||
plt.savefig("{}".format(output)) | ||
plt.close() | ||
|
||
def create_filtered_scatter_bins(df, label, output, completeness, contamination): | ||
ax = df.plot.scatter(x='Completeness', y='Contamination', s=40, alpha=0.9, xlim=((completeness-2),102), ylim=(-0.5,(contamination+0.5))) | ||
for i, txt in enumerate(df['Bin Id']): | ||
ax.annotate(txt, (df['Completeness'].iat[i], df['Contamination'].iat[i]), | ||
xytext=(df['Completeness'].iat[i] + 0.1, df['Contamination'].iat[i] + 0.12), | ||
fontsize=5) | ||
plt.xlabel('Genome Completeness') | ||
plt.ylabel('Genome Contamination') | ||
plt.title("Filtered Genome Bins: {}\n(labeled by bin name)".format(label)) | ||
plt.savefig("{}".format(output)) | ||
plt.close() | ||
|
||
|
||
def main(): | ||
args = get_args() | ||
df = pd.read_csv(args.input, sep='\t') | ||
scatfilt = (df['Completeness'] >= args.completeness) & (df['Contamination'] <= args.contamination) & (df['# contigs'] < args.contigs) | ||
filt = df[scatfilt] | ||
create_unfiltered_joint_scatter(df, args.output1, args.completeness, args.contamination) | ||
create_unfiltered_scatter_contigs(df, args.label, args.output2, args.completeness, args.contamination) | ||
create_unfiltered_scatter_bins(df, args.label, args.output3, args.completeness, args.contamination) | ||
create_filtered_scatter(filt, args.label, args.output4, args.completeness, args.contamination) | ||
create_filtered_scatter_contigs(filt, args.label, args.output5, args.completeness, args.contamination) | ||
create_filtered_scatter_bins(filt, args.label, args.output6, args.completeness, args.contamination) | ||
|
||
if __name__ == '__main__': | ||
main() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import argparse | ||
import os | ||
import shutil | ||
|
||
def get_args(): | ||
""" | ||
Get arguments from command line with argparse. | ||
""" | ||
parser = argparse.ArgumentParser( | ||
prog='Concoct-organize-outputs.py', | ||
description="""Relabel bin files from concoct.""") | ||
parser.add_argument("-i", "--indir", | ||
required=True, | ||
help="Name of output directory.") | ||
return parser.parse_args() | ||
|
||
def relabel_outputs(indir): | ||
os.chdir(indir) | ||
fasta_files = [f for f in os.listdir('.') if f.endswith('.fa')] | ||
for f in fasta_files: | ||
outname = "concoct.{}.{}".format(f.split('.')[0], f.split('.')[-1]) | ||
print("Relabeling file: {}".format(f)) | ||
print("\t{}".format(outname)) | ||
shutil.move(f, os.path.join(indir, outname)) | ||
|
||
def main(): | ||
args = get_args() | ||
relabel_outputs(args.indir) | ||
print("Finished.") | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import argparse | ||
import os | ||
|
||
def get_args(): | ||
""" | ||
Get arguments from command line with argparse. | ||
""" | ||
parser = argparse.ArgumentParser( | ||
prog='Make-maxbin-depths.py', | ||
description="""Filter JGI depths for maxbin.""") | ||
|
||
parser.add_argument("-i", "--infile", | ||
required=True, | ||
help="Path to JGI depth file.") | ||
parser.add_argument("-o", "--outfile", | ||
required=True, | ||
help="Name of output depth file.") | ||
return parser.parse_args() | ||
|
||
def write_new_depth_file(input_depth_file, output_depth_file): | ||
""" | ||
contigName contigLen totalAvgDepth sludge.bam sludge.bam-var | ||
s0.ctg000001c 4.13709e+06 61.7612 61.7612 103.936 | ||
s1.ctg000002c 2.23849e+06 14.3086 14.3086 33.5774 | ||
s2.ctg000003l 40246 10.6046 10.6046 38.4902 | ||
s3.ctg000004l 461151 4.44724 4.44724 3.09768 | ||
s4.ctg000005l 516089 30.5501 30.5501 65.15 | ||
s5.ctg000006l 1.6986e+06 8.46604 8.46604 13.8984 | ||
s2.ctg000007l 24711 0.643378 0.643378 0.229328 | ||
s6.ctg000008l 31810 35.0132 35.0132 468.885 | ||
s7.ctg000009c 3.24454e+06 200.377 200.377 1599.71 | ||
""" | ||
if os.path.exists(output_depth_file): | ||
print("Removing existing version of file.") | ||
os.remove(output_depth_file) | ||
|
||
with open(input_depth_file, 'r') as fh_in, open(output_depth_file, 'a') as fh_out: | ||
kept_count = int(0) | ||
for line in fh_in: | ||
if line.startswith("contigName"): | ||
pass | ||
else: | ||
fh_out.write("{}\t{}\n".format(line.split('\t')[0], line.split('\t')[2])) | ||
kept_count += 1 | ||
print("Parsed {:,} contigs".format(kept_count)) | ||
|
||
def main(): | ||
args = get_args() | ||
write_new_depth_file(args.infile, args.outfile) | ||
|
||
if __name__ == '__main__': | ||
main() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import argparse | ||
import os | ||
import shutil | ||
|
||
def get_args(): | ||
""" | ||
Get arguments from command line with argparse. | ||
""" | ||
parser = argparse.ArgumentParser( | ||
prog='Maxbin2-organize-outputs.py', | ||
description="""Organize output files from maxbin2.""") | ||
parser.add_argument("-s", "--sample", | ||
required=True, | ||
help="The sample name.") | ||
parser.add_argument("-o", "--outdir", | ||
required=True, | ||
help="Name of output directory.") | ||
return parser.parse_args() | ||
|
||
def make_outdir(outdir): | ||
fulldir = os.path.join(os.getcwd(), outdir) | ||
if not os.path.exists(fulldir): | ||
os.mkdir(fulldir) | ||
print("Created directory: {}".format(fulldir)) | ||
return fulldir | ||
|
||
def move_outputs(sample, fulldir): | ||
target_files = [f for f in os.listdir('.') if f.startswith(sample) | ||
and f.endswith(('.seed', '.log', '.marker', '.tar.gz', '.noclass', '.summary', '.tooshort'))] | ||
for f in target_files: | ||
print("\tMoving file: {}".format(f)) | ||
shutil.move(f, fulldir) | ||
|
||
fasta_files = [f for f in os.listdir('.') if f.startswith(sample) | ||
and f.endswith('.fasta')] | ||
for f in fasta_files: | ||
outname = "maxbin.{}.{}".format(f.split('.')[1], f.split('.')[-1]) | ||
print("Relabeling file: {}".format(f)) | ||
print("\t{}".format(outname)) | ||
shutil.move(f, os.path.join(fulldir, outname)) | ||
|
||
def main(): | ||
args = get_args() | ||
fulldir = make_outdir(args.outdir) | ||
move_outputs(args.sample, fulldir) | ||
print("Finished.") | ||
|
||
if __name__ == '__main__': | ||
main() |