Skip to content

Commit

Permalink
Move scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
dportik committed Jan 30, 2023
1 parent a2fed92 commit ce9a9c6
Show file tree
Hide file tree
Showing 4 changed files with 265 additions and 0 deletions.
131 changes: 131 additions & 0 deletions HiFi-MAG-Pipeline/scripts/Checkm-Plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import argparse
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


def get_args():
"""
Get arguments from command line with argparse.
"""
parser = argparse.ArgumentParser(
prog='Metabat-Plot.py',
description="""Plot bin information from MetaBAT2.""")

parser.add_argument("-i", "--input",
required=True,
help="The o2 format summary file from CheckM.")
parser.add_argument("-l", "--label",
required=True,
help="A label for the plot.")
parser.add_argument("-c", "--completeness",
required=True,
type=int,
help="Completeness threshold.")
parser.add_argument("-m", "--contamination",
required=True,
type=int,
help="Contamination threshold.")
parser.add_argument("-g", "--contigs",
required=True,
type=int,
help="Contigs threshold.")
parser.add_argument("-o1", "--output1",
required=True,
help="The name of the output file (a plot).")
parser.add_argument("-o2", "--output2",
required=True,
help="The name of the output file (a plot).")
parser.add_argument("-o3", "--output3",
required=True,
help="The name of the output file (a plot).")
parser.add_argument("-o4", "--output4",
required=True,
help="The name of the output file (a plot).")
parser.add_argument("-o5", "--output5",
required=True,
help="The name of the output file (a plot).")
parser.add_argument("-o6", "--output6",
required=True,
help="The name of the output file (a plot).")

return parser.parse_args()


def create_unfiltered_joint_scatter(df, output):
sns.jointplot(data=df, x='Completeness', y='Contamination', s=50, alpha=0.7, xlim=(-2,102), ylim=(-2,102))
plt.savefig("{}".format(output))
plt.close()

def create_unfiltered_scatter_bins(df, label, output, completeness, contamination):
ax = df.plot.scatter(x='Completeness', y='Contamination', s=40, alpha=0.9, xlim=(-2,102), ylim=(-2,102))
for i, txt in enumerate(df['Bin Id']):
ax.annotate(txt, (df['Completeness'].iat[i], df['Contamination'].iat[i]),
xytext=(df['Completeness'].iat[i] + 0.15, df['Contamination'].iat[i] + 0.15),
fontsize=5)
plt.xlabel('Genome Completeness')
plt.ylabel('Genome Contamination')
plt.title("Unfiltered Genome Bins: {}\n(labeled by bin name)".format(label))
plt.savefig("{}".format(output))
plt.close()

def create_unfiltered_scatter_contigs(df, label, output, completeness, contamination):
ax = df.plot.scatter(x='Completeness', y='Contamination', s=40, alpha=0.9, xlim=(-2,102), ylim=(-2,102))
for i, txt in enumerate(df['# contigs']):
ax.annotate(txt, (df['Completeness'].iat[i], df['Contamination'].iat[i]),
xytext=(df['Completeness'].iat[i] + 0.15, df['Contamination'].iat[i] + 0.15),
fontsize=5)
plt.xlabel('Genome Completeness')
plt.ylabel('Genome Contamination')
plt.title("Unfiltered Genome Bins: {}\n(labeled with numbers of contigs in bins)".format(label))
plt.savefig("{}".format(output))
plt.close()

def create_filtered_scatter(df, label, output, completeness, contamination):
ax = df.plot.scatter(x='Completeness', y='Contamination', s=40, alpha=0.9, xlim=((completeness-2),102), ylim=(-0.5,(contamination+0.5)))
plt.xlabel('Genome Completeness')
plt.ylabel('Genome Contamination')
plt.title("Filtered Genome Bins: {}".format(label))
plt.savefig("{}".format(output))
plt.close()

def create_filtered_scatter_contigs(df, label, output, completeness, contamination):
ax = df.plot.scatter(x='Completeness', y='Contamination', s=40, alpha=0.9, xlim=((completeness-2),102), ylim=(-0.5,(contamination+0.5)))
for i, txt in enumerate(df['# contigs']):
ax.annotate(txt, (df['Completeness'].iat[i], df['Contamination'].iat[i]),
xytext=(df['Completeness'].iat[i] + 0.1, df['Contamination'].iat[i] + 0.15),
fontsize=6)
plt.xlabel('Genome Completeness')
plt.ylabel('Genome Contamination')
plt.title("Filtered Genome Bins: {}\n(labeled with numbers of contigs in bins)".format(label))
plt.savefig("{}".format(output))
plt.close()

def create_filtered_scatter_bins(df, label, output, completeness, contamination):
ax = df.plot.scatter(x='Completeness', y='Contamination', s=40, alpha=0.9, xlim=((completeness-2),102), ylim=(-0.5,(contamination+0.5)))
for i, txt in enumerate(df['Bin Id']):
ax.annotate(txt, (df['Completeness'].iat[i], df['Contamination'].iat[i]),
xytext=(df['Completeness'].iat[i] + 0.1, df['Contamination'].iat[i] + 0.12),
fontsize=5)
plt.xlabel('Genome Completeness')
plt.ylabel('Genome Contamination')
plt.title("Filtered Genome Bins: {}\n(labeled by bin name)".format(label))
plt.savefig("{}".format(output))
plt.close()


def main():
args = get_args()
df = pd.read_csv(args.input, sep='\t')
scatfilt = (df['Completeness'] >= args.completeness) & (df['Contamination'] <= args.contamination) & (df['# contigs'] < args.contigs)
filt = df[scatfilt]
create_unfiltered_joint_scatter(df, args.output1, args.completeness, args.contamination)
create_unfiltered_scatter_contigs(df, args.label, args.output2, args.completeness, args.contamination)
create_unfiltered_scatter_bins(df, args.label, args.output3, args.completeness, args.contamination)
create_filtered_scatter(filt, args.label, args.output4, args.completeness, args.contamination)
create_filtered_scatter_contigs(filt, args.label, args.output5, args.completeness, args.contamination)
create_filtered_scatter_bins(filt, args.label, args.output6, args.completeness, args.contamination)

if __name__ == '__main__':
main()

32 changes: 32 additions & 0 deletions HiFi-MAG-Pipeline/scripts/Concoct-organize-outputs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import argparse
import os
import shutil

def get_args():
"""
Get arguments from command line with argparse.
"""
parser = argparse.ArgumentParser(
prog='Concoct-organize-outputs.py',
description="""Relabel bin files from concoct.""")
parser.add_argument("-i", "--indir",
required=True,
help="Name of output directory.")
return parser.parse_args()

def relabel_outputs(indir):
os.chdir(indir)
fasta_files = [f for f in os.listdir('.') if f.endswith('.fa')]
for f in fasta_files:
outname = "concoct.{}.{}".format(f.split('.')[0], f.split('.')[-1])
print("Relabeling file: {}".format(f))
print("\t{}".format(outname))
shutil.move(f, os.path.join(indir, outname))

def main():
args = get_args()
relabel_outputs(args.indir)
print("Finished.")

if __name__ == '__main__':
main()
53 changes: 53 additions & 0 deletions HiFi-MAG-Pipeline/scripts/Make-maxbin-depths.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import argparse
import os

def get_args():
"""
Get arguments from command line with argparse.
"""
parser = argparse.ArgumentParser(
prog='Make-maxbin-depths.py',
description="""Filter JGI depths for maxbin.""")

parser.add_argument("-i", "--infile",
required=True,
help="Path to JGI depth file.")
parser.add_argument("-o", "--outfile",
required=True,
help="Name of output depth file.")
return parser.parse_args()

def write_new_depth_file(input_depth_file, output_depth_file):
"""
contigName contigLen totalAvgDepth sludge.bam sludge.bam-var
s0.ctg000001c 4.13709e+06 61.7612 61.7612 103.936
s1.ctg000002c 2.23849e+06 14.3086 14.3086 33.5774
s2.ctg000003l 40246 10.6046 10.6046 38.4902
s3.ctg000004l 461151 4.44724 4.44724 3.09768
s4.ctg000005l 516089 30.5501 30.5501 65.15
s5.ctg000006l 1.6986e+06 8.46604 8.46604 13.8984
s2.ctg000007l 24711 0.643378 0.643378 0.229328
s6.ctg000008l 31810 35.0132 35.0132 468.885
s7.ctg000009c 3.24454e+06 200.377 200.377 1599.71
"""
if os.path.exists(output_depth_file):
print("Removing existing version of file.")
os.remove(output_depth_file)

with open(input_depth_file, 'r') as fh_in, open(output_depth_file, 'a') as fh_out:
kept_count = int(0)
for line in fh_in:
if line.startswith("contigName"):
pass
else:
fh_out.write("{}\t{}\n".format(line.split('\t')[0], line.split('\t')[2]))
kept_count += 1
print("Parsed {:,} contigs".format(kept_count))

def main():
args = get_args()
write_new_depth_file(args.infile, args.outfile)

if __name__ == '__main__':
main()

49 changes: 49 additions & 0 deletions HiFi-MAG-Pipeline/scripts/Maxbin2-organize-outputs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import argparse
import os
import shutil

def get_args():
"""
Get arguments from command line with argparse.
"""
parser = argparse.ArgumentParser(
prog='Maxbin2-organize-outputs.py',
description="""Organize output files from maxbin2.""")
parser.add_argument("-s", "--sample",
required=True,
help="The sample name.")
parser.add_argument("-o", "--outdir",
required=True,
help="Name of output directory.")
return parser.parse_args()

def make_outdir(outdir):
fulldir = os.path.join(os.getcwd(), outdir)
if not os.path.exists(fulldir):
os.mkdir(fulldir)
print("Created directory: {}".format(fulldir))
return fulldir

def move_outputs(sample, fulldir):
target_files = [f for f in os.listdir('.') if f.startswith(sample)
and f.endswith(('.seed', '.log', '.marker', '.tar.gz', '.noclass', '.summary', '.tooshort'))]
for f in target_files:
print("\tMoving file: {}".format(f))
shutil.move(f, fulldir)

fasta_files = [f for f in os.listdir('.') if f.startswith(sample)
and f.endswith('.fasta')]
for f in fasta_files:
outname = "maxbin.{}.{}".format(f.split('.')[1], f.split('.')[-1])
print("Relabeling file: {}".format(f))
print("\t{}".format(outname))
shutil.move(f, os.path.join(fulldir, outname))

def main():
args = get_args()
fulldir = make_outdir(args.outdir)
move_outputs(args.sample, fulldir)
print("Finished.")

if __name__ == '__main__':
main()

0 comments on commit ce9a9c6

Please sign in to comment.