Skip to content

Commit

Permalink
added --outfile to filter_contigs
Browse files Browse the repository at this point in the history
  • Loading branch information
Nicholas Youngblut committed Aug 31, 2022
1 parent 2c1bef9 commit a1f2227
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 5 deletions.
2 changes: 2 additions & 0 deletions resmico/commands/filter_contigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ def parse_args(test_args=None, subparsers=None):
'The contig names must match those in the prediction_table')
parser.add_argument('--outdir', default='resmico-filter', type=str,
help='Output directory (default: %(default)s)')
parser.add_argument('--outfile', default=None, type=str,
help='Output file name. If None, then the input file name will be used (default: %(default)s)')
parser.add_argument('--score-cutoff', default=0.8, type=float,
help='Prediction score cutoff for filtering: >=[score] will be filtered'
' (default: %(default)s)')
Expand Down
14 changes: 9 additions & 5 deletions resmico/filter_contigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,15 @@ def parse_fasta_list(fasta_list):
return fasta_files

def _filter_fasta(fasta_file, predictions, outdir, pred_score_cutoff, add_score=False,
error_on_missing=False, min_length=0, max_length=0):
error_on_missing=False, min_length=0, max_length=0, outfile=None):
"""
Filtering fasta based on contig prediction scores
"""
logging.info(f'Filtering fasta: {fasta_file}')
outfile = os.path.join(outdir, os.path.split(fasta_file)[1])
if outfile is None:
outfile = os.path.join(outdir, os.path.split(fasta_file)[1])
else:
outfile = os.path.join(outdir, os.path.split(outfile)[1])
if outfile == fasta_file:
msg = 'Input path cannot equal output path: {} <=> {}'
raise ValueError(msg.format(fasta_file, outfile))
Expand Down Expand Up @@ -189,7 +192,7 @@ def set_logger(level):

def filter_fasta(fasta_files, predictions, outdir, pred_score_cutoff,
add_score=False, error_on_missing=False,
min_length=0, max_length=0, n_proc=1):
min_length=0, max_length=0, n_proc=1, outfile=None):
"""
Filter >=1 fasta based on prediction scores
"""
Expand All @@ -202,7 +205,8 @@ def filter_fasta(fasta_files, predictions, outdir, pred_score_cutoff,
add_score = add_score,
error_on_missing = error_on_missing,
min_length = min_length,
max_length = max_length)
max_length = max_length,
outfile = outfile)
if n_proc < 2:
res = map(func, fasta_files)
else:
Expand All @@ -226,7 +230,7 @@ def main(args):
# fasta filter
filter_fasta(args.fasta, predictions, args.outdir, args.score_cutoff,
args.add_score, args.error_on_missing, args.min_length,
args.max_length, args.n_proc)
args.max_length, args.n_proc, args.outfile)

if __name__ == '__main__':
pass
10 changes: 10 additions & 0 deletions resmico/tests/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,13 @@ def test_filter_list(tmpdir, script_runner):
pred_file, contig_files)
assert ret.success, ret.print()

def test_filter_outfile(tmpdir, script_runner):
save_path = tmpdir.mkdir('save_dir')
pred_file = os.path.join(data_dir, 'UHGG-n9', 'resmico_predictions.csv')
contig_files = os.path.join(data_dir, 'UHGG-n9', 'contig_files.txt')
ret = script_runner.run('resmico', 'filter',
'--score-cutoff', '0.02',
'--outdir', str(save_path),
'--outfile', 'contigs-filtered.fna',
pred_file, contig_files)
assert ret.success, ret.print()

0 comments on commit a1f2227

Please sign in to comment.