Skip to content

Commit

Permalink
Merge pull request #17 from jodjo86/master
Browse files Browse the repository at this point in the history
add option to force minimap2 to consider the forward transcript
  • Loading branch information
kdc10 authored Aug 26, 2024
2 parents 2dee2f3 + 0334f2c commit cd52336
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 5 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ Each step of the installation process is expected to take a matter of seconds.
|--db| $EMU_DATABASE_DIR| path to emu database; directory must include the following files: names_df.tsv, nodes_df.tsv, species_taxid.fasta, unqiue_taxids.tsv|
|--N| 50| max number of alignments utilized for each read in minimap2|
|--K| 500M| minibatch size for mapping in minimap2|
|--mm2-forward-only| FALSE| force minimap2 to consider the forward transcript strand only ([for long mRNA/cDNA reads](https://github.com/lh3/minimap2?tab=readme-ov-file#map-long-mrnacdna-reads))|
|--output-dir| ./results| directory for output results|
|--output-basename| stem of input_file(s)| basename of all output files saved in output-dir; default utilizes basename from input file(s)|
|--keep-files| FALSE| keep working files in output-dir ( alignments [.sam], reads of specied length [.fa])|
Expand Down
20 changes: 15 additions & 5 deletions emu
Original file line number Diff line number Diff line change
Expand Up @@ -417,10 +417,17 @@ def generate_alignments(in_file_list, out_basename, database):
sam_align_file = "{}_emu_alignments.sam".format(out_basename)
db_sequence_file = os.path.join(database, 'species_taxid.fasta')

subprocess.check_output("minimap2 -ax {} -t {} -N {} -p .9 -K {} {} {} -o {}".
format(args.type, args.threads, args.N, args.K,
db_sequence_file, input_file, sam_align_file),
shell=True)
# force minimap2 to consider the forward transcript strand only
if args.mm2_forward_only:
subprocess.check_output("minimap2 -ax {} -t {} -N {} -p .9 -u f -K {} {} {} -o {}".
format(args.type, args.threads, args.N, args.K,
db_sequence_file, input_file, sam_align_file),
shell=True)
else:
subprocess.check_output("minimap2 -ax {} -t {} -N {} -p .9 -K {} {} {} -o {}".
format(args.type, args.threads, args.N, args.K,
db_sequence_file, input_file, sam_align_file),
shell=True)

return sam_align_file

Expand Down Expand Up @@ -686,7 +693,7 @@ def combine_outputs(dir_path, rank, split_files=False, count_table=False):
return df_combined_full

if __name__ == "__main__":
__version__ = "3.4.5"
__version__ = "3.4.6"
parser = argparse.ArgumentParser()
parser.add_argument('--version', '-v', action='version', version='%(prog)s v' + __version__)
subparsers = parser.add_subparsers(dest="subparser_name", help='sub-commands')
Expand All @@ -711,6 +718,9 @@ if __name__ == "__main__":
abundance_parser.add_argument(
'--K', '-K', type=int, default=500000000,
help='minibatch size for minimap2 mapping [500M]')
abundance_parser.add_argument(
'--mm2-forward-only', action="store_true",
help='force minimap2 to consider the forward transcript strand only')
abundance_parser.add_argument(
'--output-dir', type=str, default="./results",
help='output directory name [./results]')
Expand Down

0 comments on commit cd52336

Please sign in to comment.