From 7df586e9d5021ee51a0df25c79ee625568f7f49b Mon Sep 17 00:00:00 2001 From: Max Brown Date: Thu, 15 Jul 2021 21:20:13 +0100 Subject: [PATCH] Minor edits --- README.md | 22 +++++++++++----------- src/main.rs | 6 +++--- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 4f72c6c..9504b1c 100644 --- a/README.md +++ b/README.md @@ -4,13 +4,14 @@ Fast statistics in windows over a genome in fasta format. - GC content - GC proportion - GC skew -- Shannon entropy - Proportion of G's, C's, A's, T's, N's +- Shannon entropy - Di/tri/tetranucleotide shannon diversity +- Di/tri/tetranucleotide frequency arrays ## Usage -Fewer options than previous versions, as di/tri/tetranucleotide diversity is calculated instead of user input for kmer length. +The masked (-m) flag only affects the first four output options above - kmers are coerced to uppercase, and shannon entropy probably needs some attention on that. ``` Fasta windows 0.2.1 @@ -21,17 +22,15 @@ USAGE: fasta_windows [FLAGS] [OPTIONS] --fasta --output FLAGS: - -h, --help Prints help information - -m, --masked Consider only uppercase nucleotides in the calculations. - -V, --version Prints version information + -c, --canonical_kmers Should the canonical kmers be calculated? + -h, --help Prints help information + -m, --masked Consider only uppercase nucleotides in the calculations. + -V, --version Prints version information OPTIONS: - -c, --canonical_kmers Should the canonical kmers be calculated? Boolean, input true or false. - [default: false] - -f, --fasta The input fasta file. - -o, --output Output filename for the CSV (without extension). - -w, --window_size Integer size of window for statistics to be computed over. [default: - 1000] + -f, --fasta The input fasta file. + -o, --output Output filename for the CSV (without extension). + -w, --window_size Integer size of window for statistics to be computed over. [default: 1000] ``` ## Building @@ -75,6 +74,7 @@ Also output (non-optional at the moment), are three more TSV's, which are the ar e.g. for dinucleotide frequencies: ``` +ID start end AA AC AG AT CA CC CG CT GA GC GG GT TA TC TG TT SUPER_1 0 1000 122 120 45 73 134 68 39 46 50 55 45 15 54 44 36 53 SUPER_1 1000 2000 140 83 32 90 85 54 22 66 30 25 19 39 91 65 40 118 SUPER_1 2000 3000 216 181 4 5 4 181 5 181 3 8 3 3 183 1 516 diff --git a/src/main.rs b/src/main.rs index a41d042..3bf3a27 100644 --- a/src/main.rs +++ b/src/main.rs @@ -240,7 +240,7 @@ fn main() { } writeln!( window_file_2, - "id\tstart\tend\t{}", + "ID\tstart\tend\t{}", kmer_maps::WriteKmerValues(dinuc_headers) ) .unwrap_or_else(|_| eprintln!("[-]\tError in writing to file.")); @@ -265,7 +265,7 @@ fn main() { } writeln!( window_file_3, - "id\tstart\tend\t{}", + "ID\tstart\tend\t{}", kmer_maps::WriteKmerValues(trinuc_headers) ) .unwrap_or_else(|_| eprintln!("[-]\tError in writing to file.")); @@ -289,7 +289,7 @@ fn main() { } writeln!( window_file_4, - "id\tstart\tend\t{}", + "ID\tstart\tend\t{}", kmer_maps::WriteKmerValues(tetranuc_headers) ) .unwrap_or_else(|_| eprintln!("[-]\tError in writing to file."));