Skip to content

Commit

Permalink
Fixes tolkit#5 - headers for output kmer frequencies done.
Browse files Browse the repository at this point in the history
  • Loading branch information
Euphrasiologist committed Jul 15, 2021
1 parent 02b1f6b commit 05d6d28
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 40 deletions.
12 changes: 11 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ edition = "2018"
bio = "*"
clap = "~2.27.0"
rayon = "1.5.0"
indicatif = "*"
indicatif = "*"
itertools = "0.10.1"
21 changes: 20 additions & 1 deletion src/kmer_maps.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ pub mod kmer_maps {
output
}

// display for Vec<i32>
// hacky display for Vec<i32>
#[derive(Clone)]
pub struct WriteArray(pub Vec<i32>);

Expand All @@ -104,4 +104,23 @@ pub mod kmer_maps {
write!(f, "{}", tab_separated)
}
}
// hacky display for Vec<Vec<u8>>
// which is what the kmers are stored as for most of the time.
#[derive(Clone)]
pub struct WriteKmerValues<'a>(pub Vec<&'a Vec<u8>>);

impl<'a> Display for WriteKmerValues<'a> {
fn fmt(&self, f: &mut Formatter) -> Result<(), Error> {
let mut tab_separated = String::new();

for kmer in &self.0[0..self.0.len() - 1] {
let kmer_str = std::str::from_utf8(kmer).unwrap();
tab_separated.push_str(kmer_str);
tab_separated.push_str("\t");
}

tab_separated.push_str(std::str::from_utf8(&self.0[self.0.len() - 1]).unwrap());
write!(f, "{}", tab_separated)
}
}
}
116 changes: 79 additions & 37 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use std::sync::mpsc::channel;
use bio::io::fasta;
use clap::{value_t, App, Arg};
use indicatif::{ProgressBar, ProgressStyle};
use itertools::Itertools;
use rayon::prelude::*;

// internal imports
Expand Down Expand Up @@ -201,6 +202,7 @@ fn main() {
// write files
// I pass over &res four times here...
// pretty inefficient.
// I should really separate these functions from main. It's just lazy.

eprintln!("[+]\tWriting output to files");
for i in &res {
Expand Down Expand Up @@ -228,44 +230,84 @@ fn main() {
window_file.flush().unwrap();

// these are the arrays, tab separated bed-like format.
for i in &res {
writeln!(
window_file_2,
"{}\t{}\t{}\t{}",
i.id,
i.start,
i.end,
kmer_maps::WriteArray(i.divalues.clone())
)
.unwrap_or_else(|_| eprintln!("[-]\tError in writing to file."));
}
window_file_2.flush().unwrap();
//
for i in &res {
writeln!(
window_file_3,
"{}\t{}\t{}\t{}",
i.id,
i.start,
i.end,
kmer_maps::WriteArray(i.trivalues.clone())
)
.unwrap_or_else(|_| eprintln!("[-]\tError in writing to file."));
}
window_file_3.flush().unwrap();
//
for i in &res {
writeln!(
window_file_4,
"{}\t{}\t{}\t{}",
i.id,
i.start,
i.end,
kmer_maps::WriteArray(i.tetravalues.clone())
)
.unwrap_or_else(|_| eprintln!("[-]\tError in writing to file."));
// TODO: add headers for each of these
match kmer_maps.as_slice() {
[two, three, four] => {
// headers for dinucs
let mut dinuc_headers = Vec::new();
for key in two.map.keys().sorted() {
dinuc_headers.push(key)
}
writeln!(
window_file_2,
"id\tstart\tend\t{}",
kmer_maps::WriteKmerValues(dinuc_headers)
)
.unwrap_or_else(|_| eprintln!("[-]\tError in writing to file."));

for i in &res {
writeln!(
window_file_2,
"{}\t{}\t{}\t{}",
i.id,
i.start,
i.end,
kmer_maps::WriteArray(i.divalues.clone())
)
.unwrap_or_else(|_| eprintln!("[-]\tError in writing to file."));
}
window_file_2.flush().unwrap();

// headers for trinucs
let mut trinuc_headers = Vec::new();
for key in three.map.keys().sorted() {
trinuc_headers.push(key)
}
writeln!(
window_file_3,
"id\tstart\tend\t{}",
kmer_maps::WriteKmerValues(trinuc_headers)
)
.unwrap_or_else(|_| eprintln!("[-]\tError in writing to file."));
for i in &res {
writeln!(
window_file_3,
"{}\t{}\t{}\t{}",
i.id,
i.start,
i.end,
kmer_maps::WriteArray(i.trivalues.clone())
)
.unwrap_or_else(|_| eprintln!("[-]\tError in writing to file."));
}
window_file_3.flush().unwrap();
//
// headers for tetranucs
let mut tetranuc_headers = Vec::new();
for key in four.map.keys().sorted() {
tetranuc_headers.push(key)
}
writeln!(
window_file_4,
"id\tstart\tend\t{}",
kmer_maps::WriteKmerValues(tetranuc_headers)
)
.unwrap_or_else(|_| eprintln!("[-]\tError in writing to file."));
for i in &res {
writeln!(
window_file_4,
"{}\t{}\t{}\t{}",
i.id,
i.start,
i.end,
kmer_maps::WriteArray(i.tetravalues.clone())
)
.unwrap_or_else(|_| eprintln!("[-]\tError in writing to file."));
}
window_file_4.flush().unwrap();
}
[..] => {} // Needed to make the patterns exhaustive
}
window_file_4.flush().unwrap();

eprintln!("[+]\tOutput written to directory: ./fw_out/{}", output);
}

0 comments on commit 05d6d28

Please sign in to comment.