From 7cab4bee04059a8e2804ee14a973362620d8efbe Mon Sep 17 00:00:00 2001 From: Kiran Garimella Date: Thu, 13 Jun 2024 23:38:39 -0400 Subject: [PATCH] Fixed issue with contig name --- Cargo.lock | 4 ++-- src/hidive/src/build.rs | 29 ++++++++++++++++------------- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fb52c9cf..c3d26e3d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1602,7 +1602,7 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "hidive" -version = "0.1.6" +version = "0.1.7" dependencies = [ "anyhow", "bio", @@ -3371,7 +3371,7 @@ dependencies = [ [[package]] name = "skydive" -version = "0.1.6" +version = "0.1.7" dependencies = [ "anyhow", "backoff", diff --git a/src/hidive/src/build.rs b/src/hidive/src/build.rs index c8cfeed6..d5b18ae8 100644 --- a/src/hidive/src/build.rs +++ b/src/hidive/src/build.rs @@ -58,7 +58,7 @@ pub fn reverse_complement(kmer: &str) -> String { .collect() } -pub fn find_sequences_between_sanchor_eanchor(reads: Vec, ref_hla: String) -> (HashSet, HashMap){ +pub fn find_sequences_between_sanchor_eanchor(reads: Vec, ref_hla: String, stem: &String) -> (HashSet, HashMap){ let mut hla_samples = HashSet::new(); let mut hla_seq = HashMap::new(); @@ -71,8 +71,11 @@ pub fn find_sequences_between_sanchor_eanchor(reads: Vec, ref_hla: Strin hla_seq.insert(h.clone(), seq_upper); } - hla_samples.insert("MHC-CHM13".to_string()); - hla_seq.insert("MHC-CHM13".to_string(), ref_hla); + // hla_samples.insert("MHC-CHM13".to_string()); + // hla_seq.insert("MHC-CHM13".to_string(), ref_hla); + + hla_samples.insert(stem.to_owned()); + hla_seq.insert(stem.to_owned(), ref_hla); (hla_samples, hla_seq) } @@ -146,13 +149,14 @@ pub fn get_anchor_information( sample_dict:&HashMap>, hla_samples:&HashSet, position_dict:&HashMap>>, - k: usize) + k: usize, + stem: &String) -> Vec { let mut anchorlist = Vec::new(); for (kmer, samplelist) in sample_dict.iter(){ if samplelist.len() == hla_samples.len(){ if let Some(positions) = position_dict.get(kmer){ - if positions.len()>1 && positions.get("MHC-CHM13").map_or(false, |v| v.len() == 1){ + if positions.len()>1 && positions.get(stem).map_or(false, |v| v.len() == 1){ anchorlist.push(kmer.clone()); } } @@ -163,11 +167,11 @@ pub fn get_anchor_information( } -pub fn get_anchors(anchorlist:&Vec, position_dict: &HashMap>>, k: usize) -> HashMap{ +pub fn get_anchors(anchorlist:&Vec, position_dict: &HashMap>>, k: usize, stem: &String) -> HashMap{ let mut anchor_info = HashMap::new(); for kmer in anchorlist.iter(){ if let Some(positions) = position_dict.get(kmer){ - if let Some(pos) = positions.get("MHC-CHM13").and_then(|v| v.first()){ + if let Some(pos) = positions.get(stem).and_then(|v| v.first()){ let anchor_name = format!("A{:06}", pos / k + 1); anchor_info.insert(anchor_name, AnchorInfo { seq: kmer.clone(), @@ -439,7 +443,7 @@ pub fn write_gfa(final_anchor: &HashMap, edge_info: &HashMap Ok(()) } -pub fn filter_undersupported_edges(edge_info: &HashMap, reference: &str, threshold: i32) -> HashMap { +pub fn filter_undersupported_edges(edge_info: &HashMap, reference: &String, threshold: i32) -> HashMap { let mut filtered_edges = edge_info.clone(); // Clone the input HashMap to create an owned version for (edge, data) in edge_info.iter() { @@ -479,11 +483,11 @@ pub fn start(output: &PathBuf, loci_list: &Vec, k: usize, fasta_path: &P .collect(); let unique_kmer_list = get_reference_kmer_profile(&reference_hla, k); - let (hla_samples, hla_seq) = find_sequences_between_sanchor_eanchor(reads, reference_hla); + let (hla_samples, hla_seq) = find_sequences_between_sanchor_eanchor(reads, reference_hla, &stem); let (sample_dict, position_dict) = map_reference_unique_kmers_to_seq(unique_kmer_list, &hla_seq, k); - let anchorlist = get_anchor_information(&sample_dict.lock().unwrap(), &hla_samples, &position_dict.lock().unwrap(), k); - let anchors = get_anchors(&anchorlist, &position_dict.lock().unwrap(), k); + let anchorlist = get_anchor_information(&sample_dict.lock().unwrap(), &hla_samples, &position_dict.lock().unwrap(), k, &stem); + let anchors = get_anchors(&anchorlist, &position_dict.lock().unwrap(), k, &stem); let final_anchor = get_final_anchor(&anchors, k); let (edge_info, outgoing) = create_edge_file(&hla_seq, &final_anchor, k); @@ -493,8 +497,7 @@ pub fn start(output: &PathBuf, loci_list: &Vec, k: usize, fasta_path: &P .map(|(k, v)| (k.clone(), (*v).clone())) .collect(); - let reference = "MHC-CHM13"; - let filtered_edges = filter_undersupported_edges(&edge_info,reference, 4); + let filtered_edges = filter_undersupported_edges(&edge_info, &stem, 4); write_gfa(&dereferenced_final_anchor, &filtered_edges, output);