-
Notifications
You must be signed in to change notification settings - Fork 0
/
Catch_ChrPosRefAlt.R
27 lines (26 loc) · 974 Bytes
/
Catch_ChrPosRefAlt.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
library(dplyr)
# Step 1: Collecting input files
tsv_files <- list.files(pattern = "\\.tsv$")
args <- commandArgs(trailingOnly = TRUE) # accept input file and output file name from the user
vcf_rawdata <- args[1] # push input file i.e. virulence excel sheet
input_csv <- read.csv(vcf_rawdata, header = TRUE)
input_csv[, tsv_files] <- NA
##working
# Step 2: Count matching patterns
for (tsv_file in tsv_files) {
tsv_data <- read.delim(tsv_file, sep = "\t", header = TRUE)
for (i in 1:nrow(input_csv)) {
row <- input_csv[i, ]
match_index <- which(
tsv_data$CHR == row$CHR &
tsv_data$POS == row$POS &
tsv_data$REF == row$REF &
tsv_data$ALT == row$ALT
)
input_csv[i, tsv_file] <- ifelse(length(match_index) > 0, 1, 0)
}
rm(tsv_data) # Remove TSV data to save memory
}
# Step 3: Write output CSV
samplename <- gsub(".csv", "", vcf_rawdata)
write.csv(input_csv, paste0(samplename,"_ChrPosRefAltCount.csv"), row.names=FALSE)