-
Notifications
You must be signed in to change notification settings - Fork 0
/
import.R
executable file
·75 lines (67 loc) · 2.95 KB
/
import.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
## Make expressionSet with count matrix.
## module load destiny
setwd("~/Shuo_Katja")
source("~/My_R_functions/make.transparent.R")
library(Biobase)
files <- list.files("Data/Star/Star_output",
pattern = "ReadsPerGene",
full.names = TRUE)
samples <- gsub("Data/Star/Star_output/", "", files)
samples <- gsub("_ReadsPerGene.out.tab", "", samples)
system(paste("wc -l", files[1]))
## 46752 Data/Star/Star_output/SHPool20-10-CGAGGCTG_S10_ReadsPerGene.out.tab
count.matrix <- matrix(nrow = 46752,
ncol = length(files),
dimnames = list(NULL, samples))
## counts for unstranded RNA-seq in 2nd column.
for(i in seq(along = files)){
data.i <- read.table(files[i],
colClasses = c("NULL", "integer","NULL", "NULL"),
sep = "\t")[,1]
count.matrix[, samples[i]] <- data.i
rm(data.i)
}
genes <- read.table(files[i],
colClasses = c("character", "NULL","NULL", "NULL"),
sep = "\t")[,1]
rownames(count.matrix) <- genes
## Get Sample info:
pData <- read.table("Data/sample_info.txt",
sep = "\t",
header = TRUE)
pData <- pData[match(colnames(count.matrix), pData$file), ]
pData$group <- paste(pData$tissue, pData$condition)
source("~/My_R_functions/make.transparent.R")
pData$col[pData$tissue == "gonad"
& pData$condition == "empty vector"] <- "orangered1"
pData$col[pData$tissue == "gonad"
& pData$condition == "ash-2 RNAi"] <- "orangered4"
pData$col[pData$tissue == "intestine"
& pData$condition == "empty vector"] <- "deepskyblue3"
pData$col[pData$tissue == "intestine"
& pData$condition == "ash-2 RNAi"] <- "midnightblue"
pData$bg <- make.transparent(pData$col, alpha = 200)
pData$dissection <- as.factor(pData$dissection)
pData$plate <- as.factor(pData$plate)
pData$condition <- relevel(as.factor(pData$condition), ref = "empty vector")
pData$tissue <- as.factor(pData$tissue)
## Get gene info:
tab <- read.table("Data/gene_info.txt",
sep = ";",
stringsAsFactors = FALSE)
fData <- data.frame(gene.id = c(rownames(count.matrix)[1:4], gsub("gene_id ", "", tab$V1)),
gene.name = c(rownames(count.matrix)[1:4], gsub(" gene_name ", "", tab$V2)),
source = c(rownames(count.matrix)[1:4], gsub(" gene_source ", "", tab$V3)),
biotype = c(rownames(count.matrix)[1:4], gsub(" gene_biotype ", "", tab$V4)),
stringsAsFactors = FALSE)
rownames(fData) <- fData$gene.name
identical(fData$gene.id, rownames(count.matrix))
## TRUE
eset <- ExpressionSet(assayData = count.matrix)
pData(eset) <- pData
fData(eset) <- fData
rownames(eset) <- fData(eset)$gene.name
colnames(eset) <- paste(pData(eset)$tissue, pData(eset)$condition, pData(eset)$plate)
eset <- eset[, order(colnames(eset))]
save(eset,
file = "Data/eset_counts.RData")