-
Notifications
You must be signed in to change notification settings - Fork 1
/
CrossPeak_Step1.R
68 lines (66 loc) · 3.55 KB
/
CrossPeak_Step1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#Load packages and functions----
library(GenomicRanges)
library(rtracklayer)
library(ggplot2)
library(BiocParallel)
library(progress)
library(data.table)
library(stringr)
library(dplyr)
library(tidyr)
source('CrossPeak_functions.R')
source('CrossPeak_parameters.R')
#Setup blacklists----
human_blacklist = Signac::blacklist_hg38_unified
chimp_blacklist_filename = 'chimp_blacklist.bed'
rhesus_blacklist_filename = 'rhesus_blacklist.bed'
species_blacklists <- vector(mode = 'list',length = length(species)); names(species_blacklists) <- species
for (sp in species){
if (sp == 'human'){
species_blacklists[[sp]] = human_blacklist
} else if (sp == 'chimp'){
species_blacklists[[sp]] = import.bed(paste0(folder,chimp_blacklist_filename))
} else if (sp == 'rhesus'){
species_blacklists[[sp]] = import.bed(paste0(folder,rhesus_blacklist_filename))
} else {stop('Unsupported species - no blacklist available. To use CrossPeak with this species, you will need to create your own blacklist and update the section Setup blacklists in CrossPeak_Step1.R')
}
genome(species_blacklists[[sp]]) = genome_names[[sp]]
}
saveRDS(species_blacklists,paste0(folder,'species_blacklists.rds'))
#Step 1: Read peak file, make summits, and export summit bed files for halLiftover----
peaks_orig <- vector(mode = 'list', length = length(species)); names(peaks_orig) <- species
peaks <- vector(mode = 'list', length = length(species)); names(peaks) <- species
summits = vector(mode = 'list', length = length(species)); names(summits) <- species
for (sp in species){
filename = paste0(sp,'_peaks')
if (peaks_format == 'bed') {peaks_orig[[sp]] = import(paste0(folder,filename,'.bed'))
} else if (peaks_format == 'rds'){peaks_orig[[sp]] = readRDS(paste0(folder,filename,'.rds'))
} else {stop('Species peak sets must be in .rds or .bed format')}
genome(peaks_orig[[sp]]) = genome_names[[sp]]
#Make sure that peaks are all the same width (and width should be odd so that summit is perfect centered)
if(max(width(peaks_orig[[sp]]))!=min(width(peaks_orig[[sp]]))){
stop('This pipeline only works for fixed width peaks with summits centered and half width given by peak_half_width.')
}
if(!is_odd(max(width(peaks_orig[[sp]])))){
warning('Peaks must have odd widths so summits can be perfectly centered. Subtracting one base pair from starts to ensure odd width.')
current_starts = start(peaks_orig[[sp]])
new_starts = current_starts - 1
peaks_orig[[sp]] <- GRanges(seqnames = seqnames(peaks_orig[[sp]]),
ranges = IRanges(start = new_starts, end = end(peaks_orig[[sp]])),
strand = strand(peaks_orig[[sp]]))
genome(peaks_orig[[sp]]) = genome_names[[sp]]
}
peaks[[sp]] <- labelPeaksWithPrefixes(peaks_orig[[sp]],sp, prefixes)
peaks[[sp]] <- suppressWarnings(excludeBlacklistPeaks(peaks[[sp]],species_blacklists[[sp]]))
if (remove_non_standard_chr){
non_std_chr = findNonStandardChromosomes(peaks[[sp]])
peaks[[sp]] = dropSeqlevels(peaks[[sp]],non_std_chr, pruning.mode = 'coarse')
}
peaks[[sp]] = dropSeqlevels(peaks[[sp]],chr_to_remove, pruning.mode = 'coarse')
strand(peaks[[sp]]) <- '+' #strand information not captured in peak calling but needed to keep track of strand flipping between original and ancestral genomes
summits[[sp]] <- makeSummitsFromPeaks(peaks[[sp]], peak_half_width,summit_half_width)
export.bed(summits[[sp]],paste0(folder,sp,'_summits.bed'))
}
saveRDS(peaks_orig, paste0(folder,'peaks_orig.rds'))
saveRDS(peaks, paste0(folder,'peaks.rds'))
saveRDS(summits, paste0(folder,'summits.rds'))