Skip to content

Commit

Permalink
Merge pull request #62 from uclahs-cds/nzeltser-handle-overlapping-de…
Browse files Browse the repository at this point in the history
…letions

handle overlapping deletions
  • Loading branch information
alkaZeltser authored Aug 27, 2024
2 parents 5da468a + 8b2f718 commit b681d82
Show file tree
Hide file tree
Showing 9 changed files with 66 additions and 13 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@ Config/testthat/edition: 3
License: GPL-2
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
VignetteBuilder: knitr
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Unreleased
* Added handling of overlapping deletion allele notation

# ApplyPolygenicScore 2.0.0 (2024-07-31)

## Changed
Expand Down
5 changes: 5 additions & 0 deletions R/apply-pgs.R
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,12 @@ apply.polygenic.score <- function(
sample.coordinate.to.row.dict.hash <- new.env(hash = TRUE, parent = emptyenv());

for (i in 1:nrow(merged.vcf.with.pgs.data)) {
# skip if the variant is missing from all samples
if (is.na(merged.vcf.with.pgs.data[i, 'Indiv'])) {
next;
}
key <- paste(merged.vcf.with.pgs.data[i, 'Indiv'], merged.vcf.with.pgs.data[i, 'CHROM'], merged.vcf.with.pgs.data[i, 'POS'], sep = '_');
# save all row indexes that have the same sample:coordinate combination under one key
sample.coordinate.to.row.dict.hash[[key]] <- c(sample.coordinate.to.row.dict.hash[[key]], i);
}

Expand Down
5 changes: 3 additions & 2 deletions R/calculate-dosage.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,12 @@ convert.alleles.to.pgs.dosage <- function(called.alleles, risk.alleles) {
split.alleles <- data.frame(called.alleles, called.alleles);
} else {
# check that called.alleles is a vector of genotypes in allelic notation or '.' separated by a slash or pipe
allowed.pattern <- '^((([A-Z]+|\\.)[/\\|]([A-Z]+|\\.))|\\.)$' # '|' are special chars in regular expressions
# "*" characters represent overlapping deletions from an upstream indel and are accepted VCF format
allowed.pattern <- '^((([A-Z]+|\\.|\\*)[/\\|]([A-Z]+|\\.|\\*))|\\.)$' # '|' are special chars in regular expressions
passing.alleles <- grepl(allowed.pattern, called.alleles);
passing.alleles[is.na(called.alleles)] <- TRUE; # NA allowed
if (!all(passing.alleles)) {
stop('unrecognized called.alleles format, must be capitalized letters or "." separated by a slash or pipe.');
stop('unrecognized called.alleles format, must be capitalized letters, "." or "*" separated by a slash or pipe.');
}
split.alleles <- data.table::tstrsplit(called.alleles, split = c('/|\\|'), keep = c(1,2)); # '|' are special chars in regular expressions
}
Expand Down
4 changes: 3 additions & 1 deletion R/plot-pgs-rank.R
Original file line number Diff line number Diff line change
Expand Up @@ -250,12 +250,14 @@ create.pgs.rank.plot <- function(
missing.barplot.formula <- percent.missing.genotypes ~ Indiv;
missing.barplot.ylimits <- c(0, 1.05);
missing.barplot.ylabel <- 'Missing GT\n(%)';
missing.barplot.yat <- seq(0, 1, 0.2);

} else {
# count barplot formatting
missing.barplot.formula <- n.missing.genotypes ~ Indiv;
missing.barplot.ylimits <- c(0, max(pgs.data$n.missing.genotypes) * 1.05);
missing.barplot.ylabel <- 'Missing GT\ncount';
missing.barplot.yat <- 'auto';
}

missing.genotypes.barplot <- BoutrosLab.plotting.general::create.barplot(
Expand All @@ -265,7 +267,7 @@ create.pgs.rank.plot <- function(
xlab.label = '',
ylab.label = missing.barplot.ylabel,
xaxis.lab = '',
yat = 'auto',
yat = missing.barplot.yat,
main = '',
main.cex = 0,
ylab.cex = titles.cex,
Expand Down
9 changes: 8 additions & 1 deletion R/plot-pgs.R
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,7 @@ create.pgs.density.plot <- function(
#' @param point.cex numeric size for plot points
#' @param border.padding numeric padding for plot borders
#' @return If no output directory is provided, a multipanel lattice plot object is returned, otherwise a plot is written to the indicated path and \code{NULL} is returned.
#' If no continuous phenotype variables are detected, a warning is issued and \code{NULL} is returned.
#' @examples
#' set.seed(100);
#'
Expand Down Expand Up @@ -442,6 +443,11 @@ create.pgs.with.continuous.phenotype.plot <- function(
# check input
pgs.distribution.plotting.input.checks(pgs.data = pgs.data, phenotype.columns = phenotype.columns, output.dir = output.dir);

if (is.null(phenotype.columns)) {
warning('No continuous phenotype variables detected; returning NULL');
return(NULL);
}

recognized.pgs.colnames <- c('PGS', 'PGS.with.replaced.missing', 'PGS.with.normalized.missing');
pgs.columns <- colnames(pgs.data)[colnames(pgs.data) %in% recognized.pgs.colnames];

Expand All @@ -451,7 +457,8 @@ create.pgs.with.continuous.phenotype.plot <- function(
phenotype.data.for.plotting <- subset(phenotype.data, select = phenotype.index.by.type$continuous);

if (length(phenotype.data.for.plotting) == 0) {
stop('No continuous phenotype variables detected');
warning('No continuous phenotype variables detected; returning NULL');
return(NULL);
}

# Plotting
Expand Down
1 change: 1 addition & 0 deletions man/create.pgs.with.continuous.phenotype.plot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 18 additions & 6 deletions tests/testthat/test-dosage-calculator.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,28 +46,28 @@ test_that(
called.alleles = c('A/A', 'A'),
risk.alleles = c('A', 'T')
),
'unrecognized called.alleles format, must be capitalized letters or "." separated by a slash or pipe.'
'unrecognized called.alleles format, must be capitalized letters, "." or "\\*" separated by a slash or pipe.'
);
expect_error(
convert.alleles.to.pgs.dosage(
called.alleles = c('A/A', 'A,'),
risk.alleles = c('A', 'T')
),
'unrecognized called.alleles format, must be capitalized letters or "." separated by a slash or pipe.'
'unrecognized called.alleles format, must be capitalized letters, "." or "\\*" separated by a slash or pipe.'
);
expect_error(
convert.alleles.to.pgs.dosage(
called.alleles = c('A/A', 'A-A'),
risk.alleles = c('A', 'T')
),
'unrecognized called.alleles format, must be capitalized letters or "." separated by a slash or pipe.'
'unrecognized called.alleles format, must be capitalized letters, "." or "\\*" separated by a slash or pipe.'
);
expect_error(
convert.alleles.to.pgs.dosage(
called.alleles = c('A/A', 'a/A'),
risk.alleles = c('A', 'T')
),
'unrecognized called.alleles format, must be capitalized letters or "." separated by a slash or pipe.'
'unrecognized called.alleles format, must be capitalized letters, "." or "\\*" separated by a slash or pipe.'
);
expect_error(
convert.alleles.to.pgs.dosage(
Expand Down Expand Up @@ -108,8 +108,8 @@ test_that(
# check that correct input is accepted
expect_silent(
convert.alleles.to.pgs.dosage(
called.alleles = c('A/A', 'A|T', 'TA/T', 'A/ATTTT', './.', '.'),
risk.alleles = c('A', 'T', 'A', 'T', 'A', 'T')
called.alleles = c('A/A', 'A|T', 'TA/T', 'A/ATTTT', './.', '.', '*/T', 'T/*', '*/*'),
risk.alleles = c('A', 'T', 'A', 'T', 'A', 'T', 'A', 'T', 'A')
)
);
}
Expand Down Expand Up @@ -163,6 +163,18 @@ test_that(
}
);

test_that(
'convert.alleles.to.pgs.dosage calculates dosage correctly for overlapping deletion alleles', {
expect_equal(
convert.alleles.to.pgs.dosage(
called.alleles = c('A/*', '*/A', '*/*', 'A/*', '*/A'),
risk.alleles = c('A', 'A', 'A', 'T', 'T')
),
c(1, 1, 0, 0, 0)
);
}
);

test_that(
'convert.alleles.to.pgs.dosage correctly handles a scenario where no genotypes are provided', {
expect_equal(
Expand Down
26 changes: 24 additions & 2 deletions tests/testthat/test-plotting.R
Original file line number Diff line number Diff line change
Expand Up @@ -273,12 +273,34 @@ test_that(
'phenotype.columns cannot contain recognized PGS column names'
);
# check that at least one of the phenotype columns provided is a continuous variable
expect_error(
expect_warning(
create.pgs.with.continuous.phenotype.plot(
pgs.data = pgs.test,
phenotype.columns = 'binary.phenotype'
),
'No continuous phenotype variables detected'
'No continuous phenotype variables detected; returning NULL'
);
expect_equal(
create.pgs.with.continuous.phenotype.plot(
pgs.data = pgs.test,
phenotype.columns = 'binary.phenotype'
),
NULL
);
# handle NULL phenotype.columns
expect_warning(
create.pgs.with.continuous.phenotype.plot(
pgs.data = pgs.test,
phenotype.columns = NULL
),
'No continuous phenotype variables detected; returning NULL'
);
expect_equal(
create.pgs.with.continuous.phenotype.plot(
pgs.data = pgs.test,
phenotype.columns = NULL
),
NULL
);
# check that output.dir is a real directory
expect_error(
Expand Down

0 comments on commit b681d82

Please sign in to comment.