-
Notifications
You must be signed in to change notification settings - Fork 0
/
calTanimotoSimMat.R
57 lines (39 loc) · 1.13 KB
/
calTanimotoSimMat.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#' use sparse matrix to calculate tanimoto similarity matrix
#'
#'
#' @param descMat input matrix, which is usually descritor (fingerprints {0, 1}) matrix
#'
calTanimotoSimMat <- function(descMat) {
## check if input is matrix
is_mat <- is.matrix(descMat)
if (!is_mat) {
stop("Input should be Matrix, not data.frame and others!")
}
## check if {0, 1}
Lev <- levels(factor(descMat))
all_in <- Lev %in% c("0", "1")
if (sum(!all_in) != 0) {
stop("Input should be {0, 1} matrix!!!")
}
## common values:
A <- tcrossprod(descMat)
## indexes for non-zero common values
im <- which(A > 0, arr.ind = TRUE)
## counts for each row
b <- rowSums(descMat)
## only non-zero values of common
Aim <- A[im]
## Jacard formula: #common / (#i + #j - #common)
## library(Matrix)
J <- sparseMatrix(
i = im[, 1],
j = im[, 2],
x = Aim / (b[im[, 1]] + b[im[, 2]] - Aim),
dims = dim(A)
)
## convert to common matrix
J <- as.matrix(J)
## remove attribute, then the result is equalto that from calJaccardMat.R
J <- unname(J)
return(J)
}