-
Notifications
You must be signed in to change notification settings - Fork 0
/
Bioactivity_Prediction.R
72 lines (54 loc) · 1.43 KB
/
Bioactivity_Prediction.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
###Initialization
library(Rcpi)
library(rpart)
library(dplyr)
library(e1071)
library(stringr)
set.seed(93)
rsq = function(x,y){
cor(x,y)*cor(x,y)
}
###Import the chemicals
x=readMolFromSmi("Smile_Files.smi")
###Compute MACCS166 chemical fingerprints for the molecules
y=extractDrugMACCSComplete(x)
###Load Bioactivity data taken from ChEMBL
bio=read.table("Bio2.csv",header = T)
###Process NAs. Delete them from both data, bio(Bioactivity data) and y(MACCS166 matrix)
ind=which(is.na(bio))
bio=na.omit(bio)
y=as.data.frame(y)
y=y[-ind,]
y$bioactivity=(bio[,1])
###Take a look at the data used for the decision tree algorithm
head(y)
y
dim(y)
###Prepare test and train datasets
N=70 # N = amount of data used for training algorithm out of all data = dim(y)[1] = 124
sind=sample(1:(dim(y)[1]),70)
train=y[sind,]
test=y[-sind,]
###Take a look at train and test datasets
head(train)
head(test)
dim(train)
dim(test)
###Implement decision tree
tree=rpart((bioactivity)~.,train)
prd=predict(tree,test[,-167])
real=test[,167]
###Print the confusion matrix
res=table(predicted=round(prd),reality=real)
print(res)
###Accuracy = ?
cat(str_c("Accuracy is ",sum(diag(res))/sum(res),"\n"))
###Implement SVM
mod=svm(as.factor(bioactivity)~.,train,scale = F)
prd=predict(mod,test[,-167])
real=test[,167]
###Print the confusion matrix
res=table(predicted=prd,reality=real)
print(res)
###Accuracy = ?
cat(str_c("Accuracy is ",sum(diag(res))/sum(res),"\n"))