forked from Pedrodpj92/Predicting_DBpedia_types
-
Notifications
You must be signed in to change notification settings - Fork 0
/
modela_pruebaHojas.R
314 lines (235 loc) · 15 KB
/
modela_pruebaHojas.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
#!/usr/bin/env Rscript
#modela_pruebaHojas.R
library(h2o)
#library(reshape2)
#library(randomForest)
#library(caret)
h2o.init(
nthreads=-1 ## -1: use all available threads
#max_mem_size = "2G"
)
#h2o.removeAll()
#327 opciones aprox.
#carga datos escritos desde preparaDatos_SoloHojas.R
df_training_test1 <- h2o.importFile(path = normalizePath("/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/inputData/divisionSDtypes/pruebaHojas/soloTest1/training.csv"), header = TRUE)
df_training_test2 <- h2o.importFile(path = normalizePath("/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/inputData/divisionSDtypes/pruebaHojas/soloTest2/training.csv"), header = TRUE)
df_training_test3 <- h2o.importFile(path = normalizePath("/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/inputData/divisionSDtypes/pruebaHojas/soloTest3/training.csv"), header = TRUE)
#df_validating <- h2o.importFile(path = normalizePath("/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/inputData/divisionSDtypes/pruebaHojas/validating_test1_2_3.csv"), header = TRUE)
df_validating_test1 <- h2o.importFile(path = normalizePath("/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/inputData/divisionSDtypes/pruebaHojas/soloTest1/validating_test1.csv"), header = TRUE)
df_validating_test2 <- h2o.importFile(path = normalizePath("/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/inputData/divisionSDtypes/pruebaHojas/soloTest2/validating_test2.csv"), header = TRUE)
df_validating_test3 <- h2o.importFile(path = normalizePath("/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/inputData/divisionSDtypes/pruebaHojas/soloTest3/validating_test3.csv"), header = TRUE)
validating_test1 <- read.csv(file="/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/inputData/divisionSDtypes/pruebaHojas/soloTest1/validating_test1.csv",
header=FALSE, sep=",", encoding = "UTF-8", stringsAsFactors = FALSE)
colnames(validating_test1) <- validating_test1[1,]
validating_test1 <- validating_test1[-1,]
validating_test1[2:ncol(validating_test1)] <- lapply(validating_test1[,2:(ncol(validating_test1)-1)], function(x) as.numeric(as.character(x)))
validating_test2 <- read.csv(file="/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/inputData/divisionSDtypes/pruebaHojas/soloTest2/validating_test2.csv",
header=FALSE, sep=",", encoding = "UTF-8", stringsAsFactors = FALSE)
colnames(validating_test2) <- validating_test2[1,]
validating_test2 <- validating_test2[-1,]
validating_test2[2:ncol(validating_test2)] <- lapply(validating_test2[,2:(ncol(validating_test2)-1)], function(x) as.numeric(as.character(x)))
validating_test3 <- read.csv(file="/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/inputData/divisionSDtypes/pruebaHojas/soloTest3/validating_test3.csv",
header=FALSE, sep=",", encoding = "UTF-8", stringsAsFactors = FALSE)
colnames(validating_test3) <- validating_test3[1,]
validating_test3 <- validating_test3[-1,]
validating_test3[2:ncol(validating_test3)] <- lapply(validating_test3[,2:(ncol(validating_test3)-1)], function(x) as.numeric(as.character(x)))
train_test1 <- h2o.assign(df_training_test1, "train_test1.hex")
train_test2 <- h2o.assign(df_training_test2, "train_test2.hex")
train_test3 <- h2o.assign(df_training_test3, "train_test3.hex")
#valid <- h2o.assign(df_validating, "valid.hex")
valid_test1 <- h2o.assign(df_validating_test1, "valid_test1.hex")
valid_test2 <- h2o.assign(df_validating_test2, "valid_test2.hex")
valid_test3 <- h2o.assign(df_validating_test3, "valid_test3.hex")
############
#naiveBayes#
############
#DESCOMENTAR CUANDO SE PRESENTE FINALMENTE
#nb_pruebaHojas_test1 <- h2o.naiveBayes(
# model_id="nb_pruebaHojas_test1",
# training_frame=train_test1,
# validation_frame=valid_test1[,2:ncol(valid_test1)],
# x=2:(ncol(train_test1)-1),
# y=ncol(train_test1),
# seed = 1234)
#h2o.saveModel(nb_pruebaHojas_test1, path="/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/models/pruebaHojas/soloTest1")
#nb_pruebaHojas_test2 <- h2o.naiveBayes(
# model_id="nb_pruebaHojas_test2",
# training_frame=train_test2,
# validation_frame=valid_test2[,2:ncol(valid_test2)],
# x=2:(ncol(train_test2)-1),
# y=ncol(train_test2),
# seed = 1234)
#h2o.saveModel(nb_pruebaHojas_test2, path="/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/models/pruebaHojas/soloTest2")
#nb_pruebaHojas_test3 <- h2o.naiveBayes(
# model_id="nb_pruebaHojas_test3",
# training_frame=train_test3,
# validation_frame=valid_test3[,2:ncol(valid_test3)],
# x=2:(ncol(train_test3)-1),
# y=ncol(train_test3),
# seed = 1234)
#h2o.saveModel(nb_pruebaHojas_test3, path="/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/models/pruebaHojas/soloTest3")
#test1 <- h2o.predict(object = nb_pruebaHojas_test1, newdata = valid_test1[,2:(ncol(valid_test1)-1)])[1]
#test2 <- h2o.predict(object = nb_pruebaHojas_test2, newdata = valid_test2[,2:(ncol(valid_test2)-1)])[1]
#test3 <- h2o.predict(object = nb_pruebaHojas_test3, newdata = valid_test3[,2:(ncol(valid_test3)-1)])[1]
#salida_test1 <- cbind(validating_test1[,1],as.data.frame(test1))
#salida_test1$p <- "<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>"
#salida_test1[,c(1,2,3)] <- salida_test1[,c(1,3,2)]
#colnames(salida_test1) <- c("s","o","p")
#colnames(salida_test1) <- c("s","o")
#salida_test2 <- cbind(validating_test2[,1],as.data.frame(test2))
#salida_test2$p <- "<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>"
#salida_test2[,c(1,2,3)] <- salida_test2[,c(1,3,2)]
#colnames(salida_test2) <- c("s","p","o")
#colnames(salida_test2) <- c("s","o")
#salida_test3 <- cbind(validating_test3[,1],as.data.frame(test3))
#salida_test3$p <- "<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>"
#salida_test3[,c(1,2,3)] <- salida_test3[,c(1,3,2)]
#colnames(salida_test3) <- c("s","p","o")
#colnames(salida_test3) <- c("s","o")
#write.csv(salida_test1, file = "/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/outputData/pruebaHojas/soloTest1/test1_nb.ttl", fileEncoding = "UTF-8", row.names=FALSE)
#write.csv(salida_test2, file = "/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/outputData/pruebaHojas/soloTest2/test2_nb.ttl", fileEncoding = "UTF-8", row.names=FALSE)
#write.csv(salida_test3, file = "/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/outputData/pruebaHojas/soloTest3/test3_nb.ttl", fileEncoding = "UTF-8", row.names=FALSE)
################################
#GBM: Gradient Boosting Machine#
################################
#gbm_pruebaHojas_default <- h2o.gbm(
# model_id="gbm_pruebaHojas_default",
# training_frame=train,
# validation_frame=valid[,2:ncol(valid)],
# x=2:ncol(train),
# y=ncol(train),
# ntrees = 50,
# max_depth = 5,
# seed = 1234)
#h2o.saveModel(gbm_pruebaHojas, path="/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/models/pruebaHojas")
#gbm_pruebaHojas_tunning1 <- h2o.gbm(
# model_id="gbm_pruebaHojas_tunning1",
# training_frame=train,
# validation_frame=valid[,2:ncol(valid)],
# x=2:ncol(train),
# y=ncol(train),
# ntrees = 200,
# max_depth = 120,
# seed = 1234)
#h2o.saveModel(gbm_pruebaHojas_tunning1, path="/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/models/pruebaHojas")
### default
#test1 <- h2o.predict(object = gbm_pruebaHojas_default, newdata = valid_test1[,2:(ncol(valid_test1)-1)])[1]
#test2 <- h2o.predict(object = gbm_pruebaHojas_default, newdata = valid_test2[,2:(ncol(valid_test2)-1)])[1]
#test3 <- h2o.predict(object = gbm_pruebaHojas_default, newdata = valid_test3[,2:(ncol(valid_test3)-1)])[1]
#salida_test1 <- cbind(validating_test1[,1],as.data.frame(test1))
#colnames(salida_test1) <- c("s","o")
#salida_test2 <- cbind(validating_test2[,1],as.data.frame(test2))
#colnames(salida_test2) <- c("s","o")
#salida_test3 <- cbind(validating_test3[,1],as.data.frame(test3))
#colnames(salida_test3) <- c("s","o")
#write.csv(salida_test1, file = "/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/outputData/pruebaHojas/test1_gbm_default.ttl", fileEncoding = "UTF-8", row.names=FALSE)
#write.csv(salida_test2, file = "/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/outputData/pruebaHojas/test2_gbm_default.ttl", fileEncoding = "UTF-8", row.names=FALSE)
#write.csv(salida_test3, file = "/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/outputData/pruebaHojas/test3_gbm_default.ttl", fileEncoding = "UTF-8", row.names=FALSE)
### tunning1
#test1 <- h2o.predict(object = gbm_pruebaHojas_tunning1, newdata = valid_test1[,2:(ncol(valid_test1)-1)])[1]
#test2 <- h2o.predict(object = gbm_pruebaHojas_tunning1, newdata = valid_test2[,2:(ncol(valid_test2)-1)])[1]
#test3 <- h2o.predict(object = gbm_pruebaHojas_tunning1, newdata = valid_test3[,2:(ncol(valid_test3)-1)])[1]
#salida_test1 <- cbind(validating_test1[,1],as.data.frame(test1))
#colnames(salida_test1) <- c("s","o")
#salida_test2 <- cbind(validating_test2[,1],as.data.frame(test2))
#colnames(salida_test2) <- c("s","o")
#salida_test3 <- cbind(validating_test3[,1],as.data.frame(test3))
#colnames(salida_test3) <- c("s","o")
#write.csv(salida_test1, file = "/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/outputData/pruebaHojas/test1_gbm_tunning1.ttl", fileEncoding = "UTF-8", row.names=FALSE)
#write.csv(salida_test2, file = "/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/outputData/pruebaHojas/test2_gbm_tunning1.ttl", fileEncoding = "UTF-8", row.names=FALSE)
#write.csv(salida_test3, file = "/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/outputData/pruebaHojas/test3_gbm_tunning1.ttl", fileEncoding = "UTF-8", row.names=FALSE)
##############
#RandomForest#
##############
#rf_pruebaHojas_test1 <- h2o.randomForest(
# model_id="rf_pruebaHojas_test1",
# training_frame=train_test1,
# validation_frame=valid_test1[,2:ncol(valid_test1)],
# x=2:(ncol(train_test1)-1),
# y=ncol(train_test1),
# ntrees = 200,
# max_depth = 120,
# stopping_rounds = 3,
# score_each_iteration = T,
# seed = 1234)
#h2o.saveModel(rf_pruebaHojas_test1, path="/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/models/pruebaHojas/soloTest1")
rf_pruebaHojas_test1 <- h2o.loadModel(path = "/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/models/pruebaHojas/soloTest1/rf_pruebaHojas_test1")
rf_pruebaHojas_test2 <- h2o.randomForest(
model_id="rf_pruebaHojas_test2",
training_frame=train_test2,
validation_frame=valid_test2[,2:ncol(valid_test2)],
x=2:(ncol(train_test2)-1),
y=ncol(train_test2),
ntrees = 200,
max_depth = 120,
stopping_rounds = 3,
score_each_iteration = T,
seed = 1234)
h2o.saveModel(rf_pruebaHojas_test2, path="/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/models/pruebaHojas/soloTest2")
rf_pruebaHojas_test3 <- h2o.randomForest(
model_id="rf_pruebaHojas_test3",
training_frame=train_test1,
validation_frame=valid_test3[,2:ncol(valid_test3)],
x=2:(ncol(train_test3)-1),
y=ncol(train_test3),
ntrees = 200,
max_depth = 120,
stopping_rounds = 3,
score_each_iteration = T,
seed = 1234)
h2o.saveModel(rf_pruebaHojas_test3, path="/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/models/pruebaHojas/soloTest3")
test1 <- h2o.predict(object = rf_pruebaHojas_test1, newdata = valid_test1[,2:(ncol(valid_test1)-1)])[1]
test2 <- h2o.predict(object = rf_pruebaHojas_test2, newdata = valid_test2[,2:(ncol(valid_test2)-1)])[1]
test3 <- h2o.predict(object = rf_pruebaHojas_test3, newdata = valid_test3[,2:(ncol(valid_test3)-1)])[1]
salida_test1 <- cbind(validating_test1[,1],as.data.frame(test1))
colnames(salida_test1) <- c("s","o")
salida_test2 <- cbind(validating_test2[,1],as.data.frame(test2))
colnames(salida_test2) <- c("s","o")
salida_test3 <- cbind(validating_test3[,1],as.data.frame(test3))
colnames(salida_test3) <- c("s","o")
write.csv(salida_test1, file = "/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/outputData/pruebaHojas/soloTest1/test1_rf.ttl", fileEncoding = "UTF-8", row.names=FALSE)
write.csv(salida_test2, file = "/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/outputData/pruebaHojas/soloTest2/test2_rf.ttl", fileEncoding = "UTF-8", row.names=FALSE)
write.csv(salida_test3, file = "/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/outputData/pruebaHojas/soloTest3/test3_rf.ttl", fileEncoding = "UTF-8", row.names=FALSE)
###############
#Deep Learning#
###############
dl_pruebaHojas_test1 <- h2o.deeplearning(
model_id="dl_pruebaHojas_test1",
training_frame=train_test1,
validation_frame=valid_test1[,2:ncol(valid_test1)],
x=2:(ncol(train_test1)-1),
y=ncol(train_test1),
stopping_rounds = 0,
seed = 1234)
h2o.saveModel(dl_pruebaHojas_test1, path="/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/models/pruebaHojas/soloTest1")
dl_pruebaHojas_test2 <- h2o.deeplearning(
model_id="dl_pruebaHojas_test2",
training_frame=train_test2,
validation_frame=valid_test2[,2:ncol(valid_test2)],
x=2:(ncol(train_test2)-1),
y=ncol(train_test2),
stopping_rounds = 0,
seed = 1234)
h2o.saveModel(dl_pruebaHojas_test2, path="/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/models/pruebaHojas/soloTest2")
dl_pruebaHojas_test3 <- h2o.deeplearning(
model_id="dl_pruebaHojas_test3",
training_frame=train_test3,
validation_frame=valid_test3[,2:ncol(valid_test3)],
x=2:(ncol(train_test3)-1),
y=ncol(train_test3),
stopping_rounds = 0,
seed = 1234)
h2o.saveModel(dl_pruebaHojas_test3, path="/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/models/pruebaHojas/soloTest3")
test1 <- h2o.predict(object = dl_pruebaHojas_test1, newdata = valid_test1[,2:(ncol(valid_test1)-1)])[1]
test2 <- h2o.predict(object = dl_pruebaHojas_test2, newdata = valid_test2[,2:(ncol(valid_test2)-1)])[1]
test3 <- h2o.predict(object = dl_pruebaHojas_test3, newdata = valid_test3[,2:(ncol(valid_test3)-1)])[1]
salida_test1 <- cbind(validating_test1[,1],as.data.frame(test1))
colnames(salida_test1) <- c("s","o")
salida_test2 <- cbind(validating_test2[,1],as.data.frame(test2))
colnames(salida_test2) <- c("s","o")
salida_test3 <- cbind(validating_test3[,1],as.data.frame(test3))
colnames(salida_test3) <- c("s","o")
write.csv(salida_test1, file = "/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/outputData/pruebaHojas/soloTest1/test1_dl.ttl", fileEncoding = "UTF-8", row.names=FALSE)
write.csv(salida_test2, file = "/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/outputData/pruebaHojas/soloTest2/test2_dl.ttl", fileEncoding = "UTF-8", row.names=FALSE)
write.csv(salida_test3, file = "/opt/files/proyectoHeiko_adaptado/R_proyectos/amelioratingTypes_ESWC2018/outputData/pruebaHojas/soloTest3/test3_dl.ttl", fileEncoding = "UTF-8", row.names=FALSE)
#h2o.shutdown(prompt=FALSE)