Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

custom_input_pipeline #1

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
.Rhistory
.RData
.Ruserdata
.DS_Store
40 changes: 32 additions & 8 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,11 +1,35 @@
Package: reinbo
Package: ReinBo
Type: Package
Title: What the Package Does (Title Case)
Title: Reinforcement Learning in R
Version: 0.1.0
Author: Who wrote it
Maintainer: The package maintainer <[email protected]>
Description: More about what it does (maybe more than one line)
Use four spaces when indenting paragraphs within the Description.
License: What license is it under?
Authors@R: c(
person("Jiali", "lin", email = {"[email protected]"}, role = c("aut", "ctb")),
person("Xudong", "Sun", email = {"[email protected]"}, role = c("aut", "cre"))
)
Maintainer: Xudong Sun <[email protected]>
Description: automatic machine learning.
License: BSD_2_clause + file LICENSE
Encoding: UTF-8
LazyData: true
Depends:
R (>= 3.4.0),
Imports:
R6,
mlr,
mlrCPO,
ParamHelpers,
BBmisc,
rlR,
hash,
mlrMBO
LazyData: true
RoxygenNote: 6.1.1
BugReports: https://github.com/smilesun/rlR/issues
URL: https://github.com/smilesun/rlR
SystemRequirements: The following python package are needed to use the gym openAI environment. gym >= 0.10.5; At least one deep learning backend which keras requires(tensorflow, cntk, theano) should be installed on your computer, for example tensorflow >= 1.1.0 (tested on Ubuntu 14.04); The backend keras requires could be installed by keras::install_keras(); Both dependencies can also be installed by rlR::installDep() function. It is important to note that the user should run 'reticulate::use_python("/usr/local/bin/python")' to specify the python path and 'reticulate::use_virtualenv("myenv")' to specify which virtual environment to use. By default, the package is using "~/anaconda3/bin/python" as its python version. For detail, please refer to https://rstudio.github.io/reticulate/articles/versions.html
Suggests:
devtools,
testthat,
knitr,
covr,
rmarkdown
VignetteBuilder: knitr
13 changes: 12 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1 +1,12 @@
exportPattern("^[[:alpha:]]+")
# Generated by roxygen2: do not edit by hand

export(reinbo)
import(BBmisc)
import(ParamHelpers)
import(R6)
import(hash)
import(mlr)
import(mlrCPO)
import(mlrMBO)
import(rlR)
import(smoof)
25 changes: 25 additions & 0 deletions R/hello.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,28 @@
hello <- function() {
print("Hello, world!")
}

getGconf = function() {
flag_debug = T
conf_common = list(
NCVOuterIter = 5L,
NCVInnerIter = 5L,
measures = list(mlr::mmce),
repl = 10L,
prob_seed = 1L,
RLMaxEpisode = 2000L # this number does not play a role, it only ensures RL could run for sufficient time
)

conf_debug = list(
budget = 40L,
conf_tpot = list(generations = 1L, population_size = 3L, offspring_size = 3L, config_dict = 'TPOT light')
)

conf_full = list(
budget = 1000L,
# TPOT will evaluate population_size + generations × offspring_size pipelines in total.
conf_tpot = list(generations = 20L, population_size = 10L, offspring_size = 50L)
)
if (flag_debug) return(c(conf_debug, conf_common))
return(c(conf_full, conf_common))
}
136 changes: 136 additions & 0 deletions R/reinbo_table_env.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
Q_table_Env = R6::R6Class(
"Q_table_Env",
inherit = rlR::Environment,
public = list(
step_cnt = NULL,
s_r_d_info = NULL,
task = NULL,
mbo_cache = NULL, # store pipeline, hyperparameter set and corresponding performance for MBO
model_best_perf = NULL, # best performance of sampled model until now
model_trained = NULL, # store all trained models (limited to budget)
budget = NULL, # maximun models to be evaluated
measure = NULL,
cv_instance = NULL,
ctrl = NULL,
initialize = function(task, budget, measure, cv_instance, ctrl){
self$flag_continous = FALSE # non-continuous action
self$flag_tensor = FALSE # no use of cnn
self$ctrl = ctrl
self$act_cnt = self$ctrl$g_act_cnt # available operators/actions at each stage
self$state_dim = self$ctrl$g_state_dim
self$step_cnt = 0L
self$s_r_d_info = list(
state = "s",
reward = 0,
done = FALSE,
info = list())
self$task = task
self$mbo_cache = hash()
self$model_trained = NULL
self$budget = budget
self$measure = measure
self$cv_instance = cv_instance
},

evaluateArm = function(vec_arm) {
return(vec_arm)
},

# This function will be called at each step of the learning
step = function(action) {
operators = self$ctrl$g_operators[[names(self$ctrl$g_operators)[self$step_cnt + 1]]]
mod = action %% length(operators)
if (mod == 0){
operator = operators[length(operators)]
} else {
operator = operators[mod]
}
self$s_r_d_info[["state"]] = paste0(self$s_r_d_info[["state"]], "-[", operator, "]")
#print(self$s_r_d_info[["state"]])
self$s_r_d_info[["reward"]] = 0
self$step_cnt = self$step_cnt + 1L
if (self$step_cnt >= self$ctrl$g_max_depth) {
model = g_getRLPipeline(self$s_r_d_info[["state"]])
print(paste(model, collapse = " --> "))
# stop RL agent if no enough budget for this episode:
model_id = paste(model, collapse = "\t")
if (has.key(model_id, self$mbo_cache)){
require_budget = self$ctrl$g_mbo_iter*sum(getParamLengths(g_getParamSetFun(model)))
} else {
require_budget = (self$ctrl$g_init_design + self$ctrl$g_mbo_iter)*sum(getParamLengths(g_getParamSetFun(model)))
}
if(self$budget < require_budget) stop("too small total budget for reinbo table!")
if (self$budget - length(self$model_trained) < require_budget) {
self$agent$interact$idx_episode = self$agent$interact$maxiter
self$s_r_d_info[["done"]] = TRUE
} else {
# train model with hyperparameter tuning:
self$tuning(model)
self$s_r_d_info[["reward"]] = self$model_best_perf # best performance of the model until now
self$s_r_d_info[["done"]] = TRUE
#print(paste("Best Perfomance:", self$model_best_perf))
}
}
return(self$s_r_d_info)
},


# This function will be called at the beginning of the learning and at the end of each episode
reset = function() {
self$step_cnt = 0
self$s_r_d_info[["state"]] = "s"
self$s_r_d_info[["done"]] = FALSE
self$s_r_d_info
},


# Hyperparameter tuning for generated model, return best performance as reward and update mbo_cache
tuning = function(model) {
model_id = paste(model, collapse = "\t") # mdoel_id for search in mbo_cache
ps = g_getParamSetFun(model) # generate parameter set

# check if we have already evaluated this model

# if already in mbo_cache:
if (has.key(model_id, self$mbo_cache)){
previous_perf = max(self$mbo_cache[[model_id]][ , "y"]) # best performance until now
epis_unimproved = self$mbo_cache[[model_id]][1, "epis_unimproved"] # number of episodes that performance has not been improved
# if in more than 2 episodes that the performance of this model has not been improved,
# stop further hyperparameter tuning:
if (epis_unimproved > 2) {
self$model_best_perf = previous_perf
} else {
# else: use parameter set and performance in memory as initial design
design = self$mbo_cache[[model_id]][ , -length(self$mbo_cache[[model_id]])]
# run several iterations of MBO:
run = mbo_fun(self$task, model, design, self$measure, self$cv_instance, self$ctrl)
# best accuracy:
self$model_best_perf = run$y
# update mbo_cache:
self$mbo_cache[[model_id]] = run$opt.path$env$path
# add result to self$model_trained:
new = run$opt.path$env$path$y[run$opt.path$env$dob != 0]
self$model_trained = c(self$model_trained, new)
# check if the performance of this model has been improved in this episode:
if (run$y <= previous_perf) {
self$mbo_cache[[model_id]]["epis_unimproved"] = epis_unimproved + 1
} else {
self$mbo_cache[[model_id]]["epis_unimproved"] = 0
}
}
} else {

# if not in mbo_cache:
design = generateDesign(n = self$ctrl$g_init_design*sum(getParamLengths(ps)), par.set = ps)
run = mbo_fun(self$task, model, design, self$measure, self$cv_instance, self$ctrl) # potential warning: generateDesign could only produce 3 points instead of 1000, see issue 442 of mlrMBO
self$model_best_perf = run$y
self$mbo_cache[[model_id]] = run$opt.path$env$path
self$mbo_cache[[model_id]]["epis_unimproved"] = 0
new = run$opt.path$env$path$y
self$model_trained = c(self$model_trained, new)
}
}
)
)


109 changes: 109 additions & 0 deletions R/reinbo_table_func.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# ML_ReinBo algorithm:
opt.reinbo.table = function(task, budget, measure, init_val, train_set = NULL, conf, ctrl) {
subTask = task
if (!is.null(train_set)) subTask = subsetTask(task, train_set)
inner_loop = makeResampleInstance("CV", iters = getGconf()$NCVInnerIter, stratify = TRUE, subTask)
env = runQTable(subTask, budget, measure, inner_loop, init_val, conf, ctrl)
mmodel = getBestModel(env$mbo_cache)
return(list(mmodel = mmodel, env = env))
}

# Predict function: evaluate best model on test dataset
lock_eval.reinbo.table = function(task, measure, train_set, test_set, best_model){
best_model = best_model$mmodel
lrn = genLearnerForBestModel(task, best_model, measure)
mod = train(lrn, task, subset = train_set)
pred = predict(mod, task, subset = test_set)
perf = performance(pred, measures = measure)
return(perf)
}


# Reinforcement learning part:
#' @param ctrl pipeline configuration
runQTable <- function(task, budget, measure, instance, init_val, conf, ctrl) {
env = Q_table_Env$new(task, budget, measure, instance, ctrl)
agent = initAgent(name = "AgentTable", env = env, conf = conf, q_init = init_val,
state_names = ctrl$g_state_names,
act_names_per_state = get_act_names_perf_state(ctrl$g_operators),
vis_after_episode = FALSE)
agent$learn(getGconf()$RLMaxEpisode)
return(env)
}

# MBO function: hyperparameter tuning
#' @param model character vector
mbo_fun = function(task, model, design, measure, cv_instance, ctrl) {
ps = g_getParamSetFun(model) # get parameter set from string representation of a model
object = makeSingleObjectiveFunction(
fn = function(x) {
-reinbo_mlr_fun(task, model, x, measure, cv_instance) + runif(1)/100000
},
par.set = ps,
has.simple.signature = FALSE,
minimize = FALSE
)
ctrlmbo = setMBOControlTermination(makeMBOControl(), iters = ctrl$g_mbo_iter * sum(getParamLengths(ps))) # 2 times the parameter set size
run = mbo(object, design = design, control = ctrlmbo, show.info = FALSE)
## in (function (fn, nvars, max = FALSE, pop.size = 1000, max.generations = 100, : Stopped because hard maximum generation limit was hit.
## Genoud is a function that combines evolutionary search algorithms with derivative-based (Newton or quasi-Newton) methods to solve difficult optimization problems.
## not always occur: Warning in generateDesign(control$infill.opt.focussearch.points, ps.local,: generateDesign could only produce 20 points instead of 1000!
## in https://github.com/mlr-org/mlrMBO/issues/442, is being worked on https://github.com/mlr-org/mlrMBO/pull/444
return(run)
}


# Mlr function: caculate performance of generated model given specific param_set
reinbo_mlr_fun = function(task, model, param_set, measure, cv_instance){
lrn = genLearner.reinbo(task, model, param_set, measure)
perf = resample(lrn, task, resampling = cv_instance, measures = measure, show.info = FALSE)$aggr
return(perf)
}



# To get best model from mbo_cache of environment:
getBestModel = function(cache){
models = keys(cache)
results = data.frame(model = 0, y = 0)
for (i in 1:length(models)) {
results[i, 1] = models[i]
results[i, 2] = max(cache[[models[i]]][, "y"])
}
key = results[results$y == max(results$y), "model"][1]
ps = cache[[key]]
ps = ps[(ps$y == max(ps$y)), (colnames(ps) != "epis_unimproved")][1, ]
return(data.frame(Model = key, ps))
}

genLearnerForBestModel = function(task, best_model, measure){
model = strsplit(as.character(best_model$Model), "\t")[[1]]
param_set = as.list(best_model)
param_set$Model = NULL
param_set$y = NULL
if (!is.null(param_set$C)) { param_set$C = 2^param_set$C }
if (!is.null(param_set$sigma)) { param_set$sigma = 2^param_set$sigma }
lrn = genLearner.reinbo(task, model, param_set, measure)
return(lrn)
}


genLearner.reinbo = function(task, model, param_set, measure){
p = getTaskNFeats(task)
lrn = sprintf("%s %%>>%% %s %%>>%% makeLearner('%s', par.vals = ps.learner)",
model[1], model[2], model[3])
lrn = gsub(pattern = "perc", x = lrn, replacement = "perc = param_set$perc", fixed = TRUE)
lrn = gsub(pattern = "rank", x = lrn, replacement = "rank = as.integer(max(1, round(p*param_set$rank)))", fixed = TRUE)
lrn = gsub(pattern = "NA %>>%", x = lrn, replacement = "", fixed = TRUE)
ps.learner = param_set
ps.learner$perc = NULL
ps.learner$rank = NULL
if (model[3] == "classif.ranger") {
p1 = p
if (!is.null(param_set$perc)) {p1 = max(1, round(p*param_set$perc))}
if (!is.null(param_set$rank)) {p1 = max(1, round(p*param_set$rank))}
ps.learner$mtry = max(1, as.integer(p1*param_set$mtry))
}
lrn = eval(parse(text = lrn))
return(lrn)
}
42 changes: 42 additions & 0 deletions R/reinbo_table_hyperpara_space.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
##### Parameter set of operators for hyperparameter tuning:
ps.ksvm = ParamHelpers::makeParamSet(
ParamHelpers::makeNumericParam("C", lower = -15, upper = 15, trafo = function(x) 2^x),
ParamHelpers::makeNumericParam("sigma", lower = -15, upper = 15, trafo = function(x) 2^x))

ps.ranger = ParamHelpers::makeParamSet(
ParamHelpers::makeNumericParam("mtry", lower = 1/10, upper = 1/1.5), ## range(p/10, p/1.5), p is the number of features
ParamHelpers::makeNumericParam("sample.fraction", lower = .1, upper = 1))

ps.xgboost = ParamHelpers::makeParamSet(
ParamHelpers::makeNumericParam("eta", lower = .001, upper = .3),
ParamHelpers::makeIntegerParam("max_depth", lower = 1L, upper = 15L),
ParamHelpers::makeNumericParam("subsample", lower = 0.5, upper = 1),
ParamHelpers::makeNumericParam("colsample_bytree", lower = 0.5, upper = 1),
ParamHelpers::makeNumericParam("min_child_weight", lower = 0, upper = 50)
)

ps.kknn = ParamHelpers::makeParamSet(ParamHelpers::makeIntegerParam("k", lower = 1L, upper = 20L))

ps.naiveBayes = ParamHelpers::makeParamSet(ParamHelpers::makeNumericParam("laplace", lower = 0.01, upper = 100))

ps.filter = ParamHelpers::makeParamSet(ParamHelpers::makeNumericParam("perc", lower = .1, upper = 1))

ps.pca = ParamHelpers::makeParamSet(ParamHelpers::makeNumericParam("rank", lower = .1, upper = 1)) ## range(p/10, p), p is the number of features



##### Get parameter set for generated model:
g_getParamSetFun = function(model) {
ps.classif = sub(pattern = "classif", model[3], replacement = "ps")
ps.classif = eval(parse(text = ps.classif)) # hyperparameter set for classifier
if (model[2] == "NA") {
return(ps.classif)
} else if (length(grep(pattern = "perc", x = model)) > 0) {
return(c(ps.classif, ps.filter))
} else {
return(c(ps.classif, ps.pca))
}
}



Loading