smilesun · smilesun · Jun 11, 2019 · Jun 11, 2019 · Jun 13, 2019 · Jun 13, 2019
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,4 @@
 .Rhistory
 .RData
 .Ruserdata
+.DS_Store
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,11 +1,35 @@
-Package: reinbo
+Package: ReinBo
 Type: Package
-Title: What the Package Does (Title Case)
+Title: Reinforcement Learning in R
 Version: 0.1.0
-Author: Who wrote it
-Maintainer: The package maintainer <[email protected]>
-Description: More about what it does (maybe more than one line)
-    Use four spaces when indenting paragraphs within the Description.
-License: What license is it under?
+Authors@R: c(
+    person("Jiali", "lin", email = {"[email protected]"}, role = c("aut", "ctb")),
+    person("Xudong", "Sun", email = {"[email protected]"}, role = c("aut", "cre"))
+    )
+Maintainer: Xudong Sun <[email protected]>
+Description: automatic machine learning.
+License: BSD_2_clause + file LICENSE
 Encoding: UTF-8
-LazyData: true
+Depends:
+    R (>= 3.4.0),
+Imports:
+    R6,
+    mlr,
+    mlrCPO,
+    ParamHelpers,
+    BBmisc,
+    rlR,
+    hash,
+    mlrMBO
+LazyData: true
+RoxygenNote: 6.1.1
+BugReports: https://github.com/smilesun/rlR/issues
+URL: https://github.com/smilesun/rlR
+SystemRequirements: The following python package are needed to use the gym openAI environment. gym >= 0.10.5; At least one deep learning backend which keras requires(tensorflow, cntk, theano) should be installed on your computer, for example tensorflow >= 1.1.0 (tested on Ubuntu 14.04); The backend keras requires could be installed by keras::install_keras(); Both dependencies can also be installed by rlR::installDep() function. It is important to note that the user should run 'reticulate::use_python("/usr/local/bin/python")' to specify the python path and 'reticulate::use_virtualenv("myenv")' to specify which virtual environment to use. By default, the package is using "~/anaconda3/bin/python"  as its python version. For detail, please refer to https://rstudio.github.io/reticulate/articles/versions.html
+Suggests:
+    devtools,
+    testthat,
+    knitr,
+    covr,
+    rmarkdown
+VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
@@ -1 +1,12 @@
-exportPattern("^[[:alpha:]]+")
+# Generated by roxygen2: do not edit by hand
+
+export(reinbo)
+import(BBmisc)
+import(ParamHelpers)
+import(R6)
+import(hash)
+import(mlr)
+import(mlrCPO)
+import(mlrMBO)
+import(rlR)
+import(smoof)
diff --git a/R/hello.R b/R/hello.R
@@ -16,3 +16,28 @@
 hello <- function() {
   print("Hello, world!")
 }
+
+getGconf = function() {
+  flag_debug = T
+  conf_common =  list(
+      NCVOuterIter = 5L,
+      NCVInnerIter = 5L,
+      measures = list(mlr::mmce),
+      repl = 10L,
+      prob_seed = 1L,
+      RLMaxEpisode = 2000L # this number does not play a role, it only ensures RL could run for sufficient time
+      )
+
+  conf_debug =  list(
+      budget = 40L,
+      conf_tpot = list(generations = 1L, population_size = 3L, offspring_size = 3L, config_dict = 'TPOT light')
+      )
+
+  conf_full =  list(
+      budget = 1000L,
+      # TPOT will evaluate population_size + generations × offspring_size pipelines in total.
+      conf_tpot = list(generations = 20L, population_size = 10L, offspring_size = 50L)
+      )
+  if (flag_debug) return(c(conf_debug, conf_common))
+  return(c(conf_full, conf_common))
+}
diff --git a/R/reinbo_table_env.R b/R/reinbo_table_env.R
@@ -0,0 +1,136 @@
+Q_table_Env = R6::R6Class(
+  "Q_table_Env",
+  inherit = rlR::Environment,
+  public = list(
+    step_cnt = NULL,
+    s_r_d_info = NULL,
+    task = NULL,
+    mbo_cache = NULL,       # store pipeline, hyperparameter set and corresponding performance for MBO
+    model_best_perf = NULL, # best performance of sampled model until now
+    model_trained = NULL,   # store all trained models (limited to budget)
+    budget = NULL,          # maximun models to be evaluated
+    measure = NULL,
+    cv_instance = NULL,
+    ctrl = NULL,
+    initialize = function(task, budget, measure, cv_instance, ctrl){
+      self$flag_continous = FALSE    # non-continuous action
+      self$flag_tensor = FALSE       # no use of cnn
+      self$ctrl = ctrl
+      self$act_cnt = self$ctrl$g_act_cnt       # available operators/actions at each stage
+      self$state_dim = self$ctrl$g_state_dim
+      self$step_cnt = 0L
+      self$s_r_d_info = list(
+        state = "s",
+        reward = 0,
+        done = FALSE,
+        info = list())
+      self$task = task
+      self$mbo_cache = hash()
+      self$model_trained = NULL
+      self$budget = budget
+      self$measure = measure
+      self$cv_instance = cv_instance
+    },
+
+    evaluateArm = function(vec_arm) {
+      return(vec_arm)
+    },
+
+    # This function will be called at each step of the learning
+    step = function(action) {
+      operators = self$ctrl$g_operators[[names(self$ctrl$g_operators)[self$step_cnt + 1]]]
+      mod = action %% length(operators)
+      if (mod == 0){
+        operator = operators[length(operators)]
+      } else {
+        operator = operators[mod]
+      }
+      self$s_r_d_info[["state"]] = paste0(self$s_r_d_info[["state"]], "-[", operator, "]")
+      #print(self$s_r_d_info[["state"]])
+      self$s_r_d_info[["reward"]] = 0
+      self$step_cnt = self$step_cnt + 1L
+      if (self$step_cnt >= self$ctrl$g_max_depth) {
+        model = g_getRLPipeline(self$s_r_d_info[["state"]])
+        print(paste(model, collapse = " --> "))
+        # stop RL agent if no enough budget for this episode:
+        model_id = paste(model, collapse = "\t")
+        if (has.key(model_id, self$mbo_cache)){
+          require_budget =  self$ctrl$g_mbo_iter*sum(getParamLengths(g_getParamSetFun(model)))
+        } else {
+          require_budget =  (self$ctrl$g_init_design + self$ctrl$g_mbo_iter)*sum(getParamLengths(g_getParamSetFun(model)))
+        }
+        if(self$budget < require_budget) stop("too small total budget for reinbo table!")
+        if (self$budget - length(self$model_trained) < require_budget) {
+          self$agent$interact$idx_episode = self$agent$interact$maxiter
+          self$s_r_d_info[["done"]] = TRUE
+        } else {
+          # train model with hyperparameter tuning:
+          self$tuning(model)
+          self$s_r_d_info[["reward"]] = self$model_best_perf  # best performance of the model until now
+          self$s_r_d_info[["done"]] = TRUE
+          #print(paste("Best Perfomance:", self$model_best_perf))
+          }
+      }
+      return(self$s_r_d_info)
+    },
+
+
+    # This function will be called at the beginning of the learning and at the end of each episode
+    reset = function() {
+      self$step_cnt = 0
+      self$s_r_d_info[["state"]] = "s"
+      self$s_r_d_info[["done"]] = FALSE
+      self$s_r_d_info
+    },
+
+
+    # Hyperparameter tuning for generated model, return best performance as reward and update mbo_cache
+    tuning = function(model) {
+      model_id = paste(model, collapse = "\t")  # mdoel_id for search in mbo_cache
+      ps = g_getParamSetFun(model)              # generate parameter set
+
+      # check if we have already evaluated this model
+
+      # if already in mbo_cache:
+      if (has.key(model_id, self$mbo_cache)){
+        previous_perf = max(self$mbo_cache[[model_id]][ , "y"])            # best performance until now
+        epis_unimproved = self$mbo_cache[[model_id]][1, "epis_unimproved"] # number of episodes that performance has not been improved
+        # if in more than 2 episodes that the performance of this model has not been improved,
+        # stop further hyperparameter tuning:
+        if (epis_unimproved > 2) {
+          self$model_best_perf = previous_perf
+        } else {
+          # else: use parameter set and performance in memory as initial design
+          design = self$mbo_cache[[model_id]][ , -length(self$mbo_cache[[model_id]])]
+          # run several iterations of MBO:
+          run = mbo_fun(self$task, model, design, self$measure, self$cv_instance, self$ctrl)
+          # best accuracy:
+          self$model_best_perf = run$y
+          # update mbo_cache:
+          self$mbo_cache[[model_id]] = run$opt.path$env$path
+          # add result to self$model_trained:
+          new = run$opt.path$env$path$y[run$opt.path$env$dob != 0]
+          self$model_trained = c(self$model_trained, new)
+          # check if the performance of this model has been improved in this episode:
+          if (run$y <= previous_perf) {
+            self$mbo_cache[[model_id]]["epis_unimproved"] = epis_unimproved + 1
+          } else {
+            self$mbo_cache[[model_id]]["epis_unimproved"] = 0
+          }
+        }
+      } else {
+
+        # if not in mbo_cache:
+        design = generateDesign(n = self$ctrl$g_init_design*sum(getParamLengths(ps)), par.set = ps)
+        run = mbo_fun(self$task, model, design, self$measure, self$cv_instance, self$ctrl)  # potential warning: generateDesign could only produce 3 points instead of 1000, see issue 442 of mlrMBO
+        self$model_best_perf = run$y
+        self$mbo_cache[[model_id]] = run$opt.path$env$path
+        self$mbo_cache[[model_id]]["epis_unimproved"] = 0
+        new = run$opt.path$env$path$y
+        self$model_trained = c(self$model_trained, new)
+      }
+    }
+  )
+)
+
+
diff --git a/R/reinbo_table_func.R b/R/reinbo_table_func.R
@@ -0,0 +1,109 @@
+# ML_ReinBo algorithm:
+opt.reinbo.table = function(task, budget, measure, init_val, train_set = NULL, conf, ctrl) {
+  subTask = task
+  if (!is.null(train_set)) subTask = subsetTask(task, train_set)
+  inner_loop = makeResampleInstance("CV", iters = getGconf()$NCVInnerIter, stratify = TRUE, subTask)
+  env = runQTable(subTask, budget, measure, inner_loop, init_val, conf, ctrl)
+  mmodel = getBestModel(env$mbo_cache)
+  return(list(mmodel = mmodel, env = env))
+}
+
+# Predict function: evaluate best model on test dataset
+lock_eval.reinbo.table = function(task, measure, train_set, test_set, best_model){
+  best_model = best_model$mmodel
+  lrn = genLearnerForBestModel(task, best_model, measure)
+  mod = train(lrn, task, subset = train_set)
+  pred = predict(mod, task, subset = test_set)
+  perf = performance(pred, measures = measure)
+  return(perf)
+}
+
+
+# Reinforcement learning part:
+#' @param ctrl pipeline configuration
+runQTable <- function(task, budget, measure, instance, init_val, conf, ctrl) {
+  env = Q_table_Env$new(task, budget, measure, instance, ctrl)
+  agent = initAgent(name = "AgentTable", env = env, conf = conf, q_init = init_val,
+                    state_names = ctrl$g_state_names,
+                    act_names_per_state = get_act_names_perf_state(ctrl$g_operators),
+                    vis_after_episode = FALSE)
+  agent$learn(getGconf()$RLMaxEpisode)
+  return(env)
+}
+
+# MBO function: hyperparameter tuning
+#' @param model character vector
+mbo_fun = function(task, model, design, measure, cv_instance, ctrl) {
+  ps = g_getParamSetFun(model)  # get parameter set from string representation of a model
+  object = makeSingleObjectiveFunction(
+    fn = function(x) {
+      -reinbo_mlr_fun(task, model, x, measure, cv_instance) + runif(1)/100000
+    },
+    par.set = ps,
+    has.simple.signature = FALSE,
+    minimize = FALSE
+  )
+  ctrlmbo = setMBOControlTermination(makeMBOControl(), iters = ctrl$g_mbo_iter * sum(getParamLengths(ps)))  # 2 times the parameter set size
+  run = mbo(object, design = design, control = ctrlmbo, show.info = FALSE)
+  ## in (function (fn, nvars, max = FALSE, pop.size = 1000, max.generations = 100,  : Stopped because hard maximum generation limit was hit.
+  ## Genoud is a function that combines evolutionary search algorithms with derivative-based (Newton or quasi-Newton) methods to solve difficult optimization problems.
+  ## not always occur: Warning in generateDesign(control$infill.opt.focussearch.points, ps.local,: generateDesign could only produce 20 points instead of 1000!
+  ## in https://github.com/mlr-org/mlrMBO/issues/442, is being worked on https://github.com/mlr-org/mlrMBO/pull/444
+  return(run)
+}
+
+
+# Mlr function: caculate performance of generated model given specific param_set
+reinbo_mlr_fun = function(task, model, param_set, measure, cv_instance){
+  lrn = genLearner.reinbo(task, model, param_set, measure)
+  perf = resample(lrn, task, resampling = cv_instance, measures = measure, show.info = FALSE)$aggr
+  return(perf)
+}
+
+
+
+# To get best model from mbo_cache of environment:
+getBestModel = function(cache){
+  models = keys(cache)
+  results = data.frame(model = 0, y = 0)
+  for (i in 1:length(models)) {
+    results[i, 1] = models[i]
+    results[i, 2] = max(cache[[models[i]]][, "y"])
+  }
+  key = results[results$y == max(results$y), "model"][1]
+  ps = cache[[key]]
+  ps = ps[(ps$y == max(ps$y)), (colnames(ps) != "epis_unimproved")][1, ]
+  return(data.frame(Model = key, ps))
+}
+
+genLearnerForBestModel = function(task, best_model, measure){
+  model = strsplit(as.character(best_model$Model), "\t")[[1]]
+  param_set = as.list(best_model)
+  param_set$Model = NULL
+  param_set$y = NULL
+  if (!is.null(param_set$C)) { param_set$C = 2^param_set$C }
+  if (!is.null(param_set$sigma)) { param_set$sigma = 2^param_set$sigma }
+  lrn = genLearner.reinbo(task, model, param_set, measure)
+  return(lrn)
+}
+
+
+genLearner.reinbo = function(task, model, param_set, measure){
+  p = getTaskNFeats(task)
+  lrn = sprintf("%s %%>>%% %s %%>>%% makeLearner('%s', par.vals = ps.learner)",
+                model[1], model[2], model[3])
+  lrn = gsub(pattern = "perc", x = lrn, replacement = "perc = param_set$perc", fixed = TRUE)
+  lrn = gsub(pattern = "rank", x = lrn, replacement = "rank = as.integer(max(1, round(p*param_set$rank)))", fixed = TRUE)
+  lrn = gsub(pattern = "NA %>>%", x = lrn, replacement = "", fixed = TRUE)
+  ps.learner = param_set
+  ps.learner$perc = NULL
+  ps.learner$rank = NULL
+  if (model[3] == "classif.ranger") {
+    p1 = p
+    if (!is.null(param_set$perc)) {p1 = max(1, round(p*param_set$perc))}
+    if (!is.null(param_set$rank)) {p1 = max(1, round(p*param_set$rank))}
+    ps.learner$mtry = max(1, as.integer(p1*param_set$mtry))
+  }
+  lrn = eval(parse(text = lrn))
+  return(lrn)
+}
diff --git a/R/reinbo_table_hyperpara_space.R b/R/reinbo_table_hyperpara_space.R
@@ -0,0 +1,42 @@
+##### Parameter set of operators for hyperparameter tuning:
+ps.ksvm = ParamHelpers::makeParamSet(
+  ParamHelpers::makeNumericParam("C", lower = -15, upper = 15, trafo = function(x) 2^x),
+  ParamHelpers::makeNumericParam("sigma", lower = -15, upper = 15, trafo = function(x) 2^x))
+
+ps.ranger = ParamHelpers::makeParamSet(
+  ParamHelpers::makeNumericParam("mtry", lower = 1/10, upper = 1/1.5),  ## range(p/10, p/1.5), p is the number of features
+  ParamHelpers::makeNumericParam("sample.fraction", lower = .1, upper = 1))
+
+ps.xgboost = ParamHelpers::makeParamSet(
+  ParamHelpers::makeNumericParam("eta", lower = .001, upper = .3),
+  ParamHelpers::makeIntegerParam("max_depth", lower = 1L, upper = 15L),
+  ParamHelpers::makeNumericParam("subsample", lower = 0.5, upper = 1),
+  ParamHelpers::makeNumericParam("colsample_bytree", lower = 0.5, upper = 1),
+  ParamHelpers::makeNumericParam("min_child_weight", lower = 0, upper = 50)
+  )
+
+ps.kknn = ParamHelpers::makeParamSet(ParamHelpers::makeIntegerParam("k", lower = 1L, upper = 20L))
+
+ps.naiveBayes = ParamHelpers::makeParamSet(ParamHelpers::makeNumericParam("laplace", lower = 0.01, upper = 100))
+
+ps.filter = ParamHelpers::makeParamSet(ParamHelpers::makeNumericParam("perc", lower = .1, upper = 1))
+
+ps.pca = ParamHelpers::makeParamSet(ParamHelpers::makeNumericParam("rank", lower = .1, upper = 1)) ## range(p/10, p), p is the number of features
+
+
+
+##### Get parameter set for generated model:
+g_getParamSetFun  = function(model) {
+  ps.classif = sub(pattern = "classif", model[3], replacement = "ps")
+  ps.classif = eval(parse(text = ps.classif))  # hyperparameter set for classifier
+  if (model[2] == "NA") {
+    return(ps.classif)
+  } else if (length(grep(pattern = "perc", x = model)) > 0) {
+    return(c(ps.classif, ps.filter))
+  } else {
+    return(c(ps.classif, ps.pca))
+  }
+}
+
+
+