From ec4c15663b7ffbee6b48a53e4236e49909db8fea Mon Sep 17 00:00:00 2001
From: Bagrat Ter-Akopyan <bagrat.akopyan@gmail.com>
Date: Tue, 3 Sep 2019 14:56:27 +0200
Subject: [PATCH 1/5] add convert oml task to mlr3

---
 R/convertOMLDataSetToMlr3.R                   | 123 ++++++++++++++++++
 inst/examples/convertOMLDataSetToMlr3.R       |   5 +
 .../test_local_convertOMLDataSetToMlr3.R      |  45 +++++++
 3 files changed, 173 insertions(+)
 create mode 100644 R/convertOMLDataSetToMlr3.R
 create mode 100644 inst/examples/convertOMLDataSetToMlr3.R
 create mode 100644 tests/testthat/test_local_convertOMLDataSetToMlr3.R

diff --git a/R/convertOMLDataSetToMlr3.R b/R/convertOMLDataSetToMlr3.R
new file mode 100644
index 0000000..6e2d154
--- /dev/null
+++ b/R/convertOMLDataSetToMlr3.R
@@ -0,0 +1,123 @@
+#' @title Convert an OpenML data set to mlr3 task.
+#'
+#' @description
+#' Converts an \code{\link{OMLDataSet}} to a \code{\link[mlr3]{Task}}.
+#'
+#' @param obj [\code{\link{OMLDataSet}}]\cr
+#'   The object that should be converted.
+#' @param mlr.task.id [\code{character(1)}]\cr
+#'   Id string for \code{\link[mlr3]{Task}} object.
+#'   The strings \code{<oml.data.name>}, \code{<oml.data.id>} and \code{<oml.data.version>}
+#'   will be replaced by their respective values contained in the \code{\link{OMLDataSet}} object.
+#'   Default is \code{<oml.data.name>}.
+#' @param task.type [\code{character(1)}]\cr
+#'   As we only pass the data set, we need to define the task type manually.
+#'   Possible are: \dQuote{Supervised Classification}, \dQuote{Supervised Regression},
+#'   \dQuote{Survival Analysis}.
+#'   Default is \code{NULL} which means to guess it from the target column in the
+#'   data set. If that is a factor or a logical, we choose classification.
+#'   If it is numeric we choose regression. In all other cases an error is thrown.
+#' @param target [\code{character}]\cr
+#'   The target for the classification/regression task.
+#'   Default is the \code{default.target.attribute} of the \code{\link{OMLDataSetDescription}}.
+#' @param ignore.flagged.attributes [\code{logical(1)}]\cr
+#'   Should those features that are listed in the data set description slot \dQuote{ignore.attribute}
+#'   be removed?
+#'   Default is \code{TRUE}.
+#' @param drop.levels [\code{logical(1)}]\cr
+#'   Should empty factor levels be dropped in the data?
+#'   Default is \code{TRUE}.
+#' @param fix.colnames [\code{logical(1)}]\cr
+#'   Should colnames of the data be fixed using \code{\link[base]{make.names}}?
+#'   Default is \code{TRUE}.
+#' @template arg_verbosity
+#' @return [\code{\link[mlr3]{Task}}].
+#' @family data set-related functions
+#' @example /inst/examples/convertOMLDataSetToMlr3.R
+#' @export
+convertOMLDataSetToMlr3 = function(
+  obj,
+  mlr.task.id = "<oml.data.name>",
+  task.type = NULL,
+  target = obj$desc$default.target.attribute,
+  ignore.flagged.attributes = TRUE,
+  drop.levels = TRUE,
+  fix.colnames = TRUE,
+  verbosity = NULL) {
+
+  assertClass(obj, "OMLDataSet")
+  assertSubset(target, obj$colnames.new)
+  assertFlag(ignore.flagged.attributes)
+  assertFlag(drop.levels)
+
+  data = obj$data
+  desc = obj$desc
+
+  # no task type? guess it by looking at target
+  if (is.null(task.type))
+    task.type = guessTaskType(data[, target])
+  assertChoice(task.type, getValidTaskTypes())
+
+  #  remove ignored attributes from data
+  if (any(!is.na(desc$ignore.attribute)) & ignore.flagged.attributes) {
+    keep.cols = obj$colnames.old %nin% desc$ignore.attribute
+    data = data[, keep.cols, drop = FALSE]
+  }
+
+  # drop levels
+  if (drop.levels)
+    data = droplevels(data)
+
+  # fix colnames using make.names
+  if (fix.colnames) {
+    colnames(data) = make.names(colnames(data), unique = TRUE)
+    target = make.names(target, unique = TRUE)
+  }
+
+  # get fixup verbose setting for mlr
+  if (is.null(verbosity))
+    verbosity = getOMLConfig()$verbosity
+  fixup = ifelse(verbosity == 0L, "quiet", "warn")
+
+  mlr.task = switch(task.type,
+    "Supervised Classification" = mlr3::TaskClassif$new(id = desc$name, backend = data, target = target),
+    "Supervised Regression" = mlr3::TaskRegr$new(id = desc$name, backend = data, target = target),
+    "Survival Analysis" = mlr3survival::TaskSurv$new(id = desc$name, backend = data, target = target),
+   stopf("Encountered currently unsupported task type: %s", task.type)
+  )
+
+  if (!is.null(mlr.task.id))
+    mlr.task$id = replaceOMLDataSetString(mlr.task.id, obj)
+
+  return(mlr.task)
+}
+
+replaceOMLDataSetString = function(string, data.set) {
+  string = stri_replace_all_fixed(string, "<oml.data.id>", data.set$desc$id)
+  string = stri_replace_all_fixed(string, "<oml.data.name>", data.set$desc$name)
+  stri_replace_all_fixed(string, "<oml.data.version>", data.set$desc$version)
+}
+
+# @title Helper to guess task type from target column format.
+#
+# @param target [vector]
+#   Vector of target values.
+# @return [character(1)]
+guessTaskType = function(target) {
+  if (inherits(target, "data.frame")) {
+    assertDataFrame(target, types = "logical")
+    return("Multilabel")
+  } else {
+    if (is.factor(target) | is.logical(target))
+      return("Supervised Classification")
+    if (is.numeric(target))
+      return("Supervised Regression")
+  }
+
+  stopf("Cannot guess task.type from data!")
+}
+
+getValidTaskTypes = function() {
+  c("Supervised Classification", "Supervised Regression", "Survival Analysis", "Multilabel")
+}
+
diff --git a/inst/examples/convertOMLDataSetToMlr3.R b/inst/examples/convertOMLDataSetToMlr3.R
new file mode 100644
index 0000000..b81efe1
--- /dev/null
+++ b/inst/examples/convertOMLDataSetToMlr3.R
@@ -0,0 +1,5 @@
+# \dontrun{
+# 	library("mlr3")
+# 	autosOML = getOMLDataSet(data.id = 9)
+# 	autosMlr3 = convertOMLDataSetToMlr3(autosOML)
+# }
diff --git a/tests/testthat/test_local_convertOMLDataSetToMlr3.R b/tests/testthat/test_local_convertOMLDataSetToMlr3.R
new file mode 100644
index 0000000..c591efa
--- /dev/null
+++ b/tests/testthat/test_local_convertOMLDataSetToMlr3.R
@@ -0,0 +1,45 @@
+context("convertOMLDataSetToMlr3")
+
+test_that("convertOMLDataSetToMlr3", {
+  with_test_cache({
+    ds = getOMLDataSet(10)
+
+    expect_is_mlr_task = function(mlr.task, ds) {
+      expect_equal(mlr.task$task_type, "classif")
+      expect_equal(mlr.task$nrow, nrow(ds$data))
+      expect_equal(ds$desc$default.target.attribute, mlr.task$target_names)
+    }
+
+    # now create the task
+    mlr.task = convertOMLDataSetToMlr3(ds)
+    expect_equal(mlr.task$task_type, "classif")
+
+    # now modify dataset by hand (no more server calls) to check
+    # ignore attributes stuff:
+    # Define the first two attributes as ignored attributes
+    ds$desc$ignore.attribute = colnames(ds$data[, 1:2])
+
+    mlr.task = convertOMLDataSetToMlr3(ds, ignore.flagged.attributes = TRUE)
+    expect_is_mlr_task(mlr.task, ds)
+    # we removed two attributes (and the target column is not considered here)
+    #expect_equal(sum(mlr.task$task.desc$n.feat), ncol(ds$data) - 3L)
+    expect_equal(mlr.task$ncol, ncol(ds$data) - 2L)
+
+    # pass faulty parameters
+    expect_error(convertOMLDataSetToMlr3(ds, task.type = "Nonexistent task type"), "element of")
+
+    # check setting mlr task id
+    expect_equal(convertOMLDataSetToMlr3(ds)$id, ds$desc$name)
+    expect_equal(convertOMLDataSetToMlr3(ds, mlr.task.id = "<oml.data.name>.<oml.data.id>")$id,
+      sprintf("%s.%s", ds$desc$name, ds$desc$id))
+    expect_equal(convertOMLDataSetToMlr3(ds, mlr.task.id = "test")$id, "test")
+    expect_equal(convertOMLDataSetToMlr3(ds, mlr.task.id = "<oml.data.id>")$id, as.character(ds$desc$id))
+    expect_equal(convertOMLDataSetToMlr3(ds, mlr.task.id = "<oml.data.name>")$id, as.character(ds$desc$name))
+    expect_equal(convertOMLDataSetToMlr3(ds, mlr.task.id = "<oml.data.version>")$id, as.character(ds$desc$version))
+    expect_equal(convertOMLDataSetToMlr3(ds, mlr.task.id = "<oml.task.id>")$id, "<oml.task.id>")
+
+    # check if conversion to regression task works
+    ds$desc$target.features = ds$desc$default.target.attribute = "no_of_nodes_in"
+    expect_equal(convertOMLDataSetToMlr3(ds)$task_type, "regr")
+  })
+})

From a63f22d54b5081ca1c9cf29493fed1e4e306ae50 Mon Sep 17 00:00:00 2001
From: Bagrat Ter-Akopyan <bagrat.akopyan@gmail.com>
Date: Mon, 9 Sep 2019 17:03:52 +0200
Subject: [PATCH 2/5] add convert oml splits to mlr3

---
 R/convertOMLSplitsToMlr3.R                    | 24 ++++++++++++++++++
 .../test_local_convertOMLSplitsToMlr3.R       | 25 +++++++++++++++++++
 2 files changed, 49 insertions(+)
 create mode 100644 R/convertOMLSplitsToMlr3.R
 create mode 100644 tests/testthat/test_local_convertOMLSplitsToMlr3.R

diff --git a/R/convertOMLSplitsToMlr3.R b/R/convertOMLSplitsToMlr3.R
new file mode 100644
index 0000000..d64823d
--- /dev/null
+++ b/R/convertOMLSplitsToMlr3.R
@@ -0,0 +1,24 @@
+convertOMLSplitsToMlr3 = function(estim.proc, mlr.task, predict = "both") {
+  type = estim.proc$type
+  n.repeats = estim.proc$parameters[["number_repeats"]]
+  n.folds = estim.proc$parameters[["number_folds"]]
+  percentage = as.numeric(estim.proc$parameters[["percentage"]])
+  data.splits = estim.proc$data.splits
+  stratified = estim.proc$parameters[["stratified_sampling"]]
+  stratified = ifelse(is.null(stratified), FALSE, stratified == "true")
+
+  if (type == "crossvalidation") {
+    if (n.repeats == 1L)
+      mlr.rdesc = mlr3::rsmp("cv", folds = n.folds, stratify = stratified)
+    else
+      mlr.rdesc = mlr3::rsmp("repeated_cv", reps = n.repeats, folds = n.folds, stratify = stratified)
+    mlr.rin = mlr.rdesc$instantiate(mlr.task)
+  } else if (type == "holdout") {
+    mlr.rdesc = mlr3::rsmp("holdout")
+    mlr.rin = mlr.rdesc$instantiate(task = mlr.task)
+    n.folds = 1
+  } else {
+    stopf("Unsupported estimation procedure type: %s", type)
+  }
+  return(mlr.rin)
+}
diff --git a/tests/testthat/test_local_convertOMLSplitsToMlr3.R b/tests/testthat/test_local_convertOMLSplitsToMlr3.R
new file mode 100644
index 0000000..09b2462
--- /dev/null
+++ b/tests/testthat/test_local_convertOMLSplitsToMlr3.R
@@ -0,0 +1,25 @@
+context("convertOMLSplitsToMlr3")
+
+test_that("convertOMLSplitsToMlr3", {
+  with_test_cache({
+    task = getOMLTask(59)
+    mlr.task = convertOMLTaskToMlr3(task)$mlr.task
+
+    oml.types = c("crossvalidation", "holdout")
+    mlr.types = c("cv", "holdout")
+
+    for (i in seq_along(oml.types)) {
+      task$input$estimation.procedure[['type']]= oml.types[i]
+      if (oml.types[i] == "holdout") {
+        task$input$estimation.procedure$parameters$percentage = "50"
+      }
+      splits = convertOMLSplitsToMlr3(task$input$estimation.procedure, mlr.task)
+      expect_is(splits, "Resampling")
+      expect_equal(splits$id, mlr.types[i])
+    }
+
+    # pass invalid estim.proc
+    task$input$estimation.procedure$type = "blabla"
+    expect_error(convertOMLSplitsToMlr3(task$input$estimation.procedure, mlr.task), "Unsupported estimation procedure type: blabla")
+  })
+})

From a239b3accdaace8f45fc77dcd1cf68b27f66ed3c Mon Sep 17 00:00:00 2001
From: Bagrat Ter-Akopyan <bagrat.akopyan@gmail.com>
Date: Mon, 9 Sep 2019 17:53:23 +0200
Subject: [PATCH 3/5] fixed lintr

---
 tests/testthat/test_local_convertOMLSplitsToMlr3.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test_local_convertOMLSplitsToMlr3.R b/tests/testthat/test_local_convertOMLSplitsToMlr3.R
index 09b2462..98e713a 100644
--- a/tests/testthat/test_local_convertOMLSplitsToMlr3.R
+++ b/tests/testthat/test_local_convertOMLSplitsToMlr3.R
@@ -9,7 +9,7 @@ test_that("convertOMLSplitsToMlr3", {
     mlr.types = c("cv", "holdout")
 
     for (i in seq_along(oml.types)) {
-      task$input$estimation.procedure[['type']]= oml.types[i]
+      task$input$estimation.procedure$type = oml.types[i]
       if (oml.types[i] == "holdout") {
         task$input$estimation.procedure$parameters$percentage = "50"
       }

From af4fa54deae37e5d6f1b6d2888d1c6edf3ae3ae0 Mon Sep 17 00:00:00 2001
From: Bagrat Ter-Akopyan <bagrat.akopyan@gmail.com>
Date: Tue, 5 Nov 2019 10:21:13 +0100
Subject: [PATCH 4/5] modifed DESCRIPTION and NAMESPACE

---
 DESCRIPTION | 4 +++-
 NAMESPACE   | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 085242a..5302a08 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -29,7 +29,9 @@ Suggests:
     rmarkdown,
     R.rsp,
     lintr (>= 1.0.1),
-    rex
+    rex,
+    mlr3,
+    mlr3survival
 Imports:
     backports (>= 1.1.0),
     BBmisc (>= 1.11),
diff --git a/NAMESPACE b/NAMESPACE
index 6e399bf..0f3fcf3 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -31,10 +31,12 @@ export(clearOMLCache)
 export(convertMlrLearnerToOMLFlow)
 export(convertMlrTaskToOMLDataSet)
 export(convertOMLDataSetToMlr)
+export(convertOMLDataSetToMlr3)
 export(convertOMLFlowToMlr)
 export(convertOMLMlrRunToBMR)
 export(convertOMLRunToBMR)
 export(convertOMLTaskToMlr)
+export(convertOMLTaskToMlr3)
 export(deleteOMLObject)
 export(extractOMLStudyIds)
 export(getCachedOMLDataSetStatus)

From d9f29afd29bd4711ac159dd58f539dcff1d21075 Mon Sep 17 00:00:00 2001
From: Bagrat Ter-Akopyan <bagrat.akopyan@gmail.com>
Date: Tue, 5 Nov 2019 10:48:20 +0100
Subject: [PATCH 5/5] modified .travis.yml

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index 294c9de..fd36c96 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -18,6 +18,7 @@ r_packages:
 r_github_packages:
   - mlr-org/farff
   - mlr-org/mlr
+  - mlr-org/mlr3survival
   - berndbischl/BBmisc
   - berndbischl/ParamHelpers
   - r-lib/httr