openml · pfistfl · Nov 19, 2019 · Nov 20, 2019 · Nov 22, 2019
diff --git a/NAMESPACE b/NAMESPACE
@@ -33,6 +33,7 @@ export(convertMlrTaskToOMLDataSet)
 export(convertOMLDataSetToMlr)
 export(convertOMLFlowToMlr)
 export(convertOMLMlrRunToBMR)
+export(convertOMLRunEvalsToDT)
 export(convertOMLRunToBMR)
 export(convertOMLTaskToMlr)
 export(deleteOMLObject)

diff --git a/R/convertOMLSetupParamsToDT.R b/R/convertOMLSetupParamsToDT.R
@@ -0,0 +1,47 @@
+# https://test.openml.org/api/v1/evaluation/setup/list/flow/2117/task/403/limit/100 works, while https://test.openml.org/api/v1/evaluation/setup/list/flow/6794 returns nothing.
+
+# Result size limits are okay, as long as I can somehow reliably iterate with the offset, i.e.
+# https://test.openml.org/api/v1/evaluation/setup/list/flow/2117/task/403/function/predictive_accuracy/limit/1/offset/2
+
+
+
+#' @title Extract Parameters from an OpenML run into a flat structure
+#'
+#' @param run.evals [\code{data.frame}]\cr
+#'   Result of caling listOMLRunEvaluations(..., setup = TRUE).
+#' @param drop.constant [\code{logical(1)]\cr
+#'  Should constant columns be dropped before returning the result?
+#'
+#' @return [\code{\link{data.table}}].
+#' @family run-related functions
+#' @export
+convertOMLRunEvalsToDT = function(run.evals, drop.constant = TRUE) {
+  assert_data_frame(run.evals)
+  assert_true(!is.null(run.evals$setup_parameters))
+  assert_flag(drop.constant)
+  setup_params = run.evals$setup_parameters
+  out = lapply(setup_params, function(params) {
+    params[!(params$parameter_name == "verbose" & params$data_type == "boolean"), ]
+    params[, convertValueByType(params$parameter_name, params$value, params$data_type)]
+  })
+  dt = rbindlist(out, fill = TRUE)
+  if (drop.constant) dt = dt[, vlapply(dt, function(x) length(unique(x)) > 1), with = FALSE]
+  run.evals$setup_parameters = NULL
+  return(cbind(run.evals, dt))
+}
+
+
+# Convert values according to a parameter's type.
+# Note that this is very unreliably.
+convertValueByType = function(parameter_name, value, type) {
+  value = Map(function(v, t) {
+    v[v == "None" | v == "none" | v == "Null" | v == "null"] = NA
+    v = gsub("&quot;", "", v)
+    if (t %in% c("boolean", "bool")) v = as.logical(v)
+    else if (t %in% c("float", "number")) v = as.numeric(v)
+    else if (t %in% c("int", "integer", "int or None", "integer or None")) v = suppressWarnings(as.integer(v))
+    return(v)
+  }, value, type)
+  names(value) = gsub("&quot;", "", parameter_name)
+  return(as.data.table(value))
+}
diff --git a/R/listOMLRunEvaluations.R b/R/listOMLRunEvaluations.R
@@ -1,19 +1,28 @@
 .listOMLRunEvaluations = function(task.id = NULL, flow.id = NULL, run.id = NULL,
   uploader.id = NULL, tag = NULL, limit = NULL, offset = NULL, verbosity = NULL,
-  evaluation.measure = NULL, show.array.measures = FALSE, extend.flow.name = TRUE) {
+  evaluation.measure = NULL, show.array.measures = FALSE, extend.flow.name = TRUE,
+  setup = FALSE) {
 
   if (is.null(task.id) && is.null(flow.id) && is.null(run.id) && is.null(uploader.id) && is.null(tag))
     stop("Please hand over at least one of the following: task.id, flow.id, run.id, uploader.id, tag")
   if (is.null(evaluation.measure))
     showInfo(verbosity, "Suggestion: Use the 'evaluation.measure' argument to restrict the results to only one measure.")
 
-  api.call = generateAPICall(api.call = "json/evaluation/list", task.id = task.id,
+  if (!setup) api.call = "json/evaluation/list" else api.call = "json/evaluation/setup/list"
+  api.call = generateAPICall(api.call = api.call, task.id = task.id,
     flow.id = flow.id, run.id = run.id, uploader.id = uploader.id,
     tag = tag, evaluation.measure = evaluation.measure, limit = limit, offset = offset)
 
   content = doAPICall(api.call, file = NULL, method = "GET", verbosity = verbosity)
   if (is.null(content)) return(data.frame())
-  evals = fromJSON(txt = content, simplifyVector = FALSE)$evaluations$evaluation
+  lst_content = fromJSON(txt = content, simplifyVector = FALSE)
+  evals = lst_content$evaluations$evaluation
+
+  if (setup) {
+    param_list = lapply(evals, function(x) {
+      parameters = as.data.table(cleanupSetupParameters(x$parameters))
+    })
+  }
 
   evals = rbindlist(lapply(evals, function(x) {
     if (is.null(x$value)) x$value = NA
@@ -75,7 +84,7 @@
       values = list(flow.version = flow.version, flow.source = flow.source, learner.name = learner.name)),
       stringsAsFactors = FALSE)
   }
-
+  if (setup) evals$setup_parameters = param_list
   return(evals)
 }
 
@@ -101,6 +110,9 @@
 #' @param extend.flow.name [\code{logical(1)}]\cr
 #'  Adds a column \code{flow.version} that refers to the version number of the flow and a column \code{flow.source} containing the prefix of the flow that specifies the source of the flow (i.e. weka, R) and a column \code{learner.name} that refers to the learner.
 #'  Default is \code{TRUE}.
+#' @param setup [\code{logical(1)}]\cr
+#'  Adds a column \code{setup_parameters} that contains the runs setup, i.e. the hyperparameters set
+#'  for the run.
 #'
 #' @return [\code{data.frame}].
 #' @family list

diff --git a/R/listOMLSetup.R b/R/listOMLSetup.R
@@ -11,32 +11,7 @@
   setup = fromJSON(txt = content)$setups$setup
   sid = data.frame(join_id = 1:length(setup$setup_id), setup_id = setup$setup_id)
 
-  # Get parameters and clean them up
-  param = setup$parameter
-  if (!is.null(names(param))) {
-    # if elements have a name, it refers to parameter
-    param = param[!vlapply(param, function(x) length(x) == 0)]
-    param = as.data.frame(param, stringsAsFactors = FALSE)
-    param = cbind(param, join_id = 1, stringsAsFactors = FALSE)
-  } else {
-    # add names
-    param = setNames(param, 1:length(param))
-    # filter out NULL or empty elements
-    param = param[!vlapply(param, function(x) length(x) == 0)]
-    # inside each element, replace empty values with NA
-    param = lapply(param, function(x) {
-      replace(x, which(vlapply(x, function(i) length(i) == 0)), NA_character_)
-    })
-    param = rbindlist(param, fill = TRUE, idcol = "join_id")
-    param = as.data.frame(param, stringsAsFactors = FALSE)
-  }
-
-  list.cols = colnames(param)[vlapply(param, is.list)]
-  for (col in list.cols) {
-    ind = which(vlapply(param[[col]], function(i) length(i) == 0))
-    param[[col]][ind] = NA_character_
-    param[[col]] = unlist(param[[col]], recursive = FALSE)
-  }
+  param = cleanupSetupParameters(setup$parameter)
 
   ret = merge(param, sid)
   ret$id = ret$join_id = NULL
@@ -66,3 +41,33 @@
 #' @export
 #' @example inst/examples/listOMLSetup.R
 listOMLSetup = memoise(.listOMLSetup)
+
+
+# Get parameters and clean them up
+cleanupSetupParameters = function(param) {
+  if (!is.null(names(param))) {
+    # if elements have a name, it refers to parameter
+    param = param[!vlapply(param, function(x) length(x) == 0)]
+    param = as.data.frame(param, stringsAsFactors = FALSE)
+    param = cbind(param, join_id = 1, stringsAsFactors = FALSE)
+  } else {
+    # add names
+    param = setNames(param, 1:length(param))
+    # filter out NULL or empty elements
+    param = param[!vlapply(param, function(x) length(x) == 0)]
+    # inside each element, replace empty values with NA
+    param = lapply(param, function(x) {
+      replace(x, which(vlapply(x, function(i) length(i) == 0)), NA_character_)
+    })
+    param = rbindlist(param, fill = TRUE, idcol = "join_id")
+    param = as.data.frame(param, stringsAsFactors = FALSE)
+  }
+
+  list.cols = colnames(param)[vlapply(param, is.list)]
+  for (col in list.cols) {
+    ind = which(vlapply(param[[col]], function(i) length(i) == 0))
+    param[[col]][ind] = NA_character_
+    param[[col]] = unlist(param[[col]], recursive = FALSE)
+  }
+  return(param)
+}
diff --git a/man/chunkOMLlist.Rd b/man/chunkOMLlist.Rd
diff --git a/man/convertOMLMlrRunToBMR.Rd b/man/convertOMLMlrRunToBMR.Rd
diff --git a/man/convertOMLRunEvalsToDT.Rd b/man/convertOMLRunEvalsToDT.Rd
diff --git a/man/convertOMLRunToBMR.Rd b/man/convertOMLRunToBMR.Rd
diff --git a/man/deleteOMLObject.Rd b/man/deleteOMLObject.Rd
diff --git a/man/getOMLRun.Rd b/man/getOMLRun.Rd
diff --git a/man/listOMLRunEvaluations.Rd b/man/listOMLRunEvaluations.Rd
diff --git a/man/listOMLRuns.Rd b/man/listOMLRuns.Rd
diff --git a/man/makeOMLRun.Rd b/man/makeOMLRun.Rd
diff --git a/man/makeOMLRunParameter.Rd b/man/makeOMLRunParameter.Rd
diff --git a/man/tagging.Rd b/man/tagging.Rd
diff --git a/man/uploadOMLRun.Rd b/man/uploadOMLRun.Rd
diff --git a/tests/testthat/test_server_listOMLRunEvaluations.R b/tests/testthat/test_server_listOMLRunEvaluations.R
@@ -30,3 +30,16 @@ test_that("listOMLRunEvaluations", {
     expect_error(listOMLRunEvaluations(task.id = task.id, evaluation.measure = "m"))
   })
 })
+
+test_that("listOMLRunEvaluations", {
+  with_main_server({
+    setOMLConfig(server = "https://test.openml.org/api/v1")
+    task.id = 6L
+
+    # filter only successful runs
+    run.evals = .listOMLRunEvaluations(task.id = task.id, evaluation.measure = "area_under_roc_curve", setup = TRUE, limit = 20)
+    expect_data_frame(run.evals, min.rows = 1L, col.names = "unique")
+    expect_subset(c("run.id", "task.id", "setup.id", "flow.id", "flow.name", "flow.source", "data.name", "setup_parameters"),
+      names(run.evals))
+  })
+})