Add tests and make a proposal for changes

openml · Nov 20, 2019 · 957dca1 · giuseppec · Nov 20, 2019 · 957dca1
1 parent 116b271
commit 957dca1
Show file tree

Hide file tree

Showing 5 changed files with 98 additions and 157 deletions.
diff --git a/R/convertOMLSetupParamsToDT.R b/R/convertOMLSetupParamsToDT.R
@@ -0,0 +1,47 @@
+# https://test.openml.org/api/v1/evaluation/setup/list/flow/2117/task/403/limit/100 works, while https://test.openml.org/api/v1/evaluation/setup/list/flow/6794 returns nothing.
+
+# Result size limits are okay, as long as I can somehow reliably iterate with the offset, i.e.
+# https://test.openml.org/api/v1/evaluation/setup/list/flow/2117/task/403/function/predictive_accuracy/limit/1/offset/2
+
+
+
+#' @title Extract Parameters from an OpenML run into a flat structure
+#'
+#' @param run.evals [\code{data.frame}]\cr
+#'   Result of caling listOMLRunEvaluations(..., setup = TRUE).
+#' @param drop.constant [\code{logical(1)]\cr
+#'  Should constant columns be dropped before returning the result?
+#'
+#' @return [\code{\link{data.table}}].
+#' @family run-related functions
+#' @export
+convertOMLRunEvalsToDT = function(run.evals, drop.constant = TRUE) {
+  assert_data_frame(run.evals)
+  assert_true(!is.null(run.evals$setup_parameters))
+  assert_flag(drop.constant)
+  setup_params = run.evals$setup_parameters
+  out = lapply(setup_params, function(params) {
+    params[!(parameter_name == "verbose" & data_type == "boolean"), ]
+    params[, convertValueByType(parameter_name, value, data_type)]
+  })
+  dt = rbindlist(out, fill = TRUE)
+  if (drop.constant) dt = dt[, vlapply(dt, function(x) length(unique(x)) > 1), with =FALSE]
+  run.evals$setup_parameters = NULL
+  return(cbind(run.evals, dt))
+}
+
+
+# Convert values according to a parameter's type.
+# Note that this is very unreliably.
+convertValueByType = function(parameter_name, value, type) {
+  value = Map(function(v, t) {
+    v[v == "None" | v == "none" | v == "Null" | v == "null"] = NA
+    v = gsub("&quot;", "", v)
+    if (t %in% c("boolean", "bool")) v = as.logical(v)
+    else if (t %in% c("float", "number")) v = as.numeric(v)
+    else if (t %in% c("int", "integer", "int or None", "integer or None")) v = suppressWarnings(as.integer(v))
+    return(v)
+  }, value, type)
+  names(value) = gsub("&quot;", "", parameter_name)
+  return(as.data.table(value))
+}
diff --git a/R/getOMLRunEvaluations.R b/R/getOMLRunEvaluations.R
diff --git a/R/listOMLRunEvaluations.R b/R/listOMLRunEvaluations.R
@@ -15,10 +15,15 @@
 
   content = doAPICall(api.call, file = NULL, method = "GET", verbosity = verbosity)
   if (is.null(content)) return(data.frame())
-  browser()
   lst_content = fromJSON(txt = content, simplifyVector = FALSE)
   evals = lst_content$evaluations$evaluation
 
+  if (setup) {
+    param_list = lapply(evals, function(x) {
+      parameters = as.data.table(cleanupSetupParameters(x$parameters))[-25,]
+    })
+  }
+
   evals = rbindlist(lapply(evals, function(x) {
     if (is.null(x$value)) x$value = NA
     if (is.null(x$array_data)) x$array_data = NA else x$array_data = collapse(x$array_data)
@@ -79,7 +84,7 @@
       values = list(flow.version = flow.version, flow.source = flow.source, learner.name = learner.name)),
       stringsAsFactors = FALSE)
   }
-
+  if (setup) evals$setup_parameters = param_list
   return(evals)
 }
 

diff --git a/R/listOMLSetup.R b/R/listOMLSetup.R
@@ -11,32 +11,7 @@
   setup = fromJSON(txt = content)$setups$setup
   sid = data.frame(join_id = 1:length(setup$setup_id), setup_id = setup$setup_id)
 
-  # Get parameters and clean them up
-  param = setup$parameter
-  if (!is.null(names(param))) {
-    # if elements have a name, it refers to parameter
-    param = param[!vlapply(param, function(x) length(x) == 0)]
-    param = as.data.frame(param, stringsAsFactors = FALSE)
-    param = cbind(param, join_id = 1, stringsAsFactors = FALSE)
-  } else {
-    # add names
-    param = setNames(param, 1:length(param))
-    # filter out NULL or empty elements
-    param = param[!vlapply(param, function(x) length(x) == 0)]
-    # inside each element, replace empty values with NA
-    param = lapply(param, function(x) {
-      replace(x, which(vlapply(x, function(i) length(i) == 0)), NA_character_)
-    })
-    param = rbindlist(param, fill = TRUE, idcol = "join_id")
-    param = as.data.frame(param, stringsAsFactors = FALSE)
-  }
-
-  list.cols = colnames(param)[vlapply(param, is.list)]
-  for (col in list.cols) {
-    ind = which(vlapply(param[[col]], function(i) length(i) == 0))
-    param[[col]][ind] = NA_character_
-    param[[col]] = unlist(param[[col]], recursive = FALSE)
-  }
+  param = cleanupSetupParameters(setup$parameter)
 
   ret = merge(param, sid)
   ret$id = ret$join_id = NULL
@@ -66,3 +41,33 @@
 #' @export
 #' @example inst/examples/listOMLSetup.R
 listOMLSetup = memoise(.listOMLSetup)
+
+
+# Get parameters and clean them up
+cleanupSetupParameters = function(param) {
+  if (!is.null(names(param))) {
+    # if elements have a name, it refers to parameter
+    param = param[!vlapply(param, function(x) length(x) == 0)]
+    param = as.data.frame(param, stringsAsFactors = FALSE)
+    param = cbind(param, join_id = 1, stringsAsFactors = FALSE)
+  } else {
+    # add names
+    param = setNames(param, 1:length(param))
+    # filter out NULL or empty elements
+    param = param[!vlapply(param, function(x) length(x) == 0)]
+    # inside each element, replace empty values with NA
+    param = lapply(param, function(x) {
+      replace(x, which(vlapply(x, function(i) length(i) == 0)), NA_character_)
+    })
+    param = rbindlist(param, fill = TRUE, idcol = "join_id")
+    param = as.data.frame(param, stringsAsFactors = FALSE)
+  }
+
+  list.cols = colnames(param)[vlapply(param, is.list)]
+  for (col in list.cols) {
+    ind = which(vlapply(param[[col]], function(i) length(i) == 0))
+    param[[col]][ind] = NA_character_
+    param[[col]] = unlist(param[[col]], recursive = FALSE)
+  }
+  return(param)
+}
diff --git a/tests/testthat/test_server_listOMLRunEvaluations.R b/tests/testthat/test_server_listOMLRunEvaluations.R
@@ -30,3 +30,16 @@ test_that("listOMLRunEvaluations", {
     expect_error(listOMLRunEvaluations(task.id = task.id, evaluation.measure = "m"))
   })
 })
+
+test_that("listOMLRunEvaluations", {
+  with_main_server({
+    setOMLConfig(server = "https://test.openml.org/api/v1")
+    task.id = 6L
+
+    # filter only successful runs
+    run.evals = .listOMLRunEvaluations(task.id = task.id, evaluation.measure = "area_under_roc_curve", setup = TRUE, limit = 20)
+    expect_data_frame(run.evals, min.rows = 1L, col.names = "unique")
+    expect_subset(c("run.id", "task.id", "setup.id", "flow.id", "flow.name", "flow.source", "data.name", "setup_parameters"),
+      names(run.evals))
+  })
+})