Skip to content

Commit

Permalink
Add tests and make a proposal for changes
Browse files Browse the repository at this point in the history
  • Loading branch information
pfistfl committed Nov 20, 2019
1 parent 116b271 commit 957dca1
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 157 deletions.
47 changes: 47 additions & 0 deletions R/convertOMLSetupParamsToDT.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# https://test.openml.org/api/v1/evaluation/setup/list/flow/2117/task/403/limit/100 works, while https://test.openml.org/api/v1/evaluation/setup/list/flow/6794 returns nothing.

# Result size limits are okay, as long as I can somehow reliably iterate with the offset, i.e.
# https://test.openml.org/api/v1/evaluation/setup/list/flow/2117/task/403/function/predictive_accuracy/limit/1/offset/2



#' @title Extract Parameters from an OpenML run into a flat structure
#'
#' @param run.evals [\code{data.frame}]\cr
#' Result of caling listOMLRunEvaluations(..., setup = TRUE).
#' @param drop.constant [\code{logical(1)]\cr
#' Should constant columns be dropped before returning the result?
#'
#' @return [\code{\link{data.table}}].
#' @family run-related functions
#' @export
convertOMLRunEvalsToDT = function(run.evals, drop.constant = TRUE) {
assert_data_frame(run.evals)
assert_true(!is.null(run.evals$setup_parameters))
assert_flag(drop.constant)
setup_params = run.evals$setup_parameters
out = lapply(setup_params, function(params) {
params[!(parameter_name == "verbose" & data_type == "boolean"), ]
params[, convertValueByType(parameter_name, value, data_type)]
})
dt = rbindlist(out, fill = TRUE)
if (drop.constant) dt = dt[, vlapply(dt, function(x) length(unique(x)) > 1), with =FALSE]
run.evals$setup_parameters = NULL
return(cbind(run.evals, dt))
}


# Convert values according to a parameter's type.
# Note that this is very unreliably.
convertValueByType = function(parameter_name, value, type) {
value = Map(function(v, t) {
v[v == "None" | v == "none" | v == "Null" | v == "null"] = NA
v = gsub(""", "", v)
if (t %in% c("boolean", "bool")) v = as.logical(v)
else if (t %in% c("float", "number")) v = as.numeric(v)
else if (t %in% c("int", "integer", "int or None", "integer or None")) v = suppressWarnings(as.integer(v))
return(v)
}, value, type)
names(value) = gsub(""", "", parameter_name)
return(as.data.table(value))
}
129 changes: 0 additions & 129 deletions R/getOMLRunEvaluations.R

This file was deleted.

9 changes: 7 additions & 2 deletions R/listOMLRunEvaluations.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,15 @@

content = doAPICall(api.call, file = NULL, method = "GET", verbosity = verbosity)
if (is.null(content)) return(data.frame())
browser()
lst_content = fromJSON(txt = content, simplifyVector = FALSE)
evals = lst_content$evaluations$evaluation

if (setup) {
param_list = lapply(evals, function(x) {
parameters = as.data.table(cleanupSetupParameters(x$parameters))[-25,]

This comment has been minimized.

Copy link
@giuseppec

giuseppec Nov 20, 2019

Member

what is the[-25, ] for :D? Does this make sense in every API call?

})
}

evals = rbindlist(lapply(evals, function(x) {
if (is.null(x$value)) x$value = NA
if (is.null(x$array_data)) x$array_data = NA else x$array_data = collapse(x$array_data)
Expand Down Expand Up @@ -79,7 +84,7 @@
values = list(flow.version = flow.version, flow.source = flow.source, learner.name = learner.name)),
stringsAsFactors = FALSE)
}

if (setup) evals$setup_parameters = param_list
return(evals)
}

Expand Down
57 changes: 31 additions & 26 deletions R/listOMLSetup.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,32 +11,7 @@
setup = fromJSON(txt = content)$setups$setup
sid = data.frame(join_id = 1:length(setup$setup_id), setup_id = setup$setup_id)

# Get parameters and clean them up
param = setup$parameter
if (!is.null(names(param))) {
# if elements have a name, it refers to parameter
param = param[!vlapply(param, function(x) length(x) == 0)]
param = as.data.frame(param, stringsAsFactors = FALSE)
param = cbind(param, join_id = 1, stringsAsFactors = FALSE)
} else {
# add names
param = setNames(param, 1:length(param))
# filter out NULL or empty elements
param = param[!vlapply(param, function(x) length(x) == 0)]
# inside each element, replace empty values with NA
param = lapply(param, function(x) {
replace(x, which(vlapply(x, function(i) length(i) == 0)), NA_character_)
})
param = rbindlist(param, fill = TRUE, idcol = "join_id")
param = as.data.frame(param, stringsAsFactors = FALSE)
}

list.cols = colnames(param)[vlapply(param, is.list)]
for (col in list.cols) {
ind = which(vlapply(param[[col]], function(i) length(i) == 0))
param[[col]][ind] = NA_character_
param[[col]] = unlist(param[[col]], recursive = FALSE)
}
param = cleanupSetupParameters(setup$parameter)

ret = merge(param, sid)
ret$id = ret$join_id = NULL
Expand Down Expand Up @@ -66,3 +41,33 @@
#' @export
#' @example inst/examples/listOMLSetup.R
listOMLSetup = memoise(.listOMLSetup)


# Get parameters and clean them up
cleanupSetupParameters = function(param) {
if (!is.null(names(param))) {
# if elements have a name, it refers to parameter
param = param[!vlapply(param, function(x) length(x) == 0)]
param = as.data.frame(param, stringsAsFactors = FALSE)
param = cbind(param, join_id = 1, stringsAsFactors = FALSE)
} else {
# add names
param = setNames(param, 1:length(param))
# filter out NULL or empty elements
param = param[!vlapply(param, function(x) length(x) == 0)]
# inside each element, replace empty values with NA
param = lapply(param, function(x) {
replace(x, which(vlapply(x, function(i) length(i) == 0)), NA_character_)
})
param = rbindlist(param, fill = TRUE, idcol = "join_id")
param = as.data.frame(param, stringsAsFactors = FALSE)
}

list.cols = colnames(param)[vlapply(param, is.list)]
for (col in list.cols) {
ind = which(vlapply(param[[col]], function(i) length(i) == 0))
param[[col]][ind] = NA_character_
param[[col]] = unlist(param[[col]], recursive = FALSE)
}
return(param)
}
13 changes: 13 additions & 0 deletions tests/testthat/test_server_listOMLRunEvaluations.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,16 @@ test_that("listOMLRunEvaluations", {
expect_error(listOMLRunEvaluations(task.id = task.id, evaluation.measure = "m"))
})
})

test_that("listOMLRunEvaluations", {
with_main_server({
setOMLConfig(server = "https://test.openml.org/api/v1")
task.id = 6L

# filter only successful runs
run.evals = .listOMLRunEvaluations(task.id = task.id, evaluation.measure = "area_under_roc_curve", setup = TRUE, limit = 20)
expect_data_frame(run.evals, min.rows = 1L, col.names = "unique")
expect_subset(c("run.id", "task.id", "setup.id", "flow.id", "flow.name", "flow.source", "data.name", "setup_parameters"),
names(run.evals))
})
})

1 comment on commit 957dca1

@lintr-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

R/convertOMLSetupParamsToDT.R:28:85: style: Put spaces around all infix operators (except exponentiation).

if (drop.constant) dt = dt[, vlapply(dt, function(x) length(unique(x)) > 1), with =FALSE]
                                                                                    ^~

R/listOMLRunEvaluations.R:23:76: style: Commas should always have a space after.

parameters = as.data.table(cleanupSetupParameters(x$parameters))[-25,]
                                                                           ^

Please sign in to comment.