Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add runevals #451

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ export(convertMlrTaskToOMLDataSet)
export(convertOMLDataSetToMlr)
export(convertOMLFlowToMlr)
export(convertOMLMlrRunToBMR)
export(convertOMLRunEvalsToDT)
export(convertOMLRunToBMR)
export(convertOMLTaskToMlr)
export(deleteOMLObject)
Expand Down
47 changes: 47 additions & 0 deletions R/convertOMLSetupParamsToDT.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# https://test.openml.org/api/v1/evaluation/setup/list/flow/2117/task/403/limit/100 works, while https://test.openml.org/api/v1/evaluation/setup/list/flow/6794 returns nothing.

# Result size limits are okay, as long as I can somehow reliably iterate with the offset, i.e.
# https://test.openml.org/api/v1/evaluation/setup/list/flow/2117/task/403/function/predictive_accuracy/limit/1/offset/2



#' @title Extract Parameters from an OpenML run into a flat structure
#'
#' @param run.evals [\code{data.frame}]\cr
#' Result of caling listOMLRunEvaluations(..., setup = TRUE).
#' @param drop.constant [\code{logical(1)]\cr
#' Should constant columns be dropped before returning the result?
#'
#' @return [\code{\link{data.table}}].
#' @family run-related functions
#' @export
convertOMLRunEvalsToDT = function(run.evals, drop.constant = TRUE) {
assert_data_frame(run.evals)
assert_true(!is.null(run.evals$setup_parameters))
assert_flag(drop.constant)
setup_params = run.evals$setup_parameters
out = lapply(setup_params, function(params) {
params[!(params$parameter_name == "verbose" & params$data_type == "boolean"), ]
params[, convertValueByType(params$parameter_name, params$value, params$data_type)]
})
dt = rbindlist(out, fill = TRUE)
if (drop.constant) dt = dt[, vlapply(dt, function(x) length(unique(x)) > 1), with = FALSE]
run.evals$setup_parameters = NULL
return(cbind(run.evals, dt))
}


# Convert values according to a parameter's type.
# Note that this is very unreliably.
convertValueByType = function(parameter_name, value, type) {
value = Map(function(v, t) {
v[v == "None" | v == "none" | v == "Null" | v == "null"] = NA
v = gsub(""", "", v)
if (t %in% c("boolean", "bool")) v = as.logical(v)
else if (t %in% c("float", "number")) v = as.numeric(v)
else if (t %in% c("int", "integer", "int or None", "integer or None")) v = suppressWarnings(as.integer(v))
return(v)
}, value, type)
names(value) = gsub(""", "", parameter_name)
return(as.data.table(value))
}
20 changes: 16 additions & 4 deletions R/listOMLRunEvaluations.R
Original file line number Diff line number Diff line change
@@ -1,19 +1,28 @@
.listOMLRunEvaluations = function(task.id = NULL, flow.id = NULL, run.id = NULL,
uploader.id = NULL, tag = NULL, limit = NULL, offset = NULL, verbosity = NULL,
evaluation.measure = NULL, show.array.measures = FALSE, extend.flow.name = TRUE) {
evaluation.measure = NULL, show.array.measures = FALSE, extend.flow.name = TRUE,
setup = FALSE) {

if (is.null(task.id) && is.null(flow.id) && is.null(run.id) && is.null(uploader.id) && is.null(tag))
stop("Please hand over at least one of the following: task.id, flow.id, run.id, uploader.id, tag")
if (is.null(evaluation.measure))
showInfo(verbosity, "Suggestion: Use the 'evaluation.measure' argument to restrict the results to only one measure.")

api.call = generateAPICall(api.call = "json/evaluation/list", task.id = task.id,
if (!setup) api.call = "json/evaluation/list" else api.call = "json/evaluation/setup/list"
api.call = generateAPICall(api.call = api.call, task.id = task.id,
flow.id = flow.id, run.id = run.id, uploader.id = uploader.id,
tag = tag, evaluation.measure = evaluation.measure, limit = limit, offset = offset)

content = doAPICall(api.call, file = NULL, method = "GET", verbosity = verbosity)
if (is.null(content)) return(data.frame())
evals = fromJSON(txt = content, simplifyVector = FALSE)$evaluations$evaluation
lst_content = fromJSON(txt = content, simplifyVector = FALSE)
evals = lst_content$evaluations$evaluation

if (setup) {
param_list = lapply(evals, function(x) {
parameters = as.data.table(cleanupSetupParameters(x$parameters))
})
}

evals = rbindlist(lapply(evals, function(x) {
if (is.null(x$value)) x$value = NA
Expand Down Expand Up @@ -75,7 +84,7 @@
values = list(flow.version = flow.version, flow.source = flow.source, learner.name = learner.name)),
stringsAsFactors = FALSE)
}

if (setup) evals$setup_parameters = param_list
return(evals)
}

Expand All @@ -101,6 +110,9 @@
#' @param extend.flow.name [\code{logical(1)}]\cr
#' Adds a column \code{flow.version} that refers to the version number of the flow and a column \code{flow.source} containing the prefix of the flow that specifies the source of the flow (i.e. weka, R) and a column \code{learner.name} that refers to the learner.
#' Default is \code{TRUE}.
#' @param setup [\code{logical(1)}]\cr
#' Adds a column \code{setup_parameters} that contains the runs setup, i.e. the hyperparameters set
#' for the run.
#'
#' @return [\code{data.frame}].
#' @family list
Expand Down
57 changes: 31 additions & 26 deletions R/listOMLSetup.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,32 +11,7 @@
setup = fromJSON(txt = content)$setups$setup
sid = data.frame(join_id = 1:length(setup$setup_id), setup_id = setup$setup_id)

# Get parameters and clean them up
param = setup$parameter
if (!is.null(names(param))) {
# if elements have a name, it refers to parameter
param = param[!vlapply(param, function(x) length(x) == 0)]
param = as.data.frame(param, stringsAsFactors = FALSE)
param = cbind(param, join_id = 1, stringsAsFactors = FALSE)
} else {
# add names
param = setNames(param, 1:length(param))
# filter out NULL or empty elements
param = param[!vlapply(param, function(x) length(x) == 0)]
# inside each element, replace empty values with NA
param = lapply(param, function(x) {
replace(x, which(vlapply(x, function(i) length(i) == 0)), NA_character_)
})
param = rbindlist(param, fill = TRUE, idcol = "join_id")
param = as.data.frame(param, stringsAsFactors = FALSE)
}

list.cols = colnames(param)[vlapply(param, is.list)]
for (col in list.cols) {
ind = which(vlapply(param[[col]], function(i) length(i) == 0))
param[[col]][ind] = NA_character_
param[[col]] = unlist(param[[col]], recursive = FALSE)
}
param = cleanupSetupParameters(setup$parameter)

ret = merge(param, sid)
ret$id = ret$join_id = NULL
Expand Down Expand Up @@ -66,3 +41,33 @@
#' @export
#' @example inst/examples/listOMLSetup.R
listOMLSetup = memoise(.listOMLSetup)


# Get parameters and clean them up
cleanupSetupParameters = function(param) {
if (!is.null(names(param))) {
# if elements have a name, it refers to parameter
param = param[!vlapply(param, function(x) length(x) == 0)]
param = as.data.frame(param, stringsAsFactors = FALSE)
param = cbind(param, join_id = 1, stringsAsFactors = FALSE)
} else {
# add names
param = setNames(param, 1:length(param))
# filter out NULL or empty elements
param = param[!vlapply(param, function(x) length(x) == 0)]
# inside each element, replace empty values with NA
param = lapply(param, function(x) {
replace(x, which(vlapply(x, function(i) length(i) == 0)), NA_character_)
})
param = rbindlist(param, fill = TRUE, idcol = "join_id")
param = as.data.frame(param, stringsAsFactors = FALSE)
}

list.cols = colnames(param)[vlapply(param, is.list)]
for (col in list.cols) {
ind = which(vlapply(param[[col]], function(i) length(i) == 0))
param[[col]][ind] = NA_character_
param[[col]] = unlist(param[[col]], recursive = FALSE)
}
return(param)
}
2 changes: 1 addition & 1 deletion man/chunkOMLlist.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/convertOMLMlrRunToBMR.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions man/convertOMLRunEvalsToDT.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/convertOMLRunToBMR.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/deleteOMLObject.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/getOMLRun.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion man/listOMLRunEvaluations.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/listOMLRuns.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/makeOMLRun.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/makeOMLRunParameter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/tagging.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/uploadOMLRun.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions tests/testthat/test_server_listOMLRunEvaluations.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,16 @@ test_that("listOMLRunEvaluations", {
expect_error(listOMLRunEvaluations(task.id = task.id, evaluation.measure = "m"))
})
})

test_that("listOMLRunEvaluations", {
with_main_server({
setOMLConfig(server = "https://test.openml.org/api/v1")
task.id = 6L

# filter only successful runs
run.evals = .listOMLRunEvaluations(task.id = task.id, evaluation.measure = "area_under_roc_curve", setup = TRUE, limit = 20)
expect_data_frame(run.evals, min.rows = 1L, col.names = "unique")
expect_subset(c("run.id", "task.id", "setup.id", "flow.id", "flow.name", "flow.source", "data.name", "setup_parameters"),
names(run.evals))
})
})