Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

filterProposedPoints for discrete values #444

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

* Bugfix: `plot(opt.state)` now also works for Param Sets with transformations.
* Bugfix: Infill optimization with CMAES now supports restarts.
* Proposed points are filtered more reliable (if requested) to avoid points too close to each other and also work for discrete parameter spaces.

# mlrMBO 1.1.2

Expand Down
4 changes: 0 additions & 4 deletions R/checkStuff.R
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,5 @@ checkStuff = function(fun, design, learner, control) {
stopf("For multi-objective 'mspot' infil.opt must be set to 'nsga2'!")
}

# propose point filtering
# FIXME: implement something that works for integer and discrte params
if (control$filter.proposed.points && hasDiscrete(par.set))
stop("Filtering proposed points currently not implemented for discrete parameters!")
return(control)
}
48 changes: 29 additions & 19 deletions R/filterProposedPoints.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,39 @@ filterProposedPoints = function(prop, opt.state) {
# prepare stuff
n = nrow(prop$prop.points)
design = getOptPathX(opt.path)
calcMaxMetric = function(x, y) max(abs(x - y))
to.delete = rep(FALSE, n)
disc.params = getParamIds(filterParamsDiscrete(par.set), repeated = TRUE, with.nr = TRUE)

# look at min distance from i-point to current set (design + accepted)
for (i in seq_len(n)) {
pp = prop$prop.points[i, ]
min.dist = min(apply(design, 1L, calcMaxMetric, y = pp))
# if too close, mark i-point, otherwise add it to set
if (min.dist < control$filter.proposed.points.tol)
to.delete[i] = TRUE
else
design = rbind(design, pp)
calcDistance = function(pp, design) {
calcMaxMetric = function(x, y) max(abs(x - y))
if (length(disc.params) > 0) {
# if we have discrete params subset the design to match the values of the discrete values in pp then calculate the distance on the numberic subset
disc.pp = pp[, disc.params, drop = FALSE]
this.design = merge(design, disc.pp)
this.design = this.design[, names(this.design) %nin% disc.params, drop = FALSE]
this.pp = pp[, names(pp) %nin% disc.params, drop = FALSE]
min.dist = min(apply(this.design, 1L, calcMaxMetric, y = this.pp))
} else {
min.dist = min(apply(design, 1L, calcMaxMetric, y = pp))
}
}

# for now replace removed design points with random points,
# we leave all other data in prop like it is, we have prop.tye "random_filter"
n.replace = sum(to.delete)

if (n.replace > 0) {
# FIXME: we might want to do something smarter here. how about augmenting the current design?
prop$prop.points[to.delete, ] = generateRandomDesign(n.replace, par.set)
prop$prop.type[to.delete] = "random_filter"
for (i in seq_len(n)) {
pp = prop$prop.points[i, ]
min.dist = calcDistance(pp, design)
trial = 0
# min.dist can be NA for discrete only subspaces
while (is.na(min.dist) || min.dist < control$filter.proposed.points.tol && trial < 100) {
pp = generateRandomDesign(1, par.set)[1, ]
min.dist = calcDistance(pp, design)
trial = trial + 1
}
design = rbind(design, pp)
if (trial > 0) {
prop$prop.points[i, ] = pp
prop$prop.type[i] = "random_filter"
}
}

return(prop)

}
2 changes: 1 addition & 1 deletion R/setMBOControlInfill.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#' candidate points is smaller than \code{filter.proposed.points.tol}, are replaced by random points.
#' If enabled, the column entry for \code{prop.type} is set to \dQuote{random_filter} in the resulting \code{opt.path},
#' so you can see whether such a replacement happened.
#' This does only work for numeric parameter sets without any discrete parameters.
#' If the parameter set contains discrete values the distance will be calculated on the numeric subset of the design points that match the discrete values of the proposal.
#' Default is \code{FALSE}.
#' @param filter.proposed.points.tol [\code{numeric(1)}]\cr
#' Tolerance value filtering of proposed points. We currently use a maximum metric
Expand Down
2 changes: 1 addition & 1 deletion man/setMBOControlInfill.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 30 additions & 4 deletions tests/testthat/test_filter_proposed.R
Original file line number Diff line number Diff line change
@@ -1,18 +1,44 @@
context("filter proposed points")

test_that("filter proposed points", {
lrn = makeLearner("regr.km", predict.type = "se")

# now check min dist, set to "inf" so we always replace
ctrl = makeMBOControl(propose.points = 2L)
ctrl = setMBOControlTermination(ctrl, iters = 1L)
ctrl = setMBOControlTermination(ctrl, iters = 2L)
ctrl = setMBOControlInfill(ctrl, crit = crit.cb1,
filter.proposed.points = TRUE, filter.proposed.points.tol = 1000,
opt = "focussearch", opt.focussearch.points = 100L, opt.focussearch.maxit = 1L)
ctrl = setMBOControlMultiPoint(ctrl, method = "cb")

res = mbo(testf.fsphere.1d, testd.fsphere.1d, learner = lrn, control = ctrl)
res = mbo(testf.fsphere.1d, testd.fsphere.1d, control = ctrl)
op = as.data.frame(res$opt.path)
expect_true(all(op$prop.type[seq_row(testd.fsphere.1d)] != "random_filter"))
expect_true(all(op$prop.type[-seq_row(testd.fsphere.1d)] == "random_filter"))

# test for functions with discrete values
res = mbo(testf.mixed, testd.mixed, control = ctrl)
op = as.data.frame(res$opt.path)
expect_true(all(op$prop.type[seq_row(testd.mixed)] != "random_filter"))
expect_true(all(op$prop.type[-seq_row(testd.mixed)] == "random_filter"))

# more complicated case with dependencies
fun = function(x) {
if (x$method == "a") return(x$number)
if (x$cat == "Y") return(0.9)
return(0.1)
}
par.set = makeParamSet(
makeDiscreteParam("method", values = c("a", "b")),
makeNumericParam("number", lower = 0, upper = 1, requires = quote(method == "a")),
makeDiscreteParam("cat", values = c("Y", "Z"), requires = quote(method == "b"))
)
smoof.fun = makeSingleObjectiveFunction(
name = "mixed_example", fn = fun, par.set = par.set, has.simple.signature = FALSE
)
ctrl = makeMBOControl()
ctrl = setMBOControlTermination(ctrl, iters = 6L)
ctrl = setMBOControlInfill(ctrl, crit = crit.cb1, filter.proposed.points = TRUE, filter.proposed.points.tol = 0)
res = mbo(smoof.fun, control = ctrl, show.info = FALSE)
op = as.data.frame(res$opt.path)
expect_equal(sum(op$cat == "Y", na.rm = TRUE), 1)
expect_equal(sum(op$cat == "Z", na.rm = TRUE), 1)
})