Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-16056 xgboost support gblinear parameters #16166

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ public enum DMatrixType {
public enum Backend {
auto, gpu, cpu
}
public enum FeatureSelector {
cyclic, shuffle, random, greedy, thrifty
}
public enum Updater {
gpu_hist, shotgun, coord_descent, gpu_coord_descent,
}

// H2O GBM options
public boolean _quiet_mode = true;
Expand Down Expand Up @@ -141,6 +147,12 @@ public enum Backend {
public int[] _gpu_id; // which GPU to use
public Backend _backend = Backend.auto;

// GBLiner specific (booster == gblinear)
// lambda, alpha support also for gbtree
public FeatureSelector _feature_selector = FeatureSelector.cyclic;
public int _top_k;
public Updater _updater;

public String _eval_metric;
public boolean _score_eval_metric_only;

Expand Down Expand Up @@ -378,6 +390,10 @@ public static Map<String, Object> createParamsMap(XGBoostParameters p, int nClas
params.put("one_drop", p._one_drop ? "1" : "0");
params.put("skip_drop", p._skip_drop);
}
if (p._booster == XGBoostParameters.Booster.gblinear) {
params.put("feature_selector", p._feature_selector.toString());
params.put("top_k", p._top_k);
}
XGBoostParameters.Backend actualBackend = getActualBackend(p, true);
XGBoostParameters.TreeMethod actualTreeMethod = getActualTreeMethod(p);
if (actualBackend == XGBoostParameters.Backend.gpu) {
Expand All @@ -387,17 +403,17 @@ public static Map<String, Object> createParamsMap(XGBoostParameters p, int nClas
params.put("gpu_id", 0);
}
// we are setting updater rather than tree_method here to keep CPU predictor, which is faster
if (p._booster == XGBoostParameters.Booster.gblinear) {
if (p._booster == XGBoostParameters.Booster.gblinear && p._updater == null) {
LOG.info("Using gpu_coord_descent updater.");
params.put("updater", "gpu_coord_descent");
params.put("updater", XGBoostParameters.Updater.gpu_coord_descent.toString());
} else {
LOG.info("Using gpu_hist tree method.");
params.put("max_bin", p._max_bins);
params.put("tree_method", "gpu_hist");
params.put("tree_method", XGBoostParameters.Updater.gpu_hist.toString());
}
} else if (p._booster == XGBoostParameters.Booster.gblinear) {
} else if (p._booster == XGBoostParameters.Booster.gblinear && p._updater == null) {
LOG.info("Using coord_descent updater.");
params.put("updater", "coord_descent");
params.put("updater", XGBoostParameters.Updater.coord_descent.toString());
} else if (H2O.CLOUD.size() > 1 && p._tree_method == XGBoostParameters.TreeMethod.auto &&
p._monotone_constraints != null) {
LOG.info("Using hist tree method for distributed computation with monotone_constraints.");
Expand All @@ -410,6 +426,10 @@ public static Map<String, Object> createParamsMap(XGBoostParameters p, int nClas
params.put("max_bin", p._max_bins);
}
}
if (p._updater != null) {
LOG.info("Using user-provided updater.");
params.put("updater", p._updater.toString());
}
if (p._min_child_weight != 1) {
LOG.info("Using user-provided parameter min_child_weight instead of min_rows.");
params.put("min_child_weight", p._min_child_weight);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3279,5 +3279,73 @@ public void testWarnEvalMetricOnlyWithouEvalMetric() {
Scope.exit();
}
}


@Test
public void testGBLinearTopKAndFeatureSelector() {
Scope.enter();
try {
String response = "CAPSULE";
Frame train = parseAndTrackTestFile("./smalldata/logreg/prostate_train.csv");
train.toCategoricalCol(response);

XGBoostModel.XGBoostParameters parms = new XGBoostModel.XGBoostParameters();
parms._ntrees = 1;
parms._train = train._key;
parms._response_column = response;
parms._booster = XGBoostModel.XGBoostParameters.Booster.gblinear;
parms._top_k = 2;
parms._feature_selector = XGBoostModel.XGBoostParameters.FeatureSelector.greedy;

ModelBuilder job = new hex.tree.xgboost.XGBoost(parms);

XGBoostModel xgboost = (XGBoostModel) job.trainModel().get();
Scope.track_generic(xgboost);
assertNotNull(xgboost);

Frame score = xgboost.score(train);
Scope.track(score);

parms._top_k = 100;
ModelBuilder jobTopKChanged = new hex.tree.xgboost.XGBoost(parms);

XGBoostModel xgboostTopKChanged = (XGBoostModel) jobTopKChanged.trainModel().get();
Scope.track_generic(xgboostTopKChanged);
assertNotNull(xgboostTopKChanged);

Frame scoreTopKChanged = xgboostTopKChanged.score(train);
Scope.track(scoreTopKChanged);
assertNotEquals("top_k should affect the predictions", score.toTwoDimTable().get(0,1), scoreTopKChanged.toTwoDimTable().get(0,1));
}
finally {
Scope.exit();
}
}


@Test
public void testGBLinearShotgun() {
Scope.enter();
try {
String response = "CAPSULE";
Frame train = parseAndTrackTestFile("./smalldata/logreg/prostate_train.csv");
train.toCategoricalCol(response);

XGBoostModel.XGBoostParameters parms = new XGBoostModel.XGBoostParameters();
parms._ntrees = 1;
parms._train = train._key;
parms._response_column = response;
parms._booster = XGBoostModel.XGBoostParameters.Booster.gblinear;
parms._updater = XGBoostModel.XGBoostParameters.Updater.shotgun;
parms._feature_selector = XGBoostModel.XGBoostParameters.FeatureSelector.shuffle;

ModelBuilder job = new hex.tree.xgboost.XGBoost(parms);
XGBoostModel xgboost = (XGBoostModel) job.trainModel().get();
assertNotNull(xgboost);
Scope.track_generic(xgboost);
assertEquals("updater should be changed", xgboost._output._native_parameters.get(1,1), XGBoostModel.XGBoostParameters.Updater.shotgun.toString());
}
finally {
Scope.exit();
}
}
}