Skip to content

Commit

Permalink
Merge pull request #201 from mandjevant/master
Browse files Browse the repository at this point in the history
Sklearn 1.3 update
  • Loading branch information
mandjevant committed Dec 20, 2023
2 parents d050420 + 5168d04 commit 4bc2864
Show file tree
Hide file tree
Showing 26 changed files with 84 additions and 80 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Expand Up @@ -10,7 +10,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, windows-latest]
python-version: ['3.7', '3.8', '3.9']
python-version: ['3.9', '3.10', '3.11']

steps:
- uses: actions/checkout@v2
Expand Down
3 changes: 0 additions & 3 deletions README.md
Expand Up @@ -5,9 +5,6 @@
[scikit-learn](http://scikit-learn.org/).

See how to use hyperopt-sklearn through [examples](http://hyperopt.github.io/hyperopt-sklearn/#documentation)
or older
[notebooks](http://nbviewer.ipython.org/github/hyperopt/hyperopt-sklearn/tree/master/notebooks)

More examples can be found in the Example Usage section of the SciPy paper

Komer B., Bergstra J., and Eliasmith C. "Hyperopt-Sklearn: automatic hyperparameter configuration for Scikit-learn" Proc. SciPy 2014. http://conference.scipy.org/proceedings/scipy2014/pdfs/komer.pdf
Expand Down
6 changes: 3 additions & 3 deletions hpsklearn/components/__init__.py
Expand Up @@ -189,7 +189,7 @@ def any_classifier(name):
random_forest_classifier(name + ".random_forest"),
extra_tree_classifier(name + ".extra_trees"),
ada_boost_classifier(name + ".ada_boost"),
gradient_boosting_classifier(name + ".grad_boosting", loss="deviance"),
gradient_boosting_classifier(name + ".grad_boosting"),
sgd_classifier(name + ".sgd")
]

Expand All @@ -207,7 +207,7 @@ def any_sparse_classifier(name):
sparse_classifiers = [
linear_svc(name + ".linear_svc"),
sgd_classifier(name + ".sgd"),
k_neighbors_classifier(name + ".knn", metric="euclidean", p=2),
k_neighbors_classifier(name + ".knn", p=2),
multinomial_nb(name + ".multinomial_nb")
]

Expand Down Expand Up @@ -242,7 +242,7 @@ def any_sparse_regressor(name):
"""
sparse_regressors = [
sgd_regressor(name + ".sgd"),
k_neighbors_regressor(name + ".knn", metric="euclidean", p=2)
k_neighbors_regressor(name + ".knn", p=2)
]

return hp.choice(name, sparse_regressors)
Expand Down
6 changes: 3 additions & 3 deletions hpsklearn/components/cluster/_kmeans.py
Expand Up @@ -61,8 +61,8 @@ def _kmeans_hp_space(


@validate(params=["algorithm"],
validation_test=lambda param: not isinstance(param, str) or param in ["auto", "full", "elkan"],
msg="Invalid parameter '%s' with value '%s'. Value must be 'auto', 'full' or 'elkan'")
validation_test=lambda param: not isinstance(param, str) or param in ["lloyd", "elkan"],
msg="Invalid parameter '%s' with value '%s'. Value must be 'lloyd' or 'elkan'")
def k_means(name: str,
n_init: typing.Union[int, Apply] = None,
max_iter: typing.Union[int, Apply] = None,
Expand Down Expand Up @@ -94,7 +94,7 @@ def _name(msg):
hp_space["max_iter"] = scope.int(hp.uniform(_name("max_iter"), 100, 500)) if max_iter is None else max_iter
hp_space["tol"] = hp.uniform(_name("tol"), 1e-5, 1e-3) if tol is None else tol
hp_space["copy_x"] = copy_x
hp_space["algorithm"] = hp.choice(_name("algorithm"), ["auto", "full", "elkan"]) if algorithm is None else algorithm
hp_space["algorithm"] = hp.choice(_name("algorithm"), ["lloyd", "elkan"]) if algorithm is None else algorithm

return scope.sklearn_KMeans(**hp_space)

Expand Down
4 changes: 2 additions & 2 deletions hpsklearn/components/ensemble/_bagging.py
Expand Up @@ -82,7 +82,7 @@ def _bagging_random_state(name: str):
msg="Invalid parameter '%s' with value '%s'. Parameter value must exceed 1.")
def _bagging_hp_space(
name_func,
base_estimator=None,
estimator=None,
n_estimators: typing.Union[int, Apply] = None,
max_samples: typing.Union[float, Apply] = None,
max_features: typing.Union[float, Apply] = None,
Expand All @@ -100,7 +100,7 @@ def _bagging_hp_space(
bagging regressor
"""
hp_space = dict(
base_estimator=base_estimator,
estimator=estimator,
n_estimators=_bagging_n_estimators(name_func("n_estimators")) if n_estimators is None else n_estimators,
max_samples=_bagging_max_samples(name_func("max_samples")) if max_samples is None else max_samples,
max_features=_bagging_max_features(name_func("max_features")) if max_features is None else max_features,
Expand Down
8 changes: 4 additions & 4 deletions hpsklearn/components/ensemble/_gb.py
Expand Up @@ -23,7 +23,7 @@ def _gb_clf_loss(name: str):
"""
Declaration search space 'loss' parameter for _gb classifier
"""
return hp.choice(name, ["deviance", "exponential"])
return hp.choice(name, ["log_loss", "exponential"])


def _gb_reg_loss(name: str):
Expand Down Expand Up @@ -211,16 +211,16 @@ def _gb_hp_space(


@validate(params=["loss"],
validation_test=lambda param: not isinstance(param, str) or param in ("deviance", "exponential"),
msg="Invalid parameter '%s' with value '%s'. Choose 'deviance' or 'exponential'.")
validation_test=lambda param: not isinstance(param, str) or param in ("log_loss", "exponential"),
msg="Invalid parameter '%s' with value '%s'. Choose 'log_loss' or 'exponential'.")
def gradient_boosting_classifier(name: str, loss: typing.Union[str, Apply] = None, **kwargs):
"""
Return a pyll graph with hyperparameters that will construct
a sklearn.ensemble.GradientBoostingClassifier model.
Args:
name: name | str
loss: choose 'deviance' or 'exponential' | str
loss: choose 'log_loss' or 'exponential' | str
See help(hpsklearn.components._gb._gb_hp_space) for info on
additional available GradientBoosting arguments.
Expand Down
11 changes: 7 additions & 4 deletions hpsklearn/components/ensemble/_hist_gradient_boosting.py
Expand Up @@ -26,7 +26,10 @@ def _hist_gradient_boosting_reg_loss(name: str):
hist gradient boosting regressor
Parameter 'poisson' is also available. Not implemented since
'poisson' is only available for non-negative y data
'poisson' is only available for non-zero, non-negative y data
Parameter 'gamma' is also available. Not implemented since
'gamma' is only available for non-negative y data
"""
return hp.choice(name, ["squared_error", "absolute_error"])

Expand Down Expand Up @@ -141,14 +144,14 @@ def _hist_gradient_boosting_hp_space(
"categorical_crossentropy"),
msg="Invalid parameter '%s' with value '%s'. "
"Choose 'auto', 'binary_crossentropy', 'categorical_crossentropy'")
def hist_gradient_boosting_classifier(name: str, loss: typing.Union[str, Apply] = "auto", **kwargs):
def hist_gradient_boosting_classifier(name: str, loss: typing.Union[str, Apply] = "log_loss", **kwargs):
"""
Return a pyll graph with hyperparameters that will construct
a sklearn.ensemble.HistGradientBoostingClassifier model.
Args:
name: name | str
loss: choose 'auto', 'binary_crossentropy' or 'categorical_crossentropy' | str
loss: 'log_loss' | str
See help(hpsklearn.components._hist_gradient_boosting._hist_gradient_boosting_regressor) for info on
additional available HistGradientBoosting arguments.
Expand All @@ -165,7 +168,7 @@ def _name(msg):

@validate(params=["loss"],
validation_test=lambda param: not isinstance(param, str) or param in ("squared_error", "absolute_error",
"poisson"),
"poisson", "quantile", "gamma"),
msg="Invalid parameter '%s' with value '%s'. "
"Choose 'squared_error', 'absolute_error', 'poisson'")
def hist_gradient_boosting_regressor(name: str, loss: typing.Union[str, Apply] = None, **kwargs):
Expand Down
4 changes: 2 additions & 2 deletions hpsklearn/components/ensemble/_weight_boosting.py
Expand Up @@ -58,7 +58,7 @@ def _weight_boosting_random_state(name: str):
msg="Invalid parameter '%s' with value '%s'. Parameter value must be non-negative and greater than 0.")
def _weight_boosting_hp_space(
name_func,
base_estimator=None,
estimator=None,
n_estimators: typing.Union[int, Apply] = None,
learning_rate: typing.Union[float, Apply] = None,
random_state=None
Expand All @@ -69,7 +69,7 @@ def _weight_boosting_hp_space(
AdaBoost regressor
"""
hp_space = dict(
base_estimator=base_estimator,
estimator=estimator,
n_estimators=_weight_boosting_n_estimators(name_func("n_estimators")) if n_estimators is None else n_estimators,
learning_rate=_weight_boosting_learning_rate(name_func("learning_rate"))
if learning_rate is None else learning_rate,
Expand Down
4 changes: 3 additions & 1 deletion hpsklearn/components/lightgbm.py
Expand Up @@ -141,12 +141,14 @@ def _lightgbm_hp_space(
lightgbm regressor
"""
hp_space = dict(
max_depth=_lightgbm_max_depth(name_func("max_depth")) if max_depth is None else max_depth,
# max_depth=_lightgbm_max_depth(name_func("max_depth")) if max_depth is None else max_depth,
max_depth=-1,
num_leaves=_lightgbm_num_leaves(name_func("num_leaves")) if num_leaves is None else num_leaves,
learning_rate=_lightgbm_learning_rate(name_func("learning_rate")) if learning_rate is None else learning_rate,
n_estimators=_lightgbm_n_estimators(name_func("n_estimators")) if n_estimators is None else n_estimators,
min_child_weight=_lightgbm_min_child_weight(name_func("min_child_weight"))
if min_child_weight is None else min_child_weight,
# min_child_samples=5,
max_delta_step=max_delta_step,
subsample=_lightgbm_subsample(name_func("subsample")) if subsample is None else subsample,
colsample_bytree=_lightgbm_colsample_bytree(name_func("colsample_bytree"))
Expand Down
10 changes: 5 additions & 5 deletions hpsklearn/components/linear_model/_bayes.py
Expand Up @@ -18,9 +18,9 @@ def sklearn_ARDRegression(*args, **kwargs):
return linear_model.ARDRegression(*args, **kwargs)


def _bayes_n_iter(name: str):
def _bayes_max_iter(name: str):
"""
Declaration search space 'n_iter' parameter
Declaration search space 'max_iter' parameter
"""
return scope.int(hp.qloguniform(name, low=np.log(150), high=np.log(450), q=1.0))

Expand All @@ -40,15 +40,15 @@ def _bayes_alpha_lambda(name: str):
return hp.lognormal(name, mu=np.log(1e-6), sigma=np.log(10))


@validate(params=["n_iter"],
@validate(params=["max_iter"],
validation_test=lambda param: not isinstance(param, int) or param > 1,
msg="Invalid parameter '%s' with value '%s'. Parameter value must exceed 1.")
@validate(params=["alpha_1", "alpha_2", "lambda_1", "lambda_2"],
validation_test=lambda param: not isinstance(param, float) or param >= 0,
msg="Invalid parameter '%s' with value '%s'. Parameter value must be equal to or exceed 0.")
def _bayes_hp_space(
name_func,
n_iter: typing.Union[int, Apply] = None,
max_iter: typing.Union[int, Apply] = None,
tol: typing.Union[float, Apply] = None,
alpha_1: typing.Union[float, Apply] = None,
alpha_2: typing.Union[float, Apply] = None,
Expand All @@ -65,7 +65,7 @@ def _bayes_hp_space(
ard regression
"""
hp_space = dict(
n_iter=_bayes_n_iter(name_func("n_iter")) if n_iter is None else n_iter,
max_iter=_bayes_max_iter(name_func("max_iter")) if max_iter is None else max_iter,
tol=_bayes_tol(name_func("tol")) if tol is None else tol,
alpha_1=_bayes_alpha_lambda(name_func("alpha_1")) if alpha_1 is None else alpha_1,
alpha_2=_bayes_alpha_lambda(name_func("alpha_2")) if alpha_2 is None else alpha_2,
Expand Down
4 changes: 2 additions & 2 deletions hpsklearn/components/linear_model/_least_angle.py
Expand Up @@ -38,7 +38,7 @@ def _least_angle_n_nonzero_coefs(name: str):
"""
Declaration search space 'n_nonzero_coefs' parameter
"""
return hp.qloguniform(name, low=np.log(400), high=np.log(600), q=1.0)
return scope.int(hp.qloguniform(name, low=np.log(400), high=np.log(600), q=1.0))


def _least_angle_alpha(name: str):
Expand Down Expand Up @@ -80,7 +80,7 @@ def _least_angle_max_n_alphas(name: str):
"""
Declaration search space 'max_n_alphas' parameter
"""
return hp.loguniform(name, low=np.log(750), high=np.log(1250))
return scope.int(hp.loguniform(name, low=np.log(750), high=np.log(1250)))


def _least_angle_random_state(name: str):
Expand Down
2 changes: 1 addition & 1 deletion hpsklearn/components/linear_model/_logistic.py
Expand Up @@ -43,7 +43,7 @@ def _logistic_max_iter(name: str):
"""
Declaration search space 'max_iter' parameter
"""
return scope.int(hp.uniform(name, 250, 750))
return scope.int(hp.uniform(name, 500, 1000))


def _logistic_C(name: str):
Expand Down
7 changes: 3 additions & 4 deletions hpsklearn/components/linear_model/_quantile.py
Expand Up @@ -14,10 +14,9 @@ def sklearn_QuantileRegressor(*args, **kwargs):

@validate(params=["solver"],
validation_test=lambda param: not isinstance(param, str) or
param in ["highs-ds", "highs-ipm", "highs", # noqa
"interior-point", "revised simplex"],
param in ["highs-ds", "highs-ipm", "highs", "revised simplex"],
msg="Invalid parameter '%s' with value '%s'. Value must be in ['highs-ds', 'highs-ipm', 'highs', "
"'interior-point', 'revised simplex'].")
"'revised simplex'].")
def quantile_regression(name: str,
quantile: typing.Union[float, Apply] = None,
alpha: typing.Union[float, Apply] = None,
Expand All @@ -44,7 +43,7 @@ def _name(msg):
quantile=hp.normal(_name("quantile"), 0.5, 0.075) if quantile is None else quantile,
alpha=hp.normal(_name("alpha"), mu=1.0, sigma=0.1) if alpha is None else alpha,
fit_intercept=hp.choice(_name("fit_intercept"), [True, False]) if fit_intercept is None else fit_intercept,
solver=hp.choice(_name("solver"), ["highs-ds", "highs-ipm", "highs", "interior-point", "revised simplex"])
solver=hp.choice(_name("solver"), ["highs-ds", "highs-ipm", "highs", "revised simplex"])
if solver is None else solver,
solver_options=solver_options
)
Expand Down
8 changes: 4 additions & 4 deletions hpsklearn/components/linear_model/_ransac.py
Expand Up @@ -17,7 +17,7 @@ def sklearn_RANSACRegressor(*args, **kwargs):
validation_test=lambda param: not isinstance(param, str) or param in ["absolute_error", "squared_error"],
msg="Invalid parameter '%s' with value '%s'. Value must be in ['absolute_error', 'squared_error'].")
def ransac_regression(name: str,
base_estimator=None,
estimator=None,
min_samples: float = None,
residual_threshold: float = None,
is_data_valid: callable = None,
Expand All @@ -35,7 +35,7 @@ def ransac_regression(name: str,
Args:
name: name | str
base_estimator: base estimator object
estimator: base estimator object
min_samples: minimum number of samples chosen | float
residual_threshold: maximum residual | float
is_data_valid: function called before model is fitted | callable
Expand All @@ -53,12 +53,12 @@ def _name(msg):
return f"{name}.ransac_regression_{msg}"

hp_space = dict(
base_estimator=base_estimator,
estimator=estimator,
min_samples=min_samples, # default None fits linear model with X.shape[1] + 1
residual_threshold=residual_threshold,
is_data_valid=is_data_valid,
is_model_valid=is_model_valid,
max_trials=hp.uniform(_name("max_trials"), 50, 150) if max_trials is None else max_trials,
max_trials=scope.int(hp.uniform(_name("max_trials"), 50, 150)) if max_trials is None else max_trials,
max_skips=np.inf if max_skips is None else max_skips,
stop_n_inliers=np.inf if stop_n_inliers is None else stop_n_inliers,
stop_score=np.inf if stop_score is None else stop_score,
Expand Down
6 changes: 3 additions & 3 deletions hpsklearn/components/linear_model/_stochastic_gradient.py
Expand Up @@ -30,7 +30,7 @@ def _stochastic_gradient_classifier_loss(name: str):
"""
return hp.pchoice(name, [
(0.25, "hinge"),
(0.25, "log"),
(0.25, "log_loss"),
(0.25, "modified_huber"),
(0.05, "squared_hinge"),
(0.05, "perceptron"),
Expand Down Expand Up @@ -218,10 +218,10 @@ def _stochastic_gradient_hp_space(

@validate(params=["loss"],
validation_test=lambda param: not isinstance(param, str) or
param in ["hinge", "log", "modified_huber", "squared_hinge", "perceptron", # noqa
param in ["hinge", "log_loss", "modified_huber", "squared_hinge", "perceptron", # noqa
"squared_error", "huber", "epsilon_insensitive",
"squared_epsilon_insensitive"],
msg="Invalid parameter '%s' with value '%s'. Value must be in ['hinge', 'log', 'modified_huber', "
msg="Invalid parameter '%s' with value '%s'. Value must be in ['hinge', 'log_loss', 'modified_huber', "
"'squared_hinge', 'perceptron', 'squared_error', 'huber', 'epsilon_insensitive', "
"'squared_epsilon_insensitive'].")
@validate(params=["class_weight"],
Expand Down
4 changes: 2 additions & 2 deletions hpsklearn/components/neighbors/_nearest_centroid.py
Expand Up @@ -11,7 +11,7 @@ def sklearn_NearestCentroid(*args, **kwargs):


def nearest_centroid(name: str,
metric: typing.Union[str, callable, Apply] = None,
metric: typing.Union[str, Apply] = None,
shrink_threshold: float = None):
"""
Return a pyll graph with hyperparameters that will construct
Expand All @@ -27,7 +27,7 @@ def _name(msg):
return f"{name}.nearest_centroid_{msg}"

hp_space = dict(
metric=hp.choice(_name("metric"), ["cityblock", "cosine", "l1", "l2", "minkowski", "euclidean", "manhattan"])
metric=hp.choice(_name("metric"), ["euclidean", "manhattan"])
if metric is None else metric,
shrink_threshold=shrink_threshold,
)
Expand Down
3 changes: 3 additions & 0 deletions hpsklearn/components/preprocessing/_discretization.py
Expand Up @@ -26,6 +26,7 @@ def k_bins_discretizer(name: str,
n_bins: typing.Union[int, npt.ArrayLike, Apply] = None,
encode: typing.Union[str, Apply] = None,
strategy: typing.Union[str, Apply] = None,
subsample: typing.Union[int, None, Apply] = None,
dtype=None):
"""
Return a pyll graph with hyperparameters that will construct
Expand All @@ -36,12 +37,14 @@ def k_bins_discretizer(name: str,
n_bins: number of bins | int, npt.ArrayLike
encode: encoding method | str
strategy: strategy used to define width of bins | str
subsample: subsample size of training data | int, None
dtype: dtype of output | type
"""
rval = scope.sklearn_KBinsDiscretizer(
n_bins=scope.int(hp.uniform(name + ".n_bins", 2, 20)) if n_bins is None else n_bins,
encode=hp.choice(name + ".encode", ["onehot-dense", "ordinal"]) if encode is None else encode,
strategy=hp.choice(name + ".strategy", ["uniform", "quantile", "kmeans"]) if strategy is None else strategy,
subsample=hp.choice(name + ".subsample", [200000, None] if subsample is None else subsample),
dtype=dtype
)

Expand Down
6 changes: 3 additions & 3 deletions hpsklearn/components/preprocessing/_encoders.py
Expand Up @@ -27,7 +27,7 @@ def sklearn_OrdinalEncoder(*args, **kwargs):
def one_hot_encoder(name: str,
categories: typing.Union[str, list] = "auto",
drop: typing.Union[str, np.ndarray, Apply] = None,
sparse: bool = True,
sparse_output: bool = True,
dtype: type = np.float64):
"""
Return a pyll graph with hyperparameters that will construct
Expand All @@ -37,13 +37,13 @@ def one_hot_encoder(name: str,
name: name | str
categories: categories per feature | str or list
drop: choose 'first' or 'if_binary' | str or np.ndarray
sparse: return sparse matrix or array | bool
sparse_output: return sparse_output matrix or array | bool
dtype: desired dtype of output | type
"""
rval = scope.sklearn_OneHotEncoder(
categories=categories,
drop=hp.choice(name + ".drop", ["first", "if_binary"]) if drop is None else drop,
sparse=sparse,
sparse_output=sparse_output,
dtype=dtype
)

Expand Down

0 comments on commit 4bc2864

Please sign in to comment.