Skip to content

Commit

Permalink
Fix: Inaccurate Attribute Listing with dir(obj) (scikit-learn#28558)
Browse files Browse the repository at this point in the history
  • Loading branch information
MiguelParece committed May 6, 2024
1 parent a5203e8 commit 896ec61
Show file tree
Hide file tree
Showing 29 changed files with 134 additions and 40 deletions.
3 changes: 3 additions & 0 deletions sklearn/ensemble/_bagging.py
Expand Up @@ -839,6 +839,9 @@ def __init__(
verbose=verbose,
)

def __dir__(self):
return [attr for attr in super().__dir__() if hasattr(self, attr)]

def _get_estimator(self):
"""Resolve which estimator to return (default is DecisionTreeClassifier)"""
if self.estimator is None:
Expand Down
3 changes: 3 additions & 0 deletions sklearn/ensemble/_stacking.py
Expand Up @@ -597,6 +597,9 @@ def __init__(
verbose=verbose,
)

def __dir__(self):
return [attr for attr in super().__dir__() if hasattr(self, attr)]

def _validate_final_estimator(self):
self._clone_final_estimator(default=LogisticRegression())
if not is_classifier(self.final_estimator_):
Expand Down
3 changes: 3 additions & 0 deletions sklearn/ensemble/_voting.py
Expand Up @@ -352,6 +352,9 @@ def __init__(
self.flatten_transform = flatten_transform
self.verbose = verbose

def __dir__(self):
return [attr for attr in super().__dir__() if hasattr(self, attr)]

@_fit_context(
# estimators in VotingClassifier.estimators are not validated yet
prefer_skip_nested_validation=False
Expand Down
4 changes: 3 additions & 1 deletion sklearn/ensemble/tests/test_bagging.py
Expand Up @@ -447,7 +447,9 @@ def test_error():
# Test support of decision_function
X, y = iris.data, iris.target
base = DecisionTreeClassifier()
assert not hasattr(BaggingClassifier(base).fit(X, y), "decision_function")
assert not hasattr(
BaggingClassifier(base).fit(X, y), "decision_function"
) and "decision_function" not in dir(BaggingClassifier(base).fit(X, y))


def test_parallel_classification():
Expand Down
3 changes: 3 additions & 0 deletions sklearn/ensemble/tests/test_stacking.py
Expand Up @@ -882,8 +882,11 @@ def test_stacking_final_estimator_attribute_error():
estimators=estimators, final_estimator=final_estimator, cv=3
)

assert "decision_function" not in dir(clf.fit(X, y))

outer_msg = "This 'StackingClassifier' has no attribute 'decision_function'"
inner_msg = "'RandomForestClassifier' object has no attribute 'decision_function'"

with pytest.raises(AttributeError, match=outer_msg) as exec_info:
clf.fit(X, y).decision_function(X)
assert isinstance(exec_info.value.__cause__, AttributeError)
Expand Down
4 changes: 2 additions & 2 deletions sklearn/ensemble/tests/test_voting.py
Expand Up @@ -78,9 +78,9 @@ def test_predictproba_hardvoting():
assert isinstance(exec_info.value.__cause__, AttributeError)
assert inner_msg in str(exec_info.value.__cause__)

assert not hasattr(eclf, "predict_proba")
assert not hasattr(eclf, "predict_proba") and "predict_proba" not in dir(eclf)
eclf.fit(X_scaled, y)
assert not hasattr(eclf, "predict_proba")
assert not hasattr(eclf, "predict_proba") and "predict_proba" not in dir(eclf)


def test_notfitted():
Expand Down
3 changes: 3 additions & 0 deletions sklearn/feature_selection/_rfe.py
Expand Up @@ -226,6 +226,9 @@ def __init__(
self.importance_getter = importance_getter
self.verbose = verbose

def __dir__(self):
return [attr for attr in super().__dir__() if hasattr(self, attr)]

@property
def _estimator_type(self):
return self.estimator._estimator_type
Expand Down
1 change: 1 addition & 0 deletions sklearn/feature_selection/tests/test_rfe.py
Expand Up @@ -645,6 +645,7 @@ def test_rfe_estimator_attribute_error():

outer_msg = "This 'RFE' has no attribute 'decision_function'"
inner_msg = "'LinearRegression' object has no attribute 'decision_function'"
assert "decision_function" not in dir(rfe.fit(iris.data, iris.target))
with pytest.raises(AttributeError, match=outer_msg) as exec_info:
rfe.fit(iris.data, iris.target).decision_function(iris.data)
assert isinstance(exec_info.value.__cause__, AttributeError)
Expand Down
10 changes: 6 additions & 4 deletions sklearn/linear_model/tests/test_sgd.py
Expand Up @@ -727,8 +727,10 @@ def test_sgd_predict_proba_method_access(klass):
inner_msg = "probability estimates are not available for loss={!r}".format(
loss
)
assert not hasattr(clf, "predict_proba")
assert not hasattr(clf, "predict_log_proba")
assert not hasattr(clf, "predict_proba") and "predict_proba" not in dir(clf)
assert not hasattr(
clf, "predict_log_proba"
) and "predict_log_proba" not in dir(clf)
with pytest.raises(
AttributeError, match="has no attribute 'predict_proba'"
) as exec_info:
Expand All @@ -753,8 +755,8 @@ def test_sgd_proba(klass):
# We cannot use the factory here, because it defines predict_proba
# anyway.
clf = SGDClassifier(loss="hinge", alpha=0.01, max_iter=10, tol=None).fit(X, Y)
assert not hasattr(clf, "predict_proba")
assert not hasattr(clf, "predict_log_proba")
assert not hasattr(clf, "predict_proba") and "predict_proba" not in dir(clf)
assert not hasattr(clf, "predict_log_proba") and "predict_log_proba" not in dir(clf)

# log and modified_huber losses can output probability estimates
# binary case
Expand Down
3 changes: 3 additions & 0 deletions sklearn/model_selection/_search.py
Expand Up @@ -433,6 +433,9 @@ def __init__(
def _estimator_type(self):
return self.estimator._estimator_type

def __dir__(self):
return [attr for attr in super().__dir__() if hasattr(self, attr)]

def _more_tags(self):
# allows cross-validation to see 'precomputed' metrics
return {
Expand Down
8 changes: 4 additions & 4 deletions sklearn/model_selection/tests/test_search.py
Expand Up @@ -1556,7 +1556,7 @@ def test_predict_proba_disabled():
y = [0, 0, 1, 1, 1]
clf = SVC(probability=False)
gs = GridSearchCV(clf, {}, cv=2).fit(X, y)
assert not hasattr(gs, "predict_proba")
assert not hasattr(gs, "predict_proba") and "predict_proba" not in dir(gs)


def test_grid_search_allows_nans():
Expand Down Expand Up @@ -1770,7 +1770,7 @@ def test_stochastic_gradient_loss_param():

# When the estimator is not fitted, `predict_proba` is not available as the
# loss is 'hinge'.
assert not hasattr(clf, "predict_proba")
assert not hasattr(clf, "predict_proba") and "predict_proba" not in dir(clf)
clf.fit(X, y)
clf.predict_proba(X)
clf.predict_log_proba(X)
Expand All @@ -1783,9 +1783,9 @@ def test_stochastic_gradient_loss_param():
clf = GridSearchCV(
estimator=SGDClassifier(loss="hinge"), param_grid=param_grid, cv=3
)
assert not hasattr(clf, "predict_proba")
assert not hasattr(clf, "predict_proba") and "predict_proba" not in dir(clf)
clf.fit(X, y)
assert not hasattr(clf, "predict_proba")
assert not hasattr(clf, "predict_proba") and "predict_proba" not in dir(clf)


def test_search_train_scores_set_to_false():
Expand Down
9 changes: 9 additions & 0 deletions sklearn/multiclass.py
Expand Up @@ -321,6 +321,9 @@ def __init__(self, estimator, *, n_jobs=None, verbose=0):
self.n_jobs = n_jobs
self.verbose = verbose

def __dir__(self):
return [attr for attr in super().__dir__() if hasattr(self, attr)]

@_fit_context(
# OneVsRestClassifier.estimator is not validated yet
prefer_skip_nested_validation=False
Expand Down Expand Up @@ -752,6 +755,9 @@ def __init__(self, estimator, *, n_jobs=None):
self.estimator = estimator
self.n_jobs = n_jobs

def __dir__(self):
return [attr for attr in super().__dir__() if hasattr(self, attr)]

@_fit_context(
# OneVsOneClassifier.estimator is not validated yet
prefer_skip_nested_validation=False
Expand Down Expand Up @@ -1141,6 +1147,9 @@ def __init__(self, estimator, *, code_size=1.5, random_state=None, n_jobs=None):
self.random_state = random_state
self.n_jobs = n_jobs

def __dir__(self):
return [attr for attr in super().__dir__() if hasattr(self, attr)]

@_fit_context(
# OutputCodeClassifier.estimator is not validated yet
prefer_skip_nested_validation=False
Expand Down
9 changes: 9 additions & 0 deletions sklearn/multioutput.py
Expand Up @@ -409,6 +409,9 @@ class MultiOutputRegressor(RegressorMixin, _MultiOutputEstimator):
def __init__(self, estimator, *, n_jobs=None):
super().__init__(estimator, n_jobs=n_jobs)

def __dir__(self):
return [attr for attr in super().__dir__() if hasattr(self, attr)]

@_available_if_estimator_has("partial_fit")
def partial_fit(self, X, y, sample_weight=None, **partial_fit_params):
"""Incrementally fit the model to data, for each output variable.
Expand Down Expand Up @@ -512,6 +515,9 @@ class MultiOutputClassifier(ClassifierMixin, _MultiOutputEstimator):
[1, 0, 1]])
"""

def __dir__(self):
return [attr for attr in super().__dir__() if hasattr(self, attr)]

def __init__(self, estimator, *, n_jobs=None):
super().__init__(estimator, n_jobs=n_jobs)

Expand Down Expand Up @@ -998,6 +1004,9 @@ def __init__(
)
self.chain_method = chain_method

def __dir__(self):
return [attr for attr in super().__dir__() if hasattr(self, attr)]

@_fit_context(
# ClassifierChain.base_estimator is not validated yet
prefer_skip_nested_validation=False
Expand Down
3 changes: 3 additions & 0 deletions sklearn/neighbors/_lof.py
Expand Up @@ -218,6 +218,9 @@ def __init__(
self.contamination = contamination
self.novelty = novelty

def __dir__(self):
return [attr for attr in super().__dir__() if hasattr(self, attr)]

def _check_novelty_fit_predict(self):
if self.novelty:
msg = (
Expand Down
8 changes: 4 additions & 4 deletions sklearn/neighbors/tests/test_lof.py
Expand Up @@ -212,15 +212,15 @@ def test_hasattr_prediction():
assert hasattr(clf, "predict")
assert hasattr(clf, "decision_function")
assert hasattr(clf, "score_samples")
assert not hasattr(clf, "fit_predict")
assert not hasattr(clf, "fit_predict") and "fit_predict" not in dir(clf)

# when novelty=False
clf = neighbors.LocalOutlierFactor(novelty=False)
clf.fit(X)
assert hasattr(clf, "fit_predict")
assert not hasattr(clf, "predict")
assert not hasattr(clf, "decision_function")
assert not hasattr(clf, "score_samples")
assert not hasattr(clf, "predict") and "predict" not in dir(clf)
assert not hasattr(clf, "decision_function") and "decision_function" not in dir(clf)
assert not hasattr(clf, "score_samples") and "score_samples" not in dir(clf)


@parametrize_with_checks([neighbors.LocalOutlierFactor(novelty=True)])
Expand Down
3 changes: 3 additions & 0 deletions sklearn/neural_network/_multilayer_perceptron.py
Expand Up @@ -149,6 +149,9 @@ def __init__(
self.n_iter_no_change = n_iter_no_change
self.max_fun = max_fun

def __dir__(self):
return [attr for attr in super().__dir__() if hasattr(self, attr)]

def _unpack(self, packed_parameters):
"""Extract the coefficients and intercepts from packed_parameters."""
for i in range(self.n_layers_ - 1):
Expand Down
4 changes: 3 additions & 1 deletion sklearn/neural_network/tests/test_mlp.py
Expand Up @@ -499,7 +499,9 @@ def test_partial_fit_errors():
MLPClassifier(solver="sgd").partial_fit(X, y, classes=[2])

# lbfgs doesn't support partial_fit
assert not hasattr(MLPClassifier(solver="lbfgs"), "partial_fit")
assert not hasattr(
MLPClassifier(solver="lbfgs"), "partial_fit"
) and "parital_fit" not in dir(MLPClassifier(solver="lbfgs"))


def test_nonfinite_params():
Expand Down
3 changes: 3 additions & 0 deletions sklearn/pipeline.py
Expand Up @@ -172,6 +172,9 @@ def __init__(self, steps, *, memory=None, verbose=False):
self.memory = memory
self.verbose = verbose

def __dir__(self):
return [attr for attr in super().__dir__() if hasattr(self, attr)]

def set_output(self, *, transform=None):
"""Set the output container when `"transform"` and `"fit_transform"` are called.
Expand Down
3 changes: 3 additions & 0 deletions sklearn/preprocessing/_function_transformer.py
Expand Up @@ -170,6 +170,9 @@ def __init__(
self.kw_args = kw_args
self.inv_kw_args = inv_kw_args

def __dir__(self):
return [attr for attr in super().__dir__() if hasattr(self, attr)]

def _check_input(self, X, *, reset):
if self.validate:
return self._validate_data(X, accept_sparse=self.accept_sparse, reset=reset)
Expand Down
2 changes: 1 addition & 1 deletion sklearn/preprocessing/tests/test_function_transformer.py
Expand Up @@ -352,7 +352,7 @@ def test_function_transformer_feature_names_out_is_None():
transformer = FunctionTransformer()
X = np.random.rand(100, 2)
transformer.fit_transform(X)

assert "get_feature_names_out" not in dir(transformer)
msg = "This 'FunctionTransformer' has no attribute 'get_feature_names_out'"
with pytest.raises(AttributeError, match=msg):
transformer.get_feature_names_out()
Expand Down
3 changes: 3 additions & 0 deletions sklearn/semi_supervised/_self_training.py
Expand Up @@ -184,6 +184,9 @@ def __init__(
self.max_iter = max_iter
self.verbose = verbose

def __dir__(self):
return [attr for attr in super().__dir__() if hasattr(self, attr)]

@_fit_context(
# SelfTrainingClassifier.base_estimator is not validated yet
prefer_skip_nested_validation=False
Expand Down
8 changes: 7 additions & 1 deletion sklearn/semi_supervised/tests/test_self_training.py
Expand Up @@ -310,7 +310,9 @@ def test_base_estimator_meta_estimator():
cv=2,
)

assert not hasattr(base_estimator, "predict_proba")
assert not hasattr(base_estimator, "predict_proba") and "predict_proba" not in dir(
base_estimator
)
clf = SelfTrainingClassifier(base_estimator=base_estimator)
with pytest.raises(AttributeError):
clf.fit(X_train, y_train_missing_labels)
Expand All @@ -337,6 +339,10 @@ def test_self_training_estimator_attribute_error():
# should raise an AttributeError
self_training = SelfTrainingClassifier(base_estimator=DecisionTreeClassifier())

assert "decision_function" not in dir(
self_training.fit(X_train, y_train_missing_labels)
)

outer_msg = "This 'SelfTrainingClassifier' has no attribute 'decision_function'"
inner_msg = "'DecisionTreeClassifier' object has no attribute 'decision_function'"
with pytest.raises(AttributeError, match=outer_msg) as exec_info:
Expand Down
3 changes: 3 additions & 0 deletions sklearn/svm/_base.py
Expand Up @@ -733,6 +733,9 @@ def __init__(
random_state=random_state,
)

def __dir__(self):
return [attr for attr in super().__dir__() if hasattr(self, attr)]

def _validate_targets(self, y):
y_ = column_or_1d(y, warn=True)
check_classification_targets(y)
Expand Down
4 changes: 2 additions & 2 deletions sklearn/svm/tests/test_svm.py
Expand Up @@ -1118,9 +1118,9 @@ def test_hasattr_predict_proba():
assert hasattr(G, "predict_proba")

G = svm.SVC(probability=False)
assert not hasattr(G, "predict_proba")
assert not hasattr(G, "predict_proba") and "predict_proba" not in dir(G)
G.fit(iris.data, iris.target)
assert not hasattr(G, "predict_proba")
assert not hasattr(G, "predict_proba") and "predict_proba" not in dir(G)

# Switching to `probability=True` after fitting should make
# predict_proba available, but calling it must not work:
Expand Down

0 comments on commit 896ec61

Please sign in to comment.