Fix: Inaccurate Attribute Listing with dir(obj) (scikit-learn#28558)

MiguelParece · May 6, 2024 · 896ec61 · 896ec61
1 parent a5203e8
commit 896ec61
Show file tree

Hide file tree

Showing 29 changed files with 134 additions and 40 deletions.
diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py
@@ -839,6 +839,9 @@ def __init__(
             verbose=verbose,
         )
 
+    def __dir__(self):
+        return [attr for attr in super().__dir__() if hasattr(self, attr)]
+
     def _get_estimator(self):
         """Resolve which estimator to return (default is DecisionTreeClassifier)"""
         if self.estimator is None:

diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py
@@ -597,6 +597,9 @@ def __init__(
             verbose=verbose,
         )
 
+    def __dir__(self):
+        return [attr for attr in super().__dir__() if hasattr(self, attr)]
+
     def _validate_final_estimator(self):
         self._clone_final_estimator(default=LogisticRegression())
         if not is_classifier(self.final_estimator_):

diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py
@@ -352,6 +352,9 @@ def __init__(
         self.flatten_transform = flatten_transform
         self.verbose = verbose
 
+    def __dir__(self):
+        return [attr for attr in super().__dir__() if hasattr(self, attr)]
+
     @_fit_context(
         # estimators in VotingClassifier.estimators are not validated yet
         prefer_skip_nested_validation=False

diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py
@@ -447,7 +447,9 @@ def test_error():
     # Test support of decision_function
     X, y = iris.data, iris.target
     base = DecisionTreeClassifier()
-    assert not hasattr(BaggingClassifier(base).fit(X, y), "decision_function")
+    assert not hasattr(
+        BaggingClassifier(base).fit(X, y), "decision_function"
+    ) and "decision_function" not in dir(BaggingClassifier(base).fit(X, y))
 
 
 def test_parallel_classification():

diff --git a/sklearn/ensemble/tests/test_stacking.py b/sklearn/ensemble/tests/test_stacking.py
@@ -882,8 +882,11 @@ def test_stacking_final_estimator_attribute_error():
         estimators=estimators, final_estimator=final_estimator, cv=3
     )
 
+    assert "decision_function" not in dir(clf.fit(X, y))
+
     outer_msg = "This 'StackingClassifier' has no attribute 'decision_function'"
     inner_msg = "'RandomForestClassifier' object has no attribute 'decision_function'"
+
     with pytest.raises(AttributeError, match=outer_msg) as exec_info:
         clf.fit(X, y).decision_function(X)
     assert isinstance(exec_info.value.__cause__, AttributeError)

diff --git a/sklearn/ensemble/tests/test_voting.py b/sklearn/ensemble/tests/test_voting.py
@@ -78,9 +78,9 @@ def test_predictproba_hardvoting():
     assert isinstance(exec_info.value.__cause__, AttributeError)
     assert inner_msg in str(exec_info.value.__cause__)
 
-    assert not hasattr(eclf, "predict_proba")
+    assert not hasattr(eclf, "predict_proba") and "predict_proba" not in dir(eclf)
     eclf.fit(X_scaled, y)
-    assert not hasattr(eclf, "predict_proba")
+    assert not hasattr(eclf, "predict_proba") and "predict_proba" not in dir(eclf)
 
 
 def test_notfitted():

diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py
@@ -226,6 +226,9 @@ def __init__(
         self.importance_getter = importance_getter
         self.verbose = verbose
 
+    def __dir__(self):
+        return [attr for attr in super().__dir__() if hasattr(self, attr)]
+
     @property
     def _estimator_type(self):
         return self.estimator._estimator_type

diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py
@@ -645,6 +645,7 @@ def test_rfe_estimator_attribute_error():
 
     outer_msg = "This 'RFE' has no attribute 'decision_function'"
     inner_msg = "'LinearRegression' object has no attribute 'decision_function'"
+    assert "decision_function" not in dir(rfe.fit(iris.data, iris.target))
     with pytest.raises(AttributeError, match=outer_msg) as exec_info:
         rfe.fit(iris.data, iris.target).decision_function(iris.data)
     assert isinstance(exec_info.value.__cause__, AttributeError)

diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py
@@ -727,8 +727,10 @@ def test_sgd_predict_proba_method_access(klass):
             inner_msg = "probability estimates are not available for loss={!r}".format(
                 loss
             )
-            assert not hasattr(clf, "predict_proba")
-            assert not hasattr(clf, "predict_log_proba")
+            assert not hasattr(clf, "predict_proba") and "predict_proba" not in dir(clf)
+            assert not hasattr(
+                clf, "predict_log_proba"
+            ) and "predict_log_proba" not in dir(clf)
             with pytest.raises(
                 AttributeError, match="has no attribute 'predict_proba'"
             ) as exec_info:
@@ -753,8 +755,8 @@ def test_sgd_proba(klass):
     # We cannot use the factory here, because it defines predict_proba
     # anyway.
     clf = SGDClassifier(loss="hinge", alpha=0.01, max_iter=10, tol=None).fit(X, Y)
-    assert not hasattr(clf, "predict_proba")
-    assert not hasattr(clf, "predict_log_proba")
+    assert not hasattr(clf, "predict_proba") and "predict_proba" not in dir(clf)
+    assert not hasattr(clf, "predict_log_proba") and "predict_log_proba" not in dir(clf)
 
     # log and modified_huber losses can output probability estimates
     # binary case

diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
@@ -433,6 +433,9 @@ def __init__(
     def _estimator_type(self):
         return self.estimator._estimator_type
 
+    def __dir__(self):
+        return [attr for attr in super().__dir__() if hasattr(self, attr)]
+
     def _more_tags(self):
         # allows cross-validation to see 'precomputed' metrics
         return {

diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
@@ -1556,7 +1556,7 @@ def test_predict_proba_disabled():
     y = [0, 0, 1, 1, 1]
     clf = SVC(probability=False)
     gs = GridSearchCV(clf, {}, cv=2).fit(X, y)
-    assert not hasattr(gs, "predict_proba")
+    assert not hasattr(gs, "predict_proba") and "predict_proba" not in dir(gs)
 
 
 def test_grid_search_allows_nans():
@@ -1770,7 +1770,7 @@ def test_stochastic_gradient_loss_param():
 
     # When the estimator is not fitted, `predict_proba` is not available as the
     # loss is 'hinge'.
-    assert not hasattr(clf, "predict_proba")
+    assert not hasattr(clf, "predict_proba") and "predict_proba" not in dir(clf)
     clf.fit(X, y)
     clf.predict_proba(X)
     clf.predict_log_proba(X)
@@ -1783,9 +1783,9 @@ def test_stochastic_gradient_loss_param():
     clf = GridSearchCV(
         estimator=SGDClassifier(loss="hinge"), param_grid=param_grid, cv=3
     )
-    assert not hasattr(clf, "predict_proba")
+    assert not hasattr(clf, "predict_proba") and "predict_proba" not in dir(clf)
     clf.fit(X, y)
-    assert not hasattr(clf, "predict_proba")
+    assert not hasattr(clf, "predict_proba") and "predict_proba" not in dir(clf)
 
 
 def test_search_train_scores_set_to_false():

diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py
@@ -321,6 +321,9 @@ def __init__(self, estimator, *, n_jobs=None, verbose=0):
         self.n_jobs = n_jobs
         self.verbose = verbose
 
+    def __dir__(self):
+        return [attr for attr in super().__dir__() if hasattr(self, attr)]
+
     @_fit_context(
         # OneVsRestClassifier.estimator is not validated yet
         prefer_skip_nested_validation=False
@@ -752,6 +755,9 @@ def __init__(self, estimator, *, n_jobs=None):
         self.estimator = estimator
         self.n_jobs = n_jobs
 
+    def __dir__(self):
+        return [attr for attr in super().__dir__() if hasattr(self, attr)]
+
     @_fit_context(
         # OneVsOneClassifier.estimator is not validated yet
         prefer_skip_nested_validation=False
@@ -1141,6 +1147,9 @@ def __init__(self, estimator, *, code_size=1.5, random_state=None, n_jobs=None):
         self.random_state = random_state
         self.n_jobs = n_jobs
 
+    def __dir__(self):
+        return [attr for attr in super().__dir__() if hasattr(self, attr)]
+
     @_fit_context(
         # OutputCodeClassifier.estimator is not validated yet
         prefer_skip_nested_validation=False

diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py
@@ -409,6 +409,9 @@ class MultiOutputRegressor(RegressorMixin, _MultiOutputEstimator):
     def __init__(self, estimator, *, n_jobs=None):
         super().__init__(estimator, n_jobs=n_jobs)
 
+    def __dir__(self):
+        return [attr for attr in super().__dir__() if hasattr(self, attr)]
+
     @_available_if_estimator_has("partial_fit")
     def partial_fit(self, X, y, sample_weight=None, **partial_fit_params):
         """Incrementally fit the model to data, for each output variable.
@@ -512,6 +515,9 @@ class MultiOutputClassifier(ClassifierMixin, _MultiOutputEstimator):
            [1, 0, 1]])
     """
 
+    def __dir__(self):
+        return [attr for attr in super().__dir__() if hasattr(self, attr)]
+
     def __init__(self, estimator, *, n_jobs=None):
         super().__init__(estimator, n_jobs=n_jobs)
 
@@ -998,6 +1004,9 @@ def __init__(
         )
         self.chain_method = chain_method
 
+    def __dir__(self):
+        return [attr for attr in super().__dir__() if hasattr(self, attr)]
+
     @_fit_context(
         # ClassifierChain.base_estimator is not validated yet
         prefer_skip_nested_validation=False

diff --git a/sklearn/neighbors/_lof.py b/sklearn/neighbors/_lof.py
@@ -218,6 +218,9 @@ def __init__(
         self.contamination = contamination
         self.novelty = novelty
 
+    def __dir__(self):
+        return [attr for attr in super().__dir__() if hasattr(self, attr)]
+
     def _check_novelty_fit_predict(self):
         if self.novelty:
             msg = (

diff --git a/sklearn/neighbors/tests/test_lof.py b/sklearn/neighbors/tests/test_lof.py
@@ -212,15 +212,15 @@ def test_hasattr_prediction():
     assert hasattr(clf, "predict")
     assert hasattr(clf, "decision_function")
     assert hasattr(clf, "score_samples")
-    assert not hasattr(clf, "fit_predict")
+    assert not hasattr(clf, "fit_predict") and "fit_predict" not in dir(clf)
 
     # when novelty=False
     clf = neighbors.LocalOutlierFactor(novelty=False)
     clf.fit(X)
     assert hasattr(clf, "fit_predict")
-    assert not hasattr(clf, "predict")
-    assert not hasattr(clf, "decision_function")
-    assert not hasattr(clf, "score_samples")
+    assert not hasattr(clf, "predict") and "predict" not in dir(clf)
+    assert not hasattr(clf, "decision_function") and "decision_function" not in dir(clf)
+    assert not hasattr(clf, "score_samples") and "score_samples" not in dir(clf)
 
 
 @parametrize_with_checks([neighbors.LocalOutlierFactor(novelty=True)])

diff --git a/sklearn/neural_network/_multilayer_perceptron.py b/sklearn/neural_network/_multilayer_perceptron.py
@@ -149,6 +149,9 @@ def __init__(
         self.n_iter_no_change = n_iter_no_change
         self.max_fun = max_fun
 
+    def __dir__(self):
+        return [attr for attr in super().__dir__() if hasattr(self, attr)]
+
     def _unpack(self, packed_parameters):
         """Extract the coefficients and intercepts from packed_parameters."""
         for i in range(self.n_layers_ - 1):

diff --git a/sklearn/neural_network/tests/test_mlp.py b/sklearn/neural_network/tests/test_mlp.py
@@ -499,7 +499,9 @@ def test_partial_fit_errors():
         MLPClassifier(solver="sgd").partial_fit(X, y, classes=[2])
 
     # lbfgs doesn't support partial_fit
-    assert not hasattr(MLPClassifier(solver="lbfgs"), "partial_fit")
+    assert not hasattr(
+        MLPClassifier(solver="lbfgs"), "partial_fit"
+    ) and "parital_fit" not in dir(MLPClassifier(solver="lbfgs"))
 
 
 def test_nonfinite_params():

diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
@@ -172,6 +172,9 @@ def __init__(self, steps, *, memory=None, verbose=False):
         self.memory = memory
         self.verbose = verbose
 
+    def __dir__(self):
+        return [attr for attr in super().__dir__() if hasattr(self, attr)]
+
     def set_output(self, *, transform=None):
         """Set the output container when `"transform"` and `"fit_transform"` are called.
 

diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py
@@ -170,6 +170,9 @@ def __init__(
         self.kw_args = kw_args
         self.inv_kw_args = inv_kw_args
 
+    def __dir__(self):
+        return [attr for attr in super().__dir__() if hasattr(self, attr)]
+
     def _check_input(self, X, *, reset):
         if self.validate:
             return self._validate_data(X, accept_sparse=self.accept_sparse, reset=reset)

diff --git a/sklearn/preprocessing/tests/test_function_transformer.py b/sklearn/preprocessing/tests/test_function_transformer.py
@@ -352,7 +352,7 @@ def test_function_transformer_feature_names_out_is_None():
     transformer = FunctionTransformer()
     X = np.random.rand(100, 2)
     transformer.fit_transform(X)
-
+    assert "get_feature_names_out" not in dir(transformer)
     msg = "This 'FunctionTransformer' has no attribute 'get_feature_names_out'"
     with pytest.raises(AttributeError, match=msg):
         transformer.get_feature_names_out()

diff --git a/sklearn/semi_supervised/_self_training.py b/sklearn/semi_supervised/_self_training.py
@@ -184,6 +184,9 @@ def __init__(
         self.max_iter = max_iter
         self.verbose = verbose
 
+    def __dir__(self):
+        return [attr for attr in super().__dir__() if hasattr(self, attr)]
+
     @_fit_context(
         # SelfTrainingClassifier.base_estimator is not validated yet
         prefer_skip_nested_validation=False

diff --git a/sklearn/semi_supervised/tests/test_self_training.py b/sklearn/semi_supervised/tests/test_self_training.py
@@ -310,7 +310,9 @@ def test_base_estimator_meta_estimator():
         cv=2,
     )
 
-    assert not hasattr(base_estimator, "predict_proba")
+    assert not hasattr(base_estimator, "predict_proba") and "predict_proba" not in dir(
+        base_estimator
+    )
     clf = SelfTrainingClassifier(base_estimator=base_estimator)
     with pytest.raises(AttributeError):
         clf.fit(X_train, y_train_missing_labels)
@@ -337,6 +339,10 @@ def test_self_training_estimator_attribute_error():
     # should raise an AttributeError
     self_training = SelfTrainingClassifier(base_estimator=DecisionTreeClassifier())
 
+    assert "decision_function" not in dir(
+        self_training.fit(X_train, y_train_missing_labels)
+    )
+
     outer_msg = "This 'SelfTrainingClassifier' has no attribute 'decision_function'"
     inner_msg = "'DecisionTreeClassifier' object has no attribute 'decision_function'"
     with pytest.raises(AttributeError, match=outer_msg) as exec_info:

diff --git a/sklearn/svm/_base.py b/sklearn/svm/_base.py
@@ -733,6 +733,9 @@ def __init__(
             random_state=random_state,
         )
 
+    def __dir__(self):
+        return [attr for attr in super().__dir__() if hasattr(self, attr)]
+
     def _validate_targets(self, y):
         y_ = column_or_1d(y, warn=True)
         check_classification_targets(y)

diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
@@ -1118,9 +1118,9 @@ def test_hasattr_predict_proba():
     assert hasattr(G, "predict_proba")
 
     G = svm.SVC(probability=False)
-    assert not hasattr(G, "predict_proba")
+    assert not hasattr(G, "predict_proba") and "predict_proba" not in dir(G)
     G.fit(iris.data, iris.target)
-    assert not hasattr(G, "predict_proba")
+    assert not hasattr(G, "predict_proba") and "predict_proba" not in dir(G)
 
     # Switching to `probability=True` after fitting should make
     # predict_proba available, but calling it must not work: