scikit-learn · MiguelParece · May 6, 2024 · May 6, 2024 · May 6, 2024 · May 26, 2024
diff --git a/sklearn/base.py b/sklearn/base.py
@@ -193,6 +193,13 @@ class BaseEstimator(_HTMLDocumentationLinkMixin, _MetadataRequester):
     array([3, 3, 3])
     """
 
+    def __dir__(self):
+        """Filters conditional methods that should be hidden based
+        on the `available_if` decorator from SciKit Learn."""
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", category=FutureWarning)
+            return [attr for attr in super().__dir__() if hasattr(self, attr)]
+
     @classmethod
     def _get_param_names(cls):
         """Get parameter names for the estimator"""
@@ -1353,8 +1360,9 @@ class _UnstableArchMixin:
 
     def _more_tags(self):
         return {
-            "non_deterministic": _IS_32BIT
-            or platform.machine().startswith(("ppc", "powerpc"))
+            "non_deterministic": _IS_32BIT or platform.machine().startswith(
+                ("ppc", "powerpc")
+            )
         }
 
 

diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py
@@ -448,6 +448,7 @@ def test_error():
     X, y = iris.data, iris.target
     base = DecisionTreeClassifier()
     assert not hasattr(BaggingClassifier(base).fit(X, y), "decision_function")
+    assert "decision_function" not in dir(BaggingClassifier(base).fit(X, y))
 
 
 def test_parallel_classification():

diff --git a/sklearn/ensemble/tests/test_stacking.py b/sklearn/ensemble/tests/test_stacking.py
@@ -882,8 +882,11 @@ def test_stacking_final_estimator_attribute_error():
         estimators=estimators, final_estimator=final_estimator, cv=3
     )
 
+    assert "decision_function" not in dir(clf.fit(X, y))
+
     outer_msg = "This 'StackingClassifier' has no attribute 'decision_function'"
     inner_msg = "'RandomForestClassifier' object has no attribute 'decision_function'"
+
     with pytest.raises(AttributeError, match=outer_msg) as exec_info:
         clf.fit(X, y).decision_function(X)
     assert isinstance(exec_info.value.__cause__, AttributeError)

diff --git a/sklearn/ensemble/tests/test_voting.py b/sklearn/ensemble/tests/test_voting.py
@@ -79,8 +79,10 @@ def test_predictproba_hardvoting():
     assert inner_msg in str(exec_info.value.__cause__)
 
     assert not hasattr(eclf, "predict_proba")
+    assert "predict_proba" not in dir(eclf)
     eclf.fit(X_scaled, y)
     assert not hasattr(eclf, "predict_proba")
+    assert "predict_proba" not in dir(eclf)
 
 
 def test_notfitted():

diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py
@@ -645,6 +645,7 @@ def test_rfe_estimator_attribute_error():
 
     outer_msg = "This 'RFE' has no attribute 'decision_function'"
     inner_msg = "'LinearRegression' object has no attribute 'decision_function'"
+    assert "decision_function" not in dir(rfe.fit(iris.data, iris.target))
     with pytest.raises(AttributeError, match=outer_msg) as exec_info:
         rfe.fit(iris.data, iris.target).decision_function(iris.data)
     assert isinstance(exec_info.value.__cause__, AttributeError)

diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
@@ -1358,7 +1358,8 @@ def predict_proba(self, X):
             raise NotImplementedError(
                 "predict_(log_)proba only supported when"
                 " loss='log_loss' or loss='modified_huber' "
-                "(%r given)" % self.loss
+                "(%r given)"
+                % self.loss
             )
 
     @available_if(_check_proba)

diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py
@@ -728,7 +728,9 @@ def test_sgd_predict_proba_method_access(klass):
                 loss
             )
             assert not hasattr(clf, "predict_proba")
+            assert "predict_proba" not in dir(clf)
             assert not hasattr(clf, "predict_log_proba")
+            assert "predict_log_proba" not in dir(clf)
             with pytest.raises(
                 AttributeError, match="has no attribute 'predict_proba'"
             ) as exec_info:
@@ -754,7 +756,9 @@ def test_sgd_proba(klass):
     # anyway.
     clf = SGDClassifier(loss="hinge", alpha=0.01, max_iter=10, tol=None).fit(X, Y)
     assert not hasattr(clf, "predict_proba")
+    assert "predict_proba" not in dir(clf)
     assert not hasattr(clf, "predict_log_proba")
+    assert "predict_log_proba" not in dir(clf)
 
     # log and modified_huber losses can output probability estimates
     # binary case

diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
@@ -486,7 +486,8 @@ def score(self, X, y=None, **params):
         if self.scorer_ is None:
             raise ValueError(
                 "No score function explicitly defined, "
-                "and the estimator doesn't provide one %s" % self.best_estimator_
+                "and the estimator doesn't provide one %s"
+                % self.best_estimator_
             )
         if isinstance(self.scorer_, dict):
             if self.multimetric_:

diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
@@ -1557,6 +1557,7 @@ def test_predict_proba_disabled():
     clf = SVC(probability=False)
     gs = GridSearchCV(clf, {}, cv=2).fit(X, y)
     assert not hasattr(gs, "predict_proba")
+    assert "predict_proba" not in dir(gs)
 
 
 def test_grid_search_allows_nans():
@@ -1771,6 +1772,7 @@ def test_stochastic_gradient_loss_param():
     # When the estimator is not fitted, `predict_proba` is not available as the
     # loss is 'hinge'.
     assert not hasattr(clf, "predict_proba")
+    assert "predict_proba" not in dir(clf)
     clf.fit(X, y)
     clf.predict_proba(X)
     clf.predict_log_proba(X)
@@ -1784,8 +1786,10 @@ def test_stochastic_gradient_loss_param():
         estimator=SGDClassifier(loss="hinge"), param_grid=param_grid, cv=3
     )
     assert not hasattr(clf, "predict_proba")
+    assert "predict_proba" not in dir(clf)
     clf.fit(X, y)
     assert not hasattr(clf, "predict_proba")
+    assert "predict_proba" not in dir(clf)
 
 
 def test_search_train_scores_set_to_false():

diff --git a/sklearn/neighbors/tests/test_lof.py b/sklearn/neighbors/tests/test_lof.py
@@ -213,14 +213,18 @@ def test_hasattr_prediction():
     assert hasattr(clf, "decision_function")
     assert hasattr(clf, "score_samples")
     assert not hasattr(clf, "fit_predict")
+    assert "fit_predict" not in dir(clf)
 
     # when novelty=False
     clf = neighbors.LocalOutlierFactor(novelty=False)
     clf.fit(X)
     assert hasattr(clf, "fit_predict")
     assert not hasattr(clf, "predict")
+    assert "predict" not in dir(clf)
     assert not hasattr(clf, "decision_function")
+    assert "decision_function" not in dir(clf)
     assert not hasattr(clf, "score_samples")
+    assert "score_samples" not in dir(clf)
 
 
 @parametrize_with_checks([neighbors.LocalOutlierFactor(novelty=True)])

diff --git a/sklearn/neural_network/_multilayer_perceptron.py b/sklearn/neural_network/_multilayer_perceptron.py
@@ -754,7 +754,8 @@ def _check_solver(self):
         if self.solver not in _STOCHASTIC_SOLVERS:
             raise AttributeError(
                 "partial_fit is only available for stochastic"
-                " optimizers. %s is not stochastic." % self.solver
+                " optimizers. %s is not stochastic."
+                % self.solver
             )
         return True
 

diff --git a/sklearn/neural_network/tests/test_mlp.py b/sklearn/neural_network/tests/test_mlp.py
@@ -500,6 +500,7 @@ def test_partial_fit_errors():
 
     # lbfgs doesn't support partial_fit
     assert not hasattr(MLPClassifier(solver="lbfgs"), "partial_fit")
+    assert "parital_fit" not in dir(MLPClassifier(solver="lbfgs"))
 
 
 def test_nonfinite_params():
@@ -732,7 +733,8 @@ def test_warm_start():
         message = (
             "warm_start can only be used where `y` has the same "
             "classes as in the previous call to fit."
-            " Previously got [0 1 2], `y` has %s" % np.unique(y_i)
+            " Previously got [0 1 2], `y` has %s"
+            % np.unique(y_i)
         )
         with pytest.raises(ValueError, match=re.escape(message)):
             clf.fit(X, y_i)

diff --git a/sklearn/preprocessing/tests/test_function_transformer.py b/sklearn/preprocessing/tests/test_function_transformer.py
@@ -352,7 +352,7 @@ def test_function_transformer_feature_names_out_is_None():
     transformer = FunctionTransformer()
     X = np.random.rand(100, 2)
     transformer.fit_transform(X)
-
+    assert "get_feature_names_out" not in dir(transformer)
     msg = "This 'FunctionTransformer' has no attribute 'get_feature_names_out'"
     with pytest.raises(AttributeError, match=msg):
         transformer.get_feature_names_out()

diff --git a/sklearn/semi_supervised/tests/test_self_training.py b/sklearn/semi_supervised/tests/test_self_training.py
@@ -311,6 +311,7 @@ def test_base_estimator_meta_estimator():
     )
 
     assert not hasattr(base_estimator, "predict_proba")
+    assert "predict_proba" not in dir(base_estimator)
     clf = SelfTrainingClassifier(base_estimator=base_estimator)
     with pytest.raises(AttributeError):
         clf.fit(X_train, y_train_missing_labels)
@@ -337,6 +338,10 @@ def test_self_training_estimator_attribute_error():
     # should raise an AttributeError
     self_training = SelfTrainingClassifier(base_estimator=DecisionTreeClassifier())
 
+    assert "decision_function" not in dir(
+        self_training.fit(X_train, y_train_missing_labels)
+    )
+
     outer_msg = "This 'SelfTrainingClassifier' has no attribute 'decision_function'"
     inner_msg = "'DecisionTreeClassifier' object has no attribute 'decision_function'"
     with pytest.raises(AttributeError, match=outer_msg) as exec_info:

diff --git a/sklearn/svm/_base.py b/sklearn/svm/_base.py
@@ -297,7 +297,8 @@ def _warn_from_fit_status(self):
             warnings.warn(
                 "Solver terminated early (max_iter=%i)."
                 "  Consider pre-processing your data with"
-                " StandardScaler or MinMaxScaler." % self.max_iter,
+                " StandardScaler or MinMaxScaler."
+                % self.max_iter,
                 ConvergenceWarning,
             )
 
@@ -1173,7 +1174,8 @@ def _fit_liblinear(
             raise ValueError(
                 "This solver needs samples of at least 2 classes"
                 " in the data, but the data contains only one"
-                " class: %r" % classes_[0]
+                " class: %r"
+                % classes_[0]
             )
 
         class_weight_ = compute_class_weight(class_weight, classes=classes_, y=y)

diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
@@ -1119,8 +1119,10 @@ def test_hasattr_predict_proba():
 
     G = svm.SVC(probability=False)
     assert not hasattr(G, "predict_proba")
+    assert "predict_proba" not in dir(G)
     G.fit(iris.data, iris.target)
     assert not hasattr(G, "predict_proba")
+    assert "predict_proba" not in dir(G)
 
     # Switching to `probability=True` after fitting should make
     # predict_proba available, but calling it must not work:

diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py
@@ -133,7 +133,10 @@ def test_ovr_partial_fit():
 
     # test partial_fit only exists if estimator has it:
     ovr = OneVsRestClassifier(SVC())
+    # check __dir__ method does not return partial_fit
+
     assert not hasattr(ovr, "partial_fit")
+    assert "partial_fit" not in dir(ovr)
 
 
 def test_ovr_partial_fit_exceptions():
@@ -385,12 +388,15 @@ def test_ovr_multilabel_predict_proba():
         # Decision function only estimator.
         decision_only = OneVsRestClassifier(svm.SVR()).fit(X_train, Y_train)
         assert not hasattr(decision_only, "predict_proba")
+        assert "predict_proba" not in dir(decision_only)
 
         # Estimator with predict_proba disabled, depending on parameters.
         decision_only = OneVsRestClassifier(svm.SVC(probability=False))
         assert not hasattr(decision_only, "predict_proba")
+        assert "predict_proba" not in dir(decision_only)
         decision_only.fit(X_train, Y_train)
         assert not hasattr(decision_only, "predict_proba")
+        assert "predict_proba" not in dir(decision_only)
         assert hasattr(decision_only, "decision_function")
 
         # Estimator which can get predict_proba enabled after fitting
@@ -399,6 +405,7 @@ def test_ovr_multilabel_predict_proba():
         )
         proba_after_fit = OneVsRestClassifier(gs)
         assert not hasattr(proba_after_fit, "predict_proba")
+        assert "predict_proba" not in dir(proba_after_fit)
         proba_after_fit.fit(X_train, Y_train)
         assert hasattr(proba_after_fit, "predict_proba")
 
@@ -421,6 +428,7 @@ def test_ovr_single_label_predict_proba():
     # Decision function only estimator.
     decision_only = OneVsRestClassifier(svm.SVR()).fit(X_train, Y_train)
     assert not hasattr(decision_only, "predict_proba")
+    assert "predict_proba" not in dir(decision_only)
 
     Y_pred = clf.predict(X_test)
     Y_proba = clf.predict_proba(X_test)
@@ -560,6 +568,7 @@ def test_ovo_partial_fit_predict():
     # test partial_fit only exists if estimator has it:
     ovr = OneVsOneClassifier(SVC())
     assert not hasattr(ovr, "partial_fit")
+    assert "partial_fit" not in dir(ovr)
 
 
 def test_ovo_decision_function():

diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py
@@ -97,6 +97,7 @@ def test_multi_target_regression_partial_fit():
     y_pred = sgr.predict(X_test)
     assert_almost_equal(references, y_pred)
     assert not hasattr(MultiOutputRegressor(Lasso), "partial_fit")
+    assert "partial_fit" not in dir(MultiOutputRegressor(Lasso))
 
 
 def test_multi_target_regression_one_target():
@@ -215,7 +216,9 @@ def test_hasattr_multi_output_predict_proba():
     sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5)
     multi_target_linear = MultiOutputClassifier(sgd_linear_clf)
     multi_target_linear.fit(X, y)
+
     assert not hasattr(multi_target_linear, "predict_proba")
+    assert "predict_proba" not in dir(multi_target_linear)
 
     # case where predict_proba attribute exists
     sgd_linear_clf = SGDClassifier(loss="log_loss", random_state=1, max_iter=5)
@@ -478,8 +481,10 @@ def test_multi_output_delegate_predict_proba():
     assert hasattr(moc, "predict_proba")
 
     # A base estimator without `predict_proba` should raise an AttributeError
-    moc = MultiOutputClassifier(LinearSVC())
+    moc = MultiOutputClassifier(LinearSVC(dual="auto"))
+
     assert not hasattr(moc, "predict_proba")
+    assert "predict_proba" not in dir(moc)
 
     outer_msg = "'MultiOutputClassifier' has no attribute 'predict_proba'"
     inner_msg = "'LinearSVC' object has no attribute 'predict_proba'"
@@ -490,6 +495,7 @@ def test_multi_output_delegate_predict_proba():
 
     moc.fit(X, y)
     assert not hasattr(moc, "predict_proba")
+    assert "predict_proba" not in dir(moc)
     with pytest.raises(AttributeError, match=outer_msg) as exec_info:
         moc.predict_proba(X)
     assert isinstance(exec_info.value.__cause__, AttributeError)
@@ -525,6 +531,7 @@ def test_classifier_chain_fit_and_predict_with_linear_svc(chain_method):
     Y_binary = Y_decision >= 0
     assert_array_equal(Y_binary, Y_pred)
     assert not hasattr(classifier_chain, "predict_proba")
+    assert "predict_proba" not in dir(classifier_chain)
 
 
 @pytest.mark.parametrize("csr_container", CSR_CONTAINERS)

diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
@@ -817,24 +817,30 @@ def test_pipeline_ducktyping():
 
     pipeline = make_pipeline(Transf())
     assert not hasattr(pipeline, "predict")
+    assert "predict" not in dir(pipeline)
     pipeline.transform
     pipeline.inverse_transform
 
     pipeline = make_pipeline("passthrough")
     assert pipeline.steps[0] == ("passthrough", "passthrough")
     assert not hasattr(pipeline, "predict")
+    assert "predict" not in dir(pipeline)
     pipeline.transform
     pipeline.inverse_transform
 
     pipeline = make_pipeline(Transf(), NoInvTransf())
     assert not hasattr(pipeline, "predict")
+    assert "predict" not in dir(pipeline)
     pipeline.transform
     assert not hasattr(pipeline, "inverse_transform")
+    assert "inverse_transform" not in dir(pipeline)
 
     pipeline = make_pipeline(NoInvTransf(), Transf())
     assert not hasattr(pipeline, "predict")
+    assert "predict" not in dir(pipeline)
     pipeline.transform
     assert not hasattr(pipeline, "inverse_transform")
+    assert "inverse_transform" not in dir(pipeline)
 
 
 def test_make_pipeline():

diff --git a/sklearn/utils/tests/test_mocking.py b/sklearn/utils/tests/test_mocking.py
@@ -203,3 +203,4 @@ def test_mock_estimator_on_off_prediction(iris, response_methods):
             assert getattr(estimator, response)(X) == response
         else:
             assert not hasattr(estimator, response)
+            assert response not in dir(estimator)