Merge pull request #3935 from celestinoxp/fix_auc_metrics

Fix AUC metric
pycaret · Apr 28, 2024 · 9ee0cf4 · 9ee0cf4
2 parents c62c7d6 + cb59dc2
commit 9ee0cf4
Show file tree

Hide file tree

Showing 5 changed files with 46 additions and 21 deletions.
diff --git a/pycaret/containers/metrics/classification.py b/pycaret/containers/metrics/classification.py
@@ -110,16 +110,21 @@ def __init__(
         if not isinstance(args, dict):
             raise TypeError("args needs to be a dictionary.")
 
+        if target == "pred":
+            response_method = "predict"
+        elif target == "pred_proba":
+            response_method = "predict_proba"
+        else:  # threshold
+            response_method = "decision_function"
+
         scorer = (
             scorer
             if scorer
             else pycaret.internal.metrics.make_scorer_with_error_score(
                 score_func,
-                needs_proba=target == "pred_proba",
-                needs_threshold=target == "threshold",
+                response_method=response_method,
                 greater_is_better=greater_is_better,
                 error_score=0.0,
-                **args,
             )
         )
 

diff --git a/pycaret/internal/metrics.py b/pycaret/internal/metrics.py
@@ -118,10 +118,13 @@ def __call__(self, y_true, y_pred, **kwargs):
 class BinaryMulticlassScoreFunc:
     """Wrapper to replace call kwargs with preset values if target is binary."""
 
-    def __init__(self, score_func: Callable, kwargs_if_binary: dict):
+    def __init__(
+        self, score_func: Callable, kwargs_if_binary: dict, response_method=None
+    ):
         self.score_func = score_func
         self.kwargs_if_binary = kwargs_if_binary
         self.__name__ = score_func.__name__
+        self.response_method = response_method
 
     def __call__(self, y_true, y_pred, **kwargs):
         if self.kwargs_if_binary:
@@ -133,6 +136,11 @@ def __call__(self, y_true, y_pred, **kwargs):
             )
             if is_binary:
                 kwargs = {**kwargs, **self.kwargs_if_binary}
+
+        # Use the provided response_method if available
+        if self.response_method:
+            kwargs["response_method"] = self.response_method
+
         return self.score_func(y_true, y_pred, **kwargs)
 
 

diff --git a/pycaret/internal/pipeline.py b/pycaret/internal/pipeline.py
@@ -17,7 +17,7 @@
 import sklearn.pipeline
 from sklearn.base import clone
 from sklearn.utils import _print_elapsed_time
-from sklearn.utils.metadata_routing import _routing_enabled, process_routing
+from sklearn.utils.metadata_routing import _routing_enabled
 from sklearn.utils.metaestimators import available_if
 from sklearn.utils.validation import check_memory
 
@@ -118,6 +118,8 @@ def _noop_transform(pipeline: "Pipeline", X, y, **kwargs):
 
 class Pipeline(imblearn.pipeline.Pipeline):
     def __init__(self, steps, *, memory=None, verbose=False):
+        if _routing_enabled():
+            raise RuntimeError("PyCaret Pipeline does not support metadata routing.")
         super().__init__(steps, memory=memory, verbose=verbose)
         self._fit_vars = set()
         self._feature_names_in = None
@@ -332,22 +334,8 @@ def predict(self, X, **params):
 
     @available_if(_final_estimator_has("predict_proba"))
     def predict_proba(self, X, **params):
-        # X, _ = self._memory_full_transform(self, X, None, with_final=False)
-
-        Xt = X
-
-        if not _routing_enabled():
-            for _, name, transform in self._iter(with_final=False):
-                Xt = transform.transform(Xt)
-            return self.steps[-1][1].predict_proba(Xt, **params)
-
-        # metadata routing enabled
-        routed_params = process_routing(self, "predict_proba", **params)
-
-        # return self.steps[-1][-1].predict_proba(X, **params)
-        return self.steps[-1][1].predict_proba(
-            Xt, **routed_params[self.steps[-1][0]].predict_proba
-        )
+        Xt, _ = self._memory_full_transform(self, X, None, with_final=False)
+        return self.steps[-1][1].predict_proba(Xt, **params)
 
     @available_if(_final_estimator_has("predict_log_proba"))
     def predict_log_proba(self, X, **params):

diff --git a/tests/test_classification.py b/tests/test_classification.py
@@ -34,6 +34,18 @@ def test_classification(juice_dataframe, return_train_score):
     # compare models
     top3 = pycaret.classification.compare_models(errors="raise", n_select=100)[:3]
     assert isinstance(top3, list)
+    metrics = pycaret.classification.pull()
+    # no metric should be 0
+    assert (
+        (
+            metrics.loc[[i for i in metrics.index if i not in ("dummy")]][
+                [c for c in metrics.columns if c not in ("Model", "TT (Sec)")]
+            ]
+            != 0
+        )
+        .all()
+        .all()
+    )
 
     # tune model
     tuned_top3 = [

diff --git a/tests/test_regression.py b/tests/test_regression.py
@@ -39,6 +39,18 @@ def test_regression(boston_dataframe, return_train_score):
         errors="raise",
     )[:3]
     assert isinstance(top3, list)
+    metrics = pycaret.regression.pull()
+    # no metric should be 0
+    assert (
+        (
+            metrics.loc[[i for i in metrics.index if i not in ("dummy")]][
+                [c for c in metrics.columns if c not in ("Model", "TT (Sec)")]
+            ]
+            != 0
+        )
+        .all()
+        .all()
+    )
 
     # tune model
     tuned_top3 = [