scikit-learn · jeremiedbb · Dec 14, 2021 · Dec 17, 2021 · Dec 20, 2021 · Dec 31, 2021
diff --git a/sklearn/__init__.py b/sklearn/__init__.py
@@ -89,6 +89,7 @@
 
     __all__ = [
         "calibration",
+        "callback",
         "cluster",
         "covariance",
         "cross_decomposition",

diff --git a/sklearn/base.py b/sklearn/base.py
@@ -15,6 +15,7 @@
 
 from . import __version__
 from ._config import config_context, get_config
+from .callback import BaseCallback, build_computation_tree
 from .exceptions import InconsistentVersionWarning
 from .utils import _IS_32BIT
 from .utils._estimator_html_repr import _HTMLDocumentationLinkMixin, estimator_html_repr
@@ -115,6 +116,10 @@
 
     params_set = new_object.get_params(deep=False)
 
+    # copy callbacks
+    if hasattr(estimator, "_callbacks"):
+        new_object._callbacks = estimator._callbacks
+
     # quick sanity check of the parameters of the clone
     for name in new_object_params:
         param1 = new_object_params[name]
@@ -641,6 +646,127 @@
             caller_name=self.__class__.__name__,
         )
 
+    def _set_callbacks(self, callbacks):
+        """Set callbacks for the estimator.
+
+        Parameters
+        ----------
+        callbacks : callback or list of callbacks
+            the callbacks to set.
+
+        Returns
+        -------
+        self : estimator instance
+            The estimator instance itself.
+        """
+        if not isinstance(callbacks, list):
+            callbacks = [callbacks]
+
+        if not all(isinstance(callback, BaseCallback) for callback in callbacks):
+            raise TypeError("callbacks must be subclasses of BaseCallback.")
+
+        self._callbacks = callbacks
+
+        return self
+
+    # XXX should be a method of MetaEstimatorMixin but this mixin can't handle all
+    # meta-estimators.
+    def _propagate_callbacks(self, sub_estimator, *, parent_node):
+        """Propagate the auto-propagated callbacks to a sub-estimator
+
+        Parameters
+        ----------
+        sub_estimator : estimator instance
+            The sub-estimator to propagate the callbacks to.
+
+        parent_node : ComputationNode instance
+            The computation node in this estimator to set as parent_node to the
+            computation tree of the sub-estimator. It must be the node where the fit
+            method of the sub-estimator is called.
+        """
+        if hasattr(sub_estimator, "_callbacks") and any(
+            callback.auto_propagate for callback in sub_estimator._callbacks
+        ):
+            bad_callbacks = [
+                callback.__class__.__name__
+                for callback in sub_estimator._callbacks
+                if callback.auto_propagate
+            ]
+            raise TypeError(
+                f"The sub-estimators ({sub_estimator.__class__.__name__}) of a"
+                f" meta-estimator ({self.__class__.__name__}) can't have"
+                f" auto-propagated callbacks ({bad_callbacks})."
+                " Set them directly on the meta-estimator."
+            )
+
+        if not hasattr(self, "_callbacks"):
+            return
+
+        propagated_callbacks = [
+            callback for callback in self._callbacks if callback.auto_propagate
+        ]
+
+        if not propagated_callbacks:
+            return
+
+        sub_estimator._parent_node = parent_node
+
+        sub_estimator._set_callbacks(
+            getattr(sub_estimator, "_callbacks", []) + propagated_callbacks
+        )
+
+    def _eval_callbacks_on_fit_begin(self, *, levels, X=None, y=None):
+        """Evaluate the on_fit_begin method of the callbacks
+
+        The computation tree is also built at this point.
+
+        This method should be called after all data and parameters validation.
+
+        Parameters
+        ----------
+        X : ndarray or sparse matrix, default=None
+            The training data.
+
+        y : ndarray, default=None
+            The target.
+
+        levels : list of dict
+            A description of the nested levels of computation of the estimator to build
+            the computation tree. It's a list of dict with "descr" and "max_iter" keys.
+
+        Returns
+        -------
+        root : ComputationNode instance
+            The root of the computation tree.
+        """
+        self._computation_tree = build_computation_tree(
+            estimator_name=self.__class__.__name__,
+            levels=levels,
+            parent=getattr(self, "_parent_node", None),
+        )
+
+        if not hasattr(self, "_callbacks"):
+            return self._computation_tree
+
+        # Only call the on_fit_begin method of callbacks that are not
+        # propagated from a meta-estimator.
+        for callback in self._callbacks:
+            if not callback._is_propagated(estimator=self):
+                callback.on_fit_begin(estimator=self, X=X, y=y)
+
+        return self._computation_tree
+
+    def _eval_callbacks_on_fit_end(self):
+        """Evaluate the on_fit_end method of the callbacks"""
+        if not hasattr(self, "_callbacks") or not hasattr(self, "_computation_tree"):
+            return
+
+        # Only call the on_fit_end method of callbacks that are not
+        # propagated from a meta-estimator.
+        for callback in self._callbacks:
+            if not callback._is_propagated(estimator=self):
+                callback.on_fit_end()
+
     @property
     def _repr_html_(self):
         """HTML representation of estimator.
@@ -1212,7 +1338,10 @@
                     prefer_skip_nested_validation or global_skip_validation
                 )
             ):
-                return fit_method(estimator, *args, **kwargs)
+                try:
+                    return fit_method(estimator, *args, **kwargs)
+                finally:
+                    estimator._eval_callbacks_on_fit_end()
 
         return wrapper
 

diff --git a/sklearn/callback/__init__.py b/sklearn/callback/__init__.py
@@ -0,0 +1,13 @@
+# License: BSD 3 clause
+# Authors: the scikit-learn developers
+
+from ._base import BaseCallback
+from ._computation_tree import ComputationNode, build_computation_tree
+from ._progressbar import ProgressBar
+
+__all__ = [
+    "BaseCallback",
+    "build_computation_tree",
+    "ComputationNode",
+    "ProgressBar",
+]
diff --git a/sklearn/callback/_base.py b/sklearn/callback/_base.py
@@ -0,0 +1,138 @@
+# License: BSD 3 clause
+# Authors: the scikit-learn developers
+
+from abc import ABC, abstractmethod
+
+
+# Not a method of BaseEstimator because it might not be directly called from fit but
+# by a non-method function called by fit
+def _eval_callbacks_on_fit_iter_end(**kwargs):
+    """Evaluate the on_fit_iter_end method of the callbacks
+
+    This function must be called at the end of each computation node.
+
+    Parameters
+    ----------
+    kwargs : dict
+        arguments passed to the callback.
+
+    Returns
+    -------
+    stop : bool
+        Whether or not to stop the fit at this node.
+    """
+    estimator = kwargs.get("estimator")
+    node = kwargs.get("node")
+
+    if not hasattr(estimator, "_callbacks") or node is None:
+        return False
+
+    # stopping_criterion and reconstruction_attributes can be costly to compute.
+    # They are passed as lambdas for lazy evaluation. We only actually
+    # compute them if a callback requests it.
+    # TODO: This is not used yet but will be necessary for next callbacks
+    #       Uncomment when needed
+    # if any(cb.request_stopping_criterion for cb in estimator._callbacks):
+    #     kwarg = kwargs.pop("stopping_criterion", lambda: None)()
+    #     kwargs["stopping_criterion"] = kwarg
+
+    # if any(cb.request_from_reconstruction_attributes for cb in estimator._callbacks):
+    #     kwarg = kwargs.pop("from_reconstruction_attributes", lambda: None)()
+    #     kwargs["from_reconstruction_attributes"] = kwarg
+
+    return any(callback.on_fit_iter_end(**kwargs) for callback in estimator._callbacks)
+
+
+class BaseCallback(ABC):
+    """Abstract class for the callbacks"""
+
+    @abstractmethod
+    def on_fit_begin(self, estimator, *, X=None, y=None):
+        """Method called at the beginning of the fit method of the estimator
+
+        Only called
+
+        Parameters
+        ----------
+        estimator : estimator instance
+            The estimator the callback is set on.
+
+        X : ndarray or sparse matrix, default=None
+            The training data.
+
+        y : ndarray or sparse matrix, default=None
+            The target.
+        """
+
+    @abstractmethod
+    def on_fit_end(self):
+        """Method called at the end of the fit method of the estimator"""
+
+    @abstractmethod
+    def on_fit_iter_end(self, estimator, node, **kwargs):
+        """Method called at the end of each computation node of the estimator
+
+        Parameters
+        ----------
+        estimator : estimator instance
+            The caller estimator. It might differ from the estimator passed to the
+            `on_fit_begin` method for auto-propagated callbacks.
+
+        node : ComputationNode instance
+            The caller computation node.
+
+        **kwargs : dict
+            arguments passed to the callback. Possible keys are
+
+            - stopping_criterion: float
+                Usually iterations stop when `stopping_criterion <= tol`.
+                This is only provided at the innermost level of iterations.
+
+            - tol: float
+                Tolerance for the stopping criterion.
+                This is only provided at the innermost level of iterations.
+
+            - from_reconstruction_attributes: estimator instance
+                A ready to predict, transform, etc ... estimator as if the fit stopped
+                at this node. Usually it's a copy of the caller estimator with the
+                necessary attributes set but it can sometimes be an instance of another
+                class (e.g. LogisticRegressionCV -> LogisticRegression)
+
+            - fit_state: dict
+                Model specific quantities updated during fit. This is not meant to be
+                used by generic callbacks but by a callback designed for a specific
+                estimator instead.
+
+        Returns
+        -------
+        stop : bool or None
+            Whether or not to stop the current level of iterations at this node.
+        """
+
+    @property
+    def auto_propagate(self):
+        """Whether or not this callback should be propagated to sub-estimators.
+
+        An auto-propagated callback (from a meta-estimator to its sub-estimators) must
+        be set on the meta-estimator. Its `on_fit_begin` and `on_fit_end` methods will
+        only be called at the beginning and end of the fit method of the meta-estimator,
+        while its `on_fit_iter_end` method will be called at each computation node of
+        the meta-estimator and its sub-estimators.
+        """
+        return False
+
+    def _is_propagated(self, estimator):
+        """Check if this callback attached to estimator has been propagated from a
+        meta-estimator.
+        """
+        return self.auto_propagate and hasattr(estimator, "_parent_node")
+
+    # TODO: This is not used yet but will be necessary for next callbacks
+    #       Uncomment when needed
+    # @property
+    # def request_stopping_criterion(self):
+    #     return False
+
+    # @property
+    # def request_from_reconstruction_attributes(self):
+    #     return False