scikit-learn · rth · Sep 6, 2019 · Apr 12, 2020 · Apr 12, 2020 · Apr 13, 2020
diff --git a/doc/developers/develop.rst b/doc/developers/develop.rst
@@ -5,9 +5,9 @@ Developing scikit-learn estimators
 ==================================
 
 Whether you are proposing an estimator for inclusion in scikit-learn,
-developing a separate package compatible with scikit-learn, or 
-implementing custom components for your own projects, this chapter 
-details how to develop objects that safely interact with scikit-learn 
+developing a separate package compatible with scikit-learn, or
+implementing custom components for your own projects, this chapter
+details how to develop objects that safely interact with scikit-learn
 Pipelines and model selection tools.
 
 .. currentmodule:: sklearn
@@ -576,10 +576,10 @@ closed-form solutions.
 Coding guidelines
 =================
 
-The following are some guidelines on how new code should be written for 
-inclusion in scikit-learn, and which may be appropriate to adopt in external 
-projects. Of course, there are special cases and there will be exceptions to 
-these rules. However, following these rules when submitting new code makes 
+The following are some guidelines on how new code should be written for
+inclusion in scikit-learn, and which may be appropriate to adopt in external
+projects. Of course, there are special cases and there will be exceptions to
+these rules. However, following these rules when submitting new code makes
 the review easier so new code can be integrated in less time.
 
 Uniformly formatted code makes it easier to share code ownership. The
@@ -709,3 +709,64 @@ The reason for this setup is reproducibility:
 when an estimator is ``fit`` twice to the same data,
 it should produce an identical model both times,
 hence the validation in ``fit``, not ``__init__``.
+
+Estimator callbacks
+===================
+
+To add (optional) support of callbacks, for instance to support progress
+bars or monitoring convergence, the estimator must implement the following
+points:
+
+- At the beginning of ``fit`` either explicitly call ``self._fit_callbacks(X,
+  y)`` or use ``self._validate_data(X, y)`` which
+  makes a ``self._fit_callbacks`` call internally.
+- For iterative solvers call ``self._eval_callbacks(n_iter=.., **kwargs)`` at
+  each iteration, where ``kwargs`` keys must be part of supported callback
+  arguments (cf. list below).
+
+User defined callbacks must extend the ``sklearn._callbacks.BaseCallback``
+absract base class. For instance some callbacks are implemented in the
+`sklearn-callbacks <https://github.com/rth/sklearn-callbacks>`_ package
+and can be used as follows,
+
+.. code::
+
+    from sklearn.linear_model import LogisticRegression
+    from sklearn_callbacks import ProgressBar
+
+    est = LogisticRegression()
+    pbar = ProgressBar()
+    est._set_callbacks(pbar)
+
+    est.fit(X, y)   # will display a progress bar
+
+
+**Callback arguments**
+
+Following input parameters are supported:
+
+n_iter, int
+  current iteration number for iterative solvers.
+
+max_iter, int
+  maximum number of iterations for iterative solvers. If the estimator
+  has a ``max_iter`` init parameter, this will be inferred.
+
+loss, float or ordered dict
+  cost function value or error at a given iteration. When ordered dict,
+  multiple loss functions can given, with the default loss being the first
+  element.  Lower is better.
+
+score, float or ordered dict
+  same as ``loss`` parameter, but for evaluation metrics. Higher is better.
+
+validation_loss, float or ordered dict
+  cost function value or error at a given iteration, evaluated on the
+  validation set.
+
+validation_score, float or ordered dict
+  same as ``validation_loss`` parameter, but for evaluation metrics. Higher is
+  better.
+
+coef: ndarray
+  coefficients of linear models.
diff --git a/pyproject.toml b/pyproject.toml
@@ -13,3 +13,6 @@ requires = [
     "numpy==1.17.3; python_version>='3.8' and platform_system=='AIX'",
     "scipy>=0.19.1",
 ]
+
+[tool.black]
+line-length = 79
diff --git a/sklearn/_callbacks.py b/sklearn/_callbacks.py
@@ -0,0 +1,58 @@
+# License: BSD 3 clause
+from typing import List, Callable
+from abc import ABC, abstractmethod
+
+import numpy as np
+
+CALLBACK_PARAM_TYPES = {
+    "n_iter": int,
+    "max_iter": int,
+    "loss": (float, dict),
+    "score": (float, dict),
+    "validation_loss": (float, dict),
+    "validation_score": (float, dict),
+    "coef": np.ndarray,
+    "intercept": (np.ndarray, float)
+}
+
+
+def _check_callback_params(**kwargs):
+    invalid_params = []
+    invalid_types = []
+    for key, val in kwargs.items():
+        if key not in CALLBACK_PARAM_TYPES:
+            invalid_params.append(key)
+        else:
+            val_types = CALLBACK_PARAM_TYPES[key]
+            if not isinstance(val, val_types):
+                invalid_types.append(
+                    f"{key}={val} is not of type {val_types}"
+                )
+    msg = ""
+    if invalid_params:
+        msg += ("Invalid callback parameters: {}, must be one of {}. ").format(
+                ", ".join(invalid_params),
+                ", ".join(CALLBACK_PARAM_TYPES.keys())
+        )
+    if invalid_types:
+        msg += "Invalid callback parameters: " + ", ".join(invalid_types)
+    if msg:
+        raise ValueError(msg)
+
+
+def _eval_callbacks(callbacks: List[Callable], **kwargs) -> None:
+    if callbacks is None:
+        return
+
+    for callback in callbacks:
+        callback(**kwargs)
+
+
+class BaseCallback(ABC):
+    @abstractmethod
+    def fit(self, estimator, X, y) -> None:
+        pass
+
+    @abstractmethod
+    def __call__(self, **kwargs) -> None:
+        pass
diff --git a/sklearn/base.py b/sklearn/base.py
@@ -84,6 +84,11 @@ def clone(estimator, *, safe=True):
     new_object = klass(**new_object_params)
     params_set = new_object.get_params(deep=False)
 
+    # copy callbacks
+    if hasattr(estimator, "_callbacks"):
+        # TODO: do we need to use the recusive setter here?
+        new_object._callbacks = estimator._callbacks
+
     # quick sanity check of the parameters of the clone
     for name in new_object_params:
         param1 = new_object_params[name]
@@ -406,6 +411,7 @@ def _validate_data(self, X, y=None, reset=True,
         out : {ndarray, sparse matrix} or tuple of these
             The validated input. A tuple is returned if `y` is not None.
         """
+        self._fit_callbacks(X, y)
 
         if y is None:
             if self._get_tags()['requires_y']:
@@ -433,6 +439,44 @@ def _validate_data(self, X, y=None, reset=True,
 
         return out
 
+    def _set_callbacks(self, callbacks):
+        """Set callbacks for the estimator.
+
+        In the case of meta-estmators, callbacks are also set recursively
+        for all child estimators.
+        """
+        from sklearn._callbacks import BaseCallback
+        if isinstance(callbacks, BaseCallback):
+            self._callbacks = [callbacks]
+        else:
+            self._callbacks = callbacks
+
+        for attr_name in getattr(self, "_required_parameters", []):
+            # likely a meta-estimator
+            if attr_name in ['steps', 'transformers']:
+                for attr in getattr(self, attr_name):
+                    if isinstance(attr, BaseEstimator):
+                        attr._set_callbacks(callbacks)
+                    elif (hasattr(attr, '__len__')
+                          and len(attr) >= 2
+                          and isinstance(attr[1], BaseEstimator)):
+                        attr[1]._set_callbacks(callbacks)
+
+    def _fit_callbacks(self, X, y):
+        """Send the signal to callbacks that the estimator is being fitted"""
+        callbacks = getattr(self, '_callbacks', [])
+
+        for callback in callbacks:
+            callback.fit(self, X, y)
+
+    def _eval_callbacks(self, **kwargs):
+        """Call callbacks, e.g. in each iteration of an iterative solver"""
+        from ._callbacks import _eval_callbacks
+
+        callbacks = getattr(self, '_callbacks', [])
+
+        _eval_callbacks(callbacks)
+
     @property
     def _repr_html_(self):
         """HTML representation of estimator.

diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
@@ -516,6 +516,7 @@ def fit_transform(self, X, y=None):
             sparse matrices.
 
         """
+        self._fit_callbacks(X, y)
         # TODO: this should be `feature_names_in_` when we start having it
         if hasattr(X, "columns"):
             self._feature_names_in = np.asarray(X.columns)

diff --git a/sklearn/decomposition/_factor_analysis.py b/sklearn/decomposition/_factor_analysis.py
@@ -236,6 +236,7 @@ def my_svd(X):
             old_ll = ll
 
             psi = np.maximum(var - np.sum(W ** 2, axis=0), SMALL)
+            self._eval_callbacks(n_iter=i)
         else:
             warnings.warn('FactorAnalysis did not converge.' +
                           ' You might want' +

diff --git a/sklearn/decomposition/_incremental_pca.py b/sklearn/decomposition/_incremental_pca.py
@@ -206,12 +206,14 @@ def fit(self, X, y=None):
         else:
             self.batch_size_ = self.batch_size
 
-        for batch in gen_batches(n_samples, self.batch_size_,
-                                 min_batch_size=self.n_components or 0):
+        for n_batch, batch in enumerate(
+                gen_batches(n_samples, self.batch_size_,
+                            min_batch_size=self.n_components or 0)):
             X_batch = X[batch]
             if sparse.issparse(X_batch):
                 X_batch = X_batch.toarray()
             self.partial_fit(X_batch, check_input=False)
+            self._eval_callbacks(n_iter=n_batch)
 
         return self
 

diff --git a/sklearn/decomposition/_lda.py b/sklearn/decomposition/_lda.py
@@ -464,6 +464,7 @@ def _em_step(self, X, total_samples, batch_update, parallel=None):
         self.exp_dirichlet_component_ = np.exp(
             _dirichlet_expectation_2d(self.components_))
         self.n_batch_iter_ += 1
+        self._eval_callbacks()
         return
 
     def _more_tags(self):

diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py
@@ -19,6 +19,7 @@
 from ..utils import check_random_state, check_array
 from ..utils.extmath import randomized_svd, safe_sparse_dot, squared_norm
 from ..utils.validation import check_is_fitted, check_non_negative
+from .._callbacks import _eval_callbacks
 from ..utils.validation import _deprecate_positional_args
 
 EPSILON = np.finfo(np.float32).eps
@@ -426,7 +427,8 @@ def _update_coordinate_descent(X, W, Ht, l1_reg, l2_reg, shuffle,
 
 def _fit_coordinate_descent(X, W, H, tol=1e-4, max_iter=200, l1_reg_W=0,
                             l1_reg_H=0, l2_reg_W=0, l2_reg_H=0, update_H=True,
-                            verbose=0, shuffle=False, random_state=None):
+                            verbose=0, shuffle=False, random_state=None,
+                            callbacks=None):
     """Compute Non-negative Matrix Factorization (NMF) with Coordinate Descent
 
     The objective function is minimized with an alternating minimization of W
@@ -522,6 +524,10 @@ def _fit_coordinate_descent(X, W, H, tol=1e-4, max_iter=200, l1_reg_W=0,
         if verbose:
             print("violation:", violation / violation_init)
 
+        _eval_callbacks(callbacks, n_iter=n_iter,
+                        tol=violation/violation_init,
+                        error=violation)
+
         if violation / violation_init <= tol:
             if verbose:
                 print("Converged at iteration", n_iter + 1)
@@ -710,7 +716,7 @@ def _multiplicative_update_h(X, W, H, beta_loss, l1_reg_H, l2_reg_H, gamma):
 def _fit_multiplicative_update(X, W, H, beta_loss='frobenius',
                                max_iter=200, tol=1e-4,
                                l1_reg_W=0, l1_reg_H=0, l2_reg_W=0, l2_reg_H=0,
-                               update_H=True, verbose=0):
+                               update_H=True, verbose=0, callbacks=None):
     """Compute Non-negative Matrix Factorization with Multiplicative Update
 
     The objective function is _beta_divergence(X, WH) and is minimized with an
@@ -828,6 +834,9 @@ def _fit_multiplicative_update(X, W, H, beta_loss='frobenius',
                 print("Epoch %02d reached after %.3f seconds, error: %f" %
                       (n_iter, iter_time - start_time, error))
 
+            _eval_callbacks(callbacks, n_iter=n_iter, error=error,
+                            tol=(previous_error - error) / error_at_init)
+
             if (previous_error - error) / error_at_init < tol:
                 break
             previous_error = error
@@ -847,7 +856,7 @@ def non_negative_factorization(X, W=None, H=None, n_components=None, *,
                                beta_loss='frobenius', tol=1e-4,
                                max_iter=200, alpha=0., l1_ratio=0.,
                                regularization=None, random_state=None,
-                               verbose=0, shuffle=False):
+                               verbose=0, shuffle=False, callbacks=None):
     r"""Compute Non-negative Matrix Factorization (NMF)
 
     Find two non-negative matrices (W, H) whose product approximates the non-
@@ -1062,12 +1071,13 @@ def non_negative_factorization(X, W=None, H=None, n_components=None, *,
                                                update_H=update_H,
                                                verbose=verbose,
                                                shuffle=shuffle,
-                                               random_state=random_state)
+                                               random_state=random_state,
+                                               callbacks=callbacks)
     elif solver == 'mu':
         W, H, n_iter = _fit_multiplicative_update(X, W, H, beta_loss, max_iter,
                                                   tol, l1_reg_W, l1_reg_H,
                                                   l2_reg_W, l2_reg_H, update_H,
-                                                  verbose)
+                                                  verbose, callbacks=callbacks)
 
     else:
         raise ValueError("Invalid solver parameter '%s'." % solver)
@@ -1286,7 +1296,7 @@ def fit_transform(self, X, y=None, W=None, H=None):
             tol=self.tol, max_iter=self.max_iter, alpha=self.alpha,
             l1_ratio=self.l1_ratio, regularization='both',
             random_state=self.random_state, verbose=self.verbose,
-            shuffle=self.shuffle)
+            shuffle=self.shuffle, callbacks=getattr(self, "_callbacks", []))
 
         self.reconstruction_err_ = _beta_divergence(X, W, H, self.beta_loss,
                                                     square_root=True)
@@ -1335,7 +1345,7 @@ def transform(self, X):
             beta_loss=self.beta_loss, tol=self.tol, max_iter=self.max_iter,
             alpha=self.alpha, l1_ratio=self.l1_ratio, regularization='both',
             random_state=self.random_state, verbose=self.verbose,
-            shuffle=self.shuffle)
+            shuffle=self.shuffle, callbacks=getattr(self, '_callbacks', []))
 
         return W
 

diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
@@ -491,6 +491,8 @@ def _fit_full(self, X, n_components):
             explained_variance_ratio_[:n_components]
         self.singular_values_ = singular_values_[:n_components]
 
+        self._eval_callbacks()
+
         return U, S, Vt
 
     def _fit_truncated(self, X, n_components, svd_solver):
@@ -537,12 +539,17 @@ def _fit_truncated(self, X, n_components, svd_solver):
             # flip eigenvectors' sign to enforce deterministic output
             U, Vt = svd_flip(U[:, ::-1], Vt[::-1])
 
+            self._eval_callbacks()
+
         elif svd_solver == 'randomized':
             # sign flipping is done inside
-            U, S, Vt = randomized_svd(X, n_components=n_components,
-                                      n_iter=self.iterated_power,
-                                      flip_sign=True,
-                                      random_state=random_state)
+            U, S, Vt = randomized_svd(
+                    X, n_components=n_components,
+                    n_iter=self.iterated_power,
+                    flip_sign=True,
+                    random_state=random_state,
+                    callbacks=getattr(self, '_callbacks', [])
+            )
 
         self.n_samples_, self.n_features_ = n_samples, n_features
         self.components_ = Vt