Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Change print statements to logging #17808

Closed
wants to merge 2 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
28 changes: 15 additions & 13 deletions sklearn/cluster/_kmeans.py
Expand Up @@ -14,6 +14,7 @@
import warnings

import numpy as np
import logging
import scipy.sparse as sp
from threadpoolctl import threadpool_limits

Expand All @@ -39,6 +40,7 @@
from ._k_means_elkan import elkan_iter_chunked_dense
from ._k_means_elkan import elkan_iter_chunked_sparse

logger = logging.getLogger(__name__)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think they want a global logger, like

logging.getLogger('sklearn')


###############################################################################
# Initialization heuristic
Expand Down Expand Up @@ -406,7 +408,7 @@ def _kmeans_single_elkan(X, sample_weight, n_clusters, max_iter=300,
x_squared_norms=x_squared_norms)

if verbose:
print('Initialization complete')
logger.info('Initialization complete')

n_samples = X.shape[0]

Expand Down Expand Up @@ -446,12 +448,12 @@ def _kmeans_single_elkan(X, sample_weight, n_clusters, max_iter=300,

if verbose:
inertia = _inertia(X, sample_weight, centers, labels)
print("Iteration {0}, inertia {1}" .format(i, inertia))
logger.info("Iteration {0}, inertia {1}" .format(i, inertia))

center_shift_tot = (center_shift**2).sum()
if center_shift_tot <= tol:
if verbose:
print("Converged at iteration {0}: "
logger.info("Converged at iteration {0}: "
"center shift {1} within tolerance {2}"
.format(i, center_shift_tot, tol))
break
Expand Down Expand Up @@ -553,7 +555,7 @@ def _kmeans_single_lloyd(X, sample_weight, n_clusters, max_iter=300,
x_squared_norms=x_squared_norms)

if verbose:
print("Initialization complete")
logger.info("Initialization complete")

centers_new = np.zeros_like(centers)
labels = np.full(X.shape[0], -1, dtype=np.int32)
Expand All @@ -576,12 +578,12 @@ def _kmeans_single_lloyd(X, sample_weight, n_clusters, max_iter=300,

if verbose:
inertia = _inertia(X, sample_weight, centers, labels)
print("Iteration {0}, inertia {1}" .format(i, inertia))
logger.info("Iteration {0}, inertia {1}" .format(i, inertia))

center_shift_tot = (center_shift**2).sum()
if center_shift_tot <= tol:
if verbose:
print("Converged at iteration {0}: "
logger.info("Converged at iteration {0}: "
"center shift {1} within tolerance {2}"
.format(i, center_shift_tot, tol))
break
Expand Down Expand Up @@ -1309,7 +1311,7 @@ def _mini_batch_step(X, sample_weight, x_squared_norms, centers, weight_sums,
new_centers = random_state.choice(X.shape[0], replace=False,
size=n_reassigns)
if verbose:
print("[MiniBatchKMeans] Reassigning %i cluster centers."
logger.info("[MiniBatchKMeans] Reassigning %i cluster centers."
% n_reassigns)

if sp.issparse(X) and not sp.issparse(centers):
Expand Down Expand Up @@ -1398,13 +1400,13 @@ def _mini_batch_convergence(model, iteration_idx, n_iter, tol,
' mean batch inertia: %f, ewa inertia: %f ' % (
iteration_idx + 1, n_iter, batch_inertia,
ewa_inertia))
print(progress_msg)
logger.info(progress_msg)

# Early stopping based on absolute tolerance on squared change of
# centers position (using EWA smoothing)
if tol > 0.0 and ewa_diff <= tol:
if verbose:
print('Converged (small centers change) at iteration %d/%d'
logger.info('Converged (small centers change) at iteration %d/%d'
% (iteration_idx + 1, n_iter))
return True

Expand All @@ -1420,7 +1422,7 @@ def _mini_batch_convergence(model, iteration_idx, n_iter, tol,
if (model.max_no_improvement is not None
and no_improvement >= model.max_no_improvement):
if verbose:
print('Converged (lack of improvement in inertia)'
logger.info('Converged (lack of improvement in inertia)'
' at iteration %d/%d'
% (iteration_idx + 1, n_iter))
return True
Expand Down Expand Up @@ -1668,7 +1670,7 @@ def fit(self, X, y=None, sample_weight=None):
best_inertia = None
for init_idx in range(n_init):
if self.verbose:
print("Init %d/%d with method: %s"
logger.info("Init %d/%d with method: %s"
% (init_idx + 1, n_init, self.init))
weight_sums = np.zeros(self.n_clusters, dtype=sample_weight.dtype)

Expand Down Expand Up @@ -1696,7 +1698,7 @@ def fit(self, X, y=None, sample_weight=None):
x_squared_norms_valid,
cluster_centers)
if self.verbose:
print("Inertia for init %d/%d: %f"
logger.info("Inertia for init %d/%d: %f"
% (init_idx + 1, n_init, inertia))
if best_inertia is None or inertia < best_inertia:
self.cluster_centers_ = cluster_centers
Expand Down Expand Up @@ -1768,7 +1770,7 @@ def _labels_inertia_minibatch(self, X, sample_weight):
Sum of squared distances of points to nearest cluster.
"""
if self.verbose:
print('Computing label assignment and total inertia')
logger.info('Computing label assignment and total inertia')
sample_weight = _check_normalize_sample_weight(sample_weight, X)
x_squared_norms = row_norms(X, squared=True)
slices = gen_batches(X.shape[0], self.batch_size)
Expand Down