From acb810647233e40839203ac553429e8663169702 Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Tue, 8 Jan 2019 00:32:34 +0200 Subject: [PATCH] MNT Use list and dict comprehension (#12668) --- .../plot_out_of_core_classification.py | 5 ++-- examples/neural_networks/plot_mlp_alpha.py | 4 +--- sklearn/cluster/_feature_agglomeration.py | 5 ++-- sklearn/datasets/samples_generator.py | 8 +++---- sklearn/decomposition/tests/test_pca.py | 5 +--- sklearn/ensemble/forest.py | 14 ++++------- sklearn/gaussian_process/kernels.py | 24 ++++++++----------- sklearn/linear_model/coordinate_descent.py | 13 ++++------ sklearn/model_selection/_validation.py | 6 ++--- sklearn/model_selection/tests/test_split.py | 14 +++-------- sklearn/multioutput.py | 6 ++--- sklearn/preprocessing/data.py | 8 ++----- sklearn/semi_supervised/label_propagation.py | 8 +++---- sklearn/tree/tests/test_reingold_tilford.py | 6 ++--- sklearn/utils/estimator_checks.py | 7 +++--- 15 files changed, 48 insertions(+), 85 deletions(-) diff --git a/examples/applications/plot_out_of_core_classification.py b/examples/applications/plot_out_of_core_classification.py index 52495e2d0a423..8dafc4fae120f 100644 --- a/examples/applications/plot_out_of_core_classification.py +++ b/examples/applications/plot_out_of_core_classification.py @@ -359,9 +359,8 @@ def plot_accuracy(x, y, x_legend): # Plot fitting times plt.figure() fig = plt.gcf() -cls_runtime = [] -for cls_name, stats in sorted(cls_stats.items()): - cls_runtime.append(stats['total_fit_time']) +cls_runtime = [stats['total_fit_time'] + for cls_name, stats in sorted(cls_stats.items())] cls_runtime.append(total_vect_time) cls_names.append('Vectorization') diff --git a/examples/neural_networks/plot_mlp_alpha.py b/examples/neural_networks/plot_mlp_alpha.py index 0f8adcf31e0fe..7077f9b2bba74 100644 --- a/examples/neural_networks/plot_mlp_alpha.py +++ b/examples/neural_networks/plot_mlp_alpha.py @@ -32,9 +32,7 @@ h = .02 # step size in the mesh alphas = np.logspace(-5, 3, 5) -names = [] -for i in alphas: - names.append('alpha ' + str(i)) +names = ['alpha ' + str(i) for i in alphas] classifiers = [] for i in alphas: diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py index b2b28497aedfa..28be7e80174b5 100644 --- a/sklearn/cluster/_feature_agglomeration.py +++ b/sklearn/cluster/_feature_agglomeration.py @@ -52,9 +52,8 @@ def transform(self, X): nX = np.array([np.bincount(self.labels_, X[i, :]) / size for i in range(n_samples)]) else: - nX = [] - for l in np.unique(self.labels_): - nX.append(pooling_func(X[:, self.labels_ == l], axis=1)) + nX = [pooling_func(X[:, self.labels_ == l], axis=1) + for l in np.unique(self.labels_)] nX = np.array(nX).T return nX diff --git a/sklearn/datasets/samples_generator.py b/sklearn/datasets/samples_generator.py index 35d8ea6a05589..ea139fc0b8874 100644 --- a/sklearn/datasets/samples_generator.py +++ b/sklearn/datasets/samples_generator.py @@ -176,10 +176,10 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, weights[-1] = 1.0 - sum(weights[:-1]) # Distribute samples among clusters by weight - n_samples_per_cluster = [] - for k in range(n_clusters): - n_samples_per_cluster.append(int(n_samples * weights[k % n_classes] - / n_clusters_per_class)) + n_samples_per_cluster = [ + int(n_samples * weights[k % n_classes] / n_clusters_per_class) + for k in range(n_clusters)] + for i in range(n_samples - sum(n_samples_per_cluster)): n_samples_per_cluster[i % n_clusters] += 1 diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py index ff7f6b29d67f8..db562836cbab0 100644 --- a/sklearn/decomposition/tests/test_pca.py +++ b/sklearn/decomposition/tests/test_pca.py @@ -502,10 +502,7 @@ def test_infer_dim_1(): pca = PCA(n_components=p, svd_solver='full') pca.fit(X) spect = pca.explained_variance_ - ll = [] - for k in range(p): - ll.append(_assess_dimension_(spect, k, n, p)) - ll = np.array(ll) + ll = np.array([_assess_dimension_(spect, k, n, p) for k in range(p)]) assert_greater(ll[1], ll.max() - .01 * n) diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py index c3bd7964b3b8a..6f6a9fa8e0cd8 100644 --- a/sklearn/ensemble/forest.py +++ b/sklearn/ensemble/forest.py @@ -312,11 +312,9 @@ def fit(self, X, y, sample_weight=None): # would have got if we hadn't used a warm_start. random_state.randint(MAX_INT, size=len(self.estimators_)) - trees = [] - for i in range(n_more_estimators): - tree = self._make_estimator(append=False, - random_state=random_state) - trees.append(tree) + trees = [self._make_estimator(append=False, + random_state=random_state) + for i in range(n_more_estimators)] # Parallel loop: we prefer the threading backend as the Cython code # for fitting the trees is internally releasing the Python GIL @@ -434,10 +432,8 @@ def _set_oob_score(self, X, y): oob_decision_function = [] oob_score = 0.0 - predictions = [] - - for k in range(self.n_outputs_): - predictions.append(np.zeros((n_samples, n_classes_[k]))) + predictions = [np.zeros((n_samples, n_classes_[k])) + for k in range(self.n_outputs_)] for estimator in self.estimators_: unsampled_indices = _generate_unsampled_indices( diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py index 77811ef1a9bfd..b929b2f2f1b2e 100644 --- a/sklearn/gaussian_process/kernels.py +++ b/sklearn/gaussian_process/kernels.py @@ -217,10 +217,8 @@ def n_dims(self): @property def hyperparameters(self): """Returns a list of all hyperparameter specifications.""" - r = [] - for attr in dir(self): - if attr.startswith("hyperparameter_"): - r.append(getattr(self, attr)) + r = [getattr(self, attr) for attr in dir(self) + if attr.startswith("hyperparameter_")] return r @property @@ -285,10 +283,9 @@ def bounds(self): bounds : array, shape (n_dims, 2) The log-transformed bounds on the kernel's hyperparameters theta """ - bounds = [] - for hyperparameter in self.hyperparameters: - if not hyperparameter.fixed: - bounds.append(hyperparameter.bounds) + bounds = [hyperparameter.bounds + for hyperparameter in self.hyperparameters + if not hyperparameter.fixed] if len(bounds) > 0: return np.log(np.vstack(bounds)) else: @@ -573,12 +570,11 @@ def get_params(self, deep=True): @property def hyperparameters(self): """Returns a list of all hyperparameter.""" - r = [] - for hyperparameter in self.k1.hyperparameters: - r.append(Hyperparameter("k1__" + hyperparameter.name, - hyperparameter.value_type, - hyperparameter.bounds, - hyperparameter.n_elements)) + r = [Hyperparameter("k1__" + hyperparameter.name, + hyperparameter.value_type, + hyperparameter.bounds, hyperparameter.n_elements) + for hyperparameter in self.k1.hyperparameters] + for hyperparameter in self.k2.hyperparameters: r.append(Hyperparameter("k2__" + hyperparameter.name, hyperparameter.value_type, diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py index c3685cb814b3d..3aac3b480169f 100644 --- a/sklearn/linear_model/coordinate_descent.py +++ b/sklearn/linear_model/coordinate_descent.py @@ -1166,14 +1166,11 @@ def fit(self, X, y): alphas = self.alphas n_l1_ratio = len(l1_ratios) if alphas is None: - alphas = [] - for l1_ratio in l1_ratios: - alphas.append(_alpha_grid( - X, y, l1_ratio=l1_ratio, - fit_intercept=self.fit_intercept, - eps=self.eps, n_alphas=self.n_alphas, - normalize=self.normalize, - copy_X=self.copy_X)) + alphas = [_alpha_grid(X, y, l1_ratio=l1_ratio, + fit_intercept=self.fit_intercept, + eps=self.eps, n_alphas=self.n_alphas, + normalize=self.normalize, copy_X=self.copy_X) + for l1_ratio in l1_ratios] else: # Making sure alphas is properly ordered. alphas = np.tile(np.sort(alphas)[::-1], (n_l1_ratio, 1)) diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index 78cf42854bd3c..e5c1f539914f6 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -1465,7 +1465,5 @@ def _aggregate_score_dicts(scores): {'a': array([1, 2, 3, 10]), 'b': array([10, 2, 3, 10])} """ - out = {} - for key in scores[0]: - out[key] = np.asarray([score[key] for score in scores]) - return out + return {key: np.asarray([score[key] for score in scores]) + for key in scores[0]} diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py index 8cd71e34a4b0e..96a8341115134 100644 --- a/sklearn/model_selection/tests/test_split.py +++ b/sklearn/model_selection/tests/test_split.py @@ -402,9 +402,7 @@ def test_kfold_balance(): # Check that KFold returns folds with balanced sizes for i in range(11, 17): kf = KFold(5).split(X=np.ones(i)) - sizes = [] - for _, test in kf: - sizes.append(len(test)) + sizes = [len(test) for _, test in kf] assert (np.max(sizes) - np.min(sizes)) <= 1 assert_equal(np.sum(sizes), i) @@ -421,9 +419,7 @@ def test_stratifiedkfold_balance(): cv = StratifiedKFold(3, shuffle=shuffle) for i in range(11, 17): skf = cv.split(X[:i], y[:i]) - sizes = [] - for _, test in skf: - sizes.append(len(test)) + sizes = [len(test) for _, test in skf] assert (np.max(sizes) - np.min(sizes)) <= 1 assert_equal(np.sum(sizes), i) @@ -757,14 +753,10 @@ def test_predefinedsplit_with_kfold_split(): kf_train.append(train_ind) kf_test.append(test_ind) folds[test_ind] = i - ps_train = [] - ps_test = [] ps = PredefinedSplit(folds) # n_splits is simply the no of unique folds assert_equal(len(np.unique(folds)), ps.get_n_splits()) - for train_ind, test_ind in ps.split(): - ps_train.append(train_ind) - ps_test.append(test_ind) + ps_train, ps_test = zip(*ps.split()) assert_array_equal(ps_train, kf_train) assert_array_equal(ps_test, kf_test) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index a3ec122140d68..f70b1ff805ba6 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -562,9 +562,9 @@ def fit(self, X, Y): self : object """ super(ClassifierChain, self).fit(X, Y) - self.classes_ = [] - for chain_idx, estimator in enumerate(self.estimators_): - self.classes_.append(estimator.classes_) + self.classes_ = [estimator.classes_ + for chain_idx, estimator + in enumerate(self.estimators_)] return self @if_delegate_has_method('base_estimator') diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py index b78f1c11fab96..6abac04f5dd18 100644 --- a/sklearn/preprocessing/data.py +++ b/sklearn/preprocessing/data.py @@ -2594,12 +2594,8 @@ def _fit(self, X, y=None, force_transform=False): optim_function = {'box-cox': self._box_cox_optimize, 'yeo-johnson': self._yeo_johnson_optimize }[self.method] - self.lambdas_ = [] - for col in X.T: - with np.errstate(invalid='ignore'): # hide NaN warnings - lmbda = optim_function(col) - self.lambdas_.append(lmbda) - self.lambdas_ = np.array(self.lambdas_) + with np.errstate(invalid='ignore'): # hide NaN warnings + self.lambdas_ = np.array([optim_function(col) for col in X.T]) if self.standardize or force_transform: transform_function = {'box-cox': boxcox, diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py index 6b04eb8256daa..cacf51af242a4 100644 --- a/sklearn/semi_supervised/label_propagation.py +++ b/sklearn/semi_supervised/label_propagation.py @@ -191,11 +191,9 @@ class labels 'bsr', 'lil', 'dia']) weight_matrices = self._get_kernel(self.X_, X_2d) if self.kernel == 'knn': - probabilities = [] - for weight_matrix in weight_matrices: - ine = np.sum(self.label_distributions_[weight_matrix], axis=0) - probabilities.append(ine) - probabilities = np.array(probabilities) + probabilities = np.array([ + np.sum(self.label_distributions_[weight_matrix], axis=0) + for weight_matrix in weight_matrices]) else: weight_matrices = weight_matrices.T probabilities = np.dot(weight_matrices, self.label_distributions_) diff --git a/sklearn/tree/tests/test_reingold_tilford.py b/sklearn/tree/tests/test_reingold_tilford.py index 4cb27ce6effb9..dfab29d0705c0 100644 --- a/sklearn/tree/tests/test_reingold_tilford.py +++ b/sklearn/tree/tests/test_reingold_tilford.py @@ -43,10 +43,8 @@ def walk_tree(draw_tree): # we could also do it quicker using defaultdicts.. depth = 0 while True: - x_at_this_depth = [] - for node in coordinates: - if coordinates[1] == depth: - x_at_this_depth.append(coordinates[0]) + x_at_this_depth = [coordinates[0] for node in coordinates + if coordinates[1] == depth] if not x_at_this_depth: # reached all leafs break diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 88ee8f84027bc..02050071a0d0b 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -2367,10 +2367,9 @@ def check_fit_idempotent(name, estimator_orig): # Fit for the first time estimator.fit(X_train, y_train) - result = {} - for method in check_methods: - if hasattr(estimator, method): - result[method] = getattr(estimator, method)(X_test) + result = {method: getattr(estimator, method)(X_test) + for method in check_methods + if hasattr(estimator, method)} # Fit again estimator.fit(X_train, y_train)