Fix

Basic unit test coverage Test updates Got splitter/sampler tests passing Add automl test Changelog Py3.7 mock handling Revert changes to balanced classification data splitter
alteryx · Apr 30, 2021 · 5c2f2f4 · 5c2f2f4
1 parent 1347bef
commit 5c2f2f4
Show file tree

Hide file tree

Showing 4 changed files with 66 additions and 3 deletions.
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -80,6 +80,7 @@ Release Notes
         * Fixed bug in where Time Series Classification pipelines were not encoding targets in ``predict`` and ``predict_proba`` :pr:`2040`
         * Fixed data splitting errors if target is float for classification problems :pr:`2050`
         * Pinned ``docutils`` to <0.17 to fix ReadtheDocs warning issues :pr:`2088`
+        * Ensure pipelines receive an identical set of CV or TV splits :pr:`2034`
     * Changes
         * Removed lists as acceptable hyperparameter ranges in ``AutoMLSearch`` :pr:`2028`
         * Renamed "details" to "metadata" for data check actions :pr:`2008`

diff --git a/evalml/preprocessing/data_splitters/balanced_classification_sampler.py b/evalml/preprocessing/data_splitters/balanced_classification_sampler.py
@@ -36,7 +36,6 @@ def __init__(self, sampling_ratio=0.25, min_samples=100, min_percentage=0.1, ran
         self.sampling_ratio = sampling_ratio
         self.min_samples = min_samples
         self.min_percentage = min_percentage
-        self.random_state = np.random.RandomState(self.random_seed)
 
     def _find_ideal_samples(self, y):
         """Returns dictionary of examples to drop for each class if we need to resample.
@@ -78,6 +77,7 @@ def fit_resample(self, X, y):
         Returns:
             list: Indices to keep for training data
         """
+        random_state = np.random.RandomState(self.random_seed)
         y_ww = infer_feature_types(y)
         y = _convert_woodwork_types_wrapper(y_ww.to_series())
         result = self._find_ideal_samples(y)
@@ -86,7 +86,7 @@ def fit_resample(self, X, y):
             # iterate through the classes we need to undersample and remove the number of samples we need to remove
             for key, value in result.items():
                 indices = y.index[y == key].values
-                indices_to_remove = self.random_state.choice(indices, value, replace=False)
+                indices_to_remove = random_state.choice(indices, value, replace=False)
                 indices_to_drop.extend(indices_to_remove)
         # indices of the y datacolumn
         original_indices = list(set(y.index.values).difference(set(indices_to_drop)))

diff --git a/evalml/tests/automl_tests/test_automl.py b/evalml/tests/automl_tests/test_automl.py
@@ -9,7 +9,8 @@
 import pandas as pd
 import pytest
 import woodwork as ww
-from sklearn.model_selection import KFold, StratifiedKFold
+from joblib import hash as joblib_hash
+from sklearn.model_selection import KFold
 from skopt.space import Categorical, Integer, Real
 
 from evalml import AutoMLSearch
@@ -631,6 +632,52 @@ def test_data_splitter_shuffle():
     np.testing.assert_almost_equal(automl.results['pipeline_results'][0]['validation_score'], 0.0, decimal=4)
 
 
+@pytest.mark.parametrize("automl_type", [ProblemTypes.BINARY, ProblemTypes.MULTICLASS, ProblemTypes.REGRESSION])
+@patch('evalml.pipelines.RegressionPipeline.score')
+@patch('evalml.pipelines.RegressionPipeline.fit')
+@patch('evalml.pipelines.MulticlassClassificationPipeline.score')
+@patch('evalml.pipelines.MulticlassClassificationPipeline.fit')
+@patch('evalml.pipelines.BinaryClassificationPipeline.score')
+@patch('evalml.pipelines.BinaryClassificationPipeline.fit')
+def test_data_splitter_gives_pipelines_same_data(mock_fit_binary, mock_score_binary,
+                                                 mock_fit_multi, mock_score_multi,
+                                                 mock_fit_regression, mock_score_regression,
+                                                 automl_type, caplog,
+                                                 X_y_binary, X_y_multi, X_y_regression):
+    if automl_type == ProblemTypes.BINARY:
+        X, y = X_y_binary
+        mock_score_binary.return_value = {'Log Loss Binary': 1.0}
+        mock_fit = mock_fit_binary
+        mock_score = mock_score_binary
+    elif automl_type == ProblemTypes.MULTICLASS:
+        X, y = X_y_multi
+        mock_score_multi.return_value = {'Log Loss Multiclass': 1.0}
+        mock_fit = mock_fit_multi
+        mock_score = mock_score_multi
+    elif automl_type == ProblemTypes.REGRESSION:
+        X, y = X_y_regression
+        mock_score_regression.return_value = {'R2': 1.0}
+        mock_fit = mock_fit_regression
+        mock_score = mock_score_regression
+
+    automl = AutoMLSearch(X_train=X, y_train=y, problem_type=automl_type, max_batches=1, n_jobs=1)
+    automl.search()
+    n_pipelines_evaluated = len(automl.results['pipeline_results'])
+    assert n_pipelines_evaluated > 1
+    # current automl algo trains each pipeline using 3-fold CV for "small" datasets (i.e. test data above)
+    # therefore, each pipeline should recieve an identical set of three training-validation splits
+    # we'll check the first couple to validate that the data splitter is being used correctly
+    for fold_num in range(3):
+        pipeline0_training_X, pipeline0_training_y = mock_fit.call_args_list[fold_num][0]
+        pipeline1_training_X, pipeline1_training_y = mock_fit.call_args_list[3 + fold_num][0]
+        pipeline0_validation_X, pipeline0_validation_y = mock_score.call_args_list[fold_num][0]
+        pipeline1_validation_X, pipeline1_validation_y = mock_score.call_args_list[3 + fold_num][0]
+        assert joblib_hash(pipeline0_training_X.to_dataframe()) == joblib_hash(pipeline1_training_X.to_dataframe())
+        assert joblib_hash(pipeline0_training_y.to_series()) == joblib_hash(pipeline1_training_y.to_series())
+        assert joblib_hash(pipeline0_validation_X.to_dataframe()) == joblib_hash(pipeline1_validation_X.to_dataframe())
+        assert joblib_hash(pipeline0_validation_y.to_series()) == joblib_hash(pipeline1_validation_y.to_series())
+
+
 def test_allowed_pipelines_with_incorrect_problem_type(dummy_binary_pipeline_class, X_y_binary):
     X, y = X_y_binary
     # checks that not setting allowed_pipelines does not error out

diff --git a/evalml/tests/preprocessing_tests/test_balanced_classification_sampler.py b/evalml/tests/preprocessing_tests/test_balanced_classification_sampler.py
@@ -385,3 +385,18 @@ def test_balance_ratio_value():
     indices = bcs.fit_resample(X, y)
     # make sure there was no resampling done
     assert len(indices) == 1000
+
+
+def test_classification_balanced_multirun():
+    X = pd.DataFrame({"a": [i for i in range(1000)]})
+    y = pd.Series([i % 3 for i in range(1000)])
+    bcs = BalancedClassificationSampler(random_seed=13117)
+    indices1 = bcs.fit_resample(X, y)
+    indices2 = bcs.fit_resample(X, y)
+    pd.testing.assert_series_equal(pd.Series(indices1), pd.Series(indices2))
+
+    bcs = BalancedClassificationSampler(random_seed=13117)
+    indices3 = bcs.fit_resample(X, y)
+    indices4 = bcs.fit_resample(X, y)
+    pd.testing.assert_series_equal(pd.Series(indices1), pd.Series(indices3))
+    pd.testing.assert_series_equal(pd.Series(indices1), pd.Series(indices4))