From 40f354e8aa4ae58a25f82d57125b208b9d1d35bf Mon Sep 17 00:00:00 2001 From: ivanmkc Date: Thu, 29 Apr 2021 17:59:44 -0400 Subject: [PATCH] Added test for AutoMLTabularTrainingJob for no transformations --- google/cloud/aiplatform/training_jobs.py | 4 +- .../test_automl_tabular_training_jobs.py | 101 +++++++++++++++++- tests/unit/aiplatform/test_datasets.py | 1 + 3 files changed, 102 insertions(+), 4 deletions(-) diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py index a7488f10e9..6488a8a6f6 100644 --- a/google/cloud/aiplatform/training_jobs.py +++ b/google/cloud/aiplatform/training_jobs.py @@ -2919,8 +2919,8 @@ def _run( if self._column_transformations is None: column_transformations = [ - {"AUTO": {"column_name": column_name}} - for column_name in dataset.column_names + {"auto": {"column_name": column_name}} + for column_name in dataset.column_names() ] else: column_transformations = self._column_transformations diff --git a/tests/unit/aiplatform/test_automl_tabular_training_jobs.py b/tests/unit/aiplatform/test_automl_tabular_training_jobs.py index 62cab4b3c3..a2e21da8ea 100644 --- a/tests/unit/aiplatform/test_automl_tabular_training_jobs.py +++ b/tests/unit/aiplatform/test_automl_tabular_training_jobs.py @@ -34,10 +34,16 @@ _TEST_DATASET_DISPLAY_NAME = "test-dataset-display-name" _TEST_DATASET_NAME = "test-dataset-name" _TEST_DISPLAY_NAME = "test-display-name" -_TEST_TRAINING_CONTAINER_IMAGE = "gcr.io/test-training/container:image" _TEST_METADATA_SCHEMA_URI_TABULAR = schema.dataset.metadata.tabular _TEST_METADATA_SCHEMA_URI_NONTABULAR = schema.dataset.metadata.image +_TEST_TRAINING_COLUMN_NAMES = [ + "sepal_width", + "sepal_length", + "petal_length", + "petal_width", +] + _TEST_TRAINING_COLUMN_TRANSFORMATIONS = [ {"auto": {"column_name": "sepal_width"}}, {"auto": {"column_name": "sepal_length"}}, @@ -169,7 +175,23 @@ def mock_dataset_tabular(): name=_TEST_DATASET_NAME, metadata={}, ) - return ds + + # with mock.patch.object( + # ds, "column_names", new_callable=mock.PropertyMock + # ) as mock_foo: + # mock_foo.return_value = _TEST_TRAINING_COLUMN_NAMES + # return mock_foo + + yield ds + + +@pytest.fixture +def mock_dataset_tabular_column_names(mock_dataset_tabular): + with mock.patch.object( + mock_dataset_tabular, "column_names", new_callable=mock.PropertyMock + ) as mock_dataset_tabular_column_names: + mock_dataset_tabular_column_names.return_value = _TEST_TRAINING_COLUMN_NAMES + yield mock_dataset_tabular_column_names @pytest.fixture @@ -347,6 +369,81 @@ def test_run_call_pipeline_if_no_model_display_name( training_pipeline=true_training_pipeline, ) + @pytest.mark.parametrize("sync", [True, False]) + def test_run_call_pipeline_service_create_if_no_column_transformations( + self, + mock_pipeline_service_create, + mock_pipeline_service_get, + mock_dataset_tabular, + mock_dataset_tabular_column_names, + mock_model_service_get, + sync, + ): + aiplatform.init( + project=_TEST_PROJECT, + staging_bucket=_TEST_BUCKET_NAME, + encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, + ) + + job = training_jobs.AutoMLTabularTrainingJob( + display_name=_TEST_DISPLAY_NAME, + optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, + optimization_prediction_type=_TEST_TRAINING_OPTIMIZATION_PREDICTION_TYPE, + column_transformations=None, + optimization_objective_recall_value=None, + optimization_objective_precision_value=None, + ) + + model_from_job = job.run( + dataset=mock_dataset_tabular, + target_column=_TEST_TRAINING_TARGET_COLUMN, + model_display_name=_TEST_MODEL_DISPLAY_NAME, + training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, + validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, + test_fraction_split=_TEST_TEST_FRACTION_SPLIT, + predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, + weight_column=_TEST_TRAINING_WEIGHT_COLUMN, + budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, + disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, + sync=sync, + ) + + if not sync: + model_from_job.wait() + + true_fraction_split = gca_training_pipeline.FractionSplit( + training_fraction=_TEST_TRAINING_FRACTION_SPLIT, + validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, + test_fraction=_TEST_TEST_FRACTION_SPLIT, + ) + + true_managed_model = gca_model.Model( + display_name=_TEST_MODEL_DISPLAY_NAME, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + true_input_data_config = gca_training_pipeline.InputDataConfig( + fraction_split=true_fraction_split, + predefined_split=gca_training_pipeline.PredefinedSplit( + key=_TEST_PREDEFINED_SPLIT_COLUMN_NAME + ), + dataset_id=mock_dataset_tabular.name, + ) + + true_training_pipeline = gca_training_pipeline.TrainingPipeline( + display_name=_TEST_DISPLAY_NAME, + training_task_definition=schema.training_job.definition.automl_tabular, + training_task_inputs=_TEST_TRAINING_TASK_INPUTS, + model_to_upload=true_managed_model, + input_data_config=true_input_data_config, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + mock_pipeline_service_create.assert_called_once_with( + parent=initializer.global_config.common_location_path(), + training_pipeline=true_training_pipeline, + ) + @pytest.mark.usefixtures( "mock_pipeline_service_create", "mock_pipeline_service_get", diff --git a/tests/unit/aiplatform/test_datasets.py b/tests/unit/aiplatform/test_datasets.py index db4e19d073..de7d0093ab 100644 --- a/tests/unit/aiplatform/test_datasets.py +++ b/tests/unit/aiplatform/test_datasets.py @@ -1100,6 +1100,7 @@ def test_init_dataset_video(self, get_dataset_video_mock): datasets.VideoDataset(dataset_name=_TEST_NAME) get_dataset_video_mock.assert_called_once_with(name=_TEST_NAME) + @pytest.mark.usefixtures("get_dataset_tabular_mock") def test_init_dataset_non_video(self): aiplatform.init(project=_TEST_PROJECT) with pytest.raises(ValueError):