From 40f354e8aa4ae58a25f82d57125b208b9d1d35bf Mon Sep 17 00:00:00 2001
From: ivanmkc <ivans.mailbox@gmail.com>
Date: Thu, 29 Apr 2021 17:59:44 -0400
Subject: [PATCH] Added test for AutoMLTabularTrainingJob for no
 transformations

---
 google/cloud/aiplatform/training_jobs.py      |   4 +-
 .../test_automl_tabular_training_jobs.py      | 101 +++++++++++++++++-
 tests/unit/aiplatform/test_datasets.py        |   1 +
 3 files changed, 102 insertions(+), 4 deletions(-)

diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py
index a7488f10e9..6488a8a6f6 100644
--- a/google/cloud/aiplatform/training_jobs.py
+++ b/google/cloud/aiplatform/training_jobs.py
@@ -2919,8 +2919,8 @@ def _run(
 
         if self._column_transformations is None:
             column_transformations = [
-                {"AUTO": {"column_name": column_name}}
-                for column_name in dataset.column_names
+                {"auto": {"column_name": column_name}}
+                for column_name in dataset.column_names()
             ]
         else:
             column_transformations = self._column_transformations
diff --git a/tests/unit/aiplatform/test_automl_tabular_training_jobs.py b/tests/unit/aiplatform/test_automl_tabular_training_jobs.py
index 62cab4b3c3..a2e21da8ea 100644
--- a/tests/unit/aiplatform/test_automl_tabular_training_jobs.py
+++ b/tests/unit/aiplatform/test_automl_tabular_training_jobs.py
@@ -34,10 +34,16 @@
 _TEST_DATASET_DISPLAY_NAME = "test-dataset-display-name"
 _TEST_DATASET_NAME = "test-dataset-name"
 _TEST_DISPLAY_NAME = "test-display-name"
-_TEST_TRAINING_CONTAINER_IMAGE = "gcr.io/test-training/container:image"
 _TEST_METADATA_SCHEMA_URI_TABULAR = schema.dataset.metadata.tabular
 _TEST_METADATA_SCHEMA_URI_NONTABULAR = schema.dataset.metadata.image
 
+_TEST_TRAINING_COLUMN_NAMES = [
+    "sepal_width",
+    "sepal_length",
+    "petal_length",
+    "petal_width",
+]
+
 _TEST_TRAINING_COLUMN_TRANSFORMATIONS = [
     {"auto": {"column_name": "sepal_width"}},
     {"auto": {"column_name": "sepal_length"}},
@@ -169,7 +175,23 @@ def mock_dataset_tabular():
         name=_TEST_DATASET_NAME,
         metadata={},
     )
-    return ds
+
+    # with mock.patch.object(
+    #     ds, "column_names", new_callable=mock.PropertyMock
+    # ) as mock_foo:
+    #     mock_foo.return_value = _TEST_TRAINING_COLUMN_NAMES
+    #     return mock_foo
+
+    yield ds
+
+
+@pytest.fixture
+def mock_dataset_tabular_column_names(mock_dataset_tabular):
+    with mock.patch.object(
+        mock_dataset_tabular, "column_names", new_callable=mock.PropertyMock
+    ) as mock_dataset_tabular_column_names:
+        mock_dataset_tabular_column_names.return_value = _TEST_TRAINING_COLUMN_NAMES
+        yield mock_dataset_tabular_column_names
 
 
 @pytest.fixture
@@ -347,6 +369,81 @@ def test_run_call_pipeline_if_no_model_display_name(
             training_pipeline=true_training_pipeline,
         )
 
+    @pytest.mark.parametrize("sync", [True, False])
+    def test_run_call_pipeline_service_create_if_no_column_transformations(
+        self,
+        mock_pipeline_service_create,
+        mock_pipeline_service_get,
+        mock_dataset_tabular,
+        mock_dataset_tabular_column_names,
+        mock_model_service_get,
+        sync,
+    ):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            staging_bucket=_TEST_BUCKET_NAME,
+            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
+        )
+
+        job = training_jobs.AutoMLTabularTrainingJob(
+            display_name=_TEST_DISPLAY_NAME,
+            optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME,
+            optimization_prediction_type=_TEST_TRAINING_OPTIMIZATION_PREDICTION_TYPE,
+            column_transformations=None,
+            optimization_objective_recall_value=None,
+            optimization_objective_precision_value=None,
+        )
+
+        model_from_job = job.run(
+            dataset=mock_dataset_tabular,
+            target_column=_TEST_TRAINING_TARGET_COLUMN,
+            model_display_name=_TEST_MODEL_DISPLAY_NAME,
+            training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT,
+            validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT,
+            test_fraction_split=_TEST_TEST_FRACTION_SPLIT,
+            predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME,
+            weight_column=_TEST_TRAINING_WEIGHT_COLUMN,
+            budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS,
+            disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING,
+            sync=sync,
+        )
+
+        if not sync:
+            model_from_job.wait()
+
+        true_fraction_split = gca_training_pipeline.FractionSplit(
+            training_fraction=_TEST_TRAINING_FRACTION_SPLIT,
+            validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT,
+            test_fraction=_TEST_TEST_FRACTION_SPLIT,
+        )
+
+        true_managed_model = gca_model.Model(
+            display_name=_TEST_MODEL_DISPLAY_NAME,
+            encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC,
+        )
+
+        true_input_data_config = gca_training_pipeline.InputDataConfig(
+            fraction_split=true_fraction_split,
+            predefined_split=gca_training_pipeline.PredefinedSplit(
+                key=_TEST_PREDEFINED_SPLIT_COLUMN_NAME
+            ),
+            dataset_id=mock_dataset_tabular.name,
+        )
+
+        true_training_pipeline = gca_training_pipeline.TrainingPipeline(
+            display_name=_TEST_DISPLAY_NAME,
+            training_task_definition=schema.training_job.definition.automl_tabular,
+            training_task_inputs=_TEST_TRAINING_TASK_INPUTS,
+            model_to_upload=true_managed_model,
+            input_data_config=true_input_data_config,
+            encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC,
+        )
+
+        mock_pipeline_service_create.assert_called_once_with(
+            parent=initializer.global_config.common_location_path(),
+            training_pipeline=true_training_pipeline,
+        )
+
     @pytest.mark.usefixtures(
         "mock_pipeline_service_create",
         "mock_pipeline_service_get",
diff --git a/tests/unit/aiplatform/test_datasets.py b/tests/unit/aiplatform/test_datasets.py
index db4e19d073..de7d0093ab 100644
--- a/tests/unit/aiplatform/test_datasets.py
+++ b/tests/unit/aiplatform/test_datasets.py
@@ -1100,6 +1100,7 @@ def test_init_dataset_video(self, get_dataset_video_mock):
         datasets.VideoDataset(dataset_name=_TEST_NAME)
         get_dataset_video_mock.assert_called_once_with(name=_TEST_NAME)
 
+    @pytest.mark.usefixtures("get_dataset_tabular_mock")
     def test_init_dataset_non_video(self):
         aiplatform.init(project=_TEST_PROJECT)
         with pytest.raises(ValueError):